fix(llm+worker+deploy): 兼容老 prompt 模板 + 消除 startup_run 日志噪音

- enrichment: 新增 _safe_format (基于 _SafeDict),缺失占位符保留原样不抛 KeyError。
  _enrich_format / _enrich_classify / _enrich_image / _enrich_commentary
  全部走 _safe_format,数据库里老 prompt(不支持 {body})不再让整条 article 卡住。
  复现: 388183 classify 一直 KeyError,enrichment_loop 反复重试它,316 篇全卡在 n/a。
- workers.__main__: startup_run 从 IntervalTrigger(minutes=0) 改成 DateTrigger
  (只跑一次),消除 'maximum number of running instances reached' 刷屏 WARNING。
- deploy_pull: 改 _connect 自动识别 RSA / Ed25519 / ECDSA key(原硬编码 Ed25519Key)
This commit is contained in:
Mavis
2026-06-08 21:20:43 +08:00
parent 380e8b124e
commit 8d73f4fb28
3 changed files with 57 additions and 23 deletions

View File

@@ -18,12 +18,13 @@
- 任务间互不影响:每个任务独立 try/except + 写 status - 任务间互不影响:每个任务独立 try/except + 写 status
- 全部任务共走 LlmClient 的全局限速 - 全部任务共走 LlmClient 的全局限速
- 若设置 enabled=False,只跳过(不调 LLM) - 若设置 enabled=False,只跳过(不调 LLM)
- 用户提示词模板可能不包含全部占位符,用 _safe_format 容错
""" """
from __future__ import annotations from __future__ import annotations
import asyncio import asyncio
import logging import logging
from typing import Any from typing import Any, Mapping
from sqlalchemy import select from sqlalchemy import select
@@ -55,6 +56,27 @@ DEFAULT_IMAGE_FIRST_PARA_CHARS = 400 # 提取第一段最多用这么多字
DEFAULT_IMAGE_MAX_TAGS = 5 # 分类标签上限(多标签) DEFAULT_IMAGE_MAX_TAGS = 5 # 分类标签上限(多标签)
class _SafeDict(dict):
"""missing 返回 {key} 本身(占位符原样保留),不抛 KeyError。"""
def __missing__(self, key: str) -> str: # type: ignore[override]
return "{" + key + "}"
def _safe_format(template: str, vars_: Mapping[str, Any]) -> str:
"""用 _SafeDict 跑 str.format,缺失的占位符保留原样而不是 KeyError。
用途:数据库里用户已存的 prompt 模板可能是旧版的(只支持部分占位符),
新代码传了更多变量也不应崩。
"""
try:
return template.format_map(_SafeDict(vars_))
except (KeyError, IndexError) as e:
# 极端情况(比如 {} 这种非法占位符)兜底
logger.warning("_safe_format 解析失败,按原文返回: %s", e)
return template
# === 获取当前设置(行锁 + 缓存刷新)=== # === 获取当前设置(行锁 + 缓存刷新)===
async def get_setting() -> LlmSetting: async def get_setting() -> LlmSetting:
"""读 llm_settings 单行;不存在则用默认值插入。""" """读 llm_settings 单行;不存在则用默认值插入。"""
@@ -77,9 +99,8 @@ async def get_setting() -> LlmSetting:
# === 单任务:format === # === 单任务:format ===
async def _enrich_format(article: Article, setting: LlmSetting, client: LlmClient) -> None: async def _enrich_format(article: Article, setting: LlmSetting, client: LlmClient) -> None:
prompt = (setting.format_prompt or get_default_prompts()["format_prompt"]).format( template = setting.format_prompt or get_default_prompts()["format_prompt"]
body=(article.body_zh_text or "")[:6000] prompt = _safe_format(template, {"body": (article.body_zh_text or "")[:6000]})
)
text = await client.chat( text = await client.chat(
system="你是中文新闻排版助手,只输出排版后的纯文本。", system="你是中文新闻排版助手,只输出排版后的纯文本。",
user=prompt, user=prompt,
@@ -114,11 +135,14 @@ def _wrap_article_body(inner_html: str) -> str:
# === 单任务:classify === # === 单任务:classify ===
async def _enrich_classify(article: Article, setting: LlmSetting, client: LlmClient) -> None: async def _enrich_classify(article: Article, setting: LlmSetting, client: LlmClient) -> None:
prompt = (setting.classify_prompt or get_default_prompts()["classify_prompt"]).format( template = setting.classify_prompt or get_default_prompts()["classify_prompt"]
title=(article.title_zh or article.title)[:200], # 老 prompt 可能只支持 {title}/{summary},不支持 {body} —— _safe_format 兜底
summary=(article.summary_zh or "")[:400], vars_ = {
body=(article.body_zh_text or "")[:1500], "title": (article.title_zh or article.title)[:200],
) "summary": (article.summary_zh or "")[:400],
"body": (article.body_zh_text or "")[:1500],
}
prompt = _safe_format(template, vars_)
result = await client.classify_json( result = await client.classify_json(
system="你是新闻分类助手,只返回 JSON。", system="你是新闻分类助手,只返回 JSON。",
user=prompt, user=prompt,
@@ -133,18 +157,14 @@ async def _enrich_classify(article: Article, setting: LlmSetting, client: LlmCli
# === 单任务:image === # === 单任务:image ===
async def _enrich_image(article: Article, setting: LlmSetting, client: LlmClient) -> None: async def _enrich_image(article: Article, setting: LlmSetting, client: LlmClient) -> None:
template = (setting.image_prompt_template or get_default_prompts()["image_prompt_template"]) template = setting.image_prompt_template or get_default_prompts()["image_prompt_template"]
# 用正文第一段作为 prompt(英文 prompt 走 title 仍可工作,所以 title 也带上作 fallback) # 用正文第一段作为 prompt(英文 prompt 走 title 仍可工作,所以 title 也带上作 fallback)
first_para = _first_paragraph(article.body_zh_text or "", max_chars=DEFAULT_IMAGE_FIRST_PARA_CHARS) first_para = _first_paragraph(article.body_zh_text or "", max_chars=DEFAULT_IMAGE_FIRST_PARA_CHARS)
if not first_para: if not first_para:
first_para = (article.title_zh or article.title or "")[:200] first_para = (article.title_zh or article.title or "")[:200]
title_for_prompt = (article.title_zh or article.title or "")[:200] title_for_prompt = (article.title_zh or article.title or "")[:200]
# template 同时支持 {body} 和 {title} 两种占位符 # template 同时支持 {body} 和 {title} 两种占位符;老的只支持 {title} 也能跑
try: prompt = _safe_format(template, {"body": first_para, "title": title_for_prompt})
prompt = template.format(body=first_para, title=title_for_prompt)
except (KeyError, IndexError):
# 用户改坏了 template,fallback 用 {title} 模式
prompt = template.format(title=title_for_prompt)
url = await client.generate_image(prompt, size=DEFAULT_IMAGE_SIZE) url = await client.generate_image(prompt, size=DEFAULT_IMAGE_SIZE)
article.image_ai_url = url article.image_ai_url = url
article.image_ai_status = "ok" article.image_ai_status = "ok"
@@ -163,9 +183,13 @@ def _first_paragraph(text: str, max_chars: int) -> str:
# === 单任务:commentary === # === 单任务:commentary ===
async def _enrich_commentary(article: Article, setting: LlmSetting, client: LlmClient) -> None: async def _enrich_commentary(article: Article, setting: LlmSetting, client: LlmClient) -> None:
prompt = (setting.commentary_prompt or get_default_prompts()["commentary_prompt"]).format( template = setting.commentary_prompt or get_default_prompts()["commentary_prompt"]
title=(article.title_zh or article.title)[:200], prompt = _safe_format(
body=(article.body_zh_text or "")[:3000], template,
{
"title": (article.title_zh or article.title)[:200],
"body": (article.body_zh_text or "")[:3000],
},
) )
text = await client.chat( text = await client.chat(
system="你是资深新闻评论员。", system="你是资深新闻评论员。",

View File

@@ -11,6 +11,7 @@ from datetime import datetime, timezone
from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger from apscheduler.triggers.cron import CronTrigger
from apscheduler.triggers.date import DateTrigger
from apscheduler.triggers.interval import IntervalTrigger from apscheduler.triggers.interval import IntervalTrigger
from sqlalchemy import select from sqlalchemy import select
@@ -79,12 +80,11 @@ async def main() -> None:
id="rebuild_jobs", id="rebuild_jobs",
replace_existing=True, replace_existing=True,
) )
# 启动时立即跑一次 # 启动时立即跑一次(只一次,用 DateTrigger 避免 IntervalTrigger 被 max_instances 拒绝刷日志)
scheduler.add_job( scheduler.add_job(
run_once, run_once,
trigger=IntervalTrigger(minutes=0), trigger=DateTrigger(run_date=datetime.now(timezone.utc)),
id="startup_run", id="startup_run",
next_run_time=datetime.now(timezone.utc),
) )
scheduler.start() scheduler.start()

View File

@@ -44,7 +44,17 @@ def _run(c: paramiko.SSHClient, cmd: str, timeout: int = 60) -> tuple[int, str,
def _connect(host: str, port: int, user: str, ssh_key: str) -> paramiko.SSHClient: def _connect(host: str, port: int, user: str, ssh_key: str) -> paramiko.SSHClient:
pkey = paramiko.Ed25519Key.from_private_key_file(ssh_key) # 依次尝试 RSA / Ed25519 / ECDSA(paramiko 5 没有统一入口)
pkey: Any = None
last_err: Exception | None = None
for loader in (paramiko.RSAKey, paramiko.Ed25519Key, paramiko.ECDSAKey):
try:
pkey = loader.from_private_key_file(ssh_key)
break
except Exception as e:
last_err = e
if pkey is None:
raise RuntimeError(f"无法解析 SSH key {ssh_key}: {last_err}")
c = paramiko.SSHClient() c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect(host, port=port, username=user, pkey=pkey, timeout=30, c.connect(host, port=port, username=user, pkey=pkey, timeout=30,