diff --git a/backend/app/services/llm/enrichment.py b/backend/app/services/llm/enrichment.py index 25ea80e..54e948d 100644 --- a/backend/app/services/llm/enrichment.py +++ b/backend/app/services/llm/enrichment.py @@ -18,12 +18,13 @@ - 任务间互不影响:每个任务独立 try/except + 写 status - 全部任务共走 LlmClient 的全局限速 - 若设置 enabled=False,只跳过(不调 LLM) +- 用户提示词模板可能不包含全部占位符,用 _safe_format 容错 """ from __future__ import annotations import asyncio import logging -from typing import Any +from typing import Any, Mapping from sqlalchemy import select @@ -55,6 +56,27 @@ DEFAULT_IMAGE_FIRST_PARA_CHARS = 400 # 提取第一段最多用这么多字 DEFAULT_IMAGE_MAX_TAGS = 5 # 分类标签上限(多标签) +class _SafeDict(dict): + """missing 返回 {key} 本身(占位符原样保留),不抛 KeyError。""" + + def __missing__(self, key: str) -> str: # type: ignore[override] + return "{" + key + "}" + + +def _safe_format(template: str, vars_: Mapping[str, Any]) -> str: + """用 _SafeDict 跑 str.format,缺失的占位符保留原样而不是 KeyError。 + + 用途:数据库里用户已存的 prompt 模板可能是旧版的(只支持部分占位符), + 新代码传了更多变量也不应崩。 + """ + try: + return template.format_map(_SafeDict(vars_)) + except (KeyError, IndexError) as e: + # 极端情况(比如 {} 这种非法占位符)兜底 + logger.warning("_safe_format 解析失败,按原文返回: %s", e) + return template + + # === 获取当前设置(行锁 + 缓存刷新)=== async def get_setting() -> LlmSetting: """读 llm_settings 单行;不存在则用默认值插入。""" @@ -77,9 +99,8 @@ async def get_setting() -> LlmSetting: # === 单任务:format === async def _enrich_format(article: Article, setting: LlmSetting, client: LlmClient) -> None: - prompt = (setting.format_prompt or get_default_prompts()["format_prompt"]).format( - body=(article.body_zh_text or "")[:6000] - ) + template = setting.format_prompt or get_default_prompts()["format_prompt"] + prompt = _safe_format(template, {"body": (article.body_zh_text or "")[:6000]}) text = await client.chat( system="你是中文新闻排版助手,只输出排版后的纯文本。", user=prompt, @@ -114,11 +135,14 @@ def _wrap_article_body(inner_html: str) -> str: # === 单任务:classify === async def _enrich_classify(article: Article, setting: LlmSetting, client: LlmClient) -> None: - prompt = (setting.classify_prompt or get_default_prompts()["classify_prompt"]).format( - title=(article.title_zh or article.title)[:200], - summary=(article.summary_zh or "")[:400], - body=(article.body_zh_text or "")[:1500], - ) + template = setting.classify_prompt or get_default_prompts()["classify_prompt"] + # 老 prompt 可能只支持 {title}/{summary},不支持 {body} —— _safe_format 兜底 + vars_ = { + "title": (article.title_zh or article.title)[:200], + "summary": (article.summary_zh or "")[:400], + "body": (article.body_zh_text or "")[:1500], + } + prompt = _safe_format(template, vars_) result = await client.classify_json( system="你是新闻分类助手,只返回 JSON。", user=prompt, @@ -133,18 +157,14 @@ async def _enrich_classify(article: Article, setting: LlmSetting, client: LlmCli # === 单任务:image === async def _enrich_image(article: Article, setting: LlmSetting, client: LlmClient) -> None: - template = (setting.image_prompt_template or get_default_prompts()["image_prompt_template"]) + template = setting.image_prompt_template or get_default_prompts()["image_prompt_template"] # 用正文第一段作为 prompt(英文 prompt 走 title 仍可工作,所以 title 也带上作 fallback) first_para = _first_paragraph(article.body_zh_text or "", max_chars=DEFAULT_IMAGE_FIRST_PARA_CHARS) if not first_para: first_para = (article.title_zh or article.title or "")[:200] title_for_prompt = (article.title_zh or article.title or "")[:200] - # template 同时支持 {body} 和 {title} 两种占位符 - try: - prompt = template.format(body=first_para, title=title_for_prompt) - except (KeyError, IndexError): - # 用户改坏了 template,fallback 用 {title} 模式 - prompt = template.format(title=title_for_prompt) + # template 同时支持 {body} 和 {title} 两种占位符;老的只支持 {title} 也能跑 + prompt = _safe_format(template, {"body": first_para, "title": title_for_prompt}) url = await client.generate_image(prompt, size=DEFAULT_IMAGE_SIZE) article.image_ai_url = url article.image_ai_status = "ok" @@ -163,9 +183,13 @@ def _first_paragraph(text: str, max_chars: int) -> str: # === 单任务:commentary === async def _enrich_commentary(article: Article, setting: LlmSetting, client: LlmClient) -> None: - prompt = (setting.commentary_prompt or get_default_prompts()["commentary_prompt"]).format( - title=(article.title_zh or article.title)[:200], - body=(article.body_zh_text or "")[:3000], + template = setting.commentary_prompt or get_default_prompts()["commentary_prompt"] + prompt = _safe_format( + template, + { + "title": (article.title_zh or article.title)[:200], + "body": (article.body_zh_text or "")[:3000], + }, ) text = await client.chat( system="你是资深新闻评论员。", diff --git a/backend/app/workers/__main__.py b/backend/app/workers/__main__.py index bd6c853..ab5b419 100644 --- a/backend/app/workers/__main__.py +++ b/backend/app/workers/__main__.py @@ -11,6 +11,7 @@ from datetime import datetime, timezone from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.triggers.cron import CronTrigger +from apscheduler.triggers.date import DateTrigger from apscheduler.triggers.interval import IntervalTrigger from sqlalchemy import select @@ -79,12 +80,11 @@ async def main() -> None: id="rebuild_jobs", replace_existing=True, ) - # 启动时立即跑一次 + # 启动时立即跑一次(只一次,用 DateTrigger 避免 IntervalTrigger 被 max_instances 拒绝刷日志) scheduler.add_job( run_once, - trigger=IntervalTrigger(minutes=0), + trigger=DateTrigger(run_date=datetime.now(timezone.utc)), id="startup_run", - next_run_time=datetime.now(timezone.utc), ) scheduler.start() diff --git a/scripts/deploy_pull.py b/scripts/deploy_pull.py index d017e19..5c02a02 100644 --- a/scripts/deploy_pull.py +++ b/scripts/deploy_pull.py @@ -44,7 +44,17 @@ def _run(c: paramiko.SSHClient, cmd: str, timeout: int = 60) -> tuple[int, str, def _connect(host: str, port: int, user: str, ssh_key: str) -> paramiko.SSHClient: - pkey = paramiko.Ed25519Key.from_private_key_file(ssh_key) + # 依次尝试 RSA / Ed25519 / ECDSA(paramiko 5 没有统一入口) + pkey: Any = None + last_err: Exception | None = None + for loader in (paramiko.RSAKey, paramiko.Ed25519Key, paramiko.ECDSAKey): + try: + pkey = loader.from_private_key_file(ssh_key) + break + except Exception as e: + last_err = e + if pkey is None: + raise RuntimeError(f"无法解析 SSH key {ssh_key}: {last_err}") c = paramiko.SSHClient() c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) c.connect(host, port=port, username=user, pkey=pkey, timeout=30,