fix(llm+worker+deploy): 兼容老 prompt 模板 + 消除 startup_run 日志噪音
- enrichment: 新增 _safe_format (基于 _SafeDict),缺失占位符保留原样不抛 KeyError。
_enrich_format / _enrich_classify / _enrich_image / _enrich_commentary
全部走 _safe_format,数据库里老 prompt(不支持 {body})不再让整条 article 卡住。
复现: 388183 classify 一直 KeyError,enrichment_loop 反复重试它,316 篇全卡在 n/a。
- workers.__main__: startup_run 从 IntervalTrigger(minutes=0) 改成 DateTrigger
(只跑一次),消除 'maximum number of running instances reached' 刷屏 WARNING。
- deploy_pull: 改 _connect 自动识别 RSA / Ed25519 / ECDSA key(原硬编码 Ed25519Key)
This commit is contained in:
@@ -18,12 +18,13 @@
|
||||
- 任务间互不影响:每个任务独立 try/except + 写 status
|
||||
- 全部任务共走 LlmClient 的全局限速
|
||||
- 若设置 enabled=False,只跳过(不调 LLM)
|
||||
- 用户提示词模板可能不包含全部占位符,用 _safe_format 容错
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Any
|
||||
from typing import Any, Mapping
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
@@ -55,6 +56,27 @@ DEFAULT_IMAGE_FIRST_PARA_CHARS = 400 # 提取第一段最多用这么多字
|
||||
DEFAULT_IMAGE_MAX_TAGS = 5 # 分类标签上限(多标签)
|
||||
|
||||
|
||||
class _SafeDict(dict):
|
||||
"""missing 返回 {key} 本身(占位符原样保留),不抛 KeyError。"""
|
||||
|
||||
def __missing__(self, key: str) -> str: # type: ignore[override]
|
||||
return "{" + key + "}"
|
||||
|
||||
|
||||
def _safe_format(template: str, vars_: Mapping[str, Any]) -> str:
|
||||
"""用 _SafeDict 跑 str.format,缺失的占位符保留原样而不是 KeyError。
|
||||
|
||||
用途:数据库里用户已存的 prompt 模板可能是旧版的(只支持部分占位符),
|
||||
新代码传了更多变量也不应崩。
|
||||
"""
|
||||
try:
|
||||
return template.format_map(_SafeDict(vars_))
|
||||
except (KeyError, IndexError) as e:
|
||||
# 极端情况(比如 {} 这种非法占位符)兜底
|
||||
logger.warning("_safe_format 解析失败,按原文返回: %s", e)
|
||||
return template
|
||||
|
||||
|
||||
# === 获取当前设置(行锁 + 缓存刷新)===
|
||||
async def get_setting() -> LlmSetting:
|
||||
"""读 llm_settings 单行;不存在则用默认值插入。"""
|
||||
@@ -77,9 +99,8 @@ async def get_setting() -> LlmSetting:
|
||||
|
||||
# === 单任务:format ===
|
||||
async def _enrich_format(article: Article, setting: LlmSetting, client: LlmClient) -> None:
|
||||
prompt = (setting.format_prompt or get_default_prompts()["format_prompt"]).format(
|
||||
body=(article.body_zh_text or "")[:6000]
|
||||
)
|
||||
template = setting.format_prompt or get_default_prompts()["format_prompt"]
|
||||
prompt = _safe_format(template, {"body": (article.body_zh_text or "")[:6000]})
|
||||
text = await client.chat(
|
||||
system="你是中文新闻排版助手,只输出排版后的纯文本。",
|
||||
user=prompt,
|
||||
@@ -114,11 +135,14 @@ def _wrap_article_body(inner_html: str) -> str:
|
||||
|
||||
# === 单任务:classify ===
|
||||
async def _enrich_classify(article: Article, setting: LlmSetting, client: LlmClient) -> None:
|
||||
prompt = (setting.classify_prompt or get_default_prompts()["classify_prompt"]).format(
|
||||
title=(article.title_zh or article.title)[:200],
|
||||
summary=(article.summary_zh or "")[:400],
|
||||
body=(article.body_zh_text or "")[:1500],
|
||||
)
|
||||
template = setting.classify_prompt or get_default_prompts()["classify_prompt"]
|
||||
# 老 prompt 可能只支持 {title}/{summary},不支持 {body} —— _safe_format 兜底
|
||||
vars_ = {
|
||||
"title": (article.title_zh or article.title)[:200],
|
||||
"summary": (article.summary_zh or "")[:400],
|
||||
"body": (article.body_zh_text or "")[:1500],
|
||||
}
|
||||
prompt = _safe_format(template, vars_)
|
||||
result = await client.classify_json(
|
||||
system="你是新闻分类助手,只返回 JSON。",
|
||||
user=prompt,
|
||||
@@ -133,18 +157,14 @@ async def _enrich_classify(article: Article, setting: LlmSetting, client: LlmCli
|
||||
|
||||
# === 单任务:image ===
|
||||
async def _enrich_image(article: Article, setting: LlmSetting, client: LlmClient) -> None:
|
||||
template = (setting.image_prompt_template or get_default_prompts()["image_prompt_template"])
|
||||
template = setting.image_prompt_template or get_default_prompts()["image_prompt_template"]
|
||||
# 用正文第一段作为 prompt(英文 prompt 走 title 仍可工作,所以 title 也带上作 fallback)
|
||||
first_para = _first_paragraph(article.body_zh_text or "", max_chars=DEFAULT_IMAGE_FIRST_PARA_CHARS)
|
||||
if not first_para:
|
||||
first_para = (article.title_zh or article.title or "")[:200]
|
||||
title_for_prompt = (article.title_zh or article.title or "")[:200]
|
||||
# template 同时支持 {body} 和 {title} 两种占位符
|
||||
try:
|
||||
prompt = template.format(body=first_para, title=title_for_prompt)
|
||||
except (KeyError, IndexError):
|
||||
# 用户改坏了 template,fallback 用 {title} 模式
|
||||
prompt = template.format(title=title_for_prompt)
|
||||
# template 同时支持 {body} 和 {title} 两种占位符;老的只支持 {title} 也能跑
|
||||
prompt = _safe_format(template, {"body": first_para, "title": title_for_prompt})
|
||||
url = await client.generate_image(prompt, size=DEFAULT_IMAGE_SIZE)
|
||||
article.image_ai_url = url
|
||||
article.image_ai_status = "ok"
|
||||
@@ -163,9 +183,13 @@ def _first_paragraph(text: str, max_chars: int) -> str:
|
||||
|
||||
# === 单任务:commentary ===
|
||||
async def _enrich_commentary(article: Article, setting: LlmSetting, client: LlmClient) -> None:
|
||||
prompt = (setting.commentary_prompt or get_default_prompts()["commentary_prompt"]).format(
|
||||
title=(article.title_zh or article.title)[:200],
|
||||
body=(article.body_zh_text or "")[:3000],
|
||||
template = setting.commentary_prompt or get_default_prompts()["commentary_prompt"]
|
||||
prompt = _safe_format(
|
||||
template,
|
||||
{
|
||||
"title": (article.title_zh or article.title)[:200],
|
||||
"body": (article.body_zh_text or "")[:3000],
|
||||
},
|
||||
)
|
||||
text = await client.chat(
|
||||
system="你是资深新闻评论员。",
|
||||
|
||||
@@ -11,6 +11,7 @@ from datetime import datetime, timezone
|
||||
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from apscheduler.triggers.date import DateTrigger
|
||||
from apscheduler.triggers.interval import IntervalTrigger
|
||||
from sqlalchemy import select
|
||||
|
||||
@@ -79,12 +80,11 @@ async def main() -> None:
|
||||
id="rebuild_jobs",
|
||||
replace_existing=True,
|
||||
)
|
||||
# 启动时立即跑一次
|
||||
# 启动时立即跑一次(只一次,用 DateTrigger 避免 IntervalTrigger 被 max_instances 拒绝刷日志)
|
||||
scheduler.add_job(
|
||||
run_once,
|
||||
trigger=IntervalTrigger(minutes=0),
|
||||
trigger=DateTrigger(run_date=datetime.now(timezone.utc)),
|
||||
id="startup_run",
|
||||
next_run_time=datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
scheduler.start()
|
||||
|
||||
Reference in New Issue
Block a user