" for p in text.split("\n\n") if p.strip()]
if not parts:
article.body_zh_formatted = None
else:
article.body_zh_formatted = _wrap_article_body("\n".join(parts))
article.format_status = "ok"
def _wrap_article_body(inner_html: str) -> str:
"""把排版好的段落包到带固定 CSS 的
里。
CSS 同时内联到 style 属性(分享/导出样式不丢)+ class 名(前端全局类可覆盖)。
"""
inline_style = (
f"font-family:{ARTICLE_BODY_FONT_FAMILY};"
f"font-size:{ARTICLE_BODY_FONT_SIZE};"
f"line-height:{ARTICLE_BODY_LINE_HEIGHT};"
f"color:{ARTICLE_BODY_COLOR};"
)
# 段落样式也内联,保证 v-html 渲染时一定生效
p_style = f"margin:0 0 {ARTICLE_BODY_P_MARGIN_BOTTOM} 0;"
inner_with_p_style = inner_html.replace("
", f'
')
return f'
{inner_with_p_style}
'
# === 单任务:classify ===
async def _enrich_classify(article: Article, setting: LlmSetting, client: LlmClient) -> None:
prompt = (setting.classify_prompt or get_default_prompts()["classify_prompt"]).format(
title=(article.title_zh or article.title)[:200],
summary=(article.summary_zh or "")[:400],
body=(article.body_zh_text or "")[:1500],
)
result = await client.classify_json(
system="你是新闻分类助手,只返回 JSON。",
user=prompt,
)
cats = result.get("categories") or result.get("tags") or []
if isinstance(cats, list) and cats:
# 多标签(2-5 个),逗号分隔存到 category 字段(已有索引)
joined = ",".join(str(c).strip() for c in cats[:DEFAULT_IMAGE_MAX_TAGS] if str(c).strip())
article.category = joined[:64] or None
article.classify_status = "ok"
# === 单任务:image ===
async def _enrich_image(article: Article, setting: LlmSetting, client: LlmClient) -> None:
template = (setting.image_prompt_template or get_default_prompts()["image_prompt_template"])
# 用正文第一段作为 prompt(英文 prompt 走 title 仍可工作,所以 title 也带上作 fallback)
first_para = _first_paragraph(article.body_zh_text or "", max_chars=DEFAULT_IMAGE_FIRST_PARA_CHARS)
if not first_para:
first_para = (article.title_zh or article.title or "")[:200]
title_for_prompt = (article.title_zh or article.title or "")[:200]
# template 同时支持 {body} 和 {title} 两种占位符
try:
prompt = template.format(body=first_para, title=title_for_prompt)
except (KeyError, IndexError):
# 用户改坏了 template,fallback 用 {title} 模式
prompt = template.format(title=title_for_prompt)
url = await client.generate_image(prompt, size=DEFAULT_IMAGE_SIZE)
article.image_ai_url = url
article.image_ai_status = "ok"
def _first_paragraph(text: str, max_chars: int) -> str:
"""取正文第一段(按 \\n\\n 切)。如果首段超长就截断。"""
if not text:
return ""
for p in text.split("\n\n"):
p = p.strip()
if p:
return p[:max_chars]
return ""
# === 单任务:commentary ===
async def _enrich_commentary(article: Article, setting: LlmSetting, client: LlmClient) -> None:
prompt = (setting.commentary_prompt or get_default_prompts()["commentary_prompt"]).format(
title=(article.title_zh or article.title)[:200],
body=(article.body_zh_text or "")[:3000],
)
text = await client.chat(
system="你是资深新闻评论员。",
user=prompt,
temperature=0.6,
max_tokens=600,
)
article.commentary = text or None
article.commentary_status = "ok"
# === 总编排:enrich_article ===
async def enrich_article(article_id: int) -> dict[str, str]:
"""对单篇文章做 4 项 LLM 增强。
返回 {task: status} 字典(用于日志)。
"""
async with AsyncSessionLocal() as session:
art = (
await session.execute(select(Article).where(Article.id == article_id))
).scalar_one_or_none()
if not art:
logger.warning("enrich_article: id=%s not found", article_id)
return {}
if not (art.title_zh or art.body_zh_text):
logger.info("enrich_article: id=%s no translation yet, skip", article_id)
return {}
# 拉取设置
setting = await get_setting()
if not setting.enabled:
logger.info("enrich_article: llm disabled, skip id=%s", article_id)
return {"format": "skipped", "classify": "skipped", "image": "skipped", "commentary": "skipped"}
# 用配置生成 client(允许热改设置)
client = LlmClient(
chat_model=setting.chat_model,
image_model=setting.image_model,
interval_sec=setting.interval_sec,
)
results: dict[str, str] = {}
async with AsyncSessionLocal() as session:
art = (
await session.execute(select(Article).where(Article.id == article_id))
).scalar_one_or_none()
if not art:
return {}
# 4 个任务(互不影响);format / classify / commentary 是 chat,image 是 image
# 串行执行(已经过 client 内部 Semaphore),但每个 try/except 独立
tasks: list[tuple[str, Any]] = [
("format", _enrich_format(art, setting, client)),
("classify", _enrich_classify(art, setting, client)),
("image", _enrich_image(art, setting, client)),
("commentary", _enrich_commentary(art, setting, client)),
]
for name, coro in tasks:
try:
await coro
results[name] = "ok"
except Exception as e:
logger.exception("enrich %s failed for article %s: %s", name, article_id, e)
results[name] = f"failed:{type(e).__name__}"
# 标 status
if name == "format":
art.format_status = "failed"
elif name == "classify":
art.classify_status = "failed"
elif name == "image":
art.image_ai_status = "failed"
elif name == "commentary":
art.commentary_status = "failed"
await session.commit()
logger.info("enrich_article id=%s: %s", article_id, results)
return results
# === 后台循环 ===
# 与 translation_loop 一样,常驻从队列里取文章
ENRICHMENT_INTERVAL_SEC = 5.0 # 没活时等待
ENRICHMENT_BATCH_SIZE = 1
async def enrichment_loop() -> None:
"""扫描已翻译但未 enrich 的文章(任一 *_status 为 pending/n/a 且 translation_status=ok)。
跟 translation_loop 一样常驻。
"""
logger.info("enrichment_loop started")
# 等一下让翻译先跑
await asyncio.sleep(10)
while True:
try:
async with AsyncSessionLocal() as session:
# 已翻译完成 + 4 个状态中至少有一个是 pending
rows = (
await session.execute(
select(Article)
.where(
Article.translation_status == "ok",
Article.title_zh.is_not(None),
)
.order_by(Article.translated_at.asc().nullslast(), Article.id.asc())
.limit(ENRICHMENT_BATCH_SIZE * 5) # 多取几个找需要 enrich 的
)
).scalars()
candidates = list(rows)
# 过滤:任一 *_status 是 pending
todo_ids: list[int] = []
for a in candidates:
statuses = [
a.format_status or "pending",
a.classify_status or "pending",
a.image_ai_status or "pending",
a.commentary_status or "pending",
]
if any(s in ("pending", "failed", "n/a") for s in statuses):
todo_ids.append(a.id)
if len(todo_ids) >= ENRICHMENT_BATCH_SIZE:
break
if not todo_ids:
await asyncio.sleep(ENRICHMENT_INTERVAL_SEC)
continue
for aid in todo_ids:
try:
await enrich_article(aid)
except Exception as e:
logger.exception("enrich_article %s in loop failed: %s", aid, e)
await asyncio.sleep(0.5) # 文章间轻节流
except Exception as e:
logger.exception("enrichment_loop error: %s", e)
await asyncio.sleep(ENRICHMENT_INTERVAL_SEC)