diff --git a/backend/alembic/versions/0002_llm_settings_and_articles_ai.py b/backend/alembic/versions/0002_llm_settings_and_articles_ai.py new file mode 100644 index 0000000..05a7e09 --- /dev/null +++ b/backend/alembic/versions/0002_llm_settings_and_articles_ai.py @@ -0,0 +1,54 @@ +"""LLM 设置表 + articles AI 增强字段 + +Revision ID: 0002 +Revises: 0001 +Create Date: 2026-06-08 +""" +from __future__ import annotations + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "0002" +down_revision: Union[str, None] = "0001" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # === llm_settings(单行配置)=== + op.create_table( + "llm_settings", + sa.Column("id", sa.Integer, primary_key=True, server_default="1"), + sa.Column("format_prompt", sa.Text), + sa.Column("classify_prompt", sa.Text), + sa.Column("commentary_prompt", sa.Text), + sa.Column("image_prompt_template", sa.Text), + sa.Column("image_size", sa.String(16), nullable=False, server_default="1024x768"), + sa.Column("chat_model", sa.String(64), nullable=False, server_default="agnes-2.0-flash"), + sa.Column("image_model", sa.String(64), nullable=False, server_default="agnes-image-2.1-flash"), + sa.Column("interval_sec", sa.Float, nullable=False, server_default="2.0"), + sa.Column("enabled", sa.Boolean, nullable=False, server_default=sa.text("true")), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False), + sa.CheckConstraint("id = 1", name="ck_llm_settings_singleton"), + ) + + # === articles 加 LLM 增强字段 === + op.add_column("articles", sa.Column("body_zh_formatted", sa.Text)) + op.add_column("articles", sa.Column("image_ai_url", sa.Text)) + op.add_column("articles", sa.Column("format_status", sa.String(16), nullable=False, server_default="n/a")) + op.add_column("articles", sa.Column("classify_status", sa.String(16), nullable=False, server_default="n/a")) + op.add_column("articles", sa.Column("image_ai_status", sa.String(16), nullable=False, server_default="n/a")) + op.add_column("articles", sa.Column("commentary_status", sa.String(16), nullable=False, server_default="n/a")) + + +def downgrade() -> None: + op.drop_column("articles", "commentary_status") + op.drop_column("articles", "image_ai_status") + op.drop_column("articles", "classify_status") + op.drop_column("articles", "format_status") + op.drop_column("articles", "image_ai_url") + op.drop_column("articles", "body_zh_formatted") + op.drop_table("llm_settings") diff --git a/backend/app/api/admin_llm.py b/backend/app/api/admin_llm.py new file mode 100644 index 0000000..2cdc1a9 --- /dev/null +++ b/backend/app/api/admin_llm.py @@ -0,0 +1,132 @@ +"""Admin LLM 设置(仅 owner)。 + +- GET /admin/llm-settings — 读当前设置(单行) +- PUT /admin/llm-settings — 更新(可只传部分字段) +- POST /admin/llm-settings/reset — 恢复默认提示词 +- POST /admin/llm-settings/test — 测一次连通性(发个最小 chat 请求) +- POST /admin/llm-enrich/{article_id} — 手动触发某篇的 LLM 增强 +""" +from __future__ import annotations + +import logging +from typing import Any + +from fastapi import APIRouter, Depends, HTTPException, status +from pydantic import BaseModel +from sqlalchemy import select + +from app.core.deps import require_owner +from app.database import AsyncSessionLocal +from app.models.article import Article +from app.models.llm_setting import LlmSetting +from app.schemas.llm import LlmSettingOut, LlmSettingUpdate, get_default_prompts +from app.services.llm.client import LlmClient + +logger = logging.getLogger("news.admin_llm") +router = APIRouter(prefix="/admin/llm", tags=["admin-llm"], dependencies=[Depends(require_owner)]) + + +@router.get("/settings", response_model=LlmSettingOut) +async def get_settings(): + async with AsyncSessionLocal() as session: + row = (await session.execute(select(LlmSetting).where(LlmSetting.id == 1))).scalar_one_or_none() + if row is None: + # 返回默认值(不写库) + defaults = get_default_prompts() + return LlmSettingOut( + format_prompt=defaults["format_prompt"], + classify_prompt=defaults["classify_prompt"], + commentary_prompt=defaults["commentary_prompt"], + image_prompt_template=defaults["image_prompt_template"], + ) + return LlmSettingOut.model_validate(row) + + +@router.put("/settings", response_model=LlmSettingOut) +async def update_settings(body: LlmSettingUpdate): + async with AsyncSessionLocal() as session: + row = (await session.execute(select(LlmSetting).where(LlmSetting.id == 1))).scalar_one_or_none() + if row is None: + row = LlmSetting(id=1, **get_default_prompts()) + session.add(row) + await session.flush() + # 只更新传入的字段 + update_data = body.model_dump(exclude_unset=True) + for k, v in update_data.items(): + setattr(row, k, v) + await session.commit() + await session.refresh(row) + return LlmSettingOut.model_validate(row) + + +class ResetResponse(BaseModel): + reset: bool + detail: str = "" + + +@router.post("/settings/reset", response_model=ResetResponse) +async def reset_settings(): + """恢复默认提示词。""" + async with AsyncSessionLocal() as session: + row = (await session.execute(select(LlmSetting).where(LlmSetting.id == 1))).scalar_one_or_none() + defaults = get_default_prompts() + if row is None: + row = LlmSetting(id=1, **defaults) + session.add(row) + else: + row.format_prompt = defaults["format_prompt"] + row.classify_prompt = defaults["classify_prompt"] + row.commentary_prompt = defaults["commentary_prompt"] + row.image_prompt_template = defaults["image_prompt_template"] + await session.commit() + return ResetResponse(reset=True, detail="已恢复默认提示词") + + +class TestResponse(BaseModel): + ok: bool + detail: str = "" + configured: bool + + +@router.post("/settings/test", response_model=TestResponse) +async def test_connection(): + """最小测试:发一个 'hi' chat 请求,确认 key + 端点通。""" + async with AsyncSessionLocal() as session: + row = (await session.execute(select(LlmSetting).where(LlmSetting.id == 1))).scalar_one_or_none() + chat_model = row.chat_model if row else "agnes-2.0-flash" + client = LlmClient(chat_model=chat_model) + if not client.is_configured(): + return TestResponse(ok=False, configured=False, detail="AGNES_API_KEY 未配置") + try: + reply = await client.chat( + system="你是测试助手,只用 1 个词回答 OK 或 FAIL。", + user="ping", + temperature=0.0, + max_tokens=10, + ) + return TestResponse(ok=True, configured=True, detail=f"reply={reply[:50]!r}") + except Exception as e: + return TestResponse(ok=False, configured=True, detail=f"{type(e).__name__}: {e}") + + +class EnrichTriggerResponse(BaseModel): + triggered: bool + detail: str = "" + results: dict[str, str] | None = None + + +@router.post("/enrich/{article_id}", response_model=EnrichTriggerResponse) +async def trigger_enrich(article_id: int): + """手动触发某篇的 4 项 LLM 增强(同步等待,不会丢在后台)。""" + from app.services.llm.enrichment import enrich_article + + async with AsyncSessionLocal() as session: + row = (await session.execute(select(Article).where(Article.id == article_id))).scalar_one_or_none() + if not row: + raise HTTPException(status.HTTP_404_NOT_FOUND, "Article not found") + try: + results = await enrich_article(article_id) + return EnrichTriggerResponse(triggered=True, detail=f"done for {article_id}", results=results) + except Exception as e: + logger.exception("manual enrich failed for %s", article_id) + raise HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR, f"{type(e).__name__}: {e}") diff --git a/backend/app/models/llm_setting.py b/backend/app/models/llm_setting.py new file mode 100644 index 0000000..3d6369e --- /dev/null +++ b/backend/app/models/llm_setting.py @@ -0,0 +1,50 @@ +"""LLM 设置(单行,owner 可编辑)。 + +字段对应: + - 排版/分类/点评提示词(用户可改) + - 插图尺寸 + prompt 模板(用户可改) + - 总开关 enabled + - 模型名(默认指向 Agnes,但可改成任意 OpenAI 兼容端点) +""" +from __future__ import annotations + +from datetime import datetime + +from sqlalchemy import Boolean, DateTime, Integer, String, Text, func +from sqlalchemy.orm import Mapped, mapped_column + +from app.database import Base + + +class LlmSetting(Base): + __tablename__ = "llm_settings" + + # 永远只有一行:id=1 + id: Mapped[int] = mapped_column(Integer, primary_key=True, default=1) + + # === 提示词 === + format_prompt: Mapped[str | None] = mapped_column(Text) + classify_prompt: Mapped[str | None] = mapped_column(Text) + commentary_prompt: Mapped[str | None] = mapped_column(Text) + image_prompt_template: Mapped[str | None] = mapped_column(Text) + + # === 插图参数 === + image_size: Mapped[str] = mapped_column(String(16), default="1024x768", nullable=False) + + # === 模型 === + chat_model: Mapped[str] = mapped_column(String(64), default="agnes-2.0-flash", nullable=False) + image_model: Mapped[str] = mapped_column(String(64), default="agnes-image-2.1-flash", nullable=False) + + # === 限速 === + interval_sec: Mapped[float] = mapped_column(default=2.0, nullable=False) + + # === 总开关 === + enabled: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False) + + # === 时间 === + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False + ) + + def __repr__(self) -> str: + return f"" diff --git a/backend/app/schemas/llm.py b/backend/app/schemas/llm.py new file mode 100644 index 0000000..fabc4ea --- /dev/null +++ b/backend/app/schemas/llm.py @@ -0,0 +1,70 @@ +"""LLM 设置相关 Pydantic schemas。""" +from __future__ import annotations + +from datetime import datetime + +from pydantic import BaseModel, ConfigDict, Field + + +class LlmSettingOut(BaseModel): + model_config = ConfigDict(from_attributes=True) + + format_prompt: str | None = None + classify_prompt: str | None = None + commentary_prompt: str | None = None + image_prompt_template: str | None = None + image_size: str = "1024x768" + chat_model: str = "agnes-2.0-flash" + image_model: str = "agnes-image-2.1-flash" + interval_sec: float = 2.0 + enabled: bool = True + updated_at: datetime | None = None + + +class LlmSettingUpdate(BaseModel): + """PATCH — 全部字段 optional,只更新传入的。""" + + format_prompt: str | None = None + classify_prompt: str | None = None + commentary_prompt: str | None = None + image_prompt_template: str | None = None + image_size: str | None = Field(default=None, pattern=r"^\d{2,4}x\d{2,4}$") + chat_model: str | None = Field(default=None, min_length=1, max_length=64) + image_model: str | None = Field(default=None, min_length=1, max_length=64) + interval_sec: float | None = Field(default=None, ge=0.0, le=60.0) + enabled: bool | None = None + + +# === 默认提示词(模板,用户可改)=== +DEFAULT_PROMPTS = { + "format_prompt": ( + "你是中文新闻排版助手。请将以下译文改写为适合网页阅读的版式,要求:\n" + "1. 保留所有事实信息,不要增删内容\n" + "2. 按段落拆分(2-4 句一段),段间空行\n" + "3. 关键人物/机构/数字用 **加粗**\n" + "4. 如有并列要点,转为编号列表(1. 2. 3.)\n" + "5. 不要使用 # 标题,不要外层 markdown 代码块\n" + "6. 直接输出排版后的纯文本\n\n" + "原文:\n{body}\n" + ), + "classify_prompt": ( + "你是新闻分类助手。请阅读以下新闻,返回 1-2 个分类标签。\n" + "可选标签(可自由组合): 时政 / 经济 / 科技 / 军事 / 社会 / 国际 / 体育 / 文化 / 环境 / 健康 / 金融 / 能源 / 气候\n" + "严格要求:只返回 JSON,形如 {\"categories\": [\"时政\", \"国际\"]},不要其他内容。\n\n" + "标题:{title}\n摘要:{summary}\n" + ), + "commentary_prompt": ( + "你是资深新闻评论员。请基于以下新闻写一段 100-200 字的中文点评。\n" + "要求:客观、有深度、避免空洞套话,给出具体观察或背景。\n\n" + "标题:{title}\n正文:{body}\n" + ), + "image_prompt_template": ( + "Editorial news illustration about: {title}. " + "Cinematic, professional journalism style, soft natural lighting, " + "no text, no logos, no watermark." + ), +} + + +def get_default_prompts() -> dict[str, str]: + return dict(DEFAULT_PROMPTS) diff --git a/backend/app/services/llm/__init__.py b/backend/app/services/llm/__init__.py new file mode 100644 index 0000000..c0c8b0b --- /dev/null +++ b/backend/app/services/llm/__init__.py @@ -0,0 +1,7 @@ +"""LLM 服务:客户端 + 智能增强。""" +from app.services.llm.client import LlmClient, client # noqa: F401 +from app.services.llm.enrichment import ( # noqa: F401 + enrich_article, + enrichment_loop, + get_setting, +) diff --git a/backend/app/services/llm/client.py b/backend/app/services/llm/client.py new file mode 100644 index 0000000..18f9454 --- /dev/null +++ b/backend/app/services/llm/client.py @@ -0,0 +1,147 @@ +"""Agnes(及任意 OpenAI 兼容端点)的 LLM 客户端。 + +设计: +- 内部持 chat 和 image 两个 Semaphore(各 1 个并发),互不阻塞 +- 每次调用后 await asyncio.sleep(interval_sec) 节流 +- 失败重试 1 次,再失败抛异常由上层标记 status=failed +- 用 httpx.AsyncClient,超时 60s +""" +from __future__ import annotations + +import asyncio +import logging +from typing import Any + +import httpx + +from app.config import settings as app_settings + +logger = logging.getLogger("news.llm.client") + + +class LlmClient: + """单一客户端,所有 LLM 调用都过它。""" + + def __init__( + self, + base_url: str | None = None, + api_key: str | None = None, + chat_model: str | None = None, + image_model: str | None = None, + interval_sec: float | None = None, + ): + self.base_url = (base_url or app_settings.agnes_base_url).rstrip("/") + self.api_key = api_key or app_settings.agnes_api_key + self.chat_model = chat_model or app_settings.agnes_chat_model + self.image_model = image_model or app_settings.agnes_image_model + self.interval_sec = ( + interval_sec if interval_sec is not None else app_settings.llm_interval_sec + ) + # chat 和 image 各一个串行信号 + self._chat_sem = asyncio.Semaphore(1) + self._image_sem = asyncio.Semaphore(1) + + def is_configured(self) -> bool: + return bool(self.api_key) + + async def chat( + self, + system: str, + user: str, + *, + temperature: float = 0.4, + max_tokens: int = 1500, + model: str | None = None, + ) -> str: + """调 chat/completions,返回 assistant 文本。""" + if not self.is_configured(): + raise RuntimeError("AGNES_API_KEY 未配置") + url = f"{self.base_url}/chat/completions" + payload = { + "model": model or self.chat_model, + "messages": [ + {"role": "system", "content": system}, + {"role": "user", "content": user}, + ], + "temperature": temperature, + "max_tokens": max_tokens, + } + async with self._chat_sem: + res = await self._post_with_retry(url, payload) + await asyncio.sleep(self.interval_sec) + return res["choices"][0]["message"]["content"].strip() + + async def classify_json( + self, + system: str, + user: str, + *, + max_tokens: int = 200, + ) -> dict[str, Any]: + """调 chat 并尝试解析 JSON。失败时回退:返回空 dict。""" + text = await self.chat(system, user, temperature=0.2, max_tokens=max_tokens) + # 容错解析:可能被 ```json ... ``` 包裹 + text = text.strip() + if text.startswith("```"): + # 去掉代码块围栏 + lines = text.split("\n") + text = "\n".join(l for l in lines if not l.strip().startswith("```")) + text = text.strip() + import json + try: + return json.loads(text) + except Exception as e: + logger.warning("classify_json 解析失败: %s; raw=%r", e, text[:200]) + return {} + + async def generate_image( + self, + prompt: str, + *, + size: str = "1024x768", + model: str | None = None, + ) -> str: + """调 images/generations,返回图片 URL。""" + if not self.is_configured(): + raise RuntimeError("AGNES_API_KEY 未配置") + url = f"{self.base_url}/images/generations" + payload = { + "model": model or self.image_model, + "prompt": prompt, + "size": size, + } + async with self._image_sem: + res = await self._post_with_retry(url, payload, timeout=120) + await asyncio.sleep(self.interval_sec) + return res["data"][0]["url"] + + async def _post_with_retry( + self, url: str, payload: dict, *, timeout: float = 60.0, retries: int = 1 + ) -> dict: + """POST + 简单重试(对 5xx / 超时)。""" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + } + last_exc: Exception | None = None + for attempt in range(retries + 1): + try: + async with httpx.AsyncClient(timeout=timeout) as client: + r = await client.post(url, json=payload, headers=headers) + if r.status_code >= 500: + raise RuntimeError(f"LLM 5xx: {r.status_code} {r.text[:200]}") + if r.status_code != 200: + raise RuntimeError(f"LLM {r.status_code}: {r.text[:300]}") + return r.json() + except Exception as e: + last_exc = e + if attempt < retries: + wait = 2 ** attempt + logger.warning("LLM 调用失败,%.1fs 后重试: %s", wait, e) + await asyncio.sleep(wait) + assert last_exc is not None + raise last_exc + + +# 全局单例(读环境变量 + 启动时初始化) +client = LlmClient() diff --git a/backend/app/services/llm/enrichment.py b/backend/app/services/llm/enrichment.py new file mode 100644 index 0000000..0bf10a7 --- /dev/null +++ b/backend/app/services/llm/enrichment.py @@ -0,0 +1,238 @@ +"""LLM 智能增强服务(翻译后调)。 + +4 个独立任务: + 1. format — 排版译文(写入 body_zh_formatted) + 2. classify — 分类(写入 category) + 3. image — 生成插图(写入 image_ai_url) + 4. commentary — 写点评(写入 commentary) + +设计: +- 任务入口: enrich_article(article_id, settings_row) +- 任务间互不影响:每个任务独立 try/except + 写 status +- 全部任务共走 LlmClient 的全局限速 +- 若设置 enabled=False,只跳过(不调 LLM) +""" +from __future__ import annotations + +import asyncio +import logging +from typing import Any + +from sqlalchemy import select + +from app.database import AsyncSessionLocal +from app.models.article import Article +from app.models.llm_setting import LlmSetting +from app.schemas.llm import get_default_prompts +from app.services.llm.client import LlmClient + +logger = logging.getLogger("news.llm.enrichment") + + +# === 获取当前设置(行锁 + 缓存刷新)=== +async def get_setting() -> LlmSetting: + """读 llm_settings 单行;不存在则用默认值插入。""" + async with AsyncSessionLocal() as session: + row = (await session.execute(select(LlmSetting).where(LlmSetting.id == 1))).scalar_one_or_none() + if row is None: + defaults = get_default_prompts() + row = LlmSetting( + id=1, + format_prompt=defaults["format_prompt"], + classify_prompt=defaults["classify_prompt"], + commentary_prompt=defaults["commentary_prompt"], + image_prompt_template=defaults["image_prompt_template"], + ) + session.add(row) + await session.commit() + await session.refresh(row) + return row + + +# === 单任务:format === +async def _enrich_format(article: Article, setting: LlmSetting, client: LlmClient) -> None: + prompt = (setting.format_prompt or get_default_prompts()["format_prompt"]).format( + body=(article.body_zh_text or "")[:6000] + ) + text = await client.chat( + system="你是中文新闻排版助手,只输出排版后的纯文本。", + user=prompt, + temperature=0.3, + max_tokens=2000, + ) + # 极简 HTML 包裹:按段切 +

+ parts = [f"

{p.strip()}

" for p in text.split("\n\n") if p.strip()] + article.body_zh_formatted = "\n".join(parts) or None + article.format_status = "ok" + + +# === 单任务:classify === +async def _enrich_classify(article: Article, setting: LlmSetting, client: LlmClient) -> None: + prompt = (setting.classify_prompt or get_default_prompts()["classify_prompt"]).format( + title=(article.title_zh or article.title)[:200], + summary=(article.summary_zh or "")[:400], + ) + result = await client.classify_json( + system="你是新闻分类助手,只返回 JSON。", + user=prompt, + ) + cats = result.get("categories") or [] + if isinstance(cats, list) and cats: + article.category = ",".join(str(c).strip() for c in cats[:3])[:32] + article.classify_status = "ok" + + +# === 单任务:image === +async def _enrich_image(article: Article, setting: LlmSetting, client: LlmClient) -> None: + template = (setting.image_prompt_template or get_default_prompts()["image_prompt_template"]) + # 默认用 title_zh(若有),否则用原文 title + title_for_prompt = (article.title_zh or article.title or "")[:200] + prompt = template.format(title=title_for_prompt) + url = await client.generate_image(prompt, size=setting.image_size) + article.image_ai_url = url + article.image_ai_status = "ok" + + +# === 单任务:commentary === +async def _enrich_commentary(article: Article, setting: LlmSetting, client: LlmClient) -> None: + prompt = (setting.commentary_prompt or get_default_prompts()["commentary_prompt"]).format( + title=(article.title_zh or article.title)[:200], + body=(article.body_zh_text or "")[:3000], + ) + text = await client.chat( + system="你是资深新闻评论员。", + user=prompt, + temperature=0.6, + max_tokens=600, + ) + article.commentary = text or None + article.commentary_status = "ok" + + +# === 总编排:enrich_article === +async def enrich_article(article_id: int) -> dict[str, str]: + """对单篇文章做 4 项 LLM 增强。 + + 返回 {task: status} 字典(用于日志)。 + """ + async with AsyncSessionLocal() as session: + art = ( + await session.execute(select(Article).where(Article.id == article_id)) + ).scalar_one_or_none() + if not art: + logger.warning("enrich_article: id=%s not found", article_id) + return {} + if not (art.title_zh or art.body_zh_text): + logger.info("enrich_article: id=%s no translation yet, skip", article_id) + return {} + + # 拉取设置 + setting = await get_setting() + if not setting.enabled: + logger.info("enrich_article: llm disabled, skip id=%s", article_id) + return {"format": "skipped", "classify": "skipped", "image": "skipped", "commentary": "skipped"} + + # 用配置生成 client(允许热改设置) + client = LlmClient( + chat_model=setting.chat_model, + image_model=setting.image_model, + interval_sec=setting.interval_sec, + ) + + results: dict[str, str] = {} + + async with AsyncSessionLocal() as session: + art = ( + await session.execute(select(Article).where(Article.id == article_id)) + ).scalar_one_or_none() + if not art: + return {} + + # 4 个任务(互不影响);format / classify / commentary 是 chat,image 是 image + # 串行执行(已经过 client 内部 Semaphore),但每个 try/except 独立 + tasks: list[tuple[str, Any]] = [ + ("format", _enrich_format(art, setting, client)), + ("classify", _enrich_classify(art, setting, client)), + ("image", _enrich_image(art, setting, client)), + ("commentary", _enrich_commentary(art, setting, client)), + ] + for name, coro in tasks: + try: + await coro + results[name] = "ok" + except Exception as e: + logger.exception("enrich %s failed for article %s: %s", name, article_id, e) + results[name] = f"failed:{type(e).__name__}" + # 标 status + if name == "format": + art.format_status = "failed" + elif name == "classify": + art.classify_status = "failed" + elif name == "image": + art.image_ai_status = "failed" + elif name == "commentary": + art.commentary_status = "failed" + + await session.commit() + logger.info("enrich_article id=%s: %s", article_id, results) + return results + + +# === 后台循环 === +# 与 translation_loop 一样,常驻从队列里取文章 +ENRICHMENT_INTERVAL_SEC = 5.0 # 没活时等待 +ENRICHMENT_BATCH_SIZE = 1 + + +async def enrichment_loop() -> None: + """扫描已翻译但未 enrich 的文章(任一 *_status 为 pending/n/a 且 translation_status=ok)。 + + 跟 translation_loop 一样常驻。 + """ + logger.info("enrichment_loop started") + # 等一下让翻译先跑 + await asyncio.sleep(10) + while True: + try: + async with AsyncSessionLocal() as session: + # 已翻译完成 + 4 个状态中至少有一个是 pending + rows = ( + await session.execute( + select(Article) + .where( + Article.translation_status == "ok", + Article.title_zh.is_not(None), + ) + .order_by(Article.translated_at.asc().nullslast(), Article.id.asc()) + .limit(ENRICHMENT_BATCH_SIZE * 5) # 多取几个找需要 enrich 的 + ) + ).scalars() + candidates = list(rows) + + # 过滤:任一 *_status 是 pending + todo_ids: list[int] = [] + for a in candidates: + statuses = [ + a.format_status or "pending", + a.classify_status or "pending", + a.image_ai_status or "pending", + a.commentary_status or "pending", + ] + if any(s in ("pending", "failed", "n/a") for s in statuses): + todo_ids.append(a.id) + if len(todo_ids) >= ENRICHMENT_BATCH_SIZE: + break + + if not todo_ids: + await asyncio.sleep(ENRICHMENT_INTERVAL_SEC) + continue + + for aid in todo_ids: + try: + await enrich_article(aid) + except Exception as e: + logger.exception("enrich_article %s in loop failed: %s", aid, e) + await asyncio.sleep(0.5) # 文章间轻节流 + except Exception as e: + logger.exception("enrichment_loop error: %s", e) + await asyncio.sleep(ENRICHMENT_INTERVAL_SEC)