diff --git a/backend/app/workers/pipeline.py b/backend/app/workers/pipeline.py index 2afe823..e470b80 100644 --- a/backend/app/workers/pipeline.py +++ b/backend/app/workers/pipeline.py @@ -9,7 +9,7 @@ import asyncio import logging from datetime import datetime, timezone -from sqlalchemy import select +from sqlalchemy import or_, select from sqlalchemy.dialects.postgresql import insert as pg_insert from app.config import settings @@ -153,6 +153,17 @@ async def translate_article(article_id: int) -> None: return if art.translation_status not in ("pending", "failed"): return + # 防御性 guard:中文源头(原文就是中文)不应走翻译。 + # 正常路径 translation_loop SQL 已经过滤(commit 7),这里兜底。 + # 直接把 status 改 n/a 避免反复入队。 + if art.lang_src and art.lang_src.lower().startswith("zh"): + logger.info( + "translate_article id=%s skipped: lang_src=%s (中文源,无需翻译)", + article_id, art.lang_src, + ) + art.translation_status = "n/a" + await session.commit() + return title = art.title body_text = (art.body_text or "")[:TRANSLATE_BODY_MAX] # lang_src 优先级:article.lang_src > source.language_src > "auto" @@ -322,10 +333,19 @@ async def translation_loop() -> None: while True: try: async with AsyncSessionLocal() as session: + # 中文源头跳过 — 原文就是中文,TMT 中翻中浪费配额。 + # lang_src 为 NULL 时不跳过(可能是英文 RSS 没设 language_src,走翻译正确) + # 前端详情页同步隐藏了"译文"板块(commit 6) rows = ( await session.execute( select(Article) - .where(Article.translation_status.in_(("pending", "failed"))) + .where( + Article.translation_status.in_(("pending", "failed")), + or_( + Article.lang_src.is_(None), + ~Article.lang_src.like("zh%"), + ), + ) .order_by(Article.fetched_at.asc(), Article.id.asc()) .limit(TRANSLATION_BATCH_SIZE) )