fix(translate): 拦截引擎错误 marker + pipeline 严格 status 判定,避免 TMT AuthFailure 伪装 ok

This commit is contained in:
Mavis
2026-06-11 10:01:19 +08:00
parent 6293f82a3a
commit fd7817b881
2 changed files with 43 additions and 4 deletions

View File

@@ -176,15 +176,40 @@ async def translate_article(article_id: int) -> None:
# body 段落切分 + 重组
chunks = _chunk_text(body_text, max_chars=settings.tencent_tmt_max_chars_per_req)
translated_chunks: list[str] = []
last_engine: str | None = None
for ch in chunks:
tr = await translation_service.translate(ch, source=lang_src, target=target)
total_chars += tr.chars
translated_chunks.append(tr.text)
last_engine = tr.engine
tr_body = "\n\n".join(translated_chunks)
# 用 service 返回的 engine 标签(spark / tencent / tencent_maas / agnes / nllb / cache)
engine_label = tr_title.engine or "tencent"
status = "ok" if (tr_title.text and tr_body) else "partial"
# 引擎名取 body 最后一段(更准 — 失败 fallback 后会用 fallback 的引擎)
engine_label = last_engine or tr_title.engine or "tencent"
# === 严格 status 判定 ===
# 防御性:即使 service.py 已经主动检测 marker 并抛异常,
# 万一上游漏了,这里再补一刀 — 不让错误 marker 文本伪装成 ok。
# 出现以下任一情况都视为 failed:
# 1) 标题或正文为空
# 2) 含错误 marker ([翻译失败 / [本条未翻译 / AuthFailure / TencentCloudSDKException)
# 3) body 完全等于 body_text(翻译没起作用,虽然理论上 service 不会返回原文)
bad_markers = ("[翻译失败", "[本条未翻译", "AuthFailure", "TencentCloudSDKException")
combined = (tr_title.text or "") + "\n" + (tr_body or "")
has_marker = any(m in combined for m in bad_markers)
has_content = bool(tr_title.text) and bool(tr_body)
body_untranslated = bool(tr_body) and tr_body == (body_text or "")
if has_marker or body_untranslated:
status = "failed"
logger.warning(
"article %s translation marked failed: marker=%s body_untranslated=%s",
article_id, has_marker, body_untranslated,
)
elif not has_content:
status = "partial"
else:
status = "ok"
except Exception as e:
logger.exception("translate article %s failed: %s", article_id, e)
async with AsyncSessionLocal() as session: