fix(translate): 拦截引擎错误 marker + pipeline 严格 status 判定,避免 TMT AuthFailure 伪装 ok
This commit is contained in:
@@ -234,7 +234,21 @@ class TranslationService:
|
|||||||
if res is None:
|
if res is None:
|
||||||
raise RuntimeError(f"translation failed for {chars} chars (engine={engine.name})")
|
raise RuntimeError(f"translation failed for {chars} chars (engine={engine.name})")
|
||||||
|
|
||||||
# 4) 写缓存 — 只缓存真实翻译结果;失败/降级文本不缓存(避免污染 30 天)
|
# 4) 校验翻译结果 — 如果文本里包含错误 marker(腾讯 TMT SDK
|
||||||
|
# 异常时偶尔把错误信息当作"翻译结果"返回,导致 pipeline 误判为 ok)
|
||||||
|
# 这种情况下我们要主动抛异常,触发 fallback 或标 failed
|
||||||
|
if res.engine != "cache" and res.engine != "skip":
|
||||||
|
for marker in ("[翻译失败", "[本条未翻译", "AuthFailure", "TencentCloudSDKException"):
|
||||||
|
if marker in res.text:
|
||||||
|
logger.warning(
|
||||||
|
"engine %s returned error-marker text (marker=%s), treating as failure",
|
||||||
|
res.engine, marker,
|
||||||
|
)
|
||||||
|
raise RuntimeError(
|
||||||
|
f"engine={res.engine} returned error-marker '{marker}': {res.text[:120]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 5) 写缓存 — 只缓存真实翻译结果;失败/降级文本不缓存(避免污染 30 天)
|
||||||
if res.engine in ("spark", "zhipu", "tencent", "tencent_maas", "agnes", "nllb") and not res.cached:
|
if res.engine in ("spark", "zhipu", "tencent", "tencent_maas", "agnes", "nllb") and not res.cached:
|
||||||
if "[翻译失败" not in res.text and "[本条未翻译" not in res.text:
|
if "[翻译失败" not in res.text and "[本条未翻译" not in res.text:
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -176,15 +176,40 @@ async def translate_article(article_id: int) -> None:
|
|||||||
# body 段落切分 + 重组
|
# body 段落切分 + 重组
|
||||||
chunks = _chunk_text(body_text, max_chars=settings.tencent_tmt_max_chars_per_req)
|
chunks = _chunk_text(body_text, max_chars=settings.tencent_tmt_max_chars_per_req)
|
||||||
translated_chunks: list[str] = []
|
translated_chunks: list[str] = []
|
||||||
|
last_engine: str | None = None
|
||||||
for ch in chunks:
|
for ch in chunks:
|
||||||
tr = await translation_service.translate(ch, source=lang_src, target=target)
|
tr = await translation_service.translate(ch, source=lang_src, target=target)
|
||||||
total_chars += tr.chars
|
total_chars += tr.chars
|
||||||
translated_chunks.append(tr.text)
|
translated_chunks.append(tr.text)
|
||||||
|
last_engine = tr.engine
|
||||||
tr_body = "\n\n".join(translated_chunks)
|
tr_body = "\n\n".join(translated_chunks)
|
||||||
|
|
||||||
# 用 service 返回的 engine 标签(spark / tencent / tencent_maas / agnes / nllb / cache)
|
# 引擎名取 body 最后一段(更准 — 失败 fallback 后会用 fallback 的引擎)
|
||||||
engine_label = tr_title.engine or "tencent"
|
engine_label = last_engine or tr_title.engine or "tencent"
|
||||||
status = "ok" if (tr_title.text and tr_body) else "partial"
|
|
||||||
|
# === 严格 status 判定 ===
|
||||||
|
# 防御性:即使 service.py 已经主动检测 marker 并抛异常,
|
||||||
|
# 万一上游漏了,这里再补一刀 — 不让错误 marker 文本伪装成 ok。
|
||||||
|
# 出现以下任一情况都视为 failed:
|
||||||
|
# 1) 标题或正文为空
|
||||||
|
# 2) 含错误 marker ([翻译失败 / [本条未翻译 / AuthFailure / TencentCloudSDKException)
|
||||||
|
# 3) body 完全等于 body_text(翻译没起作用,虽然理论上 service 不会返回原文)
|
||||||
|
bad_markers = ("[翻译失败", "[本条未翻译", "AuthFailure", "TencentCloudSDKException")
|
||||||
|
combined = (tr_title.text or "") + "\n" + (tr_body or "")
|
||||||
|
has_marker = any(m in combined for m in bad_markers)
|
||||||
|
has_content = bool(tr_title.text) and bool(tr_body)
|
||||||
|
body_untranslated = bool(tr_body) and tr_body == (body_text or "")
|
||||||
|
|
||||||
|
if has_marker or body_untranslated:
|
||||||
|
status = "failed"
|
||||||
|
logger.warning(
|
||||||
|
"article %s translation marked failed: marker=%s body_untranslated=%s",
|
||||||
|
article_id, has_marker, body_untranslated,
|
||||||
|
)
|
||||||
|
elif not has_content:
|
||||||
|
status = "partial"
|
||||||
|
else:
|
||||||
|
status = "ok"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("translate article %s failed: %s", article_id, e)
|
logger.exception("translate article %s failed: %s", article_id, e)
|
||||||
async with AsyncSessionLocal() as session:
|
async with AsyncSessionLocal() as session:
|
||||||
|
|||||||
Reference in New Issue
Block a user