fix(translate): 拦截引擎错误 marker + pipeline 严格 status 判定,避免 TMT AuthFailure 伪装 ok
This commit is contained in:
@@ -234,7 +234,21 @@ class TranslationService:
|
||||
if res is None:
|
||||
raise RuntimeError(f"translation failed for {chars} chars (engine={engine.name})")
|
||||
|
||||
# 4) 写缓存 — 只缓存真实翻译结果;失败/降级文本不缓存(避免污染 30 天)
|
||||
# 4) 校验翻译结果 — 如果文本里包含错误 marker(腾讯 TMT SDK
|
||||
# 异常时偶尔把错误信息当作"翻译结果"返回,导致 pipeline 误判为 ok)
|
||||
# 这种情况下我们要主动抛异常,触发 fallback 或标 failed
|
||||
if res.engine != "cache" and res.engine != "skip":
|
||||
for marker in ("[翻译失败", "[本条未翻译", "AuthFailure", "TencentCloudSDKException"):
|
||||
if marker in res.text:
|
||||
logger.warning(
|
||||
"engine %s returned error-marker text (marker=%s), treating as failure",
|
||||
res.engine, marker,
|
||||
)
|
||||
raise RuntimeError(
|
||||
f"engine={res.engine} returned error-marker '{marker}': {res.text[:120]}"
|
||||
)
|
||||
|
||||
# 5) 写缓存 — 只缓存真实翻译结果;失败/降级文本不缓存(避免污染 30 天)
|
||||
if res.engine in ("spark", "zhipu", "tencent", "tencent_maas", "agnes", "nllb") and not res.cached:
|
||||
if "[翻译失败" not in res.text and "[本条未翻译" not in res.text:
|
||||
try:
|
||||
|
||||
@@ -176,15 +176,40 @@ async def translate_article(article_id: int) -> None:
|
||||
# body 段落切分 + 重组
|
||||
chunks = _chunk_text(body_text, max_chars=settings.tencent_tmt_max_chars_per_req)
|
||||
translated_chunks: list[str] = []
|
||||
last_engine: str | None = None
|
||||
for ch in chunks:
|
||||
tr = await translation_service.translate(ch, source=lang_src, target=target)
|
||||
total_chars += tr.chars
|
||||
translated_chunks.append(tr.text)
|
||||
last_engine = tr.engine
|
||||
tr_body = "\n\n".join(translated_chunks)
|
||||
|
||||
# 用 service 返回的 engine 标签(spark / tencent / tencent_maas / agnes / nllb / cache)
|
||||
engine_label = tr_title.engine or "tencent"
|
||||
status = "ok" if (tr_title.text and tr_body) else "partial"
|
||||
# 引擎名取 body 最后一段(更准 — 失败 fallback 后会用 fallback 的引擎)
|
||||
engine_label = last_engine or tr_title.engine or "tencent"
|
||||
|
||||
# === 严格 status 判定 ===
|
||||
# 防御性:即使 service.py 已经主动检测 marker 并抛异常,
|
||||
# 万一上游漏了,这里再补一刀 — 不让错误 marker 文本伪装成 ok。
|
||||
# 出现以下任一情况都视为 failed:
|
||||
# 1) 标题或正文为空
|
||||
# 2) 含错误 marker ([翻译失败 / [本条未翻译 / AuthFailure / TencentCloudSDKException)
|
||||
# 3) body 完全等于 body_text(翻译没起作用,虽然理论上 service 不会返回原文)
|
||||
bad_markers = ("[翻译失败", "[本条未翻译", "AuthFailure", "TencentCloudSDKException")
|
||||
combined = (tr_title.text or "") + "\n" + (tr_body or "")
|
||||
has_marker = any(m in combined for m in bad_markers)
|
||||
has_content = bool(tr_title.text) and bool(tr_body)
|
||||
body_untranslated = bool(tr_body) and tr_body == (body_text or "")
|
||||
|
||||
if has_marker or body_untranslated:
|
||||
status = "failed"
|
||||
logger.warning(
|
||||
"article %s translation marked failed: marker=%s body_untranslated=%s",
|
||||
article_id, has_marker, body_untranslated,
|
||||
)
|
||||
elif not has_content:
|
||||
status = "partial"
|
||||
else:
|
||||
status = "ok"
|
||||
except Exception as e:
|
||||
logger.exception("translate article %s failed: %s", article_id, e)
|
||||
async with AsyncSessionLocal() as session:
|
||||
|
||||
Reference in New Issue
Block a user