From fd7817b881f64e65769f079d40794a2848ba1a18 Mon Sep 17 00:00:00 2001 From: Mavis Date: Thu, 11 Jun 2026 10:01:19 +0800 Subject: [PATCH] =?UTF-8?q?fix(translate):=20=E6=8B=A6=E6=88=AA=E5=BC=95?= =?UTF-8?q?=E6=93=8E=E9=94=99=E8=AF=AF=20marker=20+=20pipeline=20=E4=B8=A5?= =?UTF-8?q?=E6=A0=BC=20status=20=E5=88=A4=E5=AE=9A,=E9=81=BF=E5=85=8D=20TM?= =?UTF-8?q?T=20AuthFailure=20=E4=BC=AA=E8=A3=85=20ok?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/services/translation/service.py | 16 ++++++++++- backend/app/workers/pipeline.py | 31 +++++++++++++++++++-- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/backend/app/services/translation/service.py b/backend/app/services/translation/service.py index 4cf18b0..e718125 100644 --- a/backend/app/services/translation/service.py +++ b/backend/app/services/translation/service.py @@ -234,7 +234,21 @@ class TranslationService: if res is None: raise RuntimeError(f"translation failed for {chars} chars (engine={engine.name})") - # 4) 写缓存 — 只缓存真实翻译结果;失败/降级文本不缓存(避免污染 30 天) + # 4) 校验翻译结果 — 如果文本里包含错误 marker(腾讯 TMT SDK + # 异常时偶尔把错误信息当作"翻译结果"返回,导致 pipeline 误判为 ok) + # 这种情况下我们要主动抛异常,触发 fallback 或标 failed + if res.engine != "cache" and res.engine != "skip": + for marker in ("[翻译失败", "[本条未翻译", "AuthFailure", "TencentCloudSDKException"): + if marker in res.text: + logger.warning( + "engine %s returned error-marker text (marker=%s), treating as failure", + res.engine, marker, + ) + raise RuntimeError( + f"engine={res.engine} returned error-marker '{marker}': {res.text[:120]}" + ) + + # 5) 写缓存 — 只缓存真实翻译结果;失败/降级文本不缓存(避免污染 30 天) if res.engine in ("spark", "zhipu", "tencent", "tencent_maas", "agnes", "nllb") and not res.cached: if "[翻译失败" not in res.text and "[本条未翻译" not in res.text: try: diff --git a/backend/app/workers/pipeline.py b/backend/app/workers/pipeline.py index 896a03a..112b373 100644 --- a/backend/app/workers/pipeline.py +++ b/backend/app/workers/pipeline.py @@ -176,15 +176,40 @@ async def translate_article(article_id: int) -> None: # body 段落切分 + 重组 chunks = _chunk_text(body_text, max_chars=settings.tencent_tmt_max_chars_per_req) translated_chunks: list[str] = [] + last_engine: str | None = None for ch in chunks: tr = await translation_service.translate(ch, source=lang_src, target=target) total_chars += tr.chars translated_chunks.append(tr.text) + last_engine = tr.engine tr_body = "\n\n".join(translated_chunks) - # 用 service 返回的 engine 标签(spark / tencent / tencent_maas / agnes / nllb / cache) - engine_label = tr_title.engine or "tencent" - status = "ok" if (tr_title.text and tr_body) else "partial" + # 引擎名取 body 最后一段(更准 — 失败 fallback 后会用 fallback 的引擎) + engine_label = last_engine or tr_title.engine or "tencent" + + # === 严格 status 判定 === + # 防御性:即使 service.py 已经主动检测 marker 并抛异常, + # 万一上游漏了,这里再补一刀 — 不让错误 marker 文本伪装成 ok。 + # 出现以下任一情况都视为 failed: + # 1) 标题或正文为空 + # 2) 含错误 marker ([翻译失败 / [本条未翻译 / AuthFailure / TencentCloudSDKException) + # 3) body 完全等于 body_text(翻译没起作用,虽然理论上 service 不会返回原文) + bad_markers = ("[翻译失败", "[本条未翻译", "AuthFailure", "TencentCloudSDKException") + combined = (tr_title.text or "") + "\n" + (tr_body or "") + has_marker = any(m in combined for m in bad_markers) + has_content = bool(tr_title.text) and bool(tr_body) + body_untranslated = bool(tr_body) and tr_body == (body_text or "") + + if has_marker or body_untranslated: + status = "failed" + logger.warning( + "article %s translation marked failed: marker=%s body_untranslated=%s", + article_id, has_marker, body_untranslated, + ) + elif not has_content: + status = "partial" + else: + status = "ok" except Exception as e: logger.exception("translate article %s failed: %s", article_id, e) async with AsyncSessionLocal() as session: