From 6b5828c1c074d9824387e7c21c54a643ba5f63f8 Mon Sep 17 00:00:00 2001 From: Mavis Date: Mon, 8 Jun 2026 15:49:03 +0800 Subject: [PATCH] =?UTF-8?q?fix(translation):=20=E8=A7=84=E8=8C=83=E5=8C=96?= =?UTF-8?q?=20BCP-47=20lang=5Fsrc(=E9=81=BF=E5=85=8D=20en-gb/zh-cn=20?= =?UTF-8?q?=E7=AD=89=E8=A2=AB=20TMT=20=E6=8B=92)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/services/translation/tencent.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/backend/app/services/translation/tencent.py b/backend/app/services/translation/tencent.py index 41d48f6..58b53e4 100644 --- a/backend/app/services/translation/tencent.py +++ b/backend/app/services/translation/tencent.py @@ -17,7 +17,22 @@ from app.services.translation.base import BaseTranslator, TranslationResult logger = logging.getLogger("news.translate.tencent") -# 常见语种映射 + +def _normalize_lang(code: str) -> str: + """把 BCP-47(可能带地区后缀)简化成主语言,TMT 用的 2 字母 code。 + + 例: + en-gb / en-us / en-au -> en + zh-cn / zh-tw -> zh + ja-jp -> ja + "" / auto -> 原样 + """ + if not code or code == "auto": + return code + return code.split("-")[0].lower() + + +# 常见语种映射(归一化后再过这层;保留覆盖是兜底) _LANG_MAP = { "en": "en", "zh": "zh", @@ -48,6 +63,9 @@ class TencentTranslator(BaseTranslator): if not text.strip(): return TranslationResult(text=text, engine=self.name, chars=0) + # 关键:BCP-47 归一化(避免 en-gb / en-us / zh-cn / ja-jp 等被 TMT 拒) + source = _normalize_lang(source) + target = _normalize_lang(target) source = _LANG_MAP.get(source, source if source != "auto" else "auto") target = _LANG_MAP.get(target, target)