fix(translation): 规范化 BCP-47 lang_src(避免 en-gb/zh-cn 等被 TMT 拒)
This commit is contained in:
@@ -17,7 +17,22 @@ from app.services.translation.base import BaseTranslator, TranslationResult
|
|||||||
|
|
||||||
logger = logging.getLogger("news.translate.tencent")
|
logger = logging.getLogger("news.translate.tencent")
|
||||||
|
|
||||||
# 常见语种映射
|
|
||||||
|
def _normalize_lang(code: str) -> str:
|
||||||
|
"""把 BCP-47(可能带地区后缀)简化成主语言,TMT 用的 2 字母 code。
|
||||||
|
|
||||||
|
例:
|
||||||
|
en-gb / en-us / en-au -> en
|
||||||
|
zh-cn / zh-tw -> zh
|
||||||
|
ja-jp -> ja
|
||||||
|
"" / auto -> 原样
|
||||||
|
"""
|
||||||
|
if not code or code == "auto":
|
||||||
|
return code
|
||||||
|
return code.split("-")[0].lower()
|
||||||
|
|
||||||
|
|
||||||
|
# 常见语种映射(归一化后再过这层;保留覆盖是兜底)
|
||||||
_LANG_MAP = {
|
_LANG_MAP = {
|
||||||
"en": "en",
|
"en": "en",
|
||||||
"zh": "zh",
|
"zh": "zh",
|
||||||
@@ -48,6 +63,9 @@ class TencentTranslator(BaseTranslator):
|
|||||||
if not text.strip():
|
if not text.strip():
|
||||||
return TranslationResult(text=text, engine=self.name, chars=0)
|
return TranslationResult(text=text, engine=self.name, chars=0)
|
||||||
|
|
||||||
|
# 关键:BCP-47 归一化(避免 en-gb / en-us / zh-cn / ja-jp 等被 TMT 拒)
|
||||||
|
source = _normalize_lang(source)
|
||||||
|
target = _normalize_lang(target)
|
||||||
source = _LANG_MAP.get(source, source if source != "auto" else "auto")
|
source = _LANG_MAP.get(source, source if source != "auto" else "auto")
|
||||||
target = _LANG_MAP.get(target, target)
|
target = _LANG_MAP.get(target, target)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user