93 lines
2.9 KiB
Python
93 lines
2.9 KiB
Python
"""腾讯云文本翻译 TMT。"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import random
|
|
from typing import Any
|
|
|
|
from tencentcloud.common import credential
|
|
from tencentcloud.common.exception.tencent_cloud_sdk_exception import (
|
|
TencentCloudSDKException,
|
|
)
|
|
from tencentcloud.tmt.v20180321 import models, tmt_client
|
|
|
|
from app.config import settings
|
|
from app.services.translation.base import BaseTranslator, TranslationResult
|
|
|
|
logger = logging.getLogger("news.translate.tencent")
|
|
|
|
|
|
def _normalize_lang(code: str) -> str:
|
|
"""把 BCP-47(可能带地区后缀)简化成主语言,TMT 用的 2 字母 code。
|
|
|
|
例:
|
|
en-gb / en-us / en-au -> en
|
|
zh-cn / zh-tw -> zh
|
|
ja-jp -> ja
|
|
"" / auto -> 原样
|
|
"""
|
|
if not code or code == "auto":
|
|
return code
|
|
return code.split("-")[0].lower()
|
|
|
|
|
|
# 常见语种映射(归一化后再过这层;保留覆盖是兜底)
|
|
_LANG_MAP = {
|
|
"en": "en",
|
|
"zh": "zh",
|
|
"ja": "ja",
|
|
"ko": "ko",
|
|
"fr": "fr",
|
|
"de": "de",
|
|
"es": "es",
|
|
"ru": "ru",
|
|
"ar": "ar",
|
|
}
|
|
|
|
|
|
class TencentTranslator(BaseTranslator):
|
|
name = "tencent"
|
|
|
|
def __init__(self):
|
|
if not settings.tencentcloud_secret_id or not settings.tencentcloud_secret_key:
|
|
raise RuntimeError("Tencent Cloud credentials missing")
|
|
self.cred = credential.Credential(
|
|
settings.tencentcloud_secret_id, settings.tencentcloud_secret_key
|
|
)
|
|
self.client = tmt_client.TmtClient(self.cred, settings.tencentcloud_region)
|
|
|
|
async def translate(
|
|
self, text: str, source: str = "auto", target: str = "zh"
|
|
) -> TranslationResult:
|
|
if not text.strip():
|
|
return TranslationResult(text=text, engine=self.name, chars=0)
|
|
|
|
# 关键:BCP-47 归一化(避免 en-gb / en-us / zh-cn / ja-jp 等被 TMT 拒)
|
|
source = _normalize_lang(source)
|
|
target = _normalize_lang(target)
|
|
source = _LANG_MAP.get(source, source if source != "auto" else "auto")
|
|
target = _LANG_MAP.get(target, target)
|
|
|
|
# 简单重试
|
|
for attempt in range(2):
|
|
try:
|
|
req = models.TextTranslateRequest()
|
|
req.SourceText = text
|
|
req.Source = source
|
|
req.Target = target
|
|
req.ProjectId = 0
|
|
# SDK 同步调用 → 放线程池
|
|
resp: Any = await asyncio.to_thread(self.client.TextTranslate, req)
|
|
out = getattr(resp, "TargetText", "") or ""
|
|
return TranslationResult(
|
|
text=out, engine=self.name, chars=len(text), cached=False
|
|
)
|
|
except TencentCloudSDKException as e:
|
|
logger.warning("tencent translate attempt %s failed: %s", attempt, e)
|
|
if attempt == 0:
|
|
await asyncio.sleep(0.5 + random.random())
|
|
else:
|
|
raise
|
|
raise RuntimeError("unreachable")
|