Files
diary-news/backend/app/services/translation/local.py

63 lines
2.0 KiB
Python
Raw Normal View History

"""本地翻译(降级用,需要 transformers + 模型文件)。
默认关闭启用方式:
- LOCAL_TRANSLATE_ENABLED=true
- 容器内预装模型(Volume 挂载)
"""
from __future__ import annotations
import logging
from app.config import settings
from app.services.translation.base import BaseTranslator, TranslationResult
logger = logging.getLogger("news.translate.local")
class LocalTranslator(BaseTranslator):
name = "nllb"
def __init__(self):
if not settings.local_translate_enabled:
raise RuntimeError("LocalTranslator disabled in settings")
# 模型懒加载(避免 import 时加载大模型)
self._pipe = None
def _ensure_loaded(self):
if self._pipe is not None:
return
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
model_name = settings.local_translate_model
logger.info("loading local translation model: %s", model_name)
tok = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
self._pipe = pipeline(
"translation",
model=model,
tokenizer=tok,
device=settings.local_translate_device,
)
async def translate(
self, text: str, source: str = "auto", target: str = "zh"
) -> TranslationResult:
if not text.strip():
return TranslationResult(text=text, engine=self.name, chars=0)
self._ensure_loaded()
import asyncio
loop = asyncio.get_running_loop()
# NLLB 的 src_lang/tgt_lang 比较长,简单按约定:en→zh_Hans
src = "eng_Latn" if source in ("en", "auto") else source
tgt = "zho_Hans" if target == "zh" else target
out = await loop.run_in_executor(
None,
lambda: self._pipe(
text, src_lang=src, tgt_lang=tgt, max_length=2000
),
)
return TranslationResult(
text=out[0]["translation_text"], engine=self.name, chars=len(text)
)