- backend: FastAPI + SQLAlchemy 2.0(async) + asyncpg + Alembic - 7 API routes: auth/me/articles/sources/bookmarks/subscriptions/admin - models: User/Source/Article/Bookmark/Subscription/ApiToken - services: RSS fetcher (feedparser) + Tencent TMT translator with quota + cache + local NLLB fallback - workers: APScheduler + asyncio pipeline (fetch -> dedupe -> insert -> translate) - seed scripts: create_user, seed_sources (5 RSS: Reuters/BBC/Al Jazeera/NHK/DW) - frontend: Vue 3 + Vite + Naive UI + Pinia + vue-router - pages: Login, Feed (24h), ArticleDetail, Sources, Bookmarks, AdminSources - deploy: docker-compose (postgres/redis/api/worker/frontend/caddy) - docs: README, DEPLOY, architecture, acceptance
63 lines
2.0 KiB
Python
63 lines
2.0 KiB
Python
"""本地翻译(降级用,需要 transformers + 模型文件)。
|
|
|
|
默认关闭。启用方式:
|
|
- LOCAL_TRANSLATE_ENABLED=true
|
|
- 容器内预装模型(Volume 挂载)
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
|
|
from app.config import settings
|
|
from app.services.translation.base import BaseTranslator, TranslationResult
|
|
|
|
logger = logging.getLogger("news.translate.local")
|
|
|
|
|
|
class LocalTranslator(BaseTranslator):
|
|
name = "nllb"
|
|
|
|
def __init__(self):
|
|
if not settings.local_translate_enabled:
|
|
raise RuntimeError("LocalTranslator disabled in settings")
|
|
# 模型懒加载(避免 import 时加载大模型)
|
|
self._pipe = None
|
|
|
|
def _ensure_loaded(self):
|
|
if self._pipe is not None:
|
|
return
|
|
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
|
|
|
|
model_name = settings.local_translate_model
|
|
logger.info("loading local translation model: %s", model_name)
|
|
tok = AutoTokenizer.from_pretrained(model_name)
|
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
|
self._pipe = pipeline(
|
|
"translation",
|
|
model=model,
|
|
tokenizer=tok,
|
|
device=settings.local_translate_device,
|
|
)
|
|
|
|
async def translate(
|
|
self, text: str, source: str = "auto", target: str = "zh"
|
|
) -> TranslationResult:
|
|
if not text.strip():
|
|
return TranslationResult(text=text, engine=self.name, chars=0)
|
|
self._ensure_loaded()
|
|
import asyncio
|
|
|
|
loop = asyncio.get_running_loop()
|
|
# NLLB 的 src_lang/tgt_lang 比较长,简单按约定:en→zh_Hans
|
|
src = "eng_Latn" if source in ("en", "auto") else source
|
|
tgt = "zho_Hans" if target == "zh" else target
|
|
out = await loop.run_in_executor(
|
|
None,
|
|
lambda: self._pipe(
|
|
text, src_lang=src, tgt_lang=tgt, max_length=2000
|
|
),
|
|
)
|
|
return TranslationResult(
|
|
text=out[0]["translation_text"], engine=self.name, chars=len(text)
|
|
)
|