feat: initial MVP - FastAPI backend + Vue3 frontend + docker-compose

- backend: FastAPI + SQLAlchemy 2.0(async) + asyncpg + Alembic
- 7 API routes: auth/me/articles/sources/bookmarks/subscriptions/admin
- models: User/Source/Article/Bookmark/Subscription/ApiToken
- services: RSS fetcher (feedparser) + Tencent TMT translator with quota + cache + local NLLB fallback
- workers: APScheduler + asyncio pipeline (fetch -> dedupe -> insert -> translate)
- seed scripts: create_user, seed_sources (5 RSS: Reuters/BBC/Al Jazeera/NHK/DW)
- frontend: Vue 3 + Vite + Naive UI + Pinia + vue-router
- pages: Login, Feed (24h), ArticleDetail, Sources, Bookmarks, AdminSources
- deploy: docker-compose (postgres/redis/api/worker/frontend/caddy)
- docs: README, DEPLOY, architecture, acceptance
This commit is contained in:
Mavis
2026-06-07 21:51:01 +08:00
commit 60b062daf2
81 changed files with 5540 additions and 0 deletions

View File

@@ -0,0 +1 @@
"""Translation services."""

View File

@@ -0,0 +1,26 @@
"""翻译后端抽象。"""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
@dataclass
class TranslationResult:
text: str
engine: str
chars: int
cached: bool = False
class BaseTranslator(ABC):
name: str = "base"
@abstractmethod
async def translate(self, text: str, source: str = "auto", target: str = "zh") -> TranslationResult:
"""同步调用,失败抛异常。"""
def count_chars(s: str) -> int:
"""近似的字符计数(Unicode 码点)。腾讯 TMT 按字符数计费。"""
return len(s)

View File

@@ -0,0 +1,62 @@
"""本地翻译(降级用,需要 transformers + 模型文件)。
默认关闭。启用方式:
- LOCAL_TRANSLATE_ENABLED=true
- 容器内预装模型(Volume 挂载)
"""
from __future__ import annotations
import logging
from app.config import settings
from app.services.translation.base import BaseTranslator, TranslationResult
logger = logging.getLogger("news.translate.local")
class LocalTranslator(BaseTranslator):
name = "nllb"
def __init__(self):
if not settings.local_translate_enabled:
raise RuntimeError("LocalTranslator disabled in settings")
# 模型懒加载(避免 import 时加载大模型)
self._pipe = None
def _ensure_loaded(self):
if self._pipe is not None:
return
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
model_name = settings.local_translate_model
logger.info("loading local translation model: %s", model_name)
tok = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
self._pipe = pipeline(
"translation",
model=model,
tokenizer=tok,
device=settings.local_translate_device,
)
async def translate(
self, text: str, source: str = "auto", target: str = "zh"
) -> TranslationResult:
if not text.strip():
return TranslationResult(text=text, engine=self.name, chars=0)
self._ensure_loaded()
import asyncio
loop = asyncio.get_running_loop()
# NLLB 的 src_lang/tgt_lang 比较长,简单按约定:en→zh_Hans
src = "eng_Latn" if source in ("en", "auto") else source
tgt = "zho_Hans" if target == "zh" else target
out = await loop.run_in_executor(
None,
lambda: self._pipe(
text, src_lang=src, tgt_lang=tgt, max_length=2000
),
)
return TranslationResult(
text=out[0]["translation_text"], engine=self.name, chars=len(text)
)

View File

@@ -0,0 +1,146 @@
"""翻译服务门面:配额检查 + 缓存 + 引擎选择 + 月度计数。"""
from __future__ import annotations
import asyncio
import hashlib
import logging
from datetime import datetime, timezone
from typing import Protocol
from app.config import settings
from app.redis_client import get_redis
from app.services.translation.base import BaseTranslator, TranslationResult
from app.services.translation.local import LocalTranslator
from app.services.translation.tencent import TencentTranslator
logger = logging.getLogger("news.translate.service")
# 缓存 key
def _cache_key(text: str, src: str, tgt: str) -> str:
h = hashlib.sha1(f"{src}|{tgt}|{text}".encode()).hexdigest()
return f"translation:cache:{h}"
def _month_key() -> str:
now = datetime.now(timezone.utc)
return f"translation:month:{now:%Y%m}"
class TranslationService:
def __init__(self):
self._tencent: BaseTranslator | None = None
self._local: BaseTranslator | None = None
self._sem = asyncio.Semaphore(3) # 并发限流
def _primary(self) -> BaseTranslator:
if self._tencent is None:
self._tencent = TencentTranslator()
return self._tencent
def _fallback(self) -> BaseTranslator | None:
if self._local is None and settings.local_translate_enabled:
try:
self._local = LocalTranslator()
except Exception as e:
logger.warning("local translator init failed: %s", e)
self._local = None
return self._local
async def can_use_tencent(self, chars: int) -> bool:
if not settings.tencentcloud_secret_id:
return False
r = get_redis()
used = int(await r.get(_month_key()) or 0)
buffered = int(
settings.tencent_tmt_quota_month * (1 - settings.tencent_tmt_quota_buffer)
)
return (used + chars) <= buffered
async def add_usage(self, chars: int) -> None:
r = get_redis()
# 用 INCRBY + EXPIRE 月初;简单做法:每次 set + 设 TTL
key = _month_key()
async with r.pipeline(transaction=False) as pipe:
pipe.incrby(key, chars)
# 月底过期(下下月 1 日)
now = datetime.now(timezone.utc)
if now.month == 12:
next_month = now.replace(year=now.year + 1, month=1, day=1)
else:
next_month = now.replace(month=now.month + 1, day=1)
ttl = int((next_month - now).total_seconds()) + 86400
pipe.expire(key, ttl)
await pipe.execute()
async def translate(
self, text: str, source: str = "auto", target: str = "zh"
) -> TranslationResult:
if not text.strip():
return TranslationResult(text=text, engine="skip", chars=0)
chars = len(text)
# 1) 缓存
r = get_redis()
ck = _cache_key(text, source, target)
cached = await r.get(ck)
if cached is not None:
return TranslationResult(text=cached, engine="cache", chars=chars, cached=True)
# 2) 选引擎
use_tencent = await self.can_use_tencent(chars)
engine: BaseTranslator
if use_tencent:
engine = self._primary()
else:
fb = self._fallback()
if fb is None:
# 没本地:返回原文 + 标记
return TranslationResult(
text=text + "\n\n[本条未翻译:配额耗尽且未启用本地翻译]",
engine="skip",
chars=chars,
)
engine = fb
logger.info("fallback to local translator for %d chars", chars)
# 3) 调用
async with self._sem:
try:
res = await engine.translate(text, source=source, target=target)
except Exception as e:
# 失败:降级
logger.exception("translate failed with %s: %s", engine.name, e)
fb = self._fallback()
if fb is not None and engine is not fb:
res = await fb.translate(text, source=source, target=target)
else:
res = TranslationResult(
text=text + f"\n\n[翻译失败: {e}]",
engine="skip",
chars=chars,
)
# 4) 写缓存(无论引擎)
try:
await r.set(ck, res.text, ex=60 * 60 * 24 * 30) # 30 天
except Exception:
pass
# 5) 计数(只在 tencent 上计)
if res.engine == "tencent":
try:
await self.add_usage(res.chars or chars)
except Exception as e:
logger.warning("add_usage failed: %s", e)
return res
# 全局单例
service = TranslationService()
# 让后端 worker 直接调
class _Protocol(Protocol):
async def translate(self, text: str, source: str = "auto", target: str = "zh") -> TranslationResult: ...

View File

@@ -0,0 +1,74 @@
"""腾讯云文本翻译 TMT。"""
from __future__ import annotations
import asyncio
import logging
import random
from typing import Any
from tencentcloud.common import credential
from tencentcloud.common.exception.tencent_cloud_sdk_exception import (
TencentCloudSDKException,
)
from tencentcloud.tmt.v20180321 import models, tmt_client
from app.config import settings
from app.services.translation.base import BaseTranslator, TranslationResult
logger = logging.getLogger("news.translate.tencent")
# 常见语种映射
_LANG_MAP = {
"en": "en",
"zh": "zh",
"ja": "ja",
"ko": "ko",
"fr": "fr",
"de": "de",
"es": "es",
"ru": "ru",
"ar": "ar",
}
class TencentTranslator(BaseTranslator):
name = "tencent"
def __init__(self):
if not settings.tencentcloud_secret_id or not settings.tencentcloud_secret_key:
raise RuntimeError("Tencent Cloud credentials missing")
self.cred = credential.Credential(
settings.tencentcloud_secret_id, settings.tencentcloud_secret_key
)
self.client = tmt_client.TmtClient(self.cred, settings.tencentcloud_region)
async def translate(
self, text: str, source: str = "auto", target: str = "zh"
) -> TranslationResult:
if not text.strip():
return TranslationResult(text=text, engine=self.name, chars=0)
source = _LANG_MAP.get(source, source if source != "auto" else "auto")
target = _LANG_MAP.get(target, target)
# 简单重试
for attempt in range(2):
try:
req = models.TextTranslateRequest()
req.SourceText = text
req.Source = source
req.Target = target
req.ProjectId = 0
# SDK 同步调用 → 放线程池
resp: Any = await asyncio.to_thread(self.client.TextTranslate, req)
out = getattr(resp, "TargetText", "") or ""
return TranslationResult(
text=out, engine=self.name, chars=len(text), cached=False
)
except TencentCloudSDKException as e:
logger.warning("tencent translate attempt %s failed: %s", attempt, e)
if attempt == 0:
await asyncio.sleep(0.5 + random.random())
else:
raise
raise RuntimeError("unreachable")