Compare commits
3 Commits
76e95908e8
...
3e56fed541
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3e56fed541 | ||
|
|
a5bfb7d49a | ||
|
|
474299baf9 |
14
.env.example
14
.env.example
@@ -36,11 +36,21 @@ TENCENT_TMT_QUOTA_BUFFER=0.05
|
||||
TENCENT_TMT_MAX_CHARS_PER_REQ=4500
|
||||
|
||||
# ===== 本地翻译(降级) =====
|
||||
# 不启用就留空:不会用本地模型
|
||||
LOCAL_TRANSLATE_ENABLED=false
|
||||
# 不启用就留空:不会用本地模<EFBFBD>?LOCAL_TRANSLATE_ENABLED=false
|
||||
LOCAL_TRANSLATE_MODEL=nllb-200-distilled-600M
|
||||
LOCAL_TRANSLATE_DEVICE=cpu
|
||||
|
||||
# ===== 腾讯 MaaS 翻译(备用通道,OpenAI 兼容协议)=====
|
||||
# 申请:https://console.cloud.tencent.com/maas 或 hivoice 控制台
|
||||
# 留空 api_key = 不启用(只在 TMT 配额耗尽/TMT 失败时启用)
|
||||
# 端点固定为 https://maas-api.hivoice.cn/v1(腾讯 MaaS 翻译服务)
|
||||
# 模型:u2(翻译专用,支持多语种)
|
||||
TENCENT_MAAS_API_KEY=
|
||||
TENCENT_MAAS_BASE_URL=https://maas-api.hivoice.cn/v1
|
||||
TENCENT_MAAS_MODEL=u2
|
||||
# 单次调用间隔(秒),避免被 MaaS 限流
|
||||
TENCENT_MAAS_INTERVAL_SEC=1.0
|
||||
|
||||
# ===== 抓取 =====
|
||||
# 全局 QPS 上限
|
||||
FETCH_GLOBAL_QPS=4
|
||||
|
||||
@@ -127,6 +127,10 @@ async def list_articles(
|
||||
published_at=art.published_at,
|
||||
fetched_at=art.fetched_at,
|
||||
image_url=art.image_url,
|
||||
# 列表预览钩子:分类 + LLM 点评 + AI 插图 缩略图
|
||||
commentary=art.commentary,
|
||||
commentary_status=art.commentary_status,
|
||||
image_ai_url=art.image_ai_url,
|
||||
is_starred=art.id in starred_ids,
|
||||
)
|
||||
items.append(item)
|
||||
|
||||
@@ -82,6 +82,15 @@ class Settings(BaseSettings):
|
||||
local_translate_model: str = "nllb-200-distilled-600M"
|
||||
local_translate_device: str = "cpu"
|
||||
|
||||
# ===== 腾讯 MaaS(OpenAI 兼容翻译备用通道)=====
|
||||
# 用法:腾讯云 MaaS 提供的翻译模型,通过 OpenAI 协议调用
|
||||
# 留空 api_key = 不启用该 provider
|
||||
tencent_maas_api_key: str = ""
|
||||
tencent_maas_base_url: str = "https://maas-api.hivoice.cn/v1"
|
||||
tencent_maas_model: str = "u2"
|
||||
# 每篇调用间隔(秒),与 LLM 客户端解耦
|
||||
tencent_maas_interval_sec: float = 1.0
|
||||
|
||||
# ===== 抓取 =====
|
||||
fetch_global_qps: int = 4
|
||||
fetch_timeout: int = 20
|
||||
|
||||
@@ -16,7 +16,7 @@ class SourceBrief(BaseModel):
|
||||
|
||||
|
||||
class ArticleListItem(BaseModel):
|
||||
"""列表项:精简字段。"""
|
||||
"""列表项:精简字段(首页只露钩子,详细阅读进详情页)。"""
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
@@ -31,6 +31,10 @@ class ArticleListItem(BaseModel):
|
||||
published_at: datetime | None = None
|
||||
fetched_at: datetime
|
||||
image_url: str | None = None
|
||||
# === 列表预览钩子:点击进详情前的"诱导点" ===
|
||||
commentary: str | None = None # LLM 点评(列表里截断显示)
|
||||
commentary_status: str | None = None # ok/failed/pending/n/a
|
||||
image_ai_url: str | None = None # AI 插图(列表里缩略图)
|
||||
is_starred: bool = False
|
||||
|
||||
|
||||
|
||||
@@ -390,7 +390,7 @@ async def enrich_article(article_id: int) -> dict[str, str]:
|
||||
# === 后台循环 ===
|
||||
# 与 translation_loop 一样,常驻从队列里取文章
|
||||
ENRICHMENT_INTERVAL_SEC = 5.0 # 没活时等待
|
||||
ENRICHMENT_BATCH_SIZE = 1
|
||||
ENRICHMENT_BATCH_SIZE = 3 # 每轮并发拉取候选,然后顺序处理(LLM 客户端本身有节流)
|
||||
|
||||
|
||||
async def enrichment_loop() -> None:
|
||||
@@ -405,6 +405,8 @@ async def enrichment_loop() -> None:
|
||||
try:
|
||||
async with AsyncSessionLocal() as session:
|
||||
# 已翻译完成 + 4 个状态中至少有一个是 pending
|
||||
# 关键:不能按 translated_at 升序 — 老文章已 enrich,新文章 translated_at=NULL(被排到最后)
|
||||
# 改为按 id 升序(新文章 id 大),循环里再过滤 status
|
||||
rows = (
|
||||
await session.execute(
|
||||
select(Article)
|
||||
@@ -412,8 +414,8 @@ async def enrichment_loop() -> None:
|
||||
Article.translation_status == "ok",
|
||||
Article.title_zh.is_not(None),
|
||||
)
|
||||
.order_by(Article.translated_at.asc().nullslast(), Article.id.asc())
|
||||
.limit(ENRICHMENT_BATCH_SIZE * 5) # 多取几个找需要 enrich 的
|
||||
.order_by(Article.id.asc())
|
||||
.limit(ENRICHMENT_BATCH_SIZE * 20) # 多取一些找需要 enrich 的
|
||||
)
|
||||
).scalars()
|
||||
candidates = list(rows)
|
||||
@@ -441,7 +443,7 @@ async def enrichment_loop() -> None:
|
||||
await enrich_article(aid)
|
||||
except Exception as e:
|
||||
logger.exception("enrich_article %s in loop failed: %s", aid, e)
|
||||
await asyncio.sleep(0.5) # 文章间轻节流
|
||||
await asyncio.sleep(0.2) # 文章间轻节流(LLM 内部还有 interval_sec)
|
||||
except Exception as e:
|
||||
logger.exception("enrichment_loop error: %s", e)
|
||||
await asyncio.sleep(ENRICHMENT_INTERVAL_SEC)
|
||||
|
||||
@@ -1,4 +1,10 @@
|
||||
"""翻译服务门面:配额检查 + 缓存 + 引擎选择 + 月度计数。"""
|
||||
"""翻译服务门面:配额检查 + 缓存 + 引擎选择 + 月度计数。
|
||||
|
||||
引擎链路(优先级降序):
|
||||
1. tencent TMT(主,按月配额)
|
||||
2. tencent_maas(备用,OpenAI 兼容,无配额;主失败/TMT 配额耗尽时启用)
|
||||
3. local(最后兜底,需 settings.local_translate_enabled=true)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
@@ -12,6 +18,7 @@ from app.redis_client import get_redis
|
||||
from app.services.translation.base import BaseTranslator, TranslationResult
|
||||
from app.services.translation.local import LocalTranslator
|
||||
from app.services.translation.tencent import TencentTranslator
|
||||
from app.services.translation.tencent_maas import TencentMaaSTranslator
|
||||
|
||||
logger = logging.getLogger("news.translate.service")
|
||||
|
||||
@@ -30,16 +37,33 @@ def _month_key() -> str:
|
||||
class TranslationService:
|
||||
def __init__(self):
|
||||
self._tencent: BaseTranslator | None = None
|
||||
self._tencent_maas: BaseTranslator | None = None
|
||||
self._local: BaseTranslator | None = None
|
||||
# 串行:1 个并发;避免触发腾讯 TMT 限速
|
||||
self._sem = asyncio.Semaphore(1)
|
||||
|
||||
def _primary(self) -> BaseTranslator:
|
||||
def _primary(self) -> BaseTranslator | None:
|
||||
"""主引擎:腾讯 TMT(初始化失败返回 None 表示不可用)。"""
|
||||
if self._tencent is None:
|
||||
self._tencent = TencentTranslator()
|
||||
try:
|
||||
self._tencent = TencentTranslator()
|
||||
except Exception as e:
|
||||
logger.warning("tencent TMT init failed: %s", e)
|
||||
self._tencent = None
|
||||
return self._tencent
|
||||
|
||||
def _fallback(self) -> BaseTranslator | None:
|
||||
def _maas(self) -> BaseTranslator | None:
|
||||
"""备用引擎:腾讯 MaaS(OpenAI 兼容,无配额)。"""
|
||||
if self._tencent_maas is None and settings.tencent_maas_api_key:
|
||||
try:
|
||||
self._tencent_maas = TencentMaaSTranslator()
|
||||
except Exception as e:
|
||||
logger.warning("tencent MaaS init failed: %s", e)
|
||||
self._tencent_maas = None
|
||||
return self._tencent_maas
|
||||
|
||||
def _local_translator(self) -> BaseTranslator | None:
|
||||
"""最后兜底:本地模型(需开关)。"""
|
||||
if self._local is None and settings.local_translate_enabled:
|
||||
try:
|
||||
self._local = LocalTranslator()
|
||||
@@ -48,6 +72,13 @@ class TranslationService:
|
||||
self._local = None
|
||||
return self._local
|
||||
|
||||
# 兼容旧调用点:返回第一个可用的 fallback(优先 maas,次 local)
|
||||
def _fallback(self) -> BaseTranslator | None:
|
||||
m = self._maas()
|
||||
if m is not None:
|
||||
return m
|
||||
return self._local_translator()
|
||||
|
||||
async def can_use_tencent(self, chars: int) -> bool:
|
||||
if not settings.tencentcloud_secret_id:
|
||||
return False
|
||||
@@ -87,52 +118,62 @@ class TranslationService:
|
||||
if cached is not None:
|
||||
return TranslationResult(text=cached, engine="cache", chars=chars, cached=True)
|
||||
|
||||
# 2) 选引擎
|
||||
# 2) 选引擎(主 → maas 备用 → local 兜底)
|
||||
use_tencent = await self.can_use_tencent(chars)
|
||||
engine: BaseTranslator
|
||||
if use_tencent:
|
||||
engine = self._primary()
|
||||
engine: BaseTranslator | None = self._primary()
|
||||
if engine is None:
|
||||
# TMT 配了 key 但初始化失败 → 直接走 maas
|
||||
logger.warning("TMT unavailable, falling back to MaaS")
|
||||
engine = self._maas()
|
||||
else:
|
||||
fb = self._fallback()
|
||||
if fb is None:
|
||||
# 没本地:返回原文 + 标记
|
||||
engine = None
|
||||
|
||||
if engine is None:
|
||||
# 配额耗尽 / TMT 不可用:走备用链
|
||||
engine = self._maas() or self._local_translator()
|
||||
if engine is None:
|
||||
# 全无可用:返回原文 + 标记
|
||||
return TranslationResult(
|
||||
text=text + "\n\n[本条未翻译:配额耗尽且未启用本地翻译]",
|
||||
text=text + "\n\n[本条未翻译:所有翻译通道不可用]",
|
||||
engine="skip",
|
||||
chars=chars,
|
||||
)
|
||||
engine = fb
|
||||
logger.info("fallback to local translator for %d chars", chars)
|
||||
if engine.name == "tencent_maas":
|
||||
logger.info("tencent quota exhausted, fallback to tencent_maas for %d chars", chars)
|
||||
else:
|
||||
logger.info("fallback to local translator for %d chars", chars)
|
||||
|
||||
# 3) 调用
|
||||
# 3) 调用(失败时降级)
|
||||
async with self._sem:
|
||||
res = None
|
||||
res: TranslationResult | None = None
|
||||
try:
|
||||
res = await engine.translate(text, source=source, target=target)
|
||||
except Exception as e:
|
||||
# 失败:降级
|
||||
logger.exception("translate failed with %s: %s", engine.name, e)
|
||||
fb = self._fallback()
|
||||
if fb is not None and engine is not fb:
|
||||
# 按 maas → local 顺序找一个不同的 fallback
|
||||
fb = self._maas() if engine.name != "tencent_maas" else None
|
||||
if fb is None and settings.local_translate_enabled and engine.name != "local":
|
||||
fb = self._local_translator()
|
||||
if fb is not None:
|
||||
try:
|
||||
res = await fb.translate(text, source=source, target=target)
|
||||
logger.info("fallback %s succeeded after %s failed", fb.name, engine.name)
|
||||
except Exception as e2:
|
||||
logger.exception("fallback %s also failed: %s", fb.name, e2)
|
||||
res = None
|
||||
if res is None:
|
||||
# 主 + fallback 都失败:抛异常,让上层标记 status=failed
|
||||
raise RuntimeError(f"translation failed for {chars} chars (engine={engine.name})")
|
||||
|
||||
# 4) 写缓存 — 只缓存真实翻译结果;失败/降级文本不缓存(避免污染 30 天)
|
||||
if res.engine in ("tencent", "nllb", "cache") and not res.cached:
|
||||
# 二次保险:如果文本里仍含错误标记,也不缓存
|
||||
if res.engine in ("tencent", "tencent_maas", "nllb") and not res.cached:
|
||||
if "[翻译失败" not in res.text and "[本条未翻译" not in res.text:
|
||||
try:
|
||||
await r.set(ck, res.text, ex=60 * 60 * 24 * 30) # 30 天
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 5) 计数(只在 tencent 上计)
|
||||
# 5) 计数(只在 tencent TMT 上计;maas/local 都不计腾讯云配额)
|
||||
if res.engine == "tencent":
|
||||
try:
|
||||
await self.add_usage(res.chars or chars)
|
||||
|
||||
143
backend/app/services/translation/tencent_maas.py
Normal file
143
backend/app/services/translation/tencent_maas.py
Normal file
@@ -0,0 +1,143 @@
|
||||
"""腾讯 MaaS 翻译(OpenAI 兼容协议)。
|
||||
|
||||
- 端点:https://maas-api.hivoice.cn/v1
|
||||
- 模型:u2(翻译专用)
|
||||
- 鉴权:Bearer token(api_key 直接当 Bearer)
|
||||
- 请求:POST /chat/completions,system prompt 告诉模型做翻译
|
||||
|
||||
设计上独立于 LlmClient(不走 agnes_* 配置),专门走 tencent_maas_* 配置,
|
||||
避免和 LLM 智能增强共用 client 的节流。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import random
|
||||
|
||||
import httpx
|
||||
|
||||
from app.config import settings
|
||||
from app.services.translation.base import BaseTranslator, TranslationResult
|
||||
|
||||
logger = logging.getLogger("news.translate.tencent_maas")
|
||||
|
||||
|
||||
# 简单的源/目标语言映射(MaaS 模型期望 ISO 639-1 代码)
|
||||
_LANG_MAP = {
|
||||
"en": "English",
|
||||
"zh": "Chinese",
|
||||
"ja": "Japanese",
|
||||
"ko": "Korean",
|
||||
"fr": "French",
|
||||
"de": "German",
|
||||
"es": "Spanish",
|
||||
"ru": "Russian",
|
||||
"ar": "Arabic",
|
||||
}
|
||||
|
||||
|
||||
def _lang_label(code: str) -> str:
|
||||
"""把 ISO 639-1 转成自然语言名(给模型做 prompt 用)。"""
|
||||
if not code or code == "auto":
|
||||
return "the source language"
|
||||
c = code.split("-")[0].lower()
|
||||
return _LANG_MAP.get(c, c)
|
||||
|
||||
|
||||
# 经过反复测试,这个 prompt 是云知声 u2 模型的关键:
|
||||
# - 明确禁止 reasoning / 分析 / 注释(否则模型会把译文放进 reasoning_content)
|
||||
# - 限定只接 EN/JA → ZH(对应用户场景)
|
||||
# - 非英日输入时返回固定拒绝文案(便于上层识别)
|
||||
_SYSTEM_PROMPT = """你是一个即时翻译助手。对于用户输入的英文或日文文章,请直接输出对应的简体中文译文。严格遵守:
|
||||
|
||||
不要进行任何分步思考、不要输出分析、不要添加注释或说明。
|
||||
|
||||
只输出中文译文本身,不包含任何额外文字(如"翻译结果:"、"以下是中文:"等)。
|
||||
|
||||
如果输入内容既非英文也非日文,仅回复:"仅支持英文或日文翻译为中文。" """
|
||||
|
||||
|
||||
class TencentMaaSTranslator(BaseTranslator):
|
||||
"""腾讯 MaaS 翻译(OpenAI 兼容协议,模型 u2)。"""
|
||||
|
||||
name = "tencent_maas"
|
||||
|
||||
def __init__(self):
|
||||
if not settings.tencent_maas_api_key:
|
||||
raise RuntimeError("Tencent MaaS api_key missing")
|
||||
self.api_key = settings.tencent_maas_api_key
|
||||
self.base_url = settings.tencent_maas_base_url.rstrip("/")
|
||||
self.model = settings.tencent_maas_model
|
||||
self.interval_sec = settings.tencent_maas_interval_sec
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
return bool(self.api_key)
|
||||
|
||||
async def translate(
|
||||
self, text: str, source: str = "auto", target: str = "zh"
|
||||
) -> TranslationResult:
|
||||
"""翻译接口。
|
||||
|
||||
注意:source/target 参数当前被忽略,因为 u2 模型在固定 system prompt 下
|
||||
会自行判断 EN/JA → ZH;保留参数是为了兼容 BaseTranslator 接口。
|
||||
"""
|
||||
if not text.strip():
|
||||
return TranslationResult(text=text, engine=self.name, chars=0)
|
||||
|
||||
if not self.is_configured():
|
||||
raise RuntimeError("Tencent MaaS api_key missing")
|
||||
|
||||
# 固定 system prompt(经过反复测试,这套 prompt 是云知声 u2 模型唯一能稳定输出
|
||||
# 译文到 content 字段的写法;改 prompt 格式会导致模型把译文放进 reasoning_content)
|
||||
system = _SYSTEM_PROMPT
|
||||
user = text
|
||||
|
||||
url = f"{self.base_url}/chat/completions"
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"messages": [
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": user},
|
||||
],
|
||||
"temperature": 0.0,
|
||||
"max_tokens": max(256, len(text) * 3),
|
||||
}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
# 简单串行 + 重试 1 次
|
||||
last_exc: Exception | None = None
|
||||
for attempt in range(2):
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
r = await client.post(url, json=payload, headers=headers)
|
||||
if r.status_code >= 500:
|
||||
raise RuntimeError(f"TencentMaas 5xx: {r.status_code} {r.text[:200]}")
|
||||
if r.status_code != 200:
|
||||
raise RuntimeError(f"TencentMaas {r.status_code}: {r.text[:300]}")
|
||||
data = r.json()
|
||||
content = (
|
||||
data.get("choices", [{}])[0]
|
||||
.get("message", {})
|
||||
.get("content", "")
|
||||
.strip()
|
||||
)
|
||||
if not content:
|
||||
raise RuntimeError(f"TencentMaas empty content: {r.text[:300]}")
|
||||
# 节流(避免被 MaaS 限流)
|
||||
await asyncio.sleep(self.interval_sec)
|
||||
return TranslationResult(
|
||||
text=content, engine=self.name, chars=len(text), cached=False
|
||||
)
|
||||
except Exception as e:
|
||||
last_exc = e
|
||||
logger.warning("tencent_maas attempt %s failed: %s", attempt, e)
|
||||
if attempt == 0:
|
||||
await asyncio.sleep(0.5 + random.random())
|
||||
else:
|
||||
raise
|
||||
# 不可达
|
||||
assert last_exc is not None
|
||||
raise last_exc
|
||||
@@ -31,6 +31,10 @@ export interface ArticleListItem {
|
||||
published_at?: string | null
|
||||
fetched_at: string
|
||||
image_url?: string | null
|
||||
// 列表预览钩子(首页展示用,详情页看完整版)
|
||||
commentary?: string | null
|
||||
commentary_status?: string | null
|
||||
image_ai_url?: string | null
|
||||
is_starred: boolean
|
||||
}
|
||||
|
||||
|
||||
@@ -68,6 +68,26 @@ function fmtTime(s?: string | null) {
|
||||
return dayjs(s).fromNow()
|
||||
}
|
||||
|
||||
// category 是逗号分隔字符串(LLM 输出),拆成多个 tag
|
||||
function splitCategory(c?: string | null): string[] {
|
||||
if (!c) return []
|
||||
return c.split(',').map((s) => s.trim()).filter(Boolean)
|
||||
}
|
||||
|
||||
// 评论预览:长文截断,带状态点
|
||||
function previewCommentary(c?: string | null, max = 120): string {
|
||||
if (!c) return ''
|
||||
const trimmed = c.replace(/\s+/g, ' ').trim()
|
||||
return trimmed.length > max ? trimmed.slice(0, max) + '…' : trimmed
|
||||
}
|
||||
|
||||
function commentaryStatusType(s?: string | null): 'success' | 'warning' | 'error' | 'default' {
|
||||
if (s === 'ok') return 'success'
|
||||
if (s === 'failed') return 'error'
|
||||
if (s === 'pending') return 'warning'
|
||||
return 'default'
|
||||
}
|
||||
|
||||
onMounted(async () => {
|
||||
await loadSources()
|
||||
await load()
|
||||
@@ -112,6 +132,15 @@ onMounted(async () => {
|
||||
<NTag v-if="a.translation_status !== 'ok'" size="small" type="warning">
|
||||
{{ a.translation_status }}
|
||||
</NTag>
|
||||
<!-- 分类标签(LLM classify 输出,多分类逗号分隔) -->
|
||||
<NTag
|
||||
v-for="c in splitCategory(a.category)"
|
||||
:key="c"
|
||||
size="small"
|
||||
type="success"
|
||||
>
|
||||
{{ c }}
|
||||
</NTag>
|
||||
<NText depth="3" style="font-size: 12px">{{ fmtTime(a.published_at || a.fetched_at) }}</NText>
|
||||
</NSpace>
|
||||
<div style="font-size: 16px; font-weight: 600; color: #333">{{ a.title }}</div>
|
||||
@@ -121,6 +150,28 @@ onMounted(async () => {
|
||||
<div v-if="a.summary_zh" style="color: #666; font-size: 13px; margin-top: 4px">
|
||||
{{ a.summary_zh.slice(0, 200) }}{{ a.summary_zh.length > 200 ? '…' : '' }}
|
||||
</div>
|
||||
<!-- 评论预览(列表钩子,详情页有完整版) -->
|
||||
<div
|
||||
v-if="a.commentary"
|
||||
style="
|
||||
margin-top: 8px;
|
||||
padding: 8px 10px;
|
||||
background: #f6f8ff;
|
||||
border-left: 3px solid #2080f0;
|
||||
border-radius: 4px;
|
||||
color: #444;
|
||||
font-size: 13px;
|
||||
line-height: 1.6;
|
||||
"
|
||||
>
|
||||
<NSpace align="center" :size="6" style="margin-bottom: 4px">
|
||||
<NText depth="2" style="font-size: 12px; font-weight: 600">💬 评论</NText>
|
||||
<NTag size="tiny" :type="commentaryStatusType(a.commentary_status)">
|
||||
{{ a.commentary_status || 'n/a' }}
|
||||
</NTag>
|
||||
</NSpace>
|
||||
<span>{{ previewCommentary(a.commentary, 140) }}</span>
|
||||
</div>
|
||||
</NSpace>
|
||||
</NCard>
|
||||
<NSpace v-if="!exhausted" justify="center" style="margin: 16px 0">
|
||||
|
||||
Reference in New Issue
Block a user