perf: 翻译独立后台循环(1 篇/秒)+ Semaphore 1

之前 fetch_one_source 入库后立即调翻译(可能并发触发腾讯 TMT 限速)
改为独立 translation_loop 后台循环:
- 完全不和 RSS 抓取并行
- 1 篇/秒节拍(Semaphore 1 + sleep 1.0)
- 没活时空闲 5 秒再轮询
- pending/failed 都重试
This commit is contained in:
Mavis
2026-06-08 00:27:09 +08:00
parent e79cfaa5f7
commit 9862a92423
6 changed files with 203 additions and 39 deletions

View File

@@ -51,10 +51,7 @@ async def fetch_one_source(source_id: int) -> None:
n_new = await _bulk_insert(src, items)
await _mark_success(source_id, n_new=n_new)
logger.info("source %s: %d new articles", src.slug, n_new)
# 入库后,挑高优先级 / 没翻译的开始翻译
await _translate_recent_for_source(source_id, max_n=20)
logger.info("source %s: %d new articles (translation deferred to background loop)", src.slug, n_new)
async def _mark_failure(source_id: int, status: str) -> None:
@@ -271,3 +268,47 @@ async def run_once() -> None:
logger.info("run_once: %d enabled sources", len(sources))
tasks = [fetch_one_source(s.id) for s in sources]
await asyncio.gather(*tasks, return_exceptions=True)
# === 翻译后台循环 ===
# 1 篇/秒(Semaphore 1 已经在 service 内部,这里是节拍)
TRANSLATION_INTERVAL_SEC = 1.0
TRANSLATION_IDLE_INTERVAL_SEC = 5.0
TRANSLATION_BATCH_SIZE = 1 # 每轮最多翻译 1 篇
async def translation_loop() -> None:
"""独立的翻译 worker。
- 不和 RSS 抓取并行
- 1 篇/秒(用 TRANSLATION_INTERVAL_SEC 控制)
- 失败 status 写 'failed',下一次循环重试
"""
logger.info("translation_loop started (interval=%.1fs)", TRANSLATION_INTERVAL_SEC)
while True:
try:
async with AsyncSessionLocal() as session:
rows = (
await session.execute(
select(Article)
.where(Article.translation_status.in_(("pending", "failed")))
.order_by(Article.fetched_at.asc(), Article.id.asc())
.limit(TRANSLATION_BATCH_SIZE)
)
).scalars()
aids = [a.id for a in rows]
if not aids:
# 没活,等久一点
await asyncio.sleep(TRANSLATION_IDLE_INTERVAL_SEC)
continue
for aid in aids:
try:
await translate_article(aid)
except Exception as e:
logger.exception("translate_article %s failed: %s", aid, e)
# 1 篇/秒节拍
await asyncio.sleep(TRANSLATION_INTERVAL_SEC)
except Exception as e:
logger.exception("translation_loop error: %s", e)
await asyncio.sleep(TRANSLATION_IDLE_INTERVAL_SEC)