perf: 翻译独立后台循环(1 篇/秒)+ Semaphore 1

之前 fetch_one_source 入库后立即调翻译(可能并发触发腾讯 TMT 限速)
改为独立 translation_loop 后台循环:
- 完全不和 RSS 抓取并行
- 1 篇/秒节拍(Semaphore 1 + sleep 1.0)
- 没活时空闲 5 秒再轮询
- pending/failed 都重试
This commit is contained in:
Mavis
2026-06-08 00:27:09 +08:00
parent e79cfaa5f7
commit 9862a92423
6 changed files with 203 additions and 39 deletions

View File

@@ -17,7 +17,7 @@ from sqlalchemy import select
from app.config import settings
from app.database import AsyncSessionLocal
from app.models.source import Source
from app.workers.pipeline import fetch_one_source, run_once
from app.workers.pipeline import fetch_one_source, run_once, translation_loop
logger = logging.getLogger("news.worker")
logging.basicConfig(
@@ -89,6 +89,10 @@ async def main() -> None:
scheduler.start()
logger.info("scheduler started with %d jobs", len(scheduler.get_jobs()))
# 独立的翻译后台循环(不和 RSS 抓取并行;1 篇/秒)
translation_task = asyncio.create_task(translation_loop(), name="translation_loop")
logger.info("translation_loop task scheduled (1 article/sec)")
stop = asyncio.Event()
def _signal_handler():
@@ -104,7 +108,12 @@ async def main() -> None:
pass
await stop.wait()
logger.info("stopping scheduler")
logger.info("stopping scheduler and translation loop")
translation_task.cancel()
try:
await translation_task
except asyncio.CancelledError:
pass
scheduler.shutdown(wait=False)

View File

@@ -51,10 +51,7 @@ async def fetch_one_source(source_id: int) -> None:
n_new = await _bulk_insert(src, items)
await _mark_success(source_id, n_new=n_new)
logger.info("source %s: %d new articles", src.slug, n_new)
# 入库后,挑高优先级 / 没翻译的开始翻译
await _translate_recent_for_source(source_id, max_n=20)
logger.info("source %s: %d new articles (translation deferred to background loop)", src.slug, n_new)
async def _mark_failure(source_id: int, status: str) -> None:
@@ -271,3 +268,47 @@ async def run_once() -> None:
logger.info("run_once: %d enabled sources", len(sources))
tasks = [fetch_one_source(s.id) for s in sources]
await asyncio.gather(*tasks, return_exceptions=True)
# === 翻译后台循环 ===
# 1 篇/秒(Semaphore 1 已经在 service 内部,这里是节拍)
TRANSLATION_INTERVAL_SEC = 1.0
TRANSLATION_IDLE_INTERVAL_SEC = 5.0
TRANSLATION_BATCH_SIZE = 1 # 每轮最多翻译 1 篇
async def translation_loop() -> None:
"""独立的翻译 worker。
- 不和 RSS 抓取并行
- 1 篇/秒(用 TRANSLATION_INTERVAL_SEC 控制)
- 失败 status 写 'failed',下一次循环重试
"""
logger.info("translation_loop started (interval=%.1fs)", TRANSLATION_INTERVAL_SEC)
while True:
try:
async with AsyncSessionLocal() as session:
rows = (
await session.execute(
select(Article)
.where(Article.translation_status.in_(("pending", "failed")))
.order_by(Article.fetched_at.asc(), Article.id.asc())
.limit(TRANSLATION_BATCH_SIZE)
)
).scalars()
aids = [a.id for a in rows]
if not aids:
# 没活,等久一点
await asyncio.sleep(TRANSLATION_IDLE_INTERVAL_SEC)
continue
for aid in aids:
try:
await translate_article(aid)
except Exception as e:
logger.exception("translate_article %s failed: %s", aid, e)
# 1 篇/秒节拍
await asyncio.sleep(TRANSLATION_INTERVAL_SEC)
except Exception as e:
logger.exception("translation_loop error: %s", e)
await asyncio.sleep(TRANSLATION_IDLE_INTERVAL_SEC)