perf: 翻译独立后台循环(1 篇/秒)+ Semaphore 1
之前 fetch_one_source 入库后立即调翻译(可能并发触发腾讯 TMT 限速) 改为独立 translation_loop 后台循环: - 完全不和 RSS 抓取并行 - 1 篇/秒节拍(Semaphore 1 + sleep 1.0) - 没活时空闲 5 秒再轮询 - pending/failed 都重试
This commit is contained in:
@@ -17,7 +17,7 @@ from sqlalchemy import select
|
||||
from app.config import settings
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.source import Source
|
||||
from app.workers.pipeline import fetch_one_source, run_once
|
||||
from app.workers.pipeline import fetch_one_source, run_once, translation_loop
|
||||
|
||||
logger = logging.getLogger("news.worker")
|
||||
logging.basicConfig(
|
||||
@@ -89,6 +89,10 @@ async def main() -> None:
|
||||
scheduler.start()
|
||||
logger.info("scheduler started with %d jobs", len(scheduler.get_jobs()))
|
||||
|
||||
# 独立的翻译后台循环(不和 RSS 抓取并行;1 篇/秒)
|
||||
translation_task = asyncio.create_task(translation_loop(), name="translation_loop")
|
||||
logger.info("translation_loop task scheduled (1 article/sec)")
|
||||
|
||||
stop = asyncio.Event()
|
||||
|
||||
def _signal_handler():
|
||||
@@ -104,7 +108,12 @@ async def main() -> None:
|
||||
pass
|
||||
|
||||
await stop.wait()
|
||||
logger.info("stopping scheduler")
|
||||
logger.info("stopping scheduler and translation loop")
|
||||
translation_task.cancel()
|
||||
try:
|
||||
await translation_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
scheduler.shutdown(wait=False)
|
||||
|
||||
|
||||
|
||||
@@ -51,10 +51,7 @@ async def fetch_one_source(source_id: int) -> None:
|
||||
|
||||
n_new = await _bulk_insert(src, items)
|
||||
await _mark_success(source_id, n_new=n_new)
|
||||
logger.info("source %s: %d new articles", src.slug, n_new)
|
||||
|
||||
# 入库后,挑高优先级 / 没翻译的开始翻译
|
||||
await _translate_recent_for_source(source_id, max_n=20)
|
||||
logger.info("source %s: %d new articles (translation deferred to background loop)", src.slug, n_new)
|
||||
|
||||
|
||||
async def _mark_failure(source_id: int, status: str) -> None:
|
||||
@@ -271,3 +268,47 @@ async def run_once() -> None:
|
||||
logger.info("run_once: %d enabled sources", len(sources))
|
||||
tasks = [fetch_one_source(s.id) for s in sources]
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
|
||||
# === 翻译后台循环 ===
|
||||
# 1 篇/秒(Semaphore 1 已经在 service 内部,这里是节拍)
|
||||
TRANSLATION_INTERVAL_SEC = 1.0
|
||||
TRANSLATION_IDLE_INTERVAL_SEC = 5.0
|
||||
TRANSLATION_BATCH_SIZE = 1 # 每轮最多翻译 1 篇
|
||||
|
||||
|
||||
async def translation_loop() -> None:
|
||||
"""独立的翻译 worker。
|
||||
- 不和 RSS 抓取并行
|
||||
- 1 篇/秒(用 TRANSLATION_INTERVAL_SEC 控制)
|
||||
- 失败 status 写 'failed',下一次循环重试
|
||||
"""
|
||||
logger.info("translation_loop started (interval=%.1fs)", TRANSLATION_INTERVAL_SEC)
|
||||
while True:
|
||||
try:
|
||||
async with AsyncSessionLocal() as session:
|
||||
rows = (
|
||||
await session.execute(
|
||||
select(Article)
|
||||
.where(Article.translation_status.in_(("pending", "failed")))
|
||||
.order_by(Article.fetched_at.asc(), Article.id.asc())
|
||||
.limit(TRANSLATION_BATCH_SIZE)
|
||||
)
|
||||
).scalars()
|
||||
aids = [a.id for a in rows]
|
||||
|
||||
if not aids:
|
||||
# 没活,等久一点
|
||||
await asyncio.sleep(TRANSLATION_IDLE_INTERVAL_SEC)
|
||||
continue
|
||||
|
||||
for aid in aids:
|
||||
try:
|
||||
await translate_article(aid)
|
||||
except Exception as e:
|
||||
logger.exception("translate_article %s failed: %s", aid, e)
|
||||
# 1 篇/秒节拍
|
||||
await asyncio.sleep(TRANSLATION_INTERVAL_SEC)
|
||||
except Exception as e:
|
||||
logger.exception("translation_loop error: %s", e)
|
||||
await asyncio.sleep(TRANSLATION_IDLE_INTERVAL_SEC)
|
||||
|
||||
Reference in New Issue
Block a user