fix(worker): enrichment_loop 永远只扫老文章(已 enrich),新文章被排到最后
bug 复现: - order_by translated_at asc nullslast - 老文章已 enrich,translated_at 有值,排前 - 新文章 translated_at=NULL,nullslast 排最后,limit5 永远拿不到 修复: - order_by 改为 Article.id.asc()(新文章 id 大) - ENRICHMENT_BATCH_SIZE 1→3(并发候选) - 文章间 sleep 0.5→0.2s 效果:enrichment_loop 现在会持续 enrich 新进文章,首页列表会逐步有分类/评论
This commit is contained in:
@@ -390,7 +390,7 @@ async def enrich_article(article_id: int) -> dict[str, str]:
|
|||||||
# === 后台循环 ===
|
# === 后台循环 ===
|
||||||
# 与 translation_loop 一样,常驻从队列里取文章
|
# 与 translation_loop 一样,常驻从队列里取文章
|
||||||
ENRICHMENT_INTERVAL_SEC = 5.0 # 没活时等待
|
ENRICHMENT_INTERVAL_SEC = 5.0 # 没活时等待
|
||||||
ENRICHMENT_BATCH_SIZE = 1
|
ENRICHMENT_BATCH_SIZE = 3 # 每轮并发拉取候选,然后顺序处理(LLM 客户端本身有节流)
|
||||||
|
|
||||||
|
|
||||||
async def enrichment_loop() -> None:
|
async def enrichment_loop() -> None:
|
||||||
@@ -405,6 +405,8 @@ async def enrichment_loop() -> None:
|
|||||||
try:
|
try:
|
||||||
async with AsyncSessionLocal() as session:
|
async with AsyncSessionLocal() as session:
|
||||||
# 已翻译完成 + 4 个状态中至少有一个是 pending
|
# 已翻译完成 + 4 个状态中至少有一个是 pending
|
||||||
|
# 关键:不能按 translated_at 升序 — 老文章已 enrich,新文章 translated_at=NULL(被排到最后)
|
||||||
|
# 改为按 id 升序(新文章 id 大),循环里再过滤 status
|
||||||
rows = (
|
rows = (
|
||||||
await session.execute(
|
await session.execute(
|
||||||
select(Article)
|
select(Article)
|
||||||
@@ -412,8 +414,8 @@ async def enrichment_loop() -> None:
|
|||||||
Article.translation_status == "ok",
|
Article.translation_status == "ok",
|
||||||
Article.title_zh.is_not(None),
|
Article.title_zh.is_not(None),
|
||||||
)
|
)
|
||||||
.order_by(Article.translated_at.asc().nullslast(), Article.id.asc())
|
.order_by(Article.id.asc())
|
||||||
.limit(ENRICHMENT_BATCH_SIZE * 5) # 多取几个找需要 enrich 的
|
.limit(ENRICHMENT_BATCH_SIZE * 20) # 多取一些找需要 enrich 的
|
||||||
)
|
)
|
||||||
).scalars()
|
).scalars()
|
||||||
candidates = list(rows)
|
candidates = list(rows)
|
||||||
@@ -441,7 +443,7 @@ async def enrichment_loop() -> None:
|
|||||||
await enrich_article(aid)
|
await enrich_article(aid)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("enrich_article %s in loop failed: %s", aid, e)
|
logger.exception("enrich_article %s in loop failed: %s", aid, e)
|
||||||
await asyncio.sleep(0.5) # 文章间轻节流
|
await asyncio.sleep(0.2) # 文章间轻节流(LLM 内部还有 interval_sec)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("enrichment_loop error: %s", e)
|
logger.exception("enrichment_loop error: %s", e)
|
||||||
await asyncio.sleep(ENRICHMENT_INTERVAL_SEC)
|
await asyncio.sleep(ENRICHMENT_INTERVAL_SEC)
|
||||||
|
|||||||
Reference in New Issue
Block a user