From 82a92032bb77ecdb10a66f4630e3410f890fbe30 Mon Sep 17 00:00:00 2001 From: Mavis Date: Mon, 8 Jun 2026 16:09:22 +0800 Subject: [PATCH] =?UTF-8?q?fix(scripts):=20backfill=5Fbody=20=E4=B9=9F?= =?UTF-8?q?=E5=9B=9E=E5=A1=AB=E7=BF=BB=E8=AF=91=E8=BF=87=E4=BD=86=20body?= =?UTF-8?q?=20=E7=9F=AD=E7=9A=84=E6=96=87=E7=AB=A0(=E8=AE=A9=20worker=20?= =?UTF-8?q?=E9=87=8D=E8=AF=91)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/backfill_body.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/scripts/backfill_body.py b/scripts/backfill_body.py index f903939..0c9c85a 100644 --- a/scripts/backfill_body.py +++ b/scripts/backfill_body.py @@ -52,15 +52,10 @@ async def backfill( stats = {"scanned": 0, "matched": 0, "updated": 0, "skipped": 0, "failed": 0} async with AsyncSessionLocal() as session: - # 1) 找短 body 文章 - stmt = ( - select(Article) - .where((Article.body_text.is_(None)) | (Article.body_text == "") | (Article.translation_status != "ok")) - # 简化:只看 body 短的(无论翻译状态) - ) + # 1) 找短 body 文章(不管翻译状态 — 已翻译的也要回填,重置让 worker 用完整 body 重译) + stmt = select(Article) if source_slug: stmt = stmt.join(Source, Source.id == Article.source_id).where(Source.slug == source_slug) - # 全部扫一次(在内存里过滤 short body,避免 SQL 长度函数复杂) rows = (await session.execute(stmt.order_by(Article.id.desc()))).scalars() all_articles = list(rows) # 过滤 short body