Files
diary-news/scripts/_redeploy_worker.py
Mavis 639562593e fix: 翻译失败/降级文本不再写 cache(避免 30 天污染)
之前 service.translate 写 cache 无条件,导致:
- 第一次翻译失败时,'[翻译失败: ...]' 占位符被写进 cache
- 30 天内相同文本的请求(新文章 title 与老文章 title 相同时)全部返回占位符
- 触发 200+ 文章 title_zh 字段被永久污染

修法:仅在 engine ∈ {tencent, nllb, cache} 且文本不含错误标记时,才写 cache。
2026-06-08 00:48:36 +08:00

53 lines
2.7 KiB
Python

"""pull + 重建 worker + 扫描 DB 把翻译失败的改回 pending + 看新 worker 跑起来。"""
import os, paramiko, json, time
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=60):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
if out: print(out, end="")
if err and "Warning" not in err and "warn" not in err: print(err, end="", file=__import__("sys").stderr)
return out
# 1) pull
print("--- pull ---")
run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -3")
# 2) 重建 worker
print("--- 重建 worker ---")
run("cd /srv/news && docker compose up -d --force-recreate --no-deps --build worker 2>&1 | tail -5", t=120)
time.sleep(5)
# 3) 扫描 DB:title_zh/body_zh_text 含 '翻译失败' 改回 pending
print("--- DB 扫描 ---")
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) FROM articles WHERE title_zh LIKE '%[翻译失败:%' OR body_zh_text LIKE '%[翻译失败:%' OR body_zh_html LIKE '%[翻译失败:%';\"")
print(f"'翻译失败' 占位符的文章数: {out.strip()}")
n = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"UPDATE articles SET translation_status='pending', title_zh=NULL, body_zh_text=NULL, body_zh_html=NULL, translated_at=NULL, translation_engine=NULL, translation_chars=0 WHERE title_zh LIKE '%[翻译失败:%' OR body_zh_text LIKE '%[翻译失败:%' OR body_zh_html LIKE '%[翻译失败:%';\"")
print(f" UPDATE 状态: {n.strip()}")
# 4) 看 worker 是否在跑 translation_loop
print("\n--- worker 日志(看 translation_loop 启动 + 节奏)---")
time.sleep(15)
out = run("docker logs --tail=50 news-aggregator-worker-1 2>&1 | tail -30", t=15)
print(out)
# 5) 等 30 秒再看(应该已经翻译 30 篇左右)
print("\n--- 等 30 秒看翻译进度 ---")
time.sleep(30)
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, count(*) FROM articles GROUP BY 1 ORDER BY 1;\"")
print(out)
# 6) redis usage
rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip()
out = run("docker exec news-aggregator-redis-1 redis-cli -a '" + rpw + "' GET translation:month:202606 2>/dev/null")
print(f"\n--- redis usage: {out.strip()}")
# 7) 验证 fetch_one_source 不再自动翻译(看 worker 日志确认)
print("\n--- worker 进程信息 ---")
run("docker ps --filter 'name=news-aggregator-worker' --format 'table {{.Names}}\\t{{.Status}}'")
c.close()