之前 service.translate 写 cache 无条件,导致:
- 第一次翻译失败时,'[翻译失败: ...]' 占位符被写进 cache
- 30 天内相同文本的请求(新文章 title 与老文章 title 相同时)全部返回占位符
- 触发 200+ 文章 title_zh 字段被永久污染
修法:仅在 engine ∈ {tencent, nllb, cache} 且文本不含错误标记时,才写 cache。
40 lines
2.1 KiB
Python
40 lines
2.1 KiB
Python
"""找一篇英文(非 NHK 日文)已翻译文章,看 body_zh_text 长度。"""
|
|
import os, paramiko, json
|
|
PW = os.environ["REMOTE_PASS"]
|
|
c = paramiko.SSHClient()
|
|
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
|
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
|
def run(cmd, t=15):
|
|
si, so, se = c.exec_command(cmd, timeout=t)
|
|
out = so.read().decode("utf-8", "replace")
|
|
err = se.read().decode("utf-8", "replace")
|
|
rc = so.channel.recv_exit_status()
|
|
if out: print(out, end="")
|
|
return out
|
|
|
|
# 找一篇 BBC/Al Jazeera/DW 已翻译(body 长度大,翻译后)
|
|
print("--- 英文(非日文)文章 body 长度 top 5 ---")
|
|
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, source_id, LEFT(title, 60) AS title, length(body_text) AS txt, length(body_zh_text) AS zh, translation_status FROM articles WHERE translation_status = 'ok' AND source_id != 4 AND length(body_zh_text) > 200 ORDER BY length(body_zh_text) DESC LIMIT 5;\"")
|
|
print(out)
|
|
|
|
# 拉一篇最长的看实际翻译
|
|
print("\n--- 拉一篇最长的英文文章详情 ---")
|
|
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
|
|
token = json.loads(out)["access_token"]
|
|
|
|
# 找 ID
|
|
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT id FROM articles WHERE translation_status = 'ok' AND source_id != 4 AND length(body_zh_text) > 200 ORDER BY length(body_zh_text) DESC LIMIT 1;\"")
|
|
aid = out.strip()
|
|
print(f"article id = {aid}")
|
|
out = run("curl -s -H 'Authorization: Bearer " + token + "' http://localhost/api/v1/articles/" + aid)
|
|
det = json.loads(out)
|
|
print(f"\ntitle: {det['title'][:80]}")
|
|
print(f"title_zh: {det.get('title_zh', '—')[:80]}")
|
|
print(f"body_text: {len(det['body_text'])} 字符")
|
|
print(f"body_zh_text: {len(det.get('body_zh_text') or '')} 字符")
|
|
print(f"\n--- body 原文(前 400 字符) ---")
|
|
print(det['body_text'][:400])
|
|
print(f"\n--- body 译文(前 500 字符) ---")
|
|
print((det.get('body_zh_text') or '—')[:500])
|
|
c.close()
|