fix: NHK 源配置改 ja(seed 写错了); translate_article 加强 lang_src 兜底

This commit is contained in:
Mavis
2026-06-08 00:54:02 +08:00
parent 639562593e
commit 523c82f7a5
3 changed files with 68 additions and 2 deletions

61
scripts/_check_nhk.py Normal file
View File

@@ -0,0 +1,61 @@
"""查 NHK 源配置 + 已入库文章 lang_src 实际值。"""
import os, paramiko
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=15):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
if out: print(out, end="")
return out
# 1) NHK 源配置
print("--- 1. NHK 源配置 ---")
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, slug, language_src FROM sources WHERE slug = 'nhk-world';\"")
# 2) 实际入库的 NHK 文章 lang_src 分布
print("\n--- 2. 已入库 NHK 文章 lang_src 分布 ---")
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT lang_src, count(*) FROM articles WHERE source_id = (SELECT id FROM sources WHERE slug = 'nhk-world') GROUP BY 1;\"")
# 3) 看 NHK RSS feed 实际的 <language> 字段
print("\n--- 3. NHK RSS 实际 language 字段 ---")
script = '''
import feedparser, httpx
async def main():
f = feedparser.parse("https://www3.nhk.or.jp/rss/news/cat0.xml")
print("feed.feed.language:", f.feed.get("language"))
if f.entries:
e = f.entries[0]
print("entry.language:", e.get("language"))
print("title:", e.title)
asyncio.run(main())
'''
import base64
b64 = base64.b64encode(script.encode()).decode()
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'")
# 修语法:用 3 撇号
script = (
"import feedparser, httpx, asyncio\n"
"async def main():\n"
" f = feedparser.parse('https://www3.nhk.or.jp/rss/news/cat0.xml')\n"
" print('feed.feed.language:', f.feed.get('language'))\n"
" if f.entries:\n"
" e = f.entries[0]\n"
" print('entry.language:', e.get('language'))\n"
" print('title:', e.title)\n"
"asyncio.run(main())\n"
)
b64 = base64.b64encode(script.encode()).decode()
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'")
run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -10", t=20)
# 4) 看翻译 service 收到的 source 是什么(我打一行新文章,看 lang_src 传到 service)
print("\n--- 4. service.translate 实际调用时 source 参数是什么? ---")
# 看 translate_article 代码
out = run("docker exec news-aggregator-worker-1 python -c 'import app.workers.pipeline; import inspect; print(inspect.getsource(app.workers.pipeline.translate_article))' 2>&1 | grep -E 'lang_src|translate\\(' | head -10")
print(out)
c.close()