fix: NHK 源配置改 ja(seed 写错了); translate_article 加强 lang_src 兜底
This commit is contained in:
61
scripts/_check_nhk.py
Normal file
61
scripts/_check_nhk.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""查 NHK 源配置 + 已入库文章 lang_src 实际值。"""
|
||||
import os, paramiko
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=15):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
if out: print(out, end="")
|
||||
return out
|
||||
|
||||
# 1) NHK 源配置
|
||||
print("--- 1. NHK 源配置 ---")
|
||||
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, slug, language_src FROM sources WHERE slug = 'nhk-world';\"")
|
||||
|
||||
# 2) 实际入库的 NHK 文章 lang_src 分布
|
||||
print("\n--- 2. 已入库 NHK 文章 lang_src 分布 ---")
|
||||
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT lang_src, count(*) FROM articles WHERE source_id = (SELECT id FROM sources WHERE slug = 'nhk-world') GROUP BY 1;\"")
|
||||
|
||||
# 3) 看 NHK RSS feed 实际的 <language> 字段
|
||||
print("\n--- 3. NHK RSS 实际 language 字段 ---")
|
||||
script = '''
|
||||
import feedparser, httpx
|
||||
async def main():
|
||||
f = feedparser.parse("https://www3.nhk.or.jp/rss/news/cat0.xml")
|
||||
print("feed.feed.language:", f.feed.get("language"))
|
||||
if f.entries:
|
||||
e = f.entries[0]
|
||||
print("entry.language:", e.get("language"))
|
||||
print("title:", e.title)
|
||||
asyncio.run(main())
|
||||
'''
|
||||
import base64
|
||||
b64 = base64.b64encode(script.encode()).decode()
|
||||
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'")
|
||||
# 修语法:用 3 撇号
|
||||
script = (
|
||||
"import feedparser, httpx, asyncio\n"
|
||||
"async def main():\n"
|
||||
" f = feedparser.parse('https://www3.nhk.or.jp/rss/news/cat0.xml')\n"
|
||||
" print('feed.feed.language:', f.feed.get('language'))\n"
|
||||
" if f.entries:\n"
|
||||
" e = f.entries[0]\n"
|
||||
" print('entry.language:', e.get('language'))\n"
|
||||
" print('title:', e.title)\n"
|
||||
"asyncio.run(main())\n"
|
||||
)
|
||||
b64 = base64.b64encode(script.encode()).decode()
|
||||
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'")
|
||||
run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -10", t=20)
|
||||
|
||||
# 4) 看翻译 service 收到的 source 是什么(我打一行新文章,看 lang_src 传到 service)
|
||||
print("\n--- 4. service.translate 实际调用时 source 参数是什么? ---")
|
||||
# 看 translate_article 代码
|
||||
out = run("docker exec news-aggregator-worker-1 python -c 'import app.workers.pipeline; import inspect; print(inspect.getsource(app.workers.pipeline.translate_article))' 2>&1 | grep -E 'lang_src|translate\\(' | head -10")
|
||||
print(out)
|
||||
|
||||
c.close()
|
||||
Reference in New Issue
Block a user