62 lines
2.7 KiB
Python
62 lines
2.7 KiB
Python
"""查 NHK 源配置 + 已入库文章 lang_src 实际值。"""
|
|
import os, paramiko
|
|
PW = os.environ["REMOTE_PASS"]
|
|
c = paramiko.SSHClient()
|
|
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
|
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
|
def run(cmd, t=15):
|
|
si, so, se = c.exec_command(cmd, timeout=t)
|
|
out = so.read().decode("utf-8", "replace")
|
|
err = se.read().decode("utf-8", "replace")
|
|
rc = so.channel.recv_exit_status()
|
|
if out: print(out, end="")
|
|
return out
|
|
|
|
# 1) NHK 源配置
|
|
print("--- 1. NHK 源配置 ---")
|
|
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, slug, language_src FROM sources WHERE slug = 'nhk-world';\"")
|
|
|
|
# 2) 实际入库的 NHK 文章 lang_src 分布
|
|
print("\n--- 2. 已入库 NHK 文章 lang_src 分布 ---")
|
|
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT lang_src, count(*) FROM articles WHERE source_id = (SELECT id FROM sources WHERE slug = 'nhk-world') GROUP BY 1;\"")
|
|
|
|
# 3) 看 NHK RSS feed 实际的 <language> 字段
|
|
print("\n--- 3. NHK RSS 实际 language 字段 ---")
|
|
script = '''
|
|
import feedparser, httpx
|
|
async def main():
|
|
f = feedparser.parse("https://www3.nhk.or.jp/rss/news/cat0.xml")
|
|
print("feed.feed.language:", f.feed.get("language"))
|
|
if f.entries:
|
|
e = f.entries[0]
|
|
print("entry.language:", e.get("language"))
|
|
print("title:", e.title)
|
|
asyncio.run(main())
|
|
'''
|
|
import base64
|
|
b64 = base64.b64encode(script.encode()).decode()
|
|
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'")
|
|
# 修语法:用 3 撇号
|
|
script = (
|
|
"import feedparser, httpx, asyncio\n"
|
|
"async def main():\n"
|
|
" f = feedparser.parse('https://www3.nhk.or.jp/rss/news/cat0.xml')\n"
|
|
" print('feed.feed.language:', f.feed.get('language'))\n"
|
|
" if f.entries:\n"
|
|
" e = f.entries[0]\n"
|
|
" print('entry.language:', e.get('language'))\n"
|
|
" print('title:', e.title)\n"
|
|
"asyncio.run(main())\n"
|
|
)
|
|
b64 = base64.b64encode(script.encode()).decode()
|
|
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'")
|
|
run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -10", t=20)
|
|
|
|
# 4) 看翻译 service 收到的 source 是什么(我打一行新文章,看 lang_src 传到 service)
|
|
print("\n--- 4. service.translate 实际调用时 source 参数是什么? ---")
|
|
# 看 translate_article 代码
|
|
out = run("docker exec news-aggregator-worker-1 python -c 'import app.workers.pipeline; import inspect; print(inspect.getsource(app.workers.pipeline.translate_article))' 2>&1 | grep -E 'lang_src|translate\\(' | head -10")
|
|
print(out)
|
|
|
|
c.close()
|