fix: NHK 源配置改 ja(seed 写错了); translate_article 加强 lang_src 兜底
This commit is contained in:
@@ -64,7 +64,7 @@ SEEDS = [
|
||||
"kind": "rss",
|
||||
"url": "https://www3.nhk.or.jp/rss/news/cat0.xml",
|
||||
"region": "asia",
|
||||
"language_src": "en",
|
||||
"language_src": "ja",
|
||||
"priority": 70,
|
||||
"fetch_interval_min": 60,
|
||||
"translate_to": "zh",
|
||||
|
||||
@@ -155,7 +155,12 @@ async def translate_article(article_id: int) -> None:
|
||||
return
|
||||
title = art.title
|
||||
body_text = (art.body_text or "")[:TRANSLATE_BODY_MAX]
|
||||
lang_src = art.lang_src or "auto"
|
||||
# lang_src 优先级:article.lang_src > source.language_src > "auto"
|
||||
# (article 入库时已经优先用了 feedparser 的 lang,这里再做一次兜底)
|
||||
if not art.lang_src and art.source and art.source.language_src:
|
||||
lang_src = art.source.language_src
|
||||
else:
|
||||
lang_src = art.lang_src or "auto"
|
||||
target = "zh"
|
||||
article_id_ref = art.id
|
||||
|
||||
|
||||
61
scripts/_check_nhk.py
Normal file
61
scripts/_check_nhk.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""查 NHK 源配置 + 已入库文章 lang_src 实际值。"""
|
||||
import os, paramiko
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=15):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
if out: print(out, end="")
|
||||
return out
|
||||
|
||||
# 1) NHK 源配置
|
||||
print("--- 1. NHK 源配置 ---")
|
||||
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, slug, language_src FROM sources WHERE slug = 'nhk-world';\"")
|
||||
|
||||
# 2) 实际入库的 NHK 文章 lang_src 分布
|
||||
print("\n--- 2. 已入库 NHK 文章 lang_src 分布 ---")
|
||||
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT lang_src, count(*) FROM articles WHERE source_id = (SELECT id FROM sources WHERE slug = 'nhk-world') GROUP BY 1;\"")
|
||||
|
||||
# 3) 看 NHK RSS feed 实际的 <language> 字段
|
||||
print("\n--- 3. NHK RSS 实际 language 字段 ---")
|
||||
script = '''
|
||||
import feedparser, httpx
|
||||
async def main():
|
||||
f = feedparser.parse("https://www3.nhk.or.jp/rss/news/cat0.xml")
|
||||
print("feed.feed.language:", f.feed.get("language"))
|
||||
if f.entries:
|
||||
e = f.entries[0]
|
||||
print("entry.language:", e.get("language"))
|
||||
print("title:", e.title)
|
||||
asyncio.run(main())
|
||||
'''
|
||||
import base64
|
||||
b64 = base64.b64encode(script.encode()).decode()
|
||||
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'")
|
||||
# 修语法:用 3 撇号
|
||||
script = (
|
||||
"import feedparser, httpx, asyncio\n"
|
||||
"async def main():\n"
|
||||
" f = feedparser.parse('https://www3.nhk.or.jp/rss/news/cat0.xml')\n"
|
||||
" print('feed.feed.language:', f.feed.get('language'))\n"
|
||||
" if f.entries:\n"
|
||||
" e = f.entries[0]\n"
|
||||
" print('entry.language:', e.get('language'))\n"
|
||||
" print('title:', e.title)\n"
|
||||
"asyncio.run(main())\n"
|
||||
)
|
||||
b64 = base64.b64encode(script.encode()).decode()
|
||||
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'")
|
||||
run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -10", t=20)
|
||||
|
||||
# 4) 看翻译 service 收到的 source 是什么(我打一行新文章,看 lang_src 传到 service)
|
||||
print("\n--- 4. service.translate 实际调用时 source 参数是什么? ---")
|
||||
# 看 translate_article 代码
|
||||
out = run("docker exec news-aggregator-worker-1 python -c 'import app.workers.pipeline; import inspect; print(inspect.getsource(app.workers.pipeline.translate_article))' 2>&1 | grep -E 'lang_src|translate\\(' | head -10")
|
||||
print(out)
|
||||
|
||||
c.close()
|
||||
Reference in New Issue
Block a user