scripts/_check_body.py

"""查 Ronaldo 那篇文章的 body 字段。"""
import os, paramiko
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=15):
    si, so, se = c.exec_command(cmd, timeout=t)
    out = so.read().decode("utf-8", "replace")
    err = se.read().decode("utf-8", "replace")
    rc = so.channel.recv_exit_status()
    if out: print(out, end="")
    return out

# 1) 看 body 字段
print("--- 文章 body 字段(可能是空)---")
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, title, length(body_html) as html_len, length(body_text) as text_len, length(body_zh_text) as zh_len, lang_src, translation_status, url FROM articles WHERE id = 175177;\"")

# 2) 看 3 篇典型 aljazeera 文章
print("\n--- 抽 3 篇 aljazeera 看 body 长度分布 ---")
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 3 ORDER BY fetched_at DESC LIMIT 5;\"")

# 3) 抽 BBC(可能是最丰富的)
print("\n--- 抽 3 篇 BBC 看 body ---")
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 2 ORDER BY fetched_at DESC LIMIT 5;\"")

# 4) 拉 RSS 源看看,Al Jazeera 到底有没有内容
print("\n--- 拉 Al Jazeera RSS 原始内容看 ---")
script = b'''
import asyncio, feedparser, httpx
async def main():
    async with httpx.AsyncClient(follow_redirects=True, timeout=15) as c:
        r = await c.get("https://www.aljazeera.com/xml/rss/all.xml")
    f = feedparser.parse(r.text)
    for e in f.entries[:3]:
        print("---")
        print("title:", e.title)
        print("link:", e.link)
        print("has content:", bool(e.get("content")))
        if e.get("content"):
            print("content[0] keys:", list(e["content"][0].keys()))
            print("content[0].value[:200]:", (e["content"][0].get("value") or "")[:200])
        print("has summary:", bool(e.get("summary")))
        if e.get("summary"):
            print("summary[:200]:", e["summary"][:200])
asyncio.run(main())
'''
import base64
b64 = base64.b64encode(script).decode()
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'")
run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -40", t=30)
c.close()
fix: articles.py get_article 链式 await coroutine 报错(.first()) 2026-06-08 00:19:03 +08:00			`"""查 Ronaldo 那篇文章的 body 字段。"""`
			`import os, paramiko`
			`PW = os.environ["REMOTE_PASS"]`
			`c = paramiko.SSHClient()`
			`c.set_missing_host_key_policy(paramiko.AutoAddPolicy())`
			`c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)`
			`def run(cmd, t=15):`
			`si, so, se = c.exec_command(cmd, timeout=t)`
			`out = so.read().decode("utf-8", "replace")`
			`err = se.read().decode("utf-8", "replace")`
			`rc = so.channel.recv_exit_status()`
			`if out: print(out, end="")`
			`return out`

			`# 1) 看 body 字段`
			`print("--- 文章 body 字段(可能是空)---")`
			`run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, title, length(body_html) as html_len, length(body_text) as text_len, length(body_zh_text) as zh_len, lang_src, translation_status, url FROM articles WHERE id = 175177;\"")`

			`# 2) 看 3 篇典型 aljazeera 文章`
			`print("\n--- 抽 3 篇 aljazeera 看 body 长度分布 ---")`
			`run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 3 ORDER BY fetched_at DESC LIMIT 5;\"")`

			`# 3) 抽 BBC(可能是最丰富的)`
			`print("\n--- 抽 3 篇 BBC 看 body ---")`
			`run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 2 ORDER BY fetched_at DESC LIMIT 5;\"")`

			`# 4) 拉 RSS 源看看,Al Jazeera 到底有没有内容`
			`print("\n--- 拉 Al Jazeera RSS 原始内容看 ---")`
			`script = b'''`
			`import asyncio, feedparser, httpx`
			`async def main():`
			`async with httpx.AsyncClient(follow_redirects=True, timeout=15) as c:`
			`r = await c.get("https://www.aljazeera.com/xml/rss/all.xml")`
			`f = feedparser.parse(r.text)`
			`for e in f.entries[:3]:`
			`print("---")`
			`print("title:", e.title)`
			`print("link:", e.link)`
			`print("has content:", bool(e.get("content")))`
			`if e.get("content"):`
			`print("content[0] keys:", list(e["content"][0].keys()))`
			`print("content[0].value[:200]:", (e["content"][0].get("value") or "")[:200])`
			`print("has summary:", bool(e.get("summary")))`
			`if e.get("summary"):`
			`print("summary[:200]:", e["summary"][:200])`
			`asyncio.run(main())`
			`'''`
			`import base64`
			`b64 = base64.b64encode(script).decode()`
			`run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " \| base64 -d > /app/_t.py'")`
			`run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 \| tail -40", t=30)`
			`c.close()`