"""查 Ronaldo 那篇文章的 body 字段。""" import os, paramiko PW = os.environ["REMOTE_PASS"] c = paramiko.SSHClient() c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) def run(cmd, t=15): si, so, se = c.exec_command(cmd, timeout=t) out = so.read().decode("utf-8", "replace") err = se.read().decode("utf-8", "replace") rc = so.channel.recv_exit_status() if out: print(out, end="") return out # 1) 看 body 字段 print("--- 文章 body 字段(可能是空)---") run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, title, length(body_html) as html_len, length(body_text) as text_len, length(body_zh_text) as zh_len, lang_src, translation_status, url FROM articles WHERE id = 175177;\"") # 2) 看 3 篇典型 aljazeera 文章 print("\n--- 抽 3 篇 aljazeera 看 body 长度分布 ---") run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 3 ORDER BY fetched_at DESC LIMIT 5;\"") # 3) 抽 BBC(可能是最丰富的) print("\n--- 抽 3 篇 BBC 看 body ---") run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 2 ORDER BY fetched_at DESC LIMIT 5;\"") # 4) 拉 RSS 源看看,Al Jazeera 到底有没有内容 print("\n--- 拉 Al Jazeera RSS 原始内容看 ---") script = b''' import asyncio, feedparser, httpx async def main(): async with httpx.AsyncClient(follow_redirects=True, timeout=15) as c: r = await c.get("https://www.aljazeera.com/xml/rss/all.xml") f = feedparser.parse(r.text) for e in f.entries[:3]: print("---") print("title:", e.title) print("link:", e.link) print("has content:", bool(e.get("content"))) if e.get("content"): print("content[0] keys:", list(e["content"][0].keys())) print("content[0].value[:200]:", (e["content"][0].get("value") or "")[:200]) print("has summary:", bool(e.get("summary"))) if e.get("summary"): print("summary[:200]:", e["summary"][:200]) asyncio.run(main()) ''' import base64 b64 = base64.b64encode(script).decode() run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'") run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -40", t=30) c.close()