53 lines
2.6 KiB
Python
53 lines
2.6 KiB
Python
|
|
"""查 Ronaldo 那篇文章的 body 字段。"""
|
||
|
|
import os, paramiko
|
||
|
|
PW = os.environ["REMOTE_PASS"]
|
||
|
|
c = paramiko.SSHClient()
|
||
|
|
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||
|
|
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||
|
|
def run(cmd, t=15):
|
||
|
|
si, so, se = c.exec_command(cmd, timeout=t)
|
||
|
|
out = so.read().decode("utf-8", "replace")
|
||
|
|
err = se.read().decode("utf-8", "replace")
|
||
|
|
rc = so.channel.recv_exit_status()
|
||
|
|
if out: print(out, end="")
|
||
|
|
return out
|
||
|
|
|
||
|
|
# 1) 看 body 字段
|
||
|
|
print("--- 文章 body 字段(可能是空)---")
|
||
|
|
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, title, length(body_html) as html_len, length(body_text) as text_len, length(body_zh_text) as zh_len, lang_src, translation_status, url FROM articles WHERE id = 175177;\"")
|
||
|
|
|
||
|
|
# 2) 看 3 篇典型 aljazeera 文章
|
||
|
|
print("\n--- 抽 3 篇 aljazeera 看 body 长度分布 ---")
|
||
|
|
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 3 ORDER BY fetched_at DESC LIMIT 5;\"")
|
||
|
|
|
||
|
|
# 3) 抽 BBC(可能是最丰富的)
|
||
|
|
print("\n--- 抽 3 篇 BBC 看 body ---")
|
||
|
|
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 2 ORDER BY fetched_at DESC LIMIT 5;\"")
|
||
|
|
|
||
|
|
# 4) 拉 RSS 源看看,Al Jazeera 到底有没有内容
|
||
|
|
print("\n--- 拉 Al Jazeera RSS 原始内容看 ---")
|
||
|
|
script = b'''
|
||
|
|
import asyncio, feedparser, httpx
|
||
|
|
async def main():
|
||
|
|
async with httpx.AsyncClient(follow_redirects=True, timeout=15) as c:
|
||
|
|
r = await c.get("https://www.aljazeera.com/xml/rss/all.xml")
|
||
|
|
f = feedparser.parse(r.text)
|
||
|
|
for e in f.entries[:3]:
|
||
|
|
print("---")
|
||
|
|
print("title:", e.title)
|
||
|
|
print("link:", e.link)
|
||
|
|
print("has content:", bool(e.get("content")))
|
||
|
|
if e.get("content"):
|
||
|
|
print("content[0] keys:", list(e["content"][0].keys()))
|
||
|
|
print("content[0].value[:200]:", (e["content"][0].get("value") or "")[:200])
|
||
|
|
print("has summary:", bool(e.get("summary")))
|
||
|
|
if e.get("summary"):
|
||
|
|
print("summary[:200]:", e["summary"][:200])
|
||
|
|
asyncio.run(main())
|
||
|
|
'''
|
||
|
|
import base64
|
||
|
|
b64 = base64.b64encode(script).decode()
|
||
|
|
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'")
|
||
|
|
run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -40", t=30)
|
||
|
|
c.close()
|