fix: articles.py get_article 链式 await coroutine 报错(.first())
This commit is contained in:
52
scripts/_check_body.py
Normal file
52
scripts/_check_body.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""查 Ronaldo 那篇文章的 body 字段。"""
|
||||
import os, paramiko
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=15):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
if out: print(out, end="")
|
||||
return out
|
||||
|
||||
# 1) 看 body 字段
|
||||
print("--- 文章 body 字段(可能是空)---")
|
||||
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, title, length(body_html) as html_len, length(body_text) as text_len, length(body_zh_text) as zh_len, lang_src, translation_status, url FROM articles WHERE id = 175177;\"")
|
||||
|
||||
# 2) 看 3 篇典型 aljazeera 文章
|
||||
print("\n--- 抽 3 篇 aljazeera 看 body 长度分布 ---")
|
||||
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 3 ORDER BY fetched_at DESC LIMIT 5;\"")
|
||||
|
||||
# 3) 抽 BBC(可能是最丰富的)
|
||||
print("\n--- 抽 3 篇 BBC 看 body ---")
|
||||
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 2 ORDER BY fetched_at DESC LIMIT 5;\"")
|
||||
|
||||
# 4) 拉 RSS 源看看,Al Jazeera 到底有没有内容
|
||||
print("\n--- 拉 Al Jazeera RSS 原始内容看 ---")
|
||||
script = b'''
|
||||
import asyncio, feedparser, httpx
|
||||
async def main():
|
||||
async with httpx.AsyncClient(follow_redirects=True, timeout=15) as c:
|
||||
r = await c.get("https://www.aljazeera.com/xml/rss/all.xml")
|
||||
f = feedparser.parse(r.text)
|
||||
for e in f.entries[:3]:
|
||||
print("---")
|
||||
print("title:", e.title)
|
||||
print("link:", e.link)
|
||||
print("has content:", bool(e.get("content")))
|
||||
if e.get("content"):
|
||||
print("content[0] keys:", list(e["content"][0].keys()))
|
||||
print("content[0].value[:200]:", (e["content"][0].get("value") or "")[:200])
|
||||
print("has summary:", bool(e.get("summary")))
|
||||
if e.get("summary"):
|
||||
print("summary[:200]:", e["summary"][:200])
|
||||
asyncio.run(main())
|
||||
'''
|
||||
import base64
|
||||
b64 = base64.b64encode(script).decode()
|
||||
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'")
|
||||
run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -40", t=30)
|
||||
c.close()
|
||||
Reference in New Issue
Block a user