40 lines
2.0 KiB
Python
40 lines
2.0 KiB
Python
|
|
"""DELETE 后看新数据(30 秒后)。"""
|
||
|
|
import os, paramiko
|
||
|
|
PW = os.environ["REMOTE_PASS"]
|
||
|
|
c = paramiko.SSHClient()
|
||
|
|
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||
|
|
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||
|
|
def run(cmd, t=30):
|
||
|
|
si, so, se = c.exec_command(cmd, timeout=t)
|
||
|
|
out = so.read().decode("utf-8", "replace")
|
||
|
|
err = se.read().decode("utf-8", "replace")
|
||
|
|
rc = so.channel.recv_exit_status()
|
||
|
|
if out: print(out, end="")
|
||
|
|
return out
|
||
|
|
|
||
|
|
# 后台启 run_once
|
||
|
|
si, so, se = c.exec_command("nohup docker exec news-aggregator-worker-1 python -c 'import asyncio; from app.workers.pipeline import run_once; asyncio.run(run_once())' > /tmp/run_once.log 2>&1 & echo $!", timeout=10)
|
||
|
|
pid = so.read().decode().strip()
|
||
|
|
print(f"run_once started, PID={pid}")
|
||
|
|
|
||
|
|
# 等 90 秒(全文抓取慢)
|
||
|
|
import time
|
||
|
|
time.sleep(90)
|
||
|
|
|
||
|
|
# 看新数据
|
||
|
|
print("\n--- 文章统计 ---")
|
||
|
|
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT count(*) AS total, count(CASE WHEN length(body_text) > 1000 THEN 1 END) AS long_body, avg(length(body_text))::int AS avg_len, max(length(body_text)) AS max_len FROM articles;\"")
|
||
|
|
|
||
|
|
# 看 RSS 摘要 vs 全文(body_text > 1000 = trafilatura 工作了)
|
||
|
|
print("\n--- body_text 长度分布 ---")
|
||
|
|
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT CASE WHEN length(body_text) < 200 THEN '<200' WHEN length(body_text) < 1000 THEN '200-1k' ELSE '>1k' END AS bucket, count(*) FROM articles GROUP BY 1 ORDER BY 1;\"")
|
||
|
|
|
||
|
|
# 看翻译状态
|
||
|
|
print("\n--- 翻译状态 ---")
|
||
|
|
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, count(*) FROM articles GROUP BY 1 ORDER BY 1;\"")
|
||
|
|
|
||
|
|
# 看前 5 篇文章 body 长度 + 来源
|
||
|
|
print("\n--- 前 5 篇 ---")
|
||
|
|
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, source_id, LEFT(title, 50) AS title, length(body_text) AS body_len FROM articles ORDER BY id LIMIT 5;\"")
|
||
|
|
c.close()
|