import os, paramiko, base64 PW = os.environ["REMOTE_PASS"] c = paramiko.SSHClient() c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) def run(cmd, t=30): si, so, se = c.exec_command(cmd, timeout=t) out = so.read().decode("utf-8", "replace") err = se.read().decode("utf-8", "replace") rc = so.channel.recv_exit_status() if out: print(out, end="") return out # 试 trafilatura 抓 Al Jazeera 全文 script = ''' import asyncio, httpx, trafilatura async def main(): url = "https://www.aljazeera.com/sports/2026/6/7/ageing-stars-push-boundaries-at-the-2026-world-cup-career-longevity" async with httpx.AsyncClient(follow_redirects=True, timeout=20) as c: r = await c.get(url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36"}) print("status:", r.status_code, "len:", len(r.text)) extracted = trafilatura.extract(r.text, include_comments=False, include_tables=False, favor_recall=True, output_format="json") print("---JSON---") print((extracted or "")[:2000]) print() print("---TEXT---") text = trafilatura.extract(r.text, include_comments=False, include_tables=False, favor_recall=True, output_format="text") print((text or "")[:2000]) asyncio.run(main()) ''' b64 = base64.b64encode(script.encode()).decode() run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_tr.py'") run("docker exec -w /app news-aggregator-worker-1 python /app/_tr.py 2>&1 | tail -50", t=60) c.close()