43 lines
2.0 KiB
Python
43 lines
2.0 KiB
Python
"""直接看 API 返回的 article 175177 的完整内容。"""
|
|
import os, paramiko, json
|
|
PW = os.environ["REMOTE_PASS"]
|
|
c = paramiko.SSHClient()
|
|
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
|
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
|
def run(cmd, t=15):
|
|
si, so, se = c.exec_command(cmd, timeout=t)
|
|
out = so.read().decode("utf-8", "replace")
|
|
err = se.read().decode("utf-8", "replace")
|
|
rc = so.channel.recv_exit_status()
|
|
if out: print(out, end="")
|
|
return out
|
|
|
|
# 1) 拉详细 JSON
|
|
print("--- /api/v1/articles/175177 详情 ---")
|
|
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
|
|
token = json.loads(out)["access_token"]
|
|
out = run("curl -s -H 'Authorization: Bearer " + token + "' http://localhost/api/v1/articles/175177")
|
|
det = json.loads(out)
|
|
print(json.dumps(det, ensure_ascii=False, indent=2))
|
|
|
|
# 2) 试 trafilatura 抓 Al Jazeera 全文
|
|
print("\n\n--- 试 trafilatura 抓 Ronaldo 全文 ---")
|
|
script = '''
|
|
import asyncio, httpx, trafilatura
|
|
async def main():
|
|
url = "https://www.aljazeera.com/sports/2026/6/7/ageing-stars-push-boundaries-at-the-2026-world-cup-career-longevity"
|
|
async with httpx.AsyncClient(follow_redirects=True, timeout=20) as c:
|
|
r = await c.get(url, headers={"User-Agent": "Mozilla/5.0"})
|
|
print("status:", r.status_code, "len:", len(r.text))
|
|
extracted = trafilatura.extract(r.text, include_comments=False, include_tables=False, favor_recall=True)
|
|
print("extracted len:", len(extracted or ""))
|
|
print("---")
|
|
print((extracted or "")[:1000])
|
|
asyncio.run(main())
|
|
'''
|
|
import base64
|
|
b64 = base64.b64encode(script.encode()).decode()
|
|
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_tr.py'")
|
|
run("docker exec -w /app news-aggregator-worker-1 python /app/_tr.py 2>&1 | tail -30", t=30)
|
|
c.close()
|