diff --git a/backend/app/workers/pipeline.py b/backend/app/workers/pipeline.py index bf7aab7..d92d596 100644 --- a/backend/app/workers/pipeline.py +++ b/backend/app/workers/pipeline.py @@ -113,7 +113,6 @@ async def _bulk_insert(src: Source, items: list[FetchedItem]) -> int: "image_url": it.image_url, "published_at": it.published_at, "translation_status": "pending", - "translate_to": src.translate_to, } ) if not rows: diff --git a/scripts/_e2e.py b/scripts/_e2e.py new file mode 100644 index 0000000..3158a39 --- /dev/null +++ b/scripts/_e2e.py @@ -0,0 +1,78 @@ +import os, paramiko, urllib.request, urllib.error, json +PW = os.environ["REMOTE_PASS"] +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) + +def run(cmd, t=15): + si, so, se = c.exec_command(cmd, timeout=t) + out = so.read().decode("utf-8", "replace") + err = se.read().decode("utf-8", "replace") + rc = so.channel.recv_exit_status() + print(f"$ {cmd}") + if out: print(out, end="") + if err: print("[err]", err, end="", file=__import__("sys").stderr) + print(f" rc={rc}") + return out + +# 服务器 pull + 重建 api +run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -3") +run("cd /srv/news && sg docker -c 'docker compose up -d --force-recreate --no-deps --build api' 2>&1 | tail -5", t=120) + +# 重设 owner 角色为 owner +run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'UPDATE users SET role = '\\''owner'\\'' WHERE username = '\\''owner'\\'';'\" 2>&1 | tail -3") + +# 触发一次抓取(等久点) +print("\n=== 触发抓取(等 60 秒)===") +import time +run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio; from app.workers.pipeline import run_once; asyncio.run(run_once())'\" 2>&1 | tail -15", t=180) +time.sleep(5) + +# 查 article 数 +run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT count(*) AS total, count(title_zh) AS translated FROM articles;'\" 2>&1 | tail -5") + +# 拿新密码 +new_pw = "owner_pass_2026" + +# 登录 + 拉 +req = urllib.request.Request( + "http://localhost/api/v1/auth/login", + data=json.dumps({"username": "owner", "password": new_pw}).encode(), + headers={"Content-Type": "application/json"}, +) +try: + resp = urllib.request.urlopen(req, timeout=10) + data = json.loads(resp.read()) + print(f"\n=== 登录 OK!token 前 40: {data['access_token'][:40]} ===") + # 测拉 articles + req2 = urllib.request.Request( + "http://localhost/api/v1/articles?limit=5", + headers={"Authorization": f"Bearer {data['access_token']}"}, + ) + resp2 = urllib.request.urlopen(req2, timeout=10) + ad = json.loads(resp2.read()) + print(f" articles: {len(ad.get('items', []))} 条") + for a in ad.get("items", [])[:3]: + print(f" - {a['source']['name']:20s} [{a['translation_status']:7s}] {a['title'][:50]}") + if a.get("title_zh"): + print(f" zh: {a['title_zh'][:50]}") + # 测 /me + req3 = urllib.request.Request( + "http://localhost/api/v1/me", + headers={"Authorization": f"Bearer {data['access_token']}"}, + ) + me = json.loads(urllib.request.urlopen(req3, timeout=10).read()) + print(f"\n /me: {me}") + # 测 /me/usage + req4 = urllib.request.Request( + "http://localhost/api/v1/me/usage", + headers={"Authorization": f"Bearer {data['access_token']}"}, + ) + usage = json.loads(urllib.request.urlopen(req4, timeout=10).read()) + print(f" /me/usage: {usage}") +except urllib.error.HTTPError as e: + print(f"\n[FAIL] {e.code}") + print(e.read().decode()) +except Exception as e: + print(f"\n[ERR] {e}") +c.close() diff --git a/scripts/_kick2.py b/scripts/_kick2.py new file mode 100644 index 0000000..f111eca --- /dev/null +++ b/scripts/_kick2.py @@ -0,0 +1,28 @@ +import os, paramiko +PW = os.environ["REMOTE_PASS"] +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) +def run(cmd, t=30): + si, so, se = c.exec_command(cmd, timeout=t) + out = so.read().decode("utf-8", "replace") + err = se.read().decode("utf-8", "replace") + rc = so.channel.recv_exit_status() + print(f"$ {cmd}") + if out: print(out, end="") + if err: print("[err]", err, end="", file=__import__("sys").stderr) + print(f" rc={rc}") + return out + +# 单独跑 BBC 抓取 + 完整日志 +print("--- BBC 单独抓取 ---") +run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio; from app.workers.pipeline import fetch_one_source; asyncio.run(fetch_one_source(2))'\" 2>&1 | tail -30", t=60) + +# 直接 curl bbc 看 +print("\n--- 容器内 curl bbc ---") +run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio, httpx, feedparser; async def t(): r = await httpx.AsyncClient(follow_redirects=True).get(\\\"https://feeds.bbci.co.uk/news/world/rss.xml\\\"); print(\\\"status:\\\", r.status_code, \\\"len:\\\", len(r.text)); f = feedparser.parse(r.text); print(\\\"entries:\\\", len(f.entries)); print(\\\"first title:\\\", f.entries[0].title if f.entries else None); asyncio.run(t())'\" 2>&1 | tail -10") + +# 试 feedparser 能否解析 +print("\n--- 查 article ---") +run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT id, source_id, title, translation_status, published_at FROM articles LIMIT 3;'\" 2>&1 | tail -10") +c.close() diff --git a/scripts/_kick_off.py b/scripts/_kick_off.py new file mode 100644 index 0000000..67d97ca --- /dev/null +++ b/scripts/_kick_off.py @@ -0,0 +1,42 @@ +import os, paramiko +PW = os.environ["REMOTE_PASS"] +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) +def run(cmd, t=60): + si, so, se = c.exec_command(cmd, timeout=t) + out = so.read().decode("utf-8", "replace") + err = se.read().decode("utf-8", "replace") + rc = so.channel.recv_exit_status() + print(f"$ {cmd}") + if out: print(out, end="") + if err: print("[err]", err, end="", file=__import__("sys").stderr) + print(f" rc={rc}") + return out + +# 1) 看 sources 状态 +run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT id, slug, url, enabled, last_status, fetch_interval_min FROM sources;'\" 2>&1 | tail -10") + +# 2) 修 Reuters URL(看新闻组/Google News 找替代) +# Reuters 把 RSS feed 改成了新域名,或者直接用 Google News +# 简单方案: 改 slug=reuters-world 的 url +# 试 https://www.reutersagency.com/feed/?best-topics=top-news +print("\n--- 更新 reuters url ---") +run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c \\\"UPDATE sources SET url = 'https://www.reutersagency.com/feed/?best-topics=top-news&posttype=post', last_status = NULL, consecutive_failures = 0, fetch_interval_min = 30 WHERE slug = 'reuters-world';\\\"\" 2>&1 | tail -3") + +# 3) 测新 URL +print("\n--- 测新 url 解析 ---") +run("cd /srv/news && sg docker -c \"docker compose exec -T worker getent hosts www.reutersagency.com\" 2>&1 | tail -3") + +# 4) 触发所有源抓取 +print("\n--- 触发抓取 ---") +run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio; from app.workers.pipeline import run_once; asyncio.run(run_once())'\" 2>&1 | tail -20", t=180) + +# 5) 看 article 数 +print("\n--- article 数 ---") +run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT count(*) total, count(title_zh) translated FROM articles;'\" 2>&1 | tail -5") + +# 6) 看 sources 状态 +print("\n--- 源状态 ---") +run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT slug, last_status, consecutive_failures FROM sources ORDER BY id;'\" 2>&1 | tail -10") +c.close() diff --git a/scripts/_net_check.py b/scripts/_net_check.py new file mode 100644 index 0000000..9d45b16 --- /dev/null +++ b/scripts/_net_check.py @@ -0,0 +1,26 @@ +import os, paramiko +PW = os.environ["REMOTE_PASS"] +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) +def run(cmd, t=15): + si, so, se = c.exec_command(cmd, timeout=t) + out = so.read().decode("utf-8", "replace") + err = se.read().decode("utf-8", "replace") + rc = so.channel.recv_exit_status() + print(f"$ {cmd}") + if out: print(out, end="") + if err: print("[err]", err, end="", file=__import__("sys").stderr) + print(f" rc={rc}") + return out + +run("cd /srv/news && sg docker -c \"docker compose exec -T worker cat /etc/resolv.conf\" 2>&1 | tail -5") +print("---") +run("cd /srv/news && sg docker -c \"docker compose exec -T worker getent hosts google.com\" 2>&1 | tail -5") +print("---") +# 测一个明确的域名 +run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import socket; print(socket.gethostbyname(\\\"feeds.reuters.com\\\"))'\" 2>&1 | tail -5") +print("---") +# 测宿主机的网络 +run("curl -s -o /dev/null -w '%{http_code}\\n' https://feeds.reuters.com/Reuters/worldNews 2>&1") +c.close() diff --git a/scripts/_net_check2.py b/scripts/_net_check2.py new file mode 100644 index 0000000..375fa14 --- /dev/null +++ b/scripts/_net_check2.py @@ -0,0 +1,25 @@ +import os, paramiko +PW = os.environ["REMOTE_PASS"] +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) +def run(cmd, t=20): + si, so, se = c.exec_command(cmd, timeout=t) + out = so.read().decode("utf-8", "replace") + err = se.read().decode("utf-8", "replace") + rc = so.channel.recv_exit_status() + print(f"$ {cmd}") + if out: print(out, end="") + if err: print("[err]", err, end="", file=__import__("sys").stderr) + print(f" rc={rc}") + return out + +# 测不同 DNS +for domain in ["feeds.reuters.com", "feeds.bbci.co.uk", "www.aljazeera.com", "www3.nhk.or.jp", "rss.dw.com"]: + r = run(f"cd /srv/news && sg docker -c \"docker compose exec -T worker getent hosts {domain}\" 2>&1 | tail -2", t=10) + print(f" => {domain}: {'OK' if 'Address' in r or any(c.isdigit() for c in r) else 'FAIL'}") + +# 容器内抓一下 bbc +print("\n--- 容器内 fetch bbc ---") +run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio, httpx; r = await httpx.AsyncClient().get(\\\"https://feeds.bbci.co.uk/news/world/rss.xml\\\"); print(\\\"status:\\\", r.status_code, \\\"len:\\\", len(r.text))'\" 2>&1 | tail -5") +c.close()