fix: pipeline INSERT 去掉不存在的 translate_to 字段
This commit is contained in:
78
scripts/_e2e.py
Normal file
78
scripts/_e2e.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import os, paramiko, urllib.request, urllib.error, json
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
|
||||
def run(cmd, t=15):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
print(f"$ {cmd}")
|
||||
if out: print(out, end="")
|
||||
if err: print("[err]", err, end="", file=__import__("sys").stderr)
|
||||
print(f" rc={rc}")
|
||||
return out
|
||||
|
||||
# 服务器 pull + 重建 api
|
||||
run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -3")
|
||||
run("cd /srv/news && sg docker -c 'docker compose up -d --force-recreate --no-deps --build api' 2>&1 | tail -5", t=120)
|
||||
|
||||
# 重设 owner 角色为 owner
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'UPDATE users SET role = '\\''owner'\\'' WHERE username = '\\''owner'\\'';'\" 2>&1 | tail -3")
|
||||
|
||||
# 触发一次抓取(等久点)
|
||||
print("\n=== 触发抓取(等 60 秒)===")
|
||||
import time
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio; from app.workers.pipeline import run_once; asyncio.run(run_once())'\" 2>&1 | tail -15", t=180)
|
||||
time.sleep(5)
|
||||
|
||||
# 查 article 数
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT count(*) AS total, count(title_zh) AS translated FROM articles;'\" 2>&1 | tail -5")
|
||||
|
||||
# 拿新密码
|
||||
new_pw = "owner_pass_2026"
|
||||
|
||||
# 登录 + 拉
|
||||
req = urllib.request.Request(
|
||||
"http://localhost/api/v1/auth/login",
|
||||
data=json.dumps({"username": "owner", "password": new_pw}).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
try:
|
||||
resp = urllib.request.urlopen(req, timeout=10)
|
||||
data = json.loads(resp.read())
|
||||
print(f"\n=== 登录 OK!token 前 40: {data['access_token'][:40]} ===")
|
||||
# 测拉 articles
|
||||
req2 = urllib.request.Request(
|
||||
"http://localhost/api/v1/articles?limit=5",
|
||||
headers={"Authorization": f"Bearer {data['access_token']}"},
|
||||
)
|
||||
resp2 = urllib.request.urlopen(req2, timeout=10)
|
||||
ad = json.loads(resp2.read())
|
||||
print(f" articles: {len(ad.get('items', []))} 条")
|
||||
for a in ad.get("items", [])[:3]:
|
||||
print(f" - {a['source']['name']:20s} [{a['translation_status']:7s}] {a['title'][:50]}")
|
||||
if a.get("title_zh"):
|
||||
print(f" zh: {a['title_zh'][:50]}")
|
||||
# 测 /me
|
||||
req3 = urllib.request.Request(
|
||||
"http://localhost/api/v1/me",
|
||||
headers={"Authorization": f"Bearer {data['access_token']}"},
|
||||
)
|
||||
me = json.loads(urllib.request.urlopen(req3, timeout=10).read())
|
||||
print(f"\n /me: {me}")
|
||||
# 测 /me/usage
|
||||
req4 = urllib.request.Request(
|
||||
"http://localhost/api/v1/me/usage",
|
||||
headers={"Authorization": f"Bearer {data['access_token']}"},
|
||||
)
|
||||
usage = json.loads(urllib.request.urlopen(req4, timeout=10).read())
|
||||
print(f" /me/usage: {usage}")
|
||||
except urllib.error.HTTPError as e:
|
||||
print(f"\n[FAIL] {e.code}")
|
||||
print(e.read().decode())
|
||||
except Exception as e:
|
||||
print(f"\n[ERR] {e}")
|
||||
c.close()
|
||||
28
scripts/_kick2.py
Normal file
28
scripts/_kick2.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import os, paramiko
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=30):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
print(f"$ {cmd}")
|
||||
if out: print(out, end="")
|
||||
if err: print("[err]", err, end="", file=__import__("sys").stderr)
|
||||
print(f" rc={rc}")
|
||||
return out
|
||||
|
||||
# 单独跑 BBC 抓取 + 完整日志
|
||||
print("--- BBC 单独抓取 ---")
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio; from app.workers.pipeline import fetch_one_source; asyncio.run(fetch_one_source(2))'\" 2>&1 | tail -30", t=60)
|
||||
|
||||
# 直接 curl bbc 看
|
||||
print("\n--- 容器内 curl bbc ---")
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio, httpx, feedparser; async def t(): r = await httpx.AsyncClient(follow_redirects=True).get(\\\"https://feeds.bbci.co.uk/news/world/rss.xml\\\"); print(\\\"status:\\\", r.status_code, \\\"len:\\\", len(r.text)); f = feedparser.parse(r.text); print(\\\"entries:\\\", len(f.entries)); print(\\\"first title:\\\", f.entries[0].title if f.entries else None); asyncio.run(t())'\" 2>&1 | tail -10")
|
||||
|
||||
# 试 feedparser 能否解析
|
||||
print("\n--- 查 article ---")
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT id, source_id, title, translation_status, published_at FROM articles LIMIT 3;'\" 2>&1 | tail -10")
|
||||
c.close()
|
||||
42
scripts/_kick_off.py
Normal file
42
scripts/_kick_off.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import os, paramiko
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=60):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
print(f"$ {cmd}")
|
||||
if out: print(out, end="")
|
||||
if err: print("[err]", err, end="", file=__import__("sys").stderr)
|
||||
print(f" rc={rc}")
|
||||
return out
|
||||
|
||||
# 1) 看 sources 状态
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT id, slug, url, enabled, last_status, fetch_interval_min FROM sources;'\" 2>&1 | tail -10")
|
||||
|
||||
# 2) 修 Reuters URL(看新闻组/Google News 找替代)
|
||||
# Reuters 把 RSS feed 改成了新域名,或者直接用 Google News
|
||||
# 简单方案: 改 slug=reuters-world 的 url
|
||||
# 试 https://www.reutersagency.com/feed/?best-topics=top-news
|
||||
print("\n--- 更新 reuters url ---")
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c \\\"UPDATE sources SET url = 'https://www.reutersagency.com/feed/?best-topics=top-news&posttype=post', last_status = NULL, consecutive_failures = 0, fetch_interval_min = 30 WHERE slug = 'reuters-world';\\\"\" 2>&1 | tail -3")
|
||||
|
||||
# 3) 测新 URL
|
||||
print("\n--- 测新 url 解析 ---")
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T worker getent hosts www.reutersagency.com\" 2>&1 | tail -3")
|
||||
|
||||
# 4) 触发所有源抓取
|
||||
print("\n--- 触发抓取 ---")
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio; from app.workers.pipeline import run_once; asyncio.run(run_once())'\" 2>&1 | tail -20", t=180)
|
||||
|
||||
# 5) 看 article 数
|
||||
print("\n--- article 数 ---")
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT count(*) total, count(title_zh) translated FROM articles;'\" 2>&1 | tail -5")
|
||||
|
||||
# 6) 看 sources 状态
|
||||
print("\n--- 源状态 ---")
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT slug, last_status, consecutive_failures FROM sources ORDER BY id;'\" 2>&1 | tail -10")
|
||||
c.close()
|
||||
26
scripts/_net_check.py
Normal file
26
scripts/_net_check.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import os, paramiko
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=15):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
print(f"$ {cmd}")
|
||||
if out: print(out, end="")
|
||||
if err: print("[err]", err, end="", file=__import__("sys").stderr)
|
||||
print(f" rc={rc}")
|
||||
return out
|
||||
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T worker cat /etc/resolv.conf\" 2>&1 | tail -5")
|
||||
print("---")
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T worker getent hosts google.com\" 2>&1 | tail -5")
|
||||
print("---")
|
||||
# 测一个明确的域名
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import socket; print(socket.gethostbyname(\\\"feeds.reuters.com\\\"))'\" 2>&1 | tail -5")
|
||||
print("---")
|
||||
# 测宿主机的网络
|
||||
run("curl -s -o /dev/null -w '%{http_code}\\n' https://feeds.reuters.com/Reuters/worldNews 2>&1")
|
||||
c.close()
|
||||
25
scripts/_net_check2.py
Normal file
25
scripts/_net_check2.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import os, paramiko
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=20):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
print(f"$ {cmd}")
|
||||
if out: print(out, end="")
|
||||
if err: print("[err]", err, end="", file=__import__("sys").stderr)
|
||||
print(f" rc={rc}")
|
||||
return out
|
||||
|
||||
# 测不同 DNS
|
||||
for domain in ["feeds.reuters.com", "feeds.bbci.co.uk", "www.aljazeera.com", "www3.nhk.or.jp", "rss.dw.com"]:
|
||||
r = run(f"cd /srv/news && sg docker -c \"docker compose exec -T worker getent hosts {domain}\" 2>&1 | tail -2", t=10)
|
||||
print(f" => {domain}: {'OK' if 'Address' in r or any(c.isdigit() for c in r) else 'FAIL'}")
|
||||
|
||||
# 容器内抓一下 bbc
|
||||
print("\n--- 容器内 fetch bbc ---")
|
||||
run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio, httpx; r = await httpx.AsyncClient().get(\\\"https://feeds.bbci.co.uk/news/world/rss.xml\\\"); print(\\\"status:\\\", r.status_code, \\\"len:\\\", len(r.text))'\" 2>&1 | tail -5")
|
||||
c.close()
|
||||
Reference in New Issue
Block a user