fix: pipeline INSERT 去掉不存在的 translate_to 字段

This commit is contained in:
Mavis
2026-06-07 23:32:13 +08:00
parent 30acd6af54
commit 3ebf280278
6 changed files with 199 additions and 1 deletions

78
scripts/_e2e.py Normal file
View File

@@ -0,0 +1,78 @@
import os, paramiko, urllib.request, urllib.error, json
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=15):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
print(f"$ {cmd}")
if out: print(out, end="")
if err: print("[err]", err, end="", file=__import__("sys").stderr)
print(f" rc={rc}")
return out
# 服务器 pull + 重建 api
run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -3")
run("cd /srv/news && sg docker -c 'docker compose up -d --force-recreate --no-deps --build api' 2>&1 | tail -5", t=120)
# 重设 owner 角色为 owner
run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'UPDATE users SET role = '\\''owner'\\'' WHERE username = '\\''owner'\\'';'\" 2>&1 | tail -3")
# 触发一次抓取(等久点)
print("\n=== 触发抓取(等 60 秒)===")
import time
run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio; from app.workers.pipeline import run_once; asyncio.run(run_once())'\" 2>&1 | tail -15", t=180)
time.sleep(5)
# 查 article 数
run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT count(*) AS total, count(title_zh) AS translated FROM articles;'\" 2>&1 | tail -5")
# 拿新密码
new_pw = "owner_pass_2026"
# 登录 + 拉
req = urllib.request.Request(
"http://localhost/api/v1/auth/login",
data=json.dumps({"username": "owner", "password": new_pw}).encode(),
headers={"Content-Type": "application/json"},
)
try:
resp = urllib.request.urlopen(req, timeout=10)
data = json.loads(resp.read())
print(f"\n=== 登录 OK!token 前 40: {data['access_token'][:40]} ===")
# 测拉 articles
req2 = urllib.request.Request(
"http://localhost/api/v1/articles?limit=5",
headers={"Authorization": f"Bearer {data['access_token']}"},
)
resp2 = urllib.request.urlopen(req2, timeout=10)
ad = json.loads(resp2.read())
print(f" articles: {len(ad.get('items', []))}")
for a in ad.get("items", [])[:3]:
print(f" - {a['source']['name']:20s} [{a['translation_status']:7s}] {a['title'][:50]}")
if a.get("title_zh"):
print(f" zh: {a['title_zh'][:50]}")
# 测 /me
req3 = urllib.request.Request(
"http://localhost/api/v1/me",
headers={"Authorization": f"Bearer {data['access_token']}"},
)
me = json.loads(urllib.request.urlopen(req3, timeout=10).read())
print(f"\n /me: {me}")
# 测 /me/usage
req4 = urllib.request.Request(
"http://localhost/api/v1/me/usage",
headers={"Authorization": f"Bearer {data['access_token']}"},
)
usage = json.loads(urllib.request.urlopen(req4, timeout=10).read())
print(f" /me/usage: {usage}")
except urllib.error.HTTPError as e:
print(f"\n[FAIL] {e.code}")
print(e.read().decode())
except Exception as e:
print(f"\n[ERR] {e}")
c.close()

28
scripts/_kick2.py Normal file
View File

@@ -0,0 +1,28 @@
import os, paramiko
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=30):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
print(f"$ {cmd}")
if out: print(out, end="")
if err: print("[err]", err, end="", file=__import__("sys").stderr)
print(f" rc={rc}")
return out
# 单独跑 BBC 抓取 + 完整日志
print("--- BBC 单独抓取 ---")
run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio; from app.workers.pipeline import fetch_one_source; asyncio.run(fetch_one_source(2))'\" 2>&1 | tail -30", t=60)
# 直接 curl bbc 看
print("\n--- 容器内 curl bbc ---")
run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio, httpx, feedparser; async def t(): r = await httpx.AsyncClient(follow_redirects=True).get(\\\"https://feeds.bbci.co.uk/news/world/rss.xml\\\"); print(\\\"status:\\\", r.status_code, \\\"len:\\\", len(r.text)); f = feedparser.parse(r.text); print(\\\"entries:\\\", len(f.entries)); print(\\\"first title:\\\", f.entries[0].title if f.entries else None); asyncio.run(t())'\" 2>&1 | tail -10")
# 试 feedparser 能否解析
print("\n--- 查 article ---")
run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT id, source_id, title, translation_status, published_at FROM articles LIMIT 3;'\" 2>&1 | tail -10")
c.close()

42
scripts/_kick_off.py Normal file
View File

@@ -0,0 +1,42 @@
import os, paramiko
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=60):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
print(f"$ {cmd}")
if out: print(out, end="")
if err: print("[err]", err, end="", file=__import__("sys").stderr)
print(f" rc={rc}")
return out
# 1) 看 sources 状态
run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT id, slug, url, enabled, last_status, fetch_interval_min FROM sources;'\" 2>&1 | tail -10")
# 2) 修 Reuters URL(看新闻组/Google News 找替代)
# Reuters 把 RSS feed 改成了新域名,或者直接用 Google News
# 简单方案: 改 slug=reuters-world 的 url
# 试 https://www.reutersagency.com/feed/?best-topics=top-news
print("\n--- 更新 reuters url ---")
run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c \\\"UPDATE sources SET url = 'https://www.reutersagency.com/feed/?best-topics=top-news&posttype=post', last_status = NULL, consecutive_failures = 0, fetch_interval_min = 30 WHERE slug = 'reuters-world';\\\"\" 2>&1 | tail -3")
# 3) 测新 URL
print("\n--- 测新 url 解析 ---")
run("cd /srv/news && sg docker -c \"docker compose exec -T worker getent hosts www.reutersagency.com\" 2>&1 | tail -3")
# 4) 触发所有源抓取
print("\n--- 触发抓取 ---")
run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio; from app.workers.pipeline import run_once; asyncio.run(run_once())'\" 2>&1 | tail -20", t=180)
# 5) 看 article 数
print("\n--- article 数 ---")
run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT count(*) total, count(title_zh) translated FROM articles;'\" 2>&1 | tail -5")
# 6) 看 sources 状态
print("\n--- 源状态 ---")
run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT slug, last_status, consecutive_failures FROM sources ORDER BY id;'\" 2>&1 | tail -10")
c.close()

26
scripts/_net_check.py Normal file
View File

@@ -0,0 +1,26 @@
import os, paramiko
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=15):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
print(f"$ {cmd}")
if out: print(out, end="")
if err: print("[err]", err, end="", file=__import__("sys").stderr)
print(f" rc={rc}")
return out
run("cd /srv/news && sg docker -c \"docker compose exec -T worker cat /etc/resolv.conf\" 2>&1 | tail -5")
print("---")
run("cd /srv/news && sg docker -c \"docker compose exec -T worker getent hosts google.com\" 2>&1 | tail -5")
print("---")
# 测一个明确的域名
run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import socket; print(socket.gethostbyname(\\\"feeds.reuters.com\\\"))'\" 2>&1 | tail -5")
print("---")
# 测宿主机的网络
run("curl -s -o /dev/null -w '%{http_code}\\n' https://feeds.reuters.com/Reuters/worldNews 2>&1")
c.close()

25
scripts/_net_check2.py Normal file
View File

@@ -0,0 +1,25 @@
import os, paramiko
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=20):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
print(f"$ {cmd}")
if out: print(out, end="")
if err: print("[err]", err, end="", file=__import__("sys").stderr)
print(f" rc={rc}")
return out
# 测不同 DNS
for domain in ["feeds.reuters.com", "feeds.bbci.co.uk", "www.aljazeera.com", "www3.nhk.or.jp", "rss.dw.com"]:
r = run(f"cd /srv/news && sg docker -c \"docker compose exec -T worker getent hosts {domain}\" 2>&1 | tail -2", t=10)
print(f" => {domain}: {'OK' if 'Address' in r or any(c.isdigit() for c in r) else 'FAIL'}")
# 容器内抓一下 bbc
print("\n--- 容器内 fetch bbc ---")
run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio, httpx; r = await httpx.AsyncClient().get(\\\"https://feeds.bbci.co.uk/news/world/rss.xml\\\"); print(\\\"status:\\\", r.status_code, \\\"len:\\\", len(r.text))'\" 2>&1 | tail -5")
c.close()