diff --git a/.gitignore b/.gitignore index 6f77b8f..afd415d 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,10 @@ alembic/versions/__pycache__/ *.7z *.bak +# 临时调试脚本(下划线开头,不进仓库) +scripts/_*.py +scripts/_*/ + # 敏感 secrets/ *.pem diff --git a/scripts/_api_redis.py b/scripts/_api_redis.py deleted file mode 100644 index 3c9eb24..0000000 --- a/scripts/_api_redis.py +++ /dev/null @@ -1,41 +0,0 @@ -import os, paramiko, base64 -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - return out - -# 在 API 容器里直接读 redis -script = ( - "import asyncio\n" - "from app.redis_client import get_redis\n" - "async def main():\n" - " r = get_redis()\n" - " await r.ping()\n" - " v = await r.get('translation:month:202606')\n" - " print('api sees:', v)\n" - " # 手动设一个值再读\n" - " await r.set('translation:month:202606', 999)\n" - " v2 = await r.get('translation:month:202606')\n" - " print('after set 999:', v2)\n" - "asyncio.run(main())\n" -) -b64 = base64.b64encode(script.encode()).decode() -run(f"docker exec news-aggregator-api-1 sh -c 'echo {b64} | base64 -d > /app/_test_redis.py'") -print("--- api 容器读 redis ---") -run("docker exec -w /app news-aggregator-api-1 python /app/_test_redis.py", t=15) - -# 立即调 /me/usage -import json -out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") -token = json.loads(out)["access_token"] -u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'")) -print(f"--- /me/usage: {u}") -c.close() diff --git a/scripts/_check_after.py b/scripts/_check_after.py deleted file mode 100644 index 1145796..0000000 --- a/scripts/_check_after.py +++ /dev/null @@ -1,39 +0,0 @@ -"""DELETE 后看新数据(30 秒后)。""" -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=30): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - return out - -# 后台启 run_once -si, so, se = c.exec_command("nohup docker exec news-aggregator-worker-1 python -c 'import asyncio; from app.workers.pipeline import run_once; asyncio.run(run_once())' > /tmp/run_once.log 2>&1 & echo $!", timeout=10) -pid = so.read().decode().strip() -print(f"run_once started, PID={pid}") - -# 等 90 秒(全文抓取慢) -import time -time.sleep(90) - -# 看新数据 -print("\n--- 文章统计 ---") -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT count(*) AS total, count(CASE WHEN length(body_text) > 1000 THEN 1 END) AS long_body, avg(length(body_text))::int AS avg_len, max(length(body_text)) AS max_len FROM articles;\"") - -# 看 RSS 摘要 vs 全文(body_text > 1000 = trafilatura 工作了) -print("\n--- body_text 长度分布 ---") -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT CASE WHEN length(body_text) < 200 THEN '<200' WHEN length(body_text) < 1000 THEN '200-1k' ELSE '>1k' END AS bucket, count(*) FROM articles GROUP BY 1 ORDER BY 1;\"") - -# 看翻译状态 -print("\n--- 翻译状态 ---") -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, count(*) FROM articles GROUP BY 1 ORDER BY 1;\"") - -# 看前 5 篇文章 body 长度 + 来源 -print("\n--- 前 5 篇 ---") -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, source_id, LEFT(title, 50) AS title, length(body_text) AS body_len FROM articles ORDER BY id LIMIT 5;\"") -c.close() diff --git a/scripts/_check_body.py b/scripts/_check_body.py deleted file mode 100644 index 65ac455..0000000 --- a/scripts/_check_body.py +++ /dev/null @@ -1,52 +0,0 @@ -"""查 Ronaldo 那篇文章的 body 字段。""" -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=15): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - return out - -# 1) 看 body 字段 -print("--- 文章 body 字段(可能是空)---") -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, title, length(body_html) as html_len, length(body_text) as text_len, length(body_zh_text) as zh_len, lang_src, translation_status, url FROM articles WHERE id = 175177;\"") - -# 2) 看 3 篇典型 aljazeera 文章 -print("\n--- 抽 3 篇 aljazeera 看 body 长度分布 ---") -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 3 ORDER BY fetched_at DESC LIMIT 5;\"") - -# 3) 抽 BBC(可能是最丰富的) -print("\n--- 抽 3 篇 BBC 看 body ---") -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 2 ORDER BY fetched_at DESC LIMIT 5;\"") - -# 4) 拉 RSS 源看看,Al Jazeera 到底有没有内容 -print("\n--- 拉 Al Jazeera RSS 原始内容看 ---") -script = b''' -import asyncio, feedparser, httpx -async def main(): - async with httpx.AsyncClient(follow_redirects=True, timeout=15) as c: - r = await c.get("https://www.aljazeera.com/xml/rss/all.xml") - f = feedparser.parse(r.text) - for e in f.entries[:3]: - print("---") - print("title:", e.title) - print("link:", e.link) - print("has content:", bool(e.get("content"))) - if e.get("content"): - print("content[0] keys:", list(e["content"][0].keys())) - print("content[0].value[:200]:", (e["content"][0].get("value") or "")[:200]) - print("has summary:", bool(e.get("summary"))) - if e.get("summary"): - print("summary[:200]:", e["summary"][:200]) -asyncio.run(main()) -''' -import base64 -b64 = base64.b64encode(script).decode() -run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'") -run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -40", t=30) -c.close() diff --git a/scripts/_check_count.py b/scripts/_check_count.py deleted file mode 100644 index c4b688d..0000000 --- a/scripts/_check_count.py +++ /dev/null @@ -1,56 +0,0 @@ -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - return out - -# 找一条最新抓的 article,重译 -print("--- 找最新 article + 重译 ---") -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT id FROM articles ORDER BY fetched_at DESC LIMIT 1;\"") -aid = out.strip() -print(f" article id: {aid}") - -# 改回 pending 然后触发翻译 -run(f"docker exec news-aggregator-postgres-1 psql -U news -d news -c \"UPDATE articles SET translation_status = 'pending' WHERE id = {aid};\" 2>&1 | tail -2") - -# 直接用 worker 调 translate -print("--- 触发翻译 ---") -script = f'''import asyncio -from app.workers.pipeline import translate_article -from app.services.translation.service import service - -async def main(): - # 调一次 service - res = await service.translate("Hello world, this is a test of translation.", source="en", target="zh") - print("res:", res.engine, "chars:", res.chars, "text:", res.text[:50]) - # 再调一次,会走 cache - res2 = await service.translate("Hello world, this is a test of translation.", source="en", target="zh") - print("res2:", res2.engine, "cached:", res2.cached) - -asyncio.run(main()) -''' -print("--- 测试 service.translate ---") -import time -# 写脚本文件 + docker cp -with open("/tmp/_test_translate.py", "w", encoding="utf-8") as f: - f.write(script) -# 用 stdin -run(f"docker exec -i news-aggregator-worker-1 python -u", t=30) # 这个不通,要传脚本 -# 改成 echo | 跑 -quoted = script.replace('"', '\\"').replace('\\n', '\\\\n') -run(f"docker exec news-aggregator-worker-1 python -c \"{quoted}\"", t=30) - -# 看 redis -print("\n--- redis usage ---") -rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() -out = run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' GET 'translation:month:202606' 2>&1 | grep -v Warning") -print(f" GET: {out.strip()}") -c.close() diff --git a/scripts/_check_count2.py b/scripts/_check_count2.py deleted file mode 100644 index ff4ada3..0000000 --- a/scripts/_check_count2.py +++ /dev/null @@ -1,40 +0,0 @@ -import os, paramiko, base64 -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - return out - -script = ( - "import asyncio, logging\n" - "from app.services.translation.service import service\n" - "from app.redis_client import get_redis\n" - "logging.basicConfig(level=logging.INFO)\n" - "async def main():\n" - " r = get_redis(); await r.ping()\n" - " print('before:', await r.get('translation:month:202606') or 0)\n" - " res = await service.translate('Hello, world. This is a short test message.', source='en', target='zh')\n" - " print(' result engine=', res.engine, 'chars=', res.chars, 'text=', res.text[:60])\n" - " print('after:', await r.get('translation:month:202606') or 0)\n" - " res2 = await service.translate('Hello, world. This is a short test message.', source='en', target='zh')\n" - " print(' cached:', res2.cached, 'engine=', res2.engine)\n" - " print('after cache:', await r.get('translation:month:202606') or 0)\n" - "asyncio.run(main())\n" -) -script_b64 = base64.b64encode(script.encode()).decode() -print("--- 写文件 ---") -run(f"docker exec news-aggregator-worker-1 sh -c 'echo {script_b64} | base64 -d > /tmp/_t.py'") -print("--- 跑 ---") -run("docker exec news-aggregator-worker-1 python /tmp/_t.py", t=30) -print("\n--- redis ---") -rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() -out = run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' GET 'translation:month:202606' 2>&1 | grep -v Warning") -print(f" usage: {out.strip()}") -c.close() diff --git a/scripts/_check_count3.py b/scripts/_check_count3.py deleted file mode 100644 index a156734..0000000 --- a/scripts/_check_count3.py +++ /dev/null @@ -1,46 +0,0 @@ -import os, paramiko, base64 -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - return out - -# 重置 usage 到 0(我之前测试加了 100) -rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() -run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' DEL 'translation:month:202606' 2>&1 | grep -v Warning") -print("usage 重置为 0") - -# 写脚本到 /app/_t.py -script = ( - "import asyncio, logging\n" - "from app.services.translation.service import service\n" - "from app.redis_client import get_redis\n" - "logging.basicConfig(level=logging.INFO)\n" - "async def main():\n" - " r = get_redis(); await r.ping()\n" - " print('before:', await r.get('translation:month:202606') or 0)\n" - " res = await service.translate('Hello, world. This is a short test message.', source='en', target='zh')\n" - " print(' result engine=', res.engine, 'chars=', res.chars, 'text=', res.text[:60])\n" - " print('after:', await r.get('translation:month:202606') or 0)\n" - " res2 = await service.translate('Hello, world. This is a short test message.', source='en', target='zh')\n" - " print(' cached:', res2.cached, 'engine=', res2.engine)\n" - " print('after cache:', await r.get('translation:month:202606') or 0)\n" - "asyncio.run(main())\n" -) -script_b64 = base64.b64encode(script.encode()).decode() -run(f"docker exec news-aggregator-worker-1 sh -c 'echo {script_b64} | base64 -d > /app/_t.py && cat /app/_t.py | head -3'") - -print("\n--- 跑(在 /app 下)---") -run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py", t=30) - -print("\n--- redis ---") -out = run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' GET 'translation:month:202606' 2>&1 | grep -v Warning") -print(f" usage: {out.strip()}") -c.close() diff --git a/scripts/_check_nhk.py b/scripts/_check_nhk.py deleted file mode 100644 index cc50323..0000000 --- a/scripts/_check_nhk.py +++ /dev/null @@ -1,61 +0,0 @@ -"""查 NHK 源配置 + 已入库文章 lang_src 实际值。""" -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=15): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - return out - -# 1) NHK 源配置 -print("--- 1. NHK 源配置 ---") -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, slug, language_src FROM sources WHERE slug = 'nhk-world';\"") - -# 2) 实际入库的 NHK 文章 lang_src 分布 -print("\n--- 2. 已入库 NHK 文章 lang_src 分布 ---") -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT lang_src, count(*) FROM articles WHERE source_id = (SELECT id FROM sources WHERE slug = 'nhk-world') GROUP BY 1;\"") - -# 3) 看 NHK RSS feed 实际的 字段 -print("\n--- 3. NHK RSS 实际 language 字段 ---") -script = ''' -import feedparser, httpx -async def main(): - f = feedparser.parse("https://www3.nhk.or.jp/rss/news/cat0.xml") - print("feed.feed.language:", f.feed.get("language")) - if f.entries: - e = f.entries[0] - print("entry.language:", e.get("language")) - print("title:", e.title) -asyncio.run(main()) -''' -import base64 -b64 = base64.b64encode(script.encode()).decode() -run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'") -# 修语法:用 3 撇号 -script = ( - "import feedparser, httpx, asyncio\n" - "async def main():\n" - " f = feedparser.parse('https://www3.nhk.or.jp/rss/news/cat0.xml')\n" - " print('feed.feed.language:', f.feed.get('language'))\n" - " if f.entries:\n" - " e = f.entries[0]\n" - " print('entry.language:', e.get('language'))\n" - " print('title:', e.title)\n" - "asyncio.run(main())\n" -) -b64 = base64.b64encode(script.encode()).decode() -run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'") -run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -10", t=20) - -# 4) 看翻译 service 收到的 source 是什么(我打一行新文章,看 lang_src 传到 service) -print("\n--- 4. service.translate 实际调用时 source 参数是什么? ---") -# 看 translate_article 代码 -out = run("docker exec news-aggregator-worker-1 python -c 'import app.workers.pipeline; import inspect; print(inspect.getsource(app.workers.pipeline.translate_article))' 2>&1 | grep -E 'lang_src|translate\\(' | head -10") -print(out) - -c.close() diff --git a/scripts/_check_progress.py b/scripts/_check_progress.py deleted file mode 100644 index 28b446e..0000000 --- a/scripts/_check_progress.py +++ /dev/null @@ -1,41 +0,0 @@ -"""等 2 分钟后看翻译消化进度。""" -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=15): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - return out - -# 翻译统计 -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, translation_engine, count(*), sum(translation_chars) FROM articles GROUP BY 1, 2 ORDER BY 1, 2;\"") -print("--- 翻译后统计 ---") -print(out) - -# redis usage -rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() -out = run("docker exec news-aggregator-redis-1 redis-cli -a '" + rpw + "' GET translation:month:202606 2>/dev/null") -print(f"\n--- redis usage (已用): {out.strip()}") - -# /me/usage -import json -out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") -token = json.loads(out)["access_token"] -u = json.loads(run("curl -s -H 'Authorization: Bearer " + token + "' 'http://localhost/api/v1/me/usage'")) -print(f"--- /me/usage ---\n {u}") - -# worker 日志最后几行(看 translation_loop 节奏) -print("\n--- worker 日志最后 20 行(看 translation_loop 节奏)---") -out = run("docker logs --tail=20 news-aggregator-worker-1 2>&1 | grep -E 'translated|translation_loop|run_once' | tail -10", t=15) -print(out) - -# 验证 fetch_one_source 不再调翻译 -print("\n--- 找 fetch_one_source 日志(看是否还有 'article X translated' 紧跟 'source Y: N new')---") -out = run("docker logs --tail=200 news-aggregator-worker-1 2>&1 | grep -E 'new articles|article .+ translated' | tail -10", t=15) -print(out) -c.close() diff --git a/scripts/_check_sshd.py b/scripts/_check_sshd.py deleted file mode 100644 index f1433ea..0000000 --- a/scripts/_check_sshd.py +++ /dev/null @@ -1,16 +0,0 @@ -import os, sys, paramiko -PW = os.environ.get("REMOTE_PASS", "") -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd): - si, so, se = c.exec_command(cmd, timeout=15) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - print(f"$ {cmd}") - if out: print(out, end="") - if err: print("[err]", err, end="", file=sys.stderr) -run("ls -la /root/.ssh/ && echo --- && cat /root/.ssh/authorized_keys | head -1 | cut -c1-100") -run("sshd -T 2>/dev/null | grep -iE 'pubkeyauth|permitroot|authentic' | head -20") -run("grep -E 'PubkeyAuthentication|PermitRootLogin|PasswordAuthentication|AuthorizedKeysFile' /etc/ssh/sshd_config 2>/dev/null; echo --- && ls -la /etc/ssh/sshd_config.d/ 2>/dev/null") -c.close() diff --git a/scripts/_check_user.py b/scripts/_check_user.py deleted file mode 100644 index a2dab7c..0000000 --- a/scripts/_check_user.py +++ /dev/null @@ -1,35 +0,0 @@ -import os, paramiko, json -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=15): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - return out - -# 直接 docker exec(不用 sg) -print("--- users ---") -run('docker exec news-aggregator-postgres-1 psql -U news -d news -c "SELECT id, username, role FROM users;"') -print("--- articles count ---") -run('docker exec news-aggregator-postgres-1 psql -U news -d news -c "SELECT count(*), count(title_zh) FROM articles;"') - -# 重设 owner 为已知密码 -print("--- 重设 owner 密码 ---") -import secrets -new_pw = "Owner@" + secrets.token_hex(4) -run(f'docker exec news-aggregator-api-1 python -m app.scripts.create_user --username owner --password "{new_pw}" 2>&1 | tail -3') -# 但因为已存在,create_user 会拒绝;改用直接 update -run(f'docker exec news-aggregator-postgres-1 psql -U news -d news -c "UPDATE users SET password_hash = (SELECT password_hash FROM users WHERE username = (SELECT username FROM users LIMIT 1)) WHERE id = 1;" 2>&1') -# 用 python 重设 hash -import hashlib -hash_v = hashlib.sha256(("Owner@2026_" + secrets.token_hex(4)).encode()).hexdigest() -print(f" new pw: Owner@2026_{secrets.token_hex(4)}") - -# 写文件 -run(f'echo "Owner@2026_test123" > /root/.owner_pass && chmod 600 /root/.owner_pass') -print(" written to /root/.owner_pass") -c.close() diff --git a/scripts/_clean_cache.py b/scripts/_clean_cache.py deleted file mode 100644 index 444f56e..0000000 --- a/scripts/_clean_cache.py +++ /dev/null @@ -1,17 +0,0 @@ - -import redis -r = redis.Redis(host="localhost", port=6379, password="b5eb4d10f12a5b1f82ab0a581105d5192a0a0b22366934dc", decode_responses=True) -to_del = [] -n = 0 -for k in r.scan_iter("translation:cache:*", count=200): - v = r.get(k) - if v and ("[翻译失败" in v or "[本条未翻译" in v): - to_del.append(k) - n += 1 -print(f" found {n} bad keys, deleting...") -if to_del: - r.delete(*to_del) -print(f" deleted {len(to_del)}") -# 总数 -total = sum(1 for _ in r.scan_iter("translation:cache:*", count=200)) -print(f" remaining cache keys: {total}") diff --git a/scripts/_count_test.py b/scripts/_count_test.py deleted file mode 100644 index a2778d5..0000000 --- a/scripts/_count_test.py +++ /dev/null @@ -1,42 +0,0 @@ -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - return out - -rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() -# 重置 usage -run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' DEL 'translation:month:202606' 2>&1 | grep -v Warning") -print("usage 重置为 0") - -# 找一条 article 重译 -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT id FROM articles WHERE translation_status = 'ok' ORDER BY translation_chars DESC LIMIT 1;\"") -aid = out.strip() -print(f"\n重译 article {aid}...") -run(f"docker exec news-aggregator-postgres-1 psql -U news -d news -c \"UPDATE articles SET translation_status = 'pending' WHERE id = {aid};\" 2>&1 | tail -2") -run(f"cd /srv/news && docker exec news-aggregator-worker-1 python -c 'import asyncio; from app.workers.pipeline import translate_article; asyncio.run(translate_article({aid}))' 2>&1 | tail -5", t=60) - -# 看 usage -out = run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' GET 'translation:month:202606' 2>&1 | grep -v Warning") -print(f"\n--- redis usage: {out.strip()}") - -# 看 article 的 translation_chars -out = run(f"docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT translation_chars FROM articles WHERE id = {aid};\"") -print(f"--- article {aid} translation_chars (DB): {out.strip()}") - -# 实际值对比 -print("\n--- /me/usage ---") -import json -out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") -token = json.loads(out)["access_token"] -u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'")) -print(json.dumps(u, indent=2)) -c.close() diff --git a/scripts/_direct2.py b/scripts/_direct2.py deleted file mode 100644 index 758b767..0000000 --- a/scripts/_direct2.py +++ /dev/null @@ -1,55 +0,0 @@ -"""重置 + 直接调 service 测 usage 链路。 - -实现:用 paramiko 写脚本到容器临时文件,然后 docker exec 跑。""" -import os, paramiko, base64, json -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err and "Warning" not in err: print(err, end="", file=__import__("sys").stderr) - return out - -rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() -run("docker exec news-aggregator-redis-1 redis-cli -a '" + rpw + "' DEL translation:month:202606 2>/dev/null") -print("--- usage 重置 0 ---") - -# 在本机写脚本,scp 到容器(不行,容器是 worker 容器,用 docker cp) -script = ( - "import asyncio\n" - "from app.services.translation.service import service\n" - "from app.redis_client import get_redis\n" - "async def main():\n" - " r = get_redis(); await r.ping()\n" - " print('before:', await r.get('translation:month:202606') or 0, flush=True)\n" - " res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')\n" - " print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True)\n" - " print('after 1:', await r.get('translation:month:202606') or 0, flush=True)\n" - " res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh')\n" - " print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True)\n" - " print('after 2:', await r.get('translation:month:202606') or 0, flush=True)\n" - " res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')\n" - " print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True)\n" - " print('after 3:', await r.get('translation:month:202606') or 0, flush=True)\n" - "asyncio.run(main())\n" -) -local_path = "D:/selftools/diary-news/scripts/_t_direct.py" -with open(local_path, "w", encoding="utf-8") as f: - f.write(script) -# docker cp 进 worker 容器 -run("docker cp " + local_path + " news-aggregator-worker-1:/app/_td.py") -print("--- 跑 ---") -run("docker exec -w /app news-aggregator-worker-1 python /app/_td.py 2>&1 | tail -15", t=30) - -# /me/usage -out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") -token = json.loads(out)["access_token"] -u = json.loads(run("curl -s -H 'Authorization: Bearer " + token + "' 'http://localhost/api/v1/me/usage'")) -print("\n--- /me/usage ---") -print(" ", u) -c.close() diff --git a/scripts/_direct3.py b/scripts/_direct3.py deleted file mode 100644 index dd30c18..0000000 --- a/scripts/_direct3.py +++ /dev/null @@ -1,57 +0,0 @@ -"""重置 + 直接调 service 测 usage 链路 — 用 docker exec -i 传脚本。""" -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err and "Warning" not in err: print(err, end="", file=__import__("sys").stderr) - return out - -rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() -run("docker exec news-aggregator-redis-1 redis-cli -a '" + rpw + "' DEL translation:month:202606 2>/dev/null") -print("--- usage 重置 0 ---") - -# 用 stdin 喂脚本 -script = '''import asyncio -from app.services.translation.service import service -from app.redis_client import get_redis -async def main(): - r = get_redis(); await r.ping() - print("before:", await r.get("translation:month:202606") or 0, flush=True) - res1 = await service.translate("Breaking news from Reuters today.", source="en", target="zh") - print(" call 1: engine=", res1.engine, "chars=", res1.chars, "text=", res1.text[:40], flush=True) - print("after 1:", await r.get("translation:month:202606") or 0, flush=True) - res2 = await service.translate("The market fell sharply after the announcement.", source="en", target="zh") - print(" call 2: engine=", res2.engine, "chars=", res2.chars, flush=True) - print("after 2:", await r.get("translation:month:202606") or 0, flush=True) - res3 = await service.translate("Breaking news from Reuters today.", source="en", target="zh") - print(" call 3 (cache): cached=", res3.cached, "engine=", res3.engine, flush=True) - print("after 3:", await r.get("translation:month:202606") or 0, flush=True) -asyncio.run(main()) -''' -# 写到 worker 容器内的 /app 目录 -# docker exec -i 把脚本从 stdin 写入 -run("docker exec -i -w /app news-aggregator-worker-1 sh -c 'cat > /app/_t.py' 2>/dev/null", t=5) # 这个会 hang - -# 改:用 docker exec 的 stdin (paramiko 可以发 stdin) -si, so, se = c.exec_command("docker exec -i -w /app news-aggregator-worker-1 sh -c 'cat > /app/_t.py && python /app/_t.py'", timeout=30) -si.sendall(script.encode("utf-8")) -si.channel.shutdown_write() # 关闭 stdin 告诉 docker 没更多输入 -out = so.read().decode("utf-8", "replace") -err = se.read().decode("utf-8", "replace") -print(f"--- 跑 ---\n{out}") -if err and "Warning" not in err: print("err:", err) - -# /me/usage -out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") -token = json.loads(out)["access_token"] -u = json.loads(run("curl -s -H 'Authorization: Bearer " + token + "' 'http://localhost/api/v1/me/usage'")) -print("\n--- /me/usage ---") -print(" ", u) -c.close() diff --git a/scripts/_direct4.py b/scripts/_direct4.py deleted file mode 100644 index ae13202..0000000 --- a/scripts/_direct4.py +++ /dev/null @@ -1,66 +0,0 @@ -"""最简单的方式:把脚本内容写到容器内,再 docker exec 跑。""" -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err and "Warning" not in err: print(err, end="", file=__import__("sys").stderr) - return out - -rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() - -# 1) 重置 -run("docker exec news-aggregator-redis-1 redis-cli -a '" + rpw + "' DEL translation:month:202606 2>/dev/null") -print("--- usage 重置 0 ---") - -# 2) 把脚本写到 server 本地 /tmp(用 heredoc 一次性写完) -script_lines = [ - "import asyncio", - "from app.services.translation.service import service", - "from app.redis_client import get_redis", - "async def main():", - " r = get_redis(); await r.ping()", - " print('before:', await r.get('translation:month:202606') or 0, flush=True)", - " res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')", - " print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True)", - " print('after 1:', await r.get('translation:month:202606') or 0, flush=True)", - " res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh')", - " print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True)", - " print('after 2:', await r.get('translation:month:2026') or 0, flush=True)" if False else " print('after 2:', await r.get('translation:month:202606') or 0, flush=True)", - " res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')", - " print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True)", - " print('after 3:', await r.get('translation:month:202606') or 0, flush=True)", - "asyncio.run(main())", -] -script = "\n".join(script_lines) -# 写到 server /tmp -local = "D:/selftools/diary-news/scripts/_tscript.py" -with open(local, "w", encoding="utf-8") as f: - f.write(script) - -# 复制到 server -si, so, se = c.exec_command("cat > /tmp/_t.py", timeout=10) -with open(local, "r", encoding="utf-8") as f: - si.write(f.read().encode()) -si.channel.shutdown_write() -so.read() -print("--- script 写到 /tmp/_t.py ---") - -# 复制到 worker 容器 -run("docker cp /tmp/_t.py news-aggregator-worker-1:/app/_t.py") -print("--- 跑 ---") -run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -15", t=30) - -# /me/usage -out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") -token = __import__("json").loads(out)["access_token"] -u = __import__("json").loads(run("curl -s -H 'Authorization: Bearer " + token + "' 'http://localhost/api/v1/me/usage'")) -print("\n--- /me/usage ---") -print(" ", u) -c.close() diff --git a/scripts/_direct_test.py b/scripts/_direct_test.py deleted file mode 100644 index b38cbfb..0000000 --- a/scripts/_direct_test.py +++ /dev/null @@ -1,54 +0,0 @@ -import os, paramiko, base64, json -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr") - return out - -rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() - -# 重置 -run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' DEL 'translation:month:202606' 2>&1 | grep -v Warning") -print("--- usage 重置 0 ---") - -# 在 worker 进程内直接调 service.translate 两次(确认链路) -script_b64 = base64.b64encode(b''' -import asyncio, sys -from app.services.translation.service import service -from app.redis_client import get_redis - -async def main(): - r = get_redis() - await r.ping() - print(f"before: {await r.get('translation:month:202606') or 0}", flush=True) - # 1) 全新字符串 -> 走 tencent - res1 = await service.translate("Breaking news from Reuters today.", source="en", target="zh") - print(f" call 1: engine={res1.engine} chars={res1.chars} text={res1.text[:40]!r}", flush=True) - print(f"after 1: {await r.get('translation:month:202606') or 0}", flush=True) - # 2) 另一段 - res2 = await service.translate("The market fell sharply after the announcement.", source="en", target="zh") - print(f" call 2: engine={res2.engine} chars={res2.chars}", flush=True) - print(f"after 2: {await r.get('translation:month:202606') or 0}", flush=True) - # 3) 重复 1 的文本 -> 走 cache - res3 = await service.translate("Breaking news from Reuters today.", source="en", target="zh") - print(f" call 3 (cache): cached={res3.cached} engine={res3.engine}", flush=True) - print(f"after 3: {await r.get('translation:month:202606') or 0}", flush=True) -asyncio.run(main()) -''').decode() -run(f"docker exec news-aggregator-worker-1 sh -c 'echo {script_b64} | base64 -d > /app/_tt2.py'") -print("--- worker 跑 ---") -run("docker exec -w /app news-aggregator-worker-1 python /app/_tt2.py 2>&1 | tail -15", t=30) - -# /me/usage -out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") -token = json.loads(out)["access_token"] -u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'")) -print(f"\n--- /me/usage ---\n {u}") -c.close() diff --git a/scripts/_e2e.py b/scripts/_e2e.py deleted file mode 100644 index 3158a39..0000000 --- a/scripts/_e2e.py +++ /dev/null @@ -1,78 +0,0 @@ -import os, paramiko, urllib.request, urllib.error, json -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) - -def run(cmd, t=15): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - print(f"$ {cmd}") - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - print(f" rc={rc}") - return out - -# 服务器 pull + 重建 api -run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -3") -run("cd /srv/news && sg docker -c 'docker compose up -d --force-recreate --no-deps --build api' 2>&1 | tail -5", t=120) - -# 重设 owner 角色为 owner -run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'UPDATE users SET role = '\\''owner'\\'' WHERE username = '\\''owner'\\'';'\" 2>&1 | tail -3") - -# 触发一次抓取(等久点) -print("\n=== 触发抓取(等 60 秒)===") -import time -run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio; from app.workers.pipeline import run_once; asyncio.run(run_once())'\" 2>&1 | tail -15", t=180) -time.sleep(5) - -# 查 article 数 -run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT count(*) AS total, count(title_zh) AS translated FROM articles;'\" 2>&1 | tail -5") - -# 拿新密码 -new_pw = "owner_pass_2026" - -# 登录 + 拉 -req = urllib.request.Request( - "http://localhost/api/v1/auth/login", - data=json.dumps({"username": "owner", "password": new_pw}).encode(), - headers={"Content-Type": "application/json"}, -) -try: - resp = urllib.request.urlopen(req, timeout=10) - data = json.loads(resp.read()) - print(f"\n=== 登录 OK!token 前 40: {data['access_token'][:40]} ===") - # 测拉 articles - req2 = urllib.request.Request( - "http://localhost/api/v1/articles?limit=5", - headers={"Authorization": f"Bearer {data['access_token']}"}, - ) - resp2 = urllib.request.urlopen(req2, timeout=10) - ad = json.loads(resp2.read()) - print(f" articles: {len(ad.get('items', []))} 条") - for a in ad.get("items", [])[:3]: - print(f" - {a['source']['name']:20s} [{a['translation_status']:7s}] {a['title'][:50]}") - if a.get("title_zh"): - print(f" zh: {a['title_zh'][:50]}") - # 测 /me - req3 = urllib.request.Request( - "http://localhost/api/v1/me", - headers={"Authorization": f"Bearer {data['access_token']}"}, - ) - me = json.loads(urllib.request.urlopen(req3, timeout=10).read()) - print(f"\n /me: {me}") - # 测 /me/usage - req4 = urllib.request.Request( - "http://localhost/api/v1/me/usage", - headers={"Authorization": f"Bearer {data['access_token']}"}, - ) - usage = json.loads(urllib.request.urlopen(req4, timeout=10).read()) - print(f" /me/usage: {usage}") -except urllib.error.HTTPError as e: - print(f"\n[FAIL] {e.code}") - print(e.read().decode()) -except Exception as e: - print(f"\n[ERR] {e}") -c.close() diff --git a/scripts/_enable_pubkey.py b/scripts/_enable_pubkey.py deleted file mode 100644 index b51dcb3..0000000 --- a/scripts/_enable_pubkey.py +++ /dev/null @@ -1,37 +0,0 @@ -import os, sys, paramiko -PW = os.environ.get("REMOTE_PASS", "") -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, allow_fail=False): - print(f"$ {cmd}") - si, so, se = c.exec_command(cmd, timeout=20) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=sys.stderr) - print(f" -> rc={rc}") - if rc != 0 and not allow_fail: - raise SystemExit(f"failed: {cmd}") - return out, err, rc - -# 1) 备份 -run("cp -a /etc/ssh/sshd_config /etc/ssh/sshd_config.bak.$(date +%s)") -# 2) 改 PubkeyAuthentication -run("sed -i -E 's/^#?\\s*PubkeyAuthentication.*/PubkeyAuthentication yes/' /etc/ssh/sshd_config") -# 3) 确认 -run("grep -n '^[^#]*PubkeyAuthentication' /etc/ssh/sshd_config") -# 4) 语法检查 -run("sshd -t && echo 'sshd config OK'") -# 5) 重启(用 service 或 systemctl,Ubuntu 24 用 systemd) -# 先试 systemctl,失败回退 service -out, _, _ = run("systemctl is-active ssh 2>/dev/null || systemctl is-active sshd 2>/dev/null || echo NONE", allow_fail=True) -if "active" in out: - run("systemctl restart ssh || systemctl restart sshd") -else: - run("service ssh restart || service sshd restart") -# 6) 再确认 sshd 配置生效 -run("sshd -T 2>/dev/null | grep -i pubkeyauth") -c.close() -print("DONE") diff --git a/scripts/_final2.py b/scripts/_final2.py deleted file mode 100644 index 3c0d729..0000000 --- a/scripts/_final2.py +++ /dev/null @@ -1,77 +0,0 @@ -"""直接用 paramiko + 容器内 Python 重置 owner 密码为固定值,然后验证登录。""" -import os, paramiko, json -PW = os.environ["REMOTE_PASS"] -NEW_PW = "Owner2026!" - -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=30): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - return out - -# 在 api 容器内用 python 算 bcrypt hash + update -print("--- 重设密码 ---") -quoted = NEW_PW.replace('"', '\\"') -cmd = f'''docker exec news-aggregator-api-1 python -c " -from app.core.security import hash_password -from app.database import AsyncSessionLocal -from app.models.user import User -from sqlalchemy import select -import asyncio - -async def main(): - async with AsyncSessionLocal() as s: - r = await s.execute(select(User).where(User.username == 'owner')) - u = r.scalar_one_or_none() - if u is None: - print('NO USER') - return - u.password_hash = hash_password('{NEW_PW}') - u.role = 'owner' - await s.commit() - print('OK', u.id, u.username, u.role.value) - -asyncio.run(main()) -"''' -out = run(cmd, t=30) -print(out) - -# 写文件 -run(f'echo "{NEW_PW}" > /root/.owner_pass && chmod 600 /root/.owner_pass') -print(f" /root/.owner_pass = {NEW_PW}") - -# 登录 -print("\n--- 登录 ---") -import urllib.parse -body = json.dumps({"username": "owner", "password": NEW_PW}) -out = run(f"curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{body}'") -try: - data = json.loads(out) - token = data.get("access_token") - if not token: - print(f"登录失败: {out}") - else: - print(f"登录 OK, token 前 30: {token[:30]}...") - # 拉 articles - out2 = run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/articles?limit=3'") - ad = json.loads(out2) - print(f"\n/articles 返回 {len(ad['items'])} 条:") - for a in ad['items'][:3]: - print(f" [{a['translation_status']:8s}] {a['source']['name']:14s} | {a['title'][:50]}") - if a.get('title_zh'): - print(f" zh: {a['title_zh'][:50]}") - # /me - me = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me'")) - print(f"\n/me: {me}") - # /me/usage - u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'")) - print(f"/me/usage: {u}") -except Exception as e: - print(f"parse err: {e}\n raw: {out}") -c.close() diff --git a/scripts/_final3.py b/scripts/_final3.py deleted file mode 100644 index f74bb10..0000000 --- a/scripts/_final3.py +++ /dev/null @@ -1,62 +0,0 @@ -import os, paramiko, json -PW = os.environ["REMOTE_PASS"] -NEW_PW = "Owner2026!" - -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - return out - -# pull + 重建 api -print("--- pull + 重建 api ---") -run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -3") -run('docker compose -f /srv/news/docker-compose.yml up -d --force-recreate --no-deps --build api', t=120) -import time -time.sleep(6) - -# 登录 -print("\n--- 登录 ---") -body = json.dumps({"username": "owner", "password": NEW_PW}) -out = run(f"curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{body}'") -data = json.loads(out) -token = data.get("access_token") -print(f" 登录 OK, token: {token[:30]}...") - -# 拉 articles -print("\n--- /api/v1/articles ---") -out = run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/articles?limit=3'") -try: - ad = json.loads(out) - print(f" 返回 {len(ad.get('items', []))} 条:") - for a in ad.get("items", [])[:3]: - print(f" [{a['translation_status']:8s}] {a['source']['name']:14s} | {a['title'][:50]}") - if a.get("title_zh"): - print(f" zh: {a['title_zh'][:50]}") -except Exception as e: - print(f" parse err: {e}\n raw: {out[:300]}") - -# /me -print("\n--- /me ---") -me = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me'")) -print(f" {me}") - -# /me/usage -print("\n--- /me/usage ---") -u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'")) -print(f" {u}") - -# /sources -print("\n--- /sources ---") -sl = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/sources'")) -print(f" {len(sl)} 个源:") -for s in sl: - en = "✓" if s["enabled"] else "✗" - print(f" {en} [{s['priority']:3d}] {s['slug']:18s} {s['name']}") -c.close() diff --git a/scripts/_final4.py b/scripts/_final4.py deleted file mode 100644 index ebb5c9a..0000000 --- a/scripts/_final4.py +++ /dev/null @@ -1,73 +0,0 @@ -import os, paramiko, base64, json -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=120): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - return out - -rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() - -# 1) 服务器 pull -print("--- pull ---") -run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -3") - -# 2) 重建 worker + api -print("--- 重建 ---") -run("cd /srv/news && docker compose up -d --force-recreate --no-deps --build worker api 2>&1 | tail -8", t=120) -import time -time.sleep(8) - -# 3) 重置 usage = 0 -run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' DEL 'translation:month:202606' 2>&1 | grep -v Warning") -print("--- usage reset to 0 ---") - -# 4) 把 5 篇文章重置为 pending 触发翻译 -print("--- 触发翻译(5 篇)---") -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"UPDATE articles SET translation_status = 'pending' WHERE id IN (SELECT id FROM articles WHERE translation_status = 'ok' ORDER BY id LIMIT 5);\" 2>&1 | tail -2") - -# 5) 跑 worker pipeline 重译 -script_b64 = base64.b64encode(b''' -import asyncio -from app.workers.pipeline import translate_article -from app.database import AsyncSessionLocal -from app.models.article import Article -from sqlalchemy import select - -async def main(): - async with AsyncSessionLocal() as s: - rows = (await s.execute(select(Article.id).where(Article.translation_status == 'pending').limit(10))).all() - ids = [r[0] for r in rows] - print(f"translating {len(ids)} pending") - for aid in ids: - await translate_article(aid) -asyncio.run(main()) -''').decode() -run(f"docker exec news-aggregator-worker-1 sh -c 'echo {script_b64} | base64 -d > /app/_tt.py'") -run("docker exec -w /app news-aggregator-worker-1 python /app/_tt.py 2>&1 | tail -10", t=120) - -# 6) 看 usage -print("\n--- redis usage ---") -out = run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' GET 'translation:month:202606' 2>&1 | grep -v Warning") -print(f" usage: {out.strip()}") - -# 7) /me/usage -out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") -token = json.loads(out)["access_token"] -u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'")) -print(f"--- /me/usage ---\n {u}") - -# 8) 容器状态 -print("\n--- docker ps ---") -run("docker ps --format 'table {{.Names}}\\t{{.Status}}\\t{{.Ports}}' 2>&1 | tail -10") - -# 9) 翻译后统计 -print("\n--- 翻译统计 ---") -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, count(*) FROM articles GROUP BY 1 ORDER BY 1;\"") -c.close() diff --git a/scripts/_final5.py b/scripts/_final5.py deleted file mode 100644 index b056880..0000000 --- a/scripts/_final5.py +++ /dev/null @@ -1,67 +0,0 @@ -import os, paramiko, base64, json, time -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=120): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - return out - -rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() - -# 强制重置 -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"UPDATE articles SET translation_status='pending' WHERE id IN (SELECT id FROM articles WHERE translation_status='ok' ORDER BY id LIMIT 3);\" 2>&1 | tail -2") - -# 等 -time.sleep(3) - -# 查 pending -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) FROM articles WHERE translation_status='pending';\"") -print(f"pending articles: {out.strip()}") - -# 重置 usage -run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' DEL 'translation:month:202606' 2>&1 | grep -v Warning") - -# 跑 worker 重译 -script_b64 = base64.b64encode(b''' -import asyncio -from app.workers.pipeline import translate_article -from app.database import AsyncSessionLocal -from app.models.article import Article -from sqlalchemy import select - -async def main(): - async with AsyncSessionLocal() as s: - rows = (await s.execute(select(Article).where(Article.translation_status=='pending').limit(5))).all() - for r in rows: r[0] - ids = [r[0].id for r in rows] - print(f"translating {len(ids)}") - for aid in ids: - try: - await translate_article(aid) - except Exception as e: - print(f" err on {aid}: {e}") - print("done") -asyncio.run(main()) -''').decode() -run(f"docker exec news-aggregator-worker-1 sh -c 'echo {script_b64} | base64 -d > /app/_tt.py'") -run("docker exec -w /app news-aggregator-worker-1 python /app/_tt.py 2>&1 | tail -20", t=180) - -# 看 usage -out = run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' GET 'translation:month:202606' 2>&1 | grep -v Warning") -print(f"\n--- redis usage: {out.strip()}") - -# /me/usage -out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") -token = json.loads(out)["access_token"] -u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'")) -print(f"--- /me/usage: {u}") - -# 翻译后统计 -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, count(*) FROM articles GROUP BY 1 ORDER BY 1;\"") -c.close() diff --git a/scripts/_final_check.py b/scripts/_final_check.py deleted file mode 100644 index 3a2ea2c..0000000 --- a/scripts/_final_check.py +++ /dev/null @@ -1,60 +0,0 @@ -import os, paramiko, json -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=30): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - return out, err, rc - -pw = run("cat /root/.owner_pass")[0].strip() -print(f"owner 密码: {pw}\n") - -# 1) 登录 -out, _, _ = run(f"curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{{\"username\":\"owner\",\"password\":\"{pw}\"}}'") -data = json.loads(out) -token = data["access_token"] -print(f"=== 登录 OK ===\n expires_in: {data['expires_in']}s\n token 前 40: {token[:40]}...\n") - -# 2) 拉 articles (5 条) -out, _, _ = run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/articles?limit=5'") -ad = json.loads(out) -print(f"=== /api/v1/articles 返回 {len(ad['items'])} 条 ===") -for a in ad["items"][:5]: - print(f" [{a['translation_status']:8s}] {a['source']['name']:14s} | {a['title'][:55]}") - if a.get("title_zh"): - print(f" zh: {a['title_zh'][:55]}") - -# 3) /me/usage -out, _, _ = run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'") -u = json.loads(out) -print(f"\n=== /me/usage ===\n {json.dumps(u, indent=2, ensure_ascii=False)}") - -# 4) 详情 -if ad["items"]: - aid = ad["items"][0]["id"] - out, _, _ = run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/articles/{aid}'") - det = json.loads(out) - print(f"\n=== /articles/{aid} 详情 ===") - print(f" title (en): {det['title'][:60]}") - print(f" title (zh): {(det.get('title_zh') or '—')[:60]}") - print(f" body_text: {len(det['body_text'])} 字符") - print(f" body_zh_text: {len(det.get('body_zh_text') or '')} 字符") - print(f" status: {det['translation_status']}") - print(f" engine: {det.get('translation_engine', '—')}") - -# 5) sources -out, _, _ = run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/sources'") -slist = json.loads(out) -print(f"\n=== /api/v1/sources ({len(slist)} 个) ===") -for s in slist: - enabled = "✓" if s["enabled"] else "✗" - print(f" {enabled} [{s['priority']:3d}] {s['slug']:18s} | {s['name']:25s} | {s['region'] or '—':8s} | {s['fetch_interval_min']}m") - -# 6) 容器状态 -out, _, _ = run("cd /srv/news && sg docker -c 'docker compose ps --format \"table {{.Name}}\\t{{.Status}}\\t{{.Ports}}\"' 2>&1 | tail -10") -print(f"\n=== Docker 状态 ===\n{out}") -c.close() diff --git a/scripts/_fix_scalars.py b/scripts/_fix_scalars.py deleted file mode 100644 index 4a61c8e..0000000 --- a/scripts/_fix_scalars.py +++ /dev/null @@ -1,63 +0,0 @@ -"""批量修 API 文件:把 `(await ...).scalars()` 改成显式两步走。""" -import re -from pathlib import Path - -api_dir = Path("D:/selftools/diary-news/backend/app/api") -files = list(api_dir.glob("*.py")) - -# 模式 1: user = (await ...).scalars().first() (多行括号形式) -# 模式 2: rows = (await ...).scalars() (单行) -# 都改成 result = await ...; user = result.scalars().first() 这种 - -for f in files: - src = f.read_text(encoding="utf-8") - orig = src - changed = False - - # 模式 1:跨行的( await ... ).scalars().first() - # 匹配:任意前缀(空白)+ ( 多行 await session.execute(...) ) .scalars() .first() - pat1 = re.compile( - r'(\s+)([\w_]+)\s*=\s*\(\s*\n' - r'(\s+)await\s+session\.execute\((.*?)\)\s*\n' - r'\s+\)\s*' - r'\.scalars\(\)\s*' - r'\.first\(\)', - re.DOTALL, - ) - def repl1(m): - indent = m.group(1) - var = m.group(2) - inner_indent = m.group(3) - exec_arg = m.group(4) - return ( - f"{indent}result = await session.execute({exec_arg})\n" - f"{indent}{var} = result.scalars().first()" - ) - new = pat1.sub(repl1, src) - if new != src: - changed = True - src = new - - # 模式 2:单行 (await session.execute(...)).scalars() - pat2 = re.compile( - r'(\s+)([\w_]+)\s*=\s*\(await\s+session\.execute\((.*?)\)\)\s*\.scalars\(\)', - re.DOTALL, - ) - def repl2(m): - indent = m.group(1) - var = m.group(2) - exec_arg = m.group(3) - return ( - f"{indent}result = await session.execute({exec_arg})\n" - f"{indent}{var} = result.scalars()" - ) - new = pat2.sub(repl2, src) - if new != src: - changed = True - src = new - - if changed: - f.write_text(src, encoding="utf-8") - print(f"[ok] {f.name}") - else: - print(f" {f.name}") diff --git a/scripts/_force_refetch.py b/scripts/_force_refetch.py deleted file mode 100644 index 9f22378..0000000 --- a/scripts/_force_refetch.py +++ /dev/null @@ -1,55 +0,0 @@ -"""强制全文重抓: -1. 备份 209 篇到 /tmp/articles_backup.json -2. DELETE FROM articles -3. 触发 run_once 让 worker 重抓(trafilatura 抓全文) -4. 等 1 分钟看新数据 -""" -import os, paramiko, json, time -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err and "Warning" not in err: print(err, end="", file=__import__("sys").stderr) - return out - -# 1) 备份 -print("=== 1. 备份 209 篇文章到 /tmp/articles_backup.json ===") -run("docker exec news-aggregator-postgres-1 pg_dump -U news -d news -t articles --data-only --column-inserts > /tmp/articles_backup.sql") -out = run("ls -la /tmp/articles_backup.sql | awk '{print $5, $9}'") -print(f" 备份文件: {out.strip()}") -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) FROM articles;\"") -print(f" 当前文章数: {out.strip()}") - -# 2) DELETE 全部 -print("\n=== 2. DELETE 所有文章 ===") -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"DELETE FROM articles;\"") -print(out) -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) FROM articles;\"") -print(f" 删后文章数: {out.strip()}") - -# 3) 触发 run_once -print("\n=== 3. 触发 worker run_once(4 源重新 fetch) ===") -run("docker exec news-aggregator-worker-1 python -c 'import asyncio; from app.workers.pipeline import run_once; asyncio.run(run_once())' 2>&1 | tail -10", t=120) - -# 4) 等 30 秒看新文章入库 -print("\n=== 4. 30 秒后看新数据 ===") -time.sleep(30) -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT count(*) AS total, count(CASE WHEN length(body_text) > 1000 THEN 1 END) AS long_body, avg(length(body_text))::int AS avg_len FROM articles;\"") -print(out) - -# 5) 看 trafilatura 是否生效 -print("\n=== 5. 看 RSS 摘要 vs trafilatura 全文 ===") -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, source_id, LEFT(title, 50) AS title, length(body_text) AS body_len FROM articles ORDER BY id LIMIT 10;\"") -print(out) - -# 6) translation_status 分布 -print("\n=== 6. 翻译状态 ===") -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, count(*) FROM articles GROUP BY 1 ORDER BY 1;\"") -print(out) -c.close() diff --git a/scripts/_http_check.py b/scripts/_http_check.py deleted file mode 100644 index 47bc228..0000000 --- a/scripts/_http_check.py +++ /dev/null @@ -1,81 +0,0 @@ -"""检查去重逻辑 + 启动 HTTP 实测。""" -import os, paramiko, json -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=15): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - return out - -# ========== 1. 启动 HTTP 看看 ========== -print("=" * 60) -print("1. HTTP 实测") -print("=" * 60) - -# 首页 (Caddy 转发到 frontend) -out = run("curl -sS -o /tmp/idx.html -w 'status=%{http_code} size=%{size_download} type=%{content_type}\\n' http://207.57.129.228/") -print(f"\n[GET /]") -print(f" -> {out.strip()}") -out = run("head -c 200 /tmp/idx.html") -print(f" body[0:200]: {out}") - -# /api/v1/healthz -out = run("curl -sS -w '\\nstatus=%{http_code}\\n' http://207.57.129.228/api/v1/healthz") -print(f"\n[GET /api/v1/healthz]") -print(f" -> {out.strip()}") - -# /api/v1/articles (没 token 应该 401) -out = run("curl -sS -w '\\nstatus=%{http_code}\\n' http://207.57.129.228/api/v1/articles?limit=3") -print(f"\n[GET /api/v1/articles 无 token]") -print(f" -> {out.strip()[:300]}") - -# 登录 -out = run("curl -sS -X POST http://207.57.129.228/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") -data = json.loads(out) -token = data.get("access_token", "") -print(f"\n[POST /api/v1/auth/login]") -print(f" -> token: {token[:40]}...") - -# /api/v1/articles 带 token -out = run("curl -sS -w '\nstatus=%{http_code}\n' -H 'Authorization: Bearer " + token + "' 'http://207.57.129.228/api/v1/articles?limit=2'") -print(f"\n[GET /api/v1/articles?limit=2 带 token]") -print(f" -> {out.strip()[:500]}") - -# 测静态资源(favicon) -out = run("curl -sS -o /dev/null -w 'status=%{http_code} type=%{content_type}\\n' http://207.57.129.228/favicon.svg") -print(f"\n[GET /favicon.svg]") -print(f" -> {out.strip()}") - -# ========== 2. 去重审计 ========== -print("\n" + "=" * 60) -print("2. 去重审计") -print("=" * 60) - -# a) 同一 url_hash 重复数(应该是 0,UNIQUE 约束) -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) - count(DISTINCT url_hash) FROM articles;\"") -print(f"\n[a) 重复 url_hash 数量(应为 0): {out.strip()}") - -# b) 同一 url 重复数(可能 url_hash 已经 normalize 过,检查原始 url) -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT url, count(*) FROM articles GROUP BY url HAVING count(*) > 1 LIMIT 5;\"") -print(f"\n[b) 重复 URL(可能含 utm_* 差异):") -print(f" {out if out.strip() else ' (无)'}") - -# c) 同源 / 同标题 / 同一天的,看是不是转载 -print("\n[c] 标题相似度去重检查(前 50 字符完全相同):") -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT LEFT(title, 60), count(*), array_agg(DISTINCT source_id) FROM articles GROUP BY LEFT(title, 60) HAVING count(*) > 1 ORDER BY count(*) DESC LIMIT 5;\"") -print(f" {out if out.strip() else ' (无)'}") - -# d) duplicate_of 字段使用情况 -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) FROM articles WHERE duplicate_of IS NOT NULL;\"") -print(f"\n[d) duplicate_of 非空的 article 数: {out.strip()}") - -# e) 抓取日志:reuters 失败时是不是会反复重试 -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT slug, last_status, consecutive_failures, fetch_interval_min FROM sources ORDER BY id;\"") -print(f"\n[e) 源状态(reuters 失败后 interval 翻倍,看是不是还在重试):") -print(out) -c.close() diff --git a/scripts/_kick2.py b/scripts/_kick2.py deleted file mode 100644 index f111eca..0000000 --- a/scripts/_kick2.py +++ /dev/null @@ -1,28 +0,0 @@ -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=30): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - print(f"$ {cmd}") - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - print(f" rc={rc}") - return out - -# 单独跑 BBC 抓取 + 完整日志 -print("--- BBC 单独抓取 ---") -run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio; from app.workers.pipeline import fetch_one_source; asyncio.run(fetch_one_source(2))'\" 2>&1 | tail -30", t=60) - -# 直接 curl bbc 看 -print("\n--- 容器内 curl bbc ---") -run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio, httpx, feedparser; async def t(): r = await httpx.AsyncClient(follow_redirects=True).get(\\\"https://feeds.bbci.co.uk/news/world/rss.xml\\\"); print(\\\"status:\\\", r.status_code, \\\"len:\\\", len(r.text)); f = feedparser.parse(r.text); print(\\\"entries:\\\", len(f.entries)); print(\\\"first title:\\\", f.entries[0].title if f.entries else None); asyncio.run(t())'\" 2>&1 | tail -10") - -# 试 feedparser 能否解析 -print("\n--- 查 article ---") -run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT id, source_id, title, translation_status, published_at FROM articles LIMIT 3;'\" 2>&1 | tail -10") -c.close() diff --git a/scripts/_kick3.py b/scripts/_kick3.py deleted file mode 100644 index ed2f1b8..0000000 --- a/scripts/_kick3.py +++ /dev/null @@ -1,40 +0,0 @@ -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=120): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - print(f"$ {cmd}") - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - print(f" rc={rc}") - return out - -# 1) pull -run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -3") - -# 2) 重建 worker -print("--- 重建 worker ---") -run("cd /srv/news && sg docker -c 'docker compose up -d --force-recreate --no-deps --build worker' 2>&1 | tail -5", t=120) -import time -time.sleep(5) - -# 3) 禁用 reuters(URL 不对) -run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c \\\"UPDATE sources SET enabled = FALSE WHERE slug = 'reuters-world';\\\"\" 2>&1 | tail -3") - -# 4) 触发抓取 -print("--- 抓取 ---") -run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio; from app.workers.pipeline import run_once; asyncio.run(run_once())'\" 2>&1 | tail -20", t=180) - -# 5) 查 article -print("--- article ---") -run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT count(*) total, count(title_zh) translated FROM articles;'\" 2>&1 | tail -5") - -# 6) 源状态 -print("--- 源状态 ---") -run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT slug, last_status, consecutive_failures, fetch_interval_min FROM sources ORDER BY id;'\" 2>&1 | tail -10") -c.close() diff --git a/scripts/_kick_off.py b/scripts/_kick_off.py deleted file mode 100644 index 67d97ca..0000000 --- a/scripts/_kick_off.py +++ /dev/null @@ -1,42 +0,0 @@ -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - print(f"$ {cmd}") - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - print(f" rc={rc}") - return out - -# 1) 看 sources 状态 -run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT id, slug, url, enabled, last_status, fetch_interval_min FROM sources;'\" 2>&1 | tail -10") - -# 2) 修 Reuters URL(看新闻组/Google News 找替代) -# Reuters 把 RSS feed 改成了新域名,或者直接用 Google News -# 简单方案: 改 slug=reuters-world 的 url -# 试 https://www.reutersagency.com/feed/?best-topics=top-news -print("\n--- 更新 reuters url ---") -run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c \\\"UPDATE sources SET url = 'https://www.reutersagency.com/feed/?best-topics=top-news&posttype=post', last_status = NULL, consecutive_failures = 0, fetch_interval_min = 30 WHERE slug = 'reuters-world';\\\"\" 2>&1 | tail -3") - -# 3) 测新 URL -print("\n--- 测新 url 解析 ---") -run("cd /srv/news && sg docker -c \"docker compose exec -T worker getent hosts www.reutersagency.com\" 2>&1 | tail -3") - -# 4) 触发所有源抓取 -print("\n--- 触发抓取 ---") -run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio; from app.workers.pipeline import run_once; asyncio.run(run_once())'\" 2>&1 | tail -20", t=180) - -# 5) 看 article 数 -print("\n--- article 数 ---") -run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT count(*) total, count(title_zh) translated FROM articles;'\" 2>&1 | tail -5") - -# 6) 看 sources 状态 -print("\n--- 源状态 ---") -run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT slug, last_status, consecutive_failures FROM sources ORDER BY id;'\" 2>&1 | tail -10") -c.close() diff --git a/scripts/_logs.py b/scripts/_logs.py deleted file mode 100644 index c691d46..0000000 --- a/scripts/_logs.py +++ /dev/null @@ -1,12 +0,0 @@ -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) - -# 用 invoke_shell 拉日志 -import time -si, so, se = c.exec_command("docker logs --tail=80 news-aggregator-api-1 2>&1", timeout=20) -out = so.read().decode("utf-8", "replace") -print(out) -c.close() diff --git a/scripts/_net_check.py b/scripts/_net_check.py deleted file mode 100644 index 9d45b16..0000000 --- a/scripts/_net_check.py +++ /dev/null @@ -1,26 +0,0 @@ -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=15): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - print(f"$ {cmd}") - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - print(f" rc={rc}") - return out - -run("cd /srv/news && sg docker -c \"docker compose exec -T worker cat /etc/resolv.conf\" 2>&1 | tail -5") -print("---") -run("cd /srv/news && sg docker -c \"docker compose exec -T worker getent hosts google.com\" 2>&1 | tail -5") -print("---") -# 测一个明确的域名 -run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import socket; print(socket.gethostbyname(\\\"feeds.reuters.com\\\"))'\" 2>&1 | tail -5") -print("---") -# 测宿主机的网络 -run("curl -s -o /dev/null -w '%{http_code}\\n' https://feeds.reuters.com/Reuters/worldNews 2>&1") -c.close() diff --git a/scripts/_net_check2.py b/scripts/_net_check2.py deleted file mode 100644 index 375fa14..0000000 --- a/scripts/_net_check2.py +++ /dev/null @@ -1,25 +0,0 @@ -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=20): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - print(f"$ {cmd}") - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - print(f" rc={rc}") - return out - -# 测不同 DNS -for domain in ["feeds.reuters.com", "feeds.bbci.co.uk", "www.aljazeera.com", "www3.nhk.or.jp", "rss.dw.com"]: - r = run(f"cd /srv/news && sg docker -c \"docker compose exec -T worker getent hosts {domain}\" 2>&1 | tail -2", t=10) - print(f" => {domain}: {'OK' if 'Address' in r or any(c.isdigit() for c in r) else 'FAIL'}") - -# 容器内抓一下 bbc -print("\n--- 容器内 fetch bbc ---") -run("cd /srv/news && sg docker -c \"docker compose exec -T worker python -c 'import asyncio, httpx; r = await httpx.AsyncClient().get(\\\"https://feeds.bbci.co.uk/news/world/rss.xml\\\"); print(\\\"status:\\\", r.status_code, \\\"len:\\\", len(r.text))'\" 2>&1 | tail -5") -c.close() diff --git a/scripts/_push_key.py b/scripts/_push_key.py deleted file mode 100644 index 14f1412..0000000 --- a/scripts/_push_key.py +++ /dev/null @@ -1,54 +0,0 @@ -import os, sys, paramiko -HOST = "207.57.129.228" -PORT = 19717 -USER = "root" -PW = os.environ.get("REMOTE_PASS", "") -PUB = os.path.expanduser("~/.ssh/id_rsa.pub") -if not PW: - print("REMOTE_PASS not set", file=sys.stderr); sys.exit(2) - -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -try: - c.connect(HOST, port=PORT, username=USER, password=PW, timeout=15, allow_agent=False, look_for_keys=False) -except Exception as e: - print("CONNECT FAIL:", e, file=sys.stderr); sys.exit(1) - -def run(cmd, check=False): - si, so, se = c.exec_command(cmd, timeout=15) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - if out: print(out, end="") - if err: print("[err]", err, end="", file=sys.stderr) - if check and (so.channel.recv_exit_status() != 0): - raise SystemExit(f"cmd failed: {cmd}") - -run("mkdir -p /root/.ssh && chmod 700 /root/.ssh") -run("touch /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys") - -pub = open(PUB, encoding="utf-8").read().strip() -marker = "news-deploy-key" -if marker not in pub: - pub = pub + " " + marker - -# 用 sftp 写文件(避免 shell 转义) -sftp = c.open_sftp() -ak_path = "/root/.ssh/authorized_keys" -existing = "" -try: - with sftp.open(ak_path, "r") as f: - existing = f.read().decode("utf-8", "replace") -except IOError: - pass - -if marker in existing: - print("[ok] public key already present, skip") -else: - with sftp.open(ak_path, "a") as f: - f.write(pub + "\n") - print("[ok] appended public key to", ak_path) - -sftp.close() -run("ls -la /root/.ssh/ && echo '---' && wc -l /root/.ssh/authorized_keys") -c.close() -print("DONE") diff --git a/scripts/_re_translate.py b/scripts/_re_translate.py deleted file mode 100644 index f13d078..0000000 --- a/scripts/_re_translate.py +++ /dev/null @@ -1,59 +0,0 @@ -import os, paramiko, json -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=120): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - return out - -# 1) 找带错误信息的文章(翻译状态虽然 ok 但字段里带"翻译失败"字样) -print("--- 找出还残留错误标记的文章 ---") -n = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) FROM articles WHERE title_zh LIKE '%[翻译失败:%' OR body_zh_text LIKE '%[翻译失败:%';\"") -print(f" 残留错误文章数: {n.strip()}") - -# 2) 改回 pending -print("\n--- 批量回退到 pending ---") -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"UPDATE articles SET translation_status = 'pending', title_zh = NULL, body_zh_text = NULL, body_zh_html = NULL WHERE title_zh LIKE '%[翻译失败:%' OR body_zh_text LIKE '%[翻译失败:%';\" 2>&1 | tail -3") - -# 3) 触发 worker 翻译 -print("\n--- 触发翻译(120s 等待)---") -run("cd /srv/news && docker exec news-aggregator-worker-1 python -c 'import asyncio; from app.workers.pipeline import _translate_recent_for_source; async def t(): [await _translate_recent_for_source(sid, max_n=300) for sid in [2,3,4,5]]; asyncio.run(t())' 2>&1 | tail -10", t=180) -import time -time.sleep(10) - -# 4) 翻译后统计 -print("\n--- 翻译后统计 ---") -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, translation_engine, count(*), sum(translation_chars) FROM articles GROUP BY 1, 2 ORDER BY 1, 2;\"") - -# 5) 看一条 BBC 详情 -print("\n--- BBC 详情 ---") -out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") -token = json.loads(out)["access_token"] -out = run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/articles?source=bbc-world&limit=1'") -ad = json.loads(out) -if ad.get("items"): - aid = ad["items"][0]["id"] - out = run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/articles/{aid}'") - try: - det = json.loads(out) - print(f"\n=== {det['source']['name']} #{aid} ===") - print(f" title: {det['title'][:80]}") - print(f" title_zh: {(det.get('title_zh') or '—')[:80]}") - print(f" body_text 长度: {len(det['body_text'])}") - print(f" body_zh_text 长度: {len(det.get('body_zh_text') or '')}") - if det.get("body_zh_text"): - print(f"\n 译文(前 600 字符):") - print(f" {det['body_zh_text'][:600]}") - except Exception as e: - print(f" err: {e}\n raw: {out[:200]}") - -# 6) /me/usage -u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'")) -print(f"\n--- /me/usage ---\n {u}") -c.close() diff --git a/scripts/_rebuild_test.py b/scripts/_rebuild_test.py deleted file mode 100644 index 449f535..0000000 --- a/scripts/_rebuild_test.py +++ /dev/null @@ -1,26 +0,0 @@ -import os, paramiko, json -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=15): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - return out - -# pull + 重建 api -run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -3") -run("cd /srv/news && docker compose up -d --force-recreate --no-deps --build api 2>&1 | tail -5", t=120) -import time -time.sleep(6) - -# 登录 + 拉详情 -out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") -token = json.loads(out)["access_token"] -out = run("curl -s -w '\nstatus=%{http_code}\n' -H 'Authorization: Bearer " + token + "' http://localhost/api/v1/articles/175177") -print("\n--- 详情响应 ---") -print(out[:1000]) -c.close() diff --git a/scripts/_recover.py b/scripts/_recover.py deleted file mode 100644 index 8c6659b..0000000 --- a/scripts/_recover.py +++ /dev/null @@ -1,36 +0,0 @@ -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) - -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - print(f"$ {cmd}") - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - print(f" rc={rc}") - return out - -# 1) 服务器 pull -run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -5") - -# 2) 重建 api + worker 容器(代码变更需要重启) -run("cd /srv/news && sg docker -c 'docker compose up -d --build --no-deps api worker' 2>&1 | tail -10") - -# 3) 等启动 -import time -time.sleep(15) - -# 4) 跑 seed -run("cd /srv/news && sg docker -c \"docker compose exec -T api python -m app.scripts.seed_sources\" 2>&1 | tail -20") - -# 5) 看 docker ps -run("cd /srv/news && sg docker -c 'docker compose ps' 2>&1 | head -15") - -# 6) 健康 -run("curl -s http://localhost/api/v1/healthz 2>&1") -c.close() diff --git a/scripts/_redeploy_worker.py b/scripts/_redeploy_worker.py deleted file mode 100644 index 26d39a1..0000000 --- a/scripts/_redeploy_worker.py +++ /dev/null @@ -1,52 +0,0 @@ -"""pull + 重建 worker + 扫描 DB 把翻译失败的改回 pending + 看新 worker 跑起来。""" -import os, paramiko, json, time -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err and "Warning" not in err and "warn" not in err: print(err, end="", file=__import__("sys").stderr) - return out - -# 1) pull -print("--- pull ---") -run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -3") - -# 2) 重建 worker -print("--- 重建 worker ---") -run("cd /srv/news && docker compose up -d --force-recreate --no-deps --build worker 2>&1 | tail -5", t=120) -time.sleep(5) - -# 3) 扫描 DB:title_zh/body_zh_text 含 '翻译失败' 改回 pending -print("--- DB 扫描 ---") -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) FROM articles WHERE title_zh LIKE '%[翻译失败:%' OR body_zh_text LIKE '%[翻译失败:%' OR body_zh_html LIKE '%[翻译失败:%';\"") -print(f" 含 '翻译失败' 占位符的文章数: {out.strip()}") -n = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"UPDATE articles SET translation_status='pending', title_zh=NULL, body_zh_text=NULL, body_zh_html=NULL, translated_at=NULL, translation_engine=NULL, translation_chars=0 WHERE title_zh LIKE '%[翻译失败:%' OR body_zh_text LIKE '%[翻译失败:%' OR body_zh_html LIKE '%[翻译失败:%';\"") -print(f" UPDATE 状态: {n.strip()}") - -# 4) 看 worker 是否在跑 translation_loop -print("\n--- worker 日志(看 translation_loop 启动 + 节奏)---") -time.sleep(15) -out = run("docker logs --tail=50 news-aggregator-worker-1 2>&1 | tail -30", t=15) -print(out) - -# 5) 等 30 秒再看(应该已经翻译 30 篇左右) -print("\n--- 等 30 秒看翻译进度 ---") -time.sleep(30) -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, count(*) FROM articles GROUP BY 1 ORDER BY 1;\"") -print(out) - -# 6) redis usage -rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() -out = run("docker exec news-aggregator-redis-1 redis-cli -a '" + rpw + "' GET translation:month:202606 2>/dev/null") -print(f"\n--- redis usage: {out.strip()}") - -# 7) 验证 fetch_one_source 不再自动翻译(看 worker 日志确认) -print("\n--- worker 进程信息 ---") -run("docker ps --filter 'name=news-aggregator-worker' --format 'table {{.Names}}\\t{{.Status}}'") -c.close() diff --git a/scripts/_redis2.py b/scripts/_redis2.py deleted file mode 100644 index 8c304d1..0000000 --- a/scripts/_redis2.py +++ /dev/null @@ -1,22 +0,0 @@ -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=15): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - return out - -rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() -# 看 translation:month -print("--- 查 usage key ---") -out = run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' KEYS 'translation:month*' 2>&1 | grep -v Warning") -print(out) -out = run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' GET 'translation:month:202606' 2>&1 | grep -v Warning") -print(f" GET: {out.strip()}") -c.close() diff --git a/scripts/_redis_check.py b/scripts/_redis_check.py deleted file mode 100644 index dcfcaa8..0000000 --- a/scripts/_redis_check.py +++ /dev/null @@ -1,27 +0,0 @@ -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=15): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - return out - -# 拿 REDIS_PASSWORD -rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() -print(f"REDIS_PASSWORD (前 6): {rpw[:6]}...") - -# 直接 docker exec redis-cli 用 -a -print("\n--- 用 docker exec 直接查 ---") -run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' GET translation:month:202606 2>&1") -run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' KEYS 'translation:*' 2>&1") - -# 看 API 容器里 service.py 调 add_usage 的逻辑 -print("\n--- 测试 add_usage ---") -run(f"docker exec news-aggregator-api-1 python -c \"import asyncio; from app.services.translation.service import service; asyncio.run(service.add_usage(100)); print('done')\"", t=15) -c.close() diff --git a/scripts/_restart_caddy.py b/scripts/_restart_caddy.py deleted file mode 100644 index ff8bc4b..0000000 --- a/scripts/_restart_caddy.py +++ /dev/null @@ -1,29 +0,0 @@ -import os, paramiko, time -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) - -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - print(f"$ {cmd}") - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - print(f" rc={rc}") - return out - -run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -3") -# 重启 caddy + api -run("cd /srv/news && sg docker -c 'docker compose up -d --force-recreate caddy api' 2>&1 | tail -8") -time.sleep(8) - -print("\n=== 验证 ===") -run("curl -s -o /dev/null -w 'healthz: %{http_code}\\n' http://localhost/api/v1/healthz") -run("curl -s http://localhost/api/v1/healthz 2>&1") -run("curl -s -o /dev/null -w 'articles (no auth): %{http_code}\\n' http://localhost/api/v1/articles") -run("curl -s http://localhost/api/v1/articles 2>&1") -run("curl -s -o /dev/null -w 'login OPTIONS: %{http_code}\\n' -X OPTIONS http://localhost/api/v1/auth/login") -c.close() diff --git a/scripts/_run_deploy.py b/scripts/_run_deploy.py deleted file mode 100644 index 5a0edcf..0000000 --- a/scripts/_run_deploy.py +++ /dev/null @@ -1,24 +0,0 @@ -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) - -def run(cmd, t=30): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - print(f"$ {cmd}") - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - print(f" rc={rc}") - -# 1) 服务器 git pull -run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -10") - -# 2) 重跑部署脚本(直接重跑,前面的 docker 镜像已构建缓存) -run("pkill -f deploy_news.sh 2>/dev/null; sleep 2; rm -f /root/deploy_news.log; echo ===restart===") -si, so, se = c.exec_command("nohup env SSHD_PORT=19717 bash /root/deploy_news.sh > /root/deploy_news.log 2>&1 & echo $!", timeout=10) -print(f"deploy started, PID={so.read().decode().strip()}") -c.close() diff --git a/scripts/_set_tencent.py b/scripts/_set_tencent.py deleted file mode 100644 index 5bc7453..0000000 --- a/scripts/_set_tencent.py +++ /dev/null @@ -1,53 +0,0 @@ -import os, paramiko -PW = os.environ["REMOTE_PASS"] -SECRET_ID = "AKIDy2Ln7OZaUPK5cv5GPXS9c4WpHlHdu035" -SECRET_KEY = "1CBxUmAWifQ1PYpNn9JEwTmqshJzRggS" - -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=60): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - return out - -# 1) 备份 -run("cp /srv/news/.env /srv/news/.env.bak.$(date +%s) 2>&1") - -# 2) 用 sed 替换 TENCENTCLOUD_SECRET_ID / KEY(用 | 分隔避免 / 冲突) -run(f"sed -i 's|^TENCENTCLOUD_SECRET_ID=.*|TENCENTCLOUD_SECRET_ID={SECRET_ID}|' /srv/news/.env") -run(f"sed -i 's|^TENCENTCLOUD_SECRET_KEY=.*|TENCENTCLOUD_SECRET_KEY={SECRET_KEY}|' /srv/news/.env") - -# 3) 确认 -print("\n--- 写入后 .env TENCENT 字段 ---") -run("grep TENCENTCLOUD /srv/news/.env") - -# 4) 重启 worker + api -print("\n--- 重启 worker + api ---") -run("cd /srv/news && docker compose up -d --force-recreate --no-deps --build worker api 2>&1 | tail -8", t=120) -import time -time.sleep(8) - -# 5) 测翻译(取一条没翻译好的文章,重译) -print("\n--- 找一条 pending 状态的 article ---") -aid_out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT id FROM articles WHERE translation_status IN ('pending', 'failed') LIMIT 1;\"") -aid = aid_out.strip() -print(f" article id = {aid!r}") - -if aid: - print(f"\n--- 手动重译 article {aid} ---") - run(f"cd /srv/news && docker exec news-aggregator-api-1 python -c 'import asyncio; from app.workers.pipeline import translate_article; asyncio.run(translate_article({aid}))' 2>&1 | tail -15", t=120) - -# 6) 查翻译结果 -print("\n--- 看翻译结果 ---") -if aid: - run(f"docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, translation_status, translation_engine, translation_chars, left(title_zh, 80) as title_zh FROM articles WHERE id = {aid};\"") - -# 7) 全局统计 -print("\n--- 翻译统计 ---") -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, translation_engine, count(*), sum(translation_chars) FROM articles GROUP BY translation_status, translation_engine ORDER BY 1, 2;\"") -c.close() diff --git a/scripts/_show_detail.py b/scripts/_show_detail.py deleted file mode 100644 index dd9b8b7..0000000 --- a/scripts/_show_detail.py +++ /dev/null @@ -1,42 +0,0 @@ -"""直接看 API 返回的 article 175177 的完整内容。""" -import os, paramiko, json -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=15): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - return out - -# 1) 拉详细 JSON -print("--- /api/v1/articles/175177 详情 ---") -out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") -token = json.loads(out)["access_token"] -out = run("curl -s -H 'Authorization: Bearer " + token + "' http://localhost/api/v1/articles/175177") -det = json.loads(out) -print(json.dumps(det, ensure_ascii=False, indent=2)) - -# 2) 试 trafilatura 抓 Al Jazeera 全文 -print("\n\n--- 试 trafilatura 抓 Ronaldo 全文 ---") -script = ''' -import asyncio, httpx, trafilatura -async def main(): - url = "https://www.aljazeera.com/sports/2026/6/7/ageing-stars-push-boundaries-at-the-2026-world-cup-career-longevity" - async with httpx.AsyncClient(follow_redirects=True, timeout=20) as c: - r = await c.get(url, headers={"User-Agent": "Mozilla/5.0"}) - print("status:", r.status_code, "len:", len(r.text)) - extracted = trafilatura.extract(r.text, include_comments=False, include_tables=False, favor_recall=True) - print("extracted len:", len(extracted or "")) - print("---") - print((extracted or "")[:1000]) -asyncio.run(main()) -''' -import base64 -b64 = base64.b64encode(script.encode()).decode() -run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_tr.py'") -run("docker exec -w /app news-aggregator-worker-1 python /app/_tr.py 2>&1 | tail -30", t=30) -c.close() diff --git a/scripts/_show_full.py b/scripts/_show_full.py deleted file mode 100644 index bc32ece..0000000 --- a/scripts/_show_full.py +++ /dev/null @@ -1,39 +0,0 @@ -"""找一篇英文(非 NHK 日文)已翻译文章,看 body_zh_text 长度。""" -import os, paramiko, json -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=15): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - return out - -# 找一篇 BBC/Al Jazeera/DW 已翻译(body 长度大,翻译后) -print("--- 英文(非日文)文章 body 长度 top 5 ---") -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, source_id, LEFT(title, 60) AS title, length(body_text) AS txt, length(body_zh_text) AS zh, translation_status FROM articles WHERE translation_status = 'ok' AND source_id != 4 AND length(body_zh_text) > 200 ORDER BY length(body_zh_text) DESC LIMIT 5;\"") -print(out) - -# 拉一篇最长的看实际翻译 -print("\n--- 拉一篇最长的英文文章详情 ---") -out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") -token = json.loads(out)["access_token"] - -# 找 ID -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT id FROM articles WHERE translation_status = 'ok' AND source_id != 4 AND length(body_zh_text) > 200 ORDER BY length(body_zh_text) DESC LIMIT 1;\"") -aid = out.strip() -print(f"article id = {aid}") -out = run("curl -s -H 'Authorization: Bearer " + token + "' http://localhost/api/v1/articles/" + aid) -det = json.loads(out) -print(f"\ntitle: {det['title'][:80]}") -print(f"title_zh: {det.get('title_zh', '—')[:80]}") -print(f"body_text: {len(det['body_text'])} 字符") -print(f"body_zh_text: {len(det.get('body_zh_text') or '')} 字符") -print(f"\n--- body 原文(前 400 字符) ---") -print(det['body_text'][:400]) -print(f"\n--- body 译文(前 500 字符) ---") -print((det.get('body_zh_text') or '—')[:500]) -c.close() diff --git a/scripts/_show_translation.py b/scripts/_show_translation.py deleted file mode 100644 index 52f99a3..0000000 --- a/scripts/_show_translation.py +++ /dev/null @@ -1,51 +0,0 @@ -import os, paramiko, json -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=20): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - return out - -# 拿一条已翻译的(随便挑) -print("--- 拉 3 篇文章看译文 ---") -out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, source_id, title, title_zh, translation_engine, translation_chars, lang_src FROM articles WHERE translation_status = 'ok' ORDER BY translation_chars DESC LIMIT 3;\"") -print(out) - -# 拿一条详情,看完整翻译 -print("\n--- 登录 + 拉详情 ---") -body = json.dumps({"username": "owner", "password": "Owner2026!"}) -out = run(f"curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{body}'") -token = json.loads(out)["access_token"] - -# 找一篇 BBC 的(大概率有图) -out = run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/articles?source=bbc-world&limit=1'") -ad = json.loads(out) -if ad.get("items"): - aid = ad["items"][0]["id"] - out = run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/articles/{aid}'") - det = json.loads(out) - print(f"\n=== {det['source']['name']} - {det['title'][:60]} ===") - print(f"\n原文标题: {det['title'][:120]}") - print(f"中文标题: {(det.get('title_zh') or '—')[:120]}") - print(f"\n原文(前 300): {det['body_text'][:300]}") - print(f"\n译文(前 400): {(det.get('body_zh_text') or '—')[:400]}") - print(f"\nstatus: {det['translation_status']}") - print(f"engine: {det.get('translation_engine')}") - print(f"chars: {det.get('translation_chars', '?')}") - print(f"img: {det.get('image_url', '—')[:80]}") - -# 用量 -print("\n--- /me/usage ---") -u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'")) -print(f" {u}") - -# 容器状态 -print("\n--- docker ps ---") -run("docker ps --format 'table {{.Names}}\\t{{.Status}}\\t{{.Ports}}' 2>&1 | tail -10") -c.close() diff --git a/scripts/_smoke_test.py b/scripts/_smoke_test.py deleted file mode 100644 index a000d35..0000000 --- a/scripts/_smoke_test.py +++ /dev/null @@ -1,60 +0,0 @@ -import os, paramiko -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) - -def run(cmd, t=15): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - print(f"$ {cmd}") - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - print(f" rc={rc}") - return out - -# 1) 看 users -run("cd /srv/news && sg docker -c \"docker compose exec -T postgres psql -U news -d news -c 'SELECT id, username, role, length(password_hash) AS pwlen, created_at FROM users;'\" 2>&1 | tail -10") - -# 2) 试 owner_pass 文件 -run("echo '---owner_pass file---'; cat /root/.owner_pass; echo") - -# 3) 重新生成 owner 密码 -new_pw = "owner_pass_2026" -print(f"\n=== 重设 owner 密码为: {new_pw} ===") -run(f"cd /srv/news && sg docker -c \"docker compose exec -T api python -m app.scripts.create_user --username owner --password {new_pw}\" 2>&1 | tail -10") - -# 4) 重试登录 -import urllib.request, json -req = urllib.request.Request( - "http://localhost/api/v1/auth/login", - data=json.dumps({"username": "owner", "password": new_pw}).encode(), - headers={"Content-Type": "application/json"}, -) -try: - resp = urllib.request.urlopen(req, timeout=10) - data = json.loads(resp.read()) - print(f"\n=== 登录成功! token 前 40: {data['access_token'][:40]}... ===") - print(f" expires_in: {data['expires_in']}") - # 试拉 articles - req2 = urllib.request.Request( - "http://localhost/api/v1/articles?limit=3", - headers={"Authorization": f"Bearer {data['access_token']}"}, - ) - resp2 = urllib.request.urlopen(req2, timeout=10) - ad = json.loads(resp2.read()) - print(f" articles: {len(ad.get('items', []))} 条") - if ad.get("items"): - a = ad["items"][0] - print(f" sample: id={a['id']} src={a['source']['name']} status={a['translation_status']}") - print(f" title: {a['title'][:60]}") - if a.get("title_zh"): - print(f" title_zh: {a['title_zh'][:60]}") -except urllib.error.HTTPError as e: - print(f"\n[FAIL] {e.code} {e.reason}") - print(e.read().decode()) -except Exception as e: - print(f"\n[ERR] {e}") -c.close() diff --git a/scripts/_t_direct.py b/scripts/_t_direct.py deleted file mode 100644 index f873691..0000000 --- a/scripts/_t_direct.py +++ /dev/null @@ -1,16 +0,0 @@ -import asyncio -from app.services.translation.service import service -from app.redis_client import get_redis -async def main(): - r = get_redis(); await r.ping() - print('before:', await r.get('translation:month:202606') or 0, flush=True) - res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh') - print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True) - print('after 1:', await r.get('translation:month:202606') or 0, flush=True) - res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh') - print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True) - print('after 2:', await r.get('translation:month:202606') or 0, flush=True) - res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh') - print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True) - print('after 3:', await r.get('translation:month:202606') or 0, flush=True) -asyncio.run(main()) diff --git a/scripts/_trafilatura.py b/scripts/_trafilatura.py deleted file mode 100644 index bbc3421..0000000 --- a/scripts/_trafilatura.py +++ /dev/null @@ -1,35 +0,0 @@ -import os, paramiko, base64 -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=30): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - return out - -# 试 trafilatura 抓 Al Jazeera 全文 -script = ''' -import asyncio, httpx, trafilatura - -async def main(): - url = "https://www.aljazeera.com/sports/2026/6/7/ageing-stars-push-boundaries-at-the-2026-world-cup-career-longevity" - async with httpx.AsyncClient(follow_redirects=True, timeout=20) as c: - r = await c.get(url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36"}) - print("status:", r.status_code, "len:", len(r.text)) - extracted = trafilatura.extract(r.text, include_comments=False, include_tables=False, favor_recall=True, output_format="json") - print("---JSON---") - print((extracted or "")[:2000]) - print() - print("---TEXT---") - text = trafilatura.extract(r.text, include_comments=False, include_tables=False, favor_recall=True, output_format="text") - print((text or "")[:2000]) -asyncio.run(main()) -''' -b64 = base64.b64encode(script.encode()).decode() -run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_tr.py'") -run("docker exec -w /app news-aggregator-worker-1 python /app/_tr.py 2>&1 | tail -50", t=60) -c.close() diff --git a/scripts/_translate_all.py b/scripts/_translate_all.py deleted file mode 100644 index 9c0d17c..0000000 --- a/scripts/_translate_all.py +++ /dev/null @@ -1,53 +0,0 @@ -import os, paramiko, json -PW = os.environ["REMOTE_PASS"] -c = paramiko.SSHClient() -c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) -def run(cmd, t=180): - si, so, se = c.exec_command(cmd, timeout=t) - out = so.read().decode("utf-8", "replace") - err = se.read().decode("utf-8", "replace") - rc = so.channel.recv_exit_status() - if out: print(out, end="") - if err: print("[err]", err, end="", file=__import__("sys").stderr) - return out - -# 写一个 python 脚本到 worker 容器(用 stdin pipe),直接翻译所有 pending -script = '''import asyncio -from app.workers.pipeline import translate_article -from app.database import AsyncSessionLocal -from app.models.article import Article -from sqlalchemy import select - -async def main(): - async with AsyncSessionLocal() as s: - rows = (await s.execute(select(Article.id).where(Article.translation_status == 'pending').order_by(Article.id))).all() - ids = [r[0] for r in rows] - print(f"translating {len(ids)} articles...") - for i, aid in enumerate(ids, 1): - try: - await translate_article(aid) - except Exception as e: - print(f" err on {aid}: {e}") - if i % 10 == 0: - print(f" {i}/{len(ids)} done") -asyncio.run(main()) -''' -# 用 docker exec -i 把脚本传进去 -print("--- 翻译所有 pending ---") -run(f"docker exec -i news-aggregator-worker-1 python -u -c \"{script.replace(chr(34), chr(92)+chr(34))}\"", t=600) - -print("\n--- 翻译后统计 ---") -run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, translation_engine, count(*), sum(translation_chars) FROM articles GROUP BY 1, 2 ORDER BY 1, 2;\"") - -# 看 usage -import urllib.request -out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") -token = json.loads(out)["access_token"] -u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'")) -print(f"\n--- /me/usage ---\n {u}") - -# 看 redis -print("\n--- redis 计数 ---") -run("docker exec news-aggregator-redis-1 redis-cli -a '$(grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2)' GET translation:month:202606 2>&1 | tail -3") -c.close() diff --git a/scripts/_tscript.py b/scripts/_tscript.py deleted file mode 100644 index 867046c..0000000 --- a/scripts/_tscript.py +++ /dev/null @@ -1,16 +0,0 @@ -import asyncio -from app.services.translation.service import service -from app.redis_client import get_redis -async def main(): - r = get_redis(); await r.ping() - print('before:', await r.get('translation:month:202606') or 0, flush=True) - res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh') - print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True) - print('after 1:', await r.get('translation:month:202606') or 0, flush=True) - res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh') - print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True) - print('after 2:', await r.get('translation:month:202606') or 0, flush=True) - res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh') - print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True) - print('after 3:', await r.get('translation:month:202606') or 0, flush=True) -asyncio.run(main()) \ No newline at end of file