diff --git a/backend/app/api/articles.py b/backend/app/api/articles.py index a3a77f7..700d516 100644 --- a/backend/app/api/articles.py +++ b/backend/app/api/articles.py @@ -95,7 +95,8 @@ async def list_articles( stmt = stmt.order_by(desc(Article.published_at), desc(Article.id)).limit(limit + 1) - rows = (await session.execute(stmt)).all() + result = await session.execute(stmt) + rows = result.all() has_more = len(rows) > limit rows = rows[:limit] @@ -140,14 +141,12 @@ async def get_article( user: User = Depends(get_current_user), session: AsyncSession = Depends(get_session), ): - art = ( - await session.execute( - select(Article, Source) - .join(Source, Source.id == Article.source_id) - .where(Article.id == article_id) - ) - .first() + result = await session.execute( + select(Article, Source) + .join(Source, Source.id == Article.source_id) + .where(Article.id == article_id) ) + art = result.first() if not art: raise HTTPException(status.HTTP_404_NOT_FOUND, "Article not found") article, source = art diff --git a/scripts/_check_body.py b/scripts/_check_body.py new file mode 100644 index 0000000..65ac455 --- /dev/null +++ b/scripts/_check_body.py @@ -0,0 +1,52 @@ +"""查 Ronaldo 那篇文章的 body 字段。""" +import os, paramiko +PW = os.environ["REMOTE_PASS"] +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) +def run(cmd, t=15): + si, so, se = c.exec_command(cmd, timeout=t) + out = so.read().decode("utf-8", "replace") + err = se.read().decode("utf-8", "replace") + rc = so.channel.recv_exit_status() + if out: print(out, end="") + return out + +# 1) 看 body 字段 +print("--- 文章 body 字段(可能是空)---") +run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, title, length(body_html) as html_len, length(body_text) as text_len, length(body_zh_text) as zh_len, lang_src, translation_status, url FROM articles WHERE id = 175177;\"") + +# 2) 看 3 篇典型 aljazeera 文章 +print("\n--- 抽 3 篇 aljazeera 看 body 长度分布 ---") +run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 3 ORDER BY fetched_at DESC LIMIT 5;\"") + +# 3) 抽 BBC(可能是最丰富的) +print("\n--- 抽 3 篇 BBC 看 body ---") +run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 2 ORDER BY fetched_at DESC LIMIT 5;\"") + +# 4) 拉 RSS 源看看,Al Jazeera 到底有没有内容 +print("\n--- 拉 Al Jazeera RSS 原始内容看 ---") +script = b''' +import asyncio, feedparser, httpx +async def main(): + async with httpx.AsyncClient(follow_redirects=True, timeout=15) as c: + r = await c.get("https://www.aljazeera.com/xml/rss/all.xml") + f = feedparser.parse(r.text) + for e in f.entries[:3]: + print("---") + print("title:", e.title) + print("link:", e.link) + print("has content:", bool(e.get("content"))) + if e.get("content"): + print("content[0] keys:", list(e["content"][0].keys())) + print("content[0].value[:200]:", (e["content"][0].get("value") or "")[:200]) + print("has summary:", bool(e.get("summary"))) + if e.get("summary"): + print("summary[:200]:", e["summary"][:200]) +asyncio.run(main()) +''' +import base64 +b64 = base64.b64encode(script).decode() +run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'") +run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -40", t=30) +c.close() diff --git a/scripts/_direct2.py b/scripts/_direct2.py new file mode 100644 index 0000000..758b767 --- /dev/null +++ b/scripts/_direct2.py @@ -0,0 +1,55 @@ +"""重置 + 直接调 service 测 usage 链路。 + +实现:用 paramiko 写脚本到容器临时文件,然后 docker exec 跑。""" +import os, paramiko, base64, json +PW = os.environ["REMOTE_PASS"] +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) +def run(cmd, t=60): + si, so, se = c.exec_command(cmd, timeout=t) + out = so.read().decode("utf-8", "replace") + err = se.read().decode("utf-8", "replace") + rc = so.channel.recv_exit_status() + if out: print(out, end="") + if err and "Warning" not in err: print(err, end="", file=__import__("sys").stderr) + return out + +rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() +run("docker exec news-aggregator-redis-1 redis-cli -a '" + rpw + "' DEL translation:month:202606 2>/dev/null") +print("--- usage 重置 0 ---") + +# 在本机写脚本,scp 到容器(不行,容器是 worker 容器,用 docker cp) +script = ( + "import asyncio\n" + "from app.services.translation.service import service\n" + "from app.redis_client import get_redis\n" + "async def main():\n" + " r = get_redis(); await r.ping()\n" + " print('before:', await r.get('translation:month:202606') or 0, flush=True)\n" + " res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')\n" + " print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True)\n" + " print('after 1:', await r.get('translation:month:202606') or 0, flush=True)\n" + " res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh')\n" + " print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True)\n" + " print('after 2:', await r.get('translation:month:202606') or 0, flush=True)\n" + " res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')\n" + " print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True)\n" + " print('after 3:', await r.get('translation:month:202606') or 0, flush=True)\n" + "asyncio.run(main())\n" +) +local_path = "D:/selftools/diary-news/scripts/_t_direct.py" +with open(local_path, "w", encoding="utf-8") as f: + f.write(script) +# docker cp 进 worker 容器 +run("docker cp " + local_path + " news-aggregator-worker-1:/app/_td.py") +print("--- 跑 ---") +run("docker exec -w /app news-aggregator-worker-1 python /app/_td.py 2>&1 | tail -15", t=30) + +# /me/usage +out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") +token = json.loads(out)["access_token"] +u = json.loads(run("curl -s -H 'Authorization: Bearer " + token + "' 'http://localhost/api/v1/me/usage'")) +print("\n--- /me/usage ---") +print(" ", u) +c.close() diff --git a/scripts/_direct3.py b/scripts/_direct3.py new file mode 100644 index 0000000..dd30c18 --- /dev/null +++ b/scripts/_direct3.py @@ -0,0 +1,57 @@ +"""重置 + 直接调 service 测 usage 链路 — 用 docker exec -i 传脚本。""" +import os, paramiko +PW = os.environ["REMOTE_PASS"] +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) +def run(cmd, t=60): + si, so, se = c.exec_command(cmd, timeout=t) + out = so.read().decode("utf-8", "replace") + err = se.read().decode("utf-8", "replace") + rc = so.channel.recv_exit_status() + if out: print(out, end="") + if err and "Warning" not in err: print(err, end="", file=__import__("sys").stderr) + return out + +rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() +run("docker exec news-aggregator-redis-1 redis-cli -a '" + rpw + "' DEL translation:month:202606 2>/dev/null") +print("--- usage 重置 0 ---") + +# 用 stdin 喂脚本 +script = '''import asyncio +from app.services.translation.service import service +from app.redis_client import get_redis +async def main(): + r = get_redis(); await r.ping() + print("before:", await r.get("translation:month:202606") or 0, flush=True) + res1 = await service.translate("Breaking news from Reuters today.", source="en", target="zh") + print(" call 1: engine=", res1.engine, "chars=", res1.chars, "text=", res1.text[:40], flush=True) + print("after 1:", await r.get("translation:month:202606") or 0, flush=True) + res2 = await service.translate("The market fell sharply after the announcement.", source="en", target="zh") + print(" call 2: engine=", res2.engine, "chars=", res2.chars, flush=True) + print("after 2:", await r.get("translation:month:202606") or 0, flush=True) + res3 = await service.translate("Breaking news from Reuters today.", source="en", target="zh") + print(" call 3 (cache): cached=", res3.cached, "engine=", res3.engine, flush=True) + print("after 3:", await r.get("translation:month:202606") or 0, flush=True) +asyncio.run(main()) +''' +# 写到 worker 容器内的 /app 目录 +# docker exec -i 把脚本从 stdin 写入 +run("docker exec -i -w /app news-aggregator-worker-1 sh -c 'cat > /app/_t.py' 2>/dev/null", t=5) # 这个会 hang + +# 改:用 docker exec 的 stdin (paramiko 可以发 stdin) +si, so, se = c.exec_command("docker exec -i -w /app news-aggregator-worker-1 sh -c 'cat > /app/_t.py && python /app/_t.py'", timeout=30) +si.sendall(script.encode("utf-8")) +si.channel.shutdown_write() # 关闭 stdin 告诉 docker 没更多输入 +out = so.read().decode("utf-8", "replace") +err = se.read().decode("utf-8", "replace") +print(f"--- 跑 ---\n{out}") +if err and "Warning" not in err: print("err:", err) + +# /me/usage +out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") +token = json.loads(out)["access_token"] +u = json.loads(run("curl -s -H 'Authorization: Bearer " + token + "' 'http://localhost/api/v1/me/usage'")) +print("\n--- /me/usage ---") +print(" ", u) +c.close() diff --git a/scripts/_direct4.py b/scripts/_direct4.py new file mode 100644 index 0000000..ae13202 --- /dev/null +++ b/scripts/_direct4.py @@ -0,0 +1,66 @@ +"""最简单的方式:把脚本内容写到容器内,再 docker exec 跑。""" +import os, paramiko +PW = os.environ["REMOTE_PASS"] +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) +def run(cmd, t=60): + si, so, se = c.exec_command(cmd, timeout=t) + out = so.read().decode("utf-8", "replace") + err = se.read().decode("utf-8", "replace") + rc = so.channel.recv_exit_status() + if out: print(out, end="") + if err and "Warning" not in err: print(err, end="", file=__import__("sys").stderr) + return out + +rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() + +# 1) 重置 +run("docker exec news-aggregator-redis-1 redis-cli -a '" + rpw + "' DEL translation:month:202606 2>/dev/null") +print("--- usage 重置 0 ---") + +# 2) 把脚本写到 server 本地 /tmp(用 heredoc 一次性写完) +script_lines = [ + "import asyncio", + "from app.services.translation.service import service", + "from app.redis_client import get_redis", + "async def main():", + " r = get_redis(); await r.ping()", + " print('before:', await r.get('translation:month:202606') or 0, flush=True)", + " res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')", + " print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True)", + " print('after 1:', await r.get('translation:month:202606') or 0, flush=True)", + " res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh')", + " print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True)", + " print('after 2:', await r.get('translation:month:2026') or 0, flush=True)" if False else " print('after 2:', await r.get('translation:month:202606') or 0, flush=True)", + " res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')", + " print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True)", + " print('after 3:', await r.get('translation:month:202606') or 0, flush=True)", + "asyncio.run(main())", +] +script = "\n".join(script_lines) +# 写到 server /tmp +local = "D:/selftools/diary-news/scripts/_tscript.py" +with open(local, "w", encoding="utf-8") as f: + f.write(script) + +# 复制到 server +si, so, se = c.exec_command("cat > /tmp/_t.py", timeout=10) +with open(local, "r", encoding="utf-8") as f: + si.write(f.read().encode()) +si.channel.shutdown_write() +so.read() +print("--- script 写到 /tmp/_t.py ---") + +# 复制到 worker 容器 +run("docker cp /tmp/_t.py news-aggregator-worker-1:/app/_t.py") +print("--- 跑 ---") +run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -15", t=30) + +# /me/usage +out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") +token = __import__("json").loads(out)["access_token"] +u = __import__("json").loads(run("curl -s -H 'Authorization: Bearer " + token + "' 'http://localhost/api/v1/me/usage'")) +print("\n--- /me/usage ---") +print(" ", u) +c.close() diff --git a/scripts/_direct_test.py b/scripts/_direct_test.py new file mode 100644 index 0000000..b38cbfb --- /dev/null +++ b/scripts/_direct_test.py @@ -0,0 +1,54 @@ +import os, paramiko, base64, json +PW = os.environ["REMOTE_PASS"] +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) +def run(cmd, t=60): + si, so, se = c.exec_command(cmd, timeout=t) + out = so.read().decode("utf-8", "replace") + err = se.read().decode("utf-8", "replace") + rc = so.channel.recv_exit_status() + if out: print(out, end="") + if err: print("[err]", err, end="", file=__import__("sys").stderr") + return out + +rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() + +# 重置 +run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' DEL 'translation:month:202606' 2>&1 | grep -v Warning") +print("--- usage 重置 0 ---") + +# 在 worker 进程内直接调 service.translate 两次(确认链路) +script_b64 = base64.b64encode(b''' +import asyncio, sys +from app.services.translation.service import service +from app.redis_client import get_redis + +async def main(): + r = get_redis() + await r.ping() + print(f"before: {await r.get('translation:month:202606') or 0}", flush=True) + # 1) 全新字符串 -> 走 tencent + res1 = await service.translate("Breaking news from Reuters today.", source="en", target="zh") + print(f" call 1: engine={res1.engine} chars={res1.chars} text={res1.text[:40]!r}", flush=True) + print(f"after 1: {await r.get('translation:month:202606') or 0}", flush=True) + # 2) 另一段 + res2 = await service.translate("The market fell sharply after the announcement.", source="en", target="zh") + print(f" call 2: engine={res2.engine} chars={res2.chars}", flush=True) + print(f"after 2: {await r.get('translation:month:202606') or 0}", flush=True) + # 3) 重复 1 的文本 -> 走 cache + res3 = await service.translate("Breaking news from Reuters today.", source="en", target="zh") + print(f" call 3 (cache): cached={res3.cached} engine={res3.engine}", flush=True) + print(f"after 3: {await r.get('translation:month:202606') or 0}", flush=True) +asyncio.run(main()) +''').decode() +run(f"docker exec news-aggregator-worker-1 sh -c 'echo {script_b64} | base64 -d > /app/_tt2.py'") +print("--- worker 跑 ---") +run("docker exec -w /app news-aggregator-worker-1 python /app/_tt2.py 2>&1 | tail -15", t=30) + +# /me/usage +out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") +token = json.loads(out)["access_token"] +u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'")) +print(f"\n--- /me/usage ---\n {u}") +c.close() diff --git a/scripts/_final4.py b/scripts/_final4.py new file mode 100644 index 0000000..ebb5c9a --- /dev/null +++ b/scripts/_final4.py @@ -0,0 +1,73 @@ +import os, paramiko, base64, json +PW = os.environ["REMOTE_PASS"] +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) +def run(cmd, t=120): + si, so, se = c.exec_command(cmd, timeout=t) + out = so.read().decode("utf-8", "replace") + err = se.read().decode("utf-8", "replace") + rc = so.channel.recv_exit_status() + if out: print(out, end="") + if err: print("[err]", err, end="", file=__import__("sys").stderr) + return out + +rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() + +# 1) 服务器 pull +print("--- pull ---") +run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -3") + +# 2) 重建 worker + api +print("--- 重建 ---") +run("cd /srv/news && docker compose up -d --force-recreate --no-deps --build worker api 2>&1 | tail -8", t=120) +import time +time.sleep(8) + +# 3) 重置 usage = 0 +run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' DEL 'translation:month:202606' 2>&1 | grep -v Warning") +print("--- usage reset to 0 ---") + +# 4) 把 5 篇文章重置为 pending 触发翻译 +print("--- 触发翻译(5 篇)---") +run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"UPDATE articles SET translation_status = 'pending' WHERE id IN (SELECT id FROM articles WHERE translation_status = 'ok' ORDER BY id LIMIT 5);\" 2>&1 | tail -2") + +# 5) 跑 worker pipeline 重译 +script_b64 = base64.b64encode(b''' +import asyncio +from app.workers.pipeline import translate_article +from app.database import AsyncSessionLocal +from app.models.article import Article +from sqlalchemy import select + +async def main(): + async with AsyncSessionLocal() as s: + rows = (await s.execute(select(Article.id).where(Article.translation_status == 'pending').limit(10))).all() + ids = [r[0] for r in rows] + print(f"translating {len(ids)} pending") + for aid in ids: + await translate_article(aid) +asyncio.run(main()) +''').decode() +run(f"docker exec news-aggregator-worker-1 sh -c 'echo {script_b64} | base64 -d > /app/_tt.py'") +run("docker exec -w /app news-aggregator-worker-1 python /app/_tt.py 2>&1 | tail -10", t=120) + +# 6) 看 usage +print("\n--- redis usage ---") +out = run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' GET 'translation:month:202606' 2>&1 | grep -v Warning") +print(f" usage: {out.strip()}") + +# 7) /me/usage +out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") +token = json.loads(out)["access_token"] +u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'")) +print(f"--- /me/usage ---\n {u}") + +# 8) 容器状态 +print("\n--- docker ps ---") +run("docker ps --format 'table {{.Names}}\\t{{.Status}}\\t{{.Ports}}' 2>&1 | tail -10") + +# 9) 翻译后统计 +print("\n--- 翻译统计 ---") +run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, count(*) FROM articles GROUP BY 1 ORDER BY 1;\"") +c.close() diff --git a/scripts/_final5.py b/scripts/_final5.py new file mode 100644 index 0000000..b056880 --- /dev/null +++ b/scripts/_final5.py @@ -0,0 +1,67 @@ +import os, paramiko, base64, json, time +PW = os.environ["REMOTE_PASS"] +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) +def run(cmd, t=120): + si, so, se = c.exec_command(cmd, timeout=t) + out = so.read().decode("utf-8", "replace") + err = se.read().decode("utf-8", "replace") + rc = so.channel.recv_exit_status() + if out: print(out, end="") + if err: print("[err]", err, end="", file=__import__("sys").stderr) + return out + +rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip() + +# 强制重置 +run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"UPDATE articles SET translation_status='pending' WHERE id IN (SELECT id FROM articles WHERE translation_status='ok' ORDER BY id LIMIT 3);\" 2>&1 | tail -2") + +# 等 +time.sleep(3) + +# 查 pending +out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) FROM articles WHERE translation_status='pending';\"") +print(f"pending articles: {out.strip()}") + +# 重置 usage +run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' DEL 'translation:month:202606' 2>&1 | grep -v Warning") + +# 跑 worker 重译 +script_b64 = base64.b64encode(b''' +import asyncio +from app.workers.pipeline import translate_article +from app.database import AsyncSessionLocal +from app.models.article import Article +from sqlalchemy import select + +async def main(): + async with AsyncSessionLocal() as s: + rows = (await s.execute(select(Article).where(Article.translation_status=='pending').limit(5))).all() + for r in rows: r[0] + ids = [r[0].id for r in rows] + print(f"translating {len(ids)}") + for aid in ids: + try: + await translate_article(aid) + except Exception as e: + print(f" err on {aid}: {e}") + print("done") +asyncio.run(main()) +''').decode() +run(f"docker exec news-aggregator-worker-1 sh -c 'echo {script_b64} | base64 -d > /app/_tt.py'") +run("docker exec -w /app news-aggregator-worker-1 python /app/_tt.py 2>&1 | tail -20", t=180) + +# 看 usage +out = run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' GET 'translation:month:202606' 2>&1 | grep -v Warning") +print(f"\n--- redis usage: {out.strip()}") + +# /me/usage +out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") +token = json.loads(out)["access_token"] +u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'")) +print(f"--- /me/usage: {u}") + +# 翻译后统计 +run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, count(*) FROM articles GROUP BY 1 ORDER BY 1;\"") +c.close() diff --git a/scripts/_http_check.py b/scripts/_http_check.py new file mode 100644 index 0000000..47bc228 --- /dev/null +++ b/scripts/_http_check.py @@ -0,0 +1,81 @@ +"""检查去重逻辑 + 启动 HTTP 实测。""" +import os, paramiko, json +PW = os.environ["REMOTE_PASS"] +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) +def run(cmd, t=15): + si, so, se = c.exec_command(cmd, timeout=t) + out = so.read().decode("utf-8", "replace") + err = se.read().decode("utf-8", "replace") + rc = so.channel.recv_exit_status() + if out: print(out, end="") + return out + +# ========== 1. 启动 HTTP 看看 ========== +print("=" * 60) +print("1. HTTP 实测") +print("=" * 60) + +# 首页 (Caddy 转发到 frontend) +out = run("curl -sS -o /tmp/idx.html -w 'status=%{http_code} size=%{size_download} type=%{content_type}\\n' http://207.57.129.228/") +print(f"\n[GET /]") +print(f" -> {out.strip()}") +out = run("head -c 200 /tmp/idx.html") +print(f" body[0:200]: {out}") + +# /api/v1/healthz +out = run("curl -sS -w '\\nstatus=%{http_code}\\n' http://207.57.129.228/api/v1/healthz") +print(f"\n[GET /api/v1/healthz]") +print(f" -> {out.strip()}") + +# /api/v1/articles (没 token 应该 401) +out = run("curl -sS -w '\\nstatus=%{http_code}\\n' http://207.57.129.228/api/v1/articles?limit=3") +print(f"\n[GET /api/v1/articles 无 token]") +print(f" -> {out.strip()[:300]}") + +# 登录 +out = run("curl -sS -X POST http://207.57.129.228/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") +data = json.loads(out) +token = data.get("access_token", "") +print(f"\n[POST /api/v1/auth/login]") +print(f" -> token: {token[:40]}...") + +# /api/v1/articles 带 token +out = run("curl -sS -w '\nstatus=%{http_code}\n' -H 'Authorization: Bearer " + token + "' 'http://207.57.129.228/api/v1/articles?limit=2'") +print(f"\n[GET /api/v1/articles?limit=2 带 token]") +print(f" -> {out.strip()[:500]}") + +# 测静态资源(favicon) +out = run("curl -sS -o /dev/null -w 'status=%{http_code} type=%{content_type}\\n' http://207.57.129.228/favicon.svg") +print(f"\n[GET /favicon.svg]") +print(f" -> {out.strip()}") + +# ========== 2. 去重审计 ========== +print("\n" + "=" * 60) +print("2. 去重审计") +print("=" * 60) + +# a) 同一 url_hash 重复数(应该是 0,UNIQUE 约束) +out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) - count(DISTINCT url_hash) FROM articles;\"") +print(f"\n[a) 重复 url_hash 数量(应为 0): {out.strip()}") + +# b) 同一 url 重复数(可能 url_hash 已经 normalize 过,检查原始 url) +out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT url, count(*) FROM articles GROUP BY url HAVING count(*) > 1 LIMIT 5;\"") +print(f"\n[b) 重复 URL(可能含 utm_* 差异):") +print(f" {out if out.strip() else ' (无)'}") + +# c) 同源 / 同标题 / 同一天的,看是不是转载 +print("\n[c] 标题相似度去重检查(前 50 字符完全相同):") +out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT LEFT(title, 60), count(*), array_agg(DISTINCT source_id) FROM articles GROUP BY LEFT(title, 60) HAVING count(*) > 1 ORDER BY count(*) DESC LIMIT 5;\"") +print(f" {out if out.strip() else ' (无)'}") + +# d) duplicate_of 字段使用情况 +out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) FROM articles WHERE duplicate_of IS NOT NULL;\"") +print(f"\n[d) duplicate_of 非空的 article 数: {out.strip()}") + +# e) 抓取日志:reuters 失败时是不是会反复重试 +out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT slug, last_status, consecutive_failures, fetch_interval_min FROM sources ORDER BY id;\"") +print(f"\n[e) 源状态(reuters 失败后 interval 翻倍,看是不是还在重试):") +print(out) +c.close() diff --git a/scripts/_logs.py b/scripts/_logs.py new file mode 100644 index 0000000..c691d46 --- /dev/null +++ b/scripts/_logs.py @@ -0,0 +1,12 @@ +import os, paramiko +PW = os.environ["REMOTE_PASS"] +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) + +# 用 invoke_shell 拉日志 +import time +si, so, se = c.exec_command("docker logs --tail=80 news-aggregator-api-1 2>&1", timeout=20) +out = so.read().decode("utf-8", "replace") +print(out) +c.close() diff --git a/scripts/_show_detail.py b/scripts/_show_detail.py new file mode 100644 index 0000000..dd9b8b7 --- /dev/null +++ b/scripts/_show_detail.py @@ -0,0 +1,42 @@ +"""直接看 API 返回的 article 175177 的完整内容。""" +import os, paramiko, json +PW = os.environ["REMOTE_PASS"] +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False) +def run(cmd, t=15): + si, so, se = c.exec_command(cmd, timeout=t) + out = so.read().decode("utf-8", "replace") + err = se.read().decode("utf-8", "replace") + rc = so.channel.recv_exit_status() + if out: print(out, end="") + return out + +# 1) 拉详细 JSON +print("--- /api/v1/articles/175177 详情 ---") +out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'") +token = json.loads(out)["access_token"] +out = run("curl -s -H 'Authorization: Bearer " + token + "' http://localhost/api/v1/articles/175177") +det = json.loads(out) +print(json.dumps(det, ensure_ascii=False, indent=2)) + +# 2) 试 trafilatura 抓 Al Jazeera 全文 +print("\n\n--- 试 trafilatura 抓 Ronaldo 全文 ---") +script = ''' +import asyncio, httpx, trafilatura +async def main(): + url = "https://www.aljazeera.com/sports/2026/6/7/ageing-stars-push-boundaries-at-the-2026-world-cup-career-longevity" + async with httpx.AsyncClient(follow_redirects=True, timeout=20) as c: + r = await c.get(url, headers={"User-Agent": "Mozilla/5.0"}) + print("status:", r.status_code, "len:", len(r.text)) + extracted = trafilatura.extract(r.text, include_comments=False, include_tables=False, favor_recall=True) + print("extracted len:", len(extracted or "")) + print("---") + print((extracted or "")[:1000]) +asyncio.run(main()) +''' +import base64 +b64 = base64.b64encode(script.encode()).decode() +run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_tr.py'") +run("docker exec -w /app news-aggregator-worker-1 python /app/_tr.py 2>&1 | tail -30", t=30) +c.close() diff --git a/scripts/_t_direct.py b/scripts/_t_direct.py new file mode 100644 index 0000000..f873691 --- /dev/null +++ b/scripts/_t_direct.py @@ -0,0 +1,16 @@ +import asyncio +from app.services.translation.service import service +from app.redis_client import get_redis +async def main(): + r = get_redis(); await r.ping() + print('before:', await r.get('translation:month:202606') or 0, flush=True) + res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh') + print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True) + print('after 1:', await r.get('translation:month:202606') or 0, flush=True) + res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh') + print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True) + print('after 2:', await r.get('translation:month:202606') or 0, flush=True) + res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh') + print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True) + print('after 3:', await r.get('translation:month:202606') or 0, flush=True) +asyncio.run(main()) diff --git a/scripts/_tscript.py b/scripts/_tscript.py new file mode 100644 index 0000000..867046c --- /dev/null +++ b/scripts/_tscript.py @@ -0,0 +1,16 @@ +import asyncio +from app.services.translation.service import service +from app.redis_client import get_redis +async def main(): + r = get_redis(); await r.ping() + print('before:', await r.get('translation:month:202606') or 0, flush=True) + res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh') + print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True) + print('after 1:', await r.get('translation:month:202606') or 0, flush=True) + res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh') + print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True) + print('after 2:', await r.get('translation:month:202606') or 0, flush=True) + res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh') + print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True) + print('after 3:', await r.get('translation:month:202606') or 0, flush=True) +asyncio.run(main()) \ No newline at end of file