fix: articles.py get_article 链式 await coroutine 报错(.first())

This commit is contained in:
Mavis
2026-06-08 00:19:03 +08:00
parent cc02d39d29
commit e79cfaa5f7
13 changed files with 598 additions and 8 deletions

View File

@@ -95,7 +95,8 @@ async def list_articles(
stmt = stmt.order_by(desc(Article.published_at), desc(Article.id)).limit(limit + 1)
rows = (await session.execute(stmt)).all()
result = await session.execute(stmt)
rows = result.all()
has_more = len(rows) > limit
rows = rows[:limit]
@@ -140,14 +141,12 @@ async def get_article(
user: User = Depends(get_current_user),
session: AsyncSession = Depends(get_session),
):
art = (
await session.execute(
select(Article, Source)
.join(Source, Source.id == Article.source_id)
.where(Article.id == article_id)
)
.first()
result = await session.execute(
select(Article, Source)
.join(Source, Source.id == Article.source_id)
.where(Article.id == article_id)
)
art = result.first()
if not art:
raise HTTPException(status.HTTP_404_NOT_FOUND, "Article not found")
article, source = art

52
scripts/_check_body.py Normal file
View File

@@ -0,0 +1,52 @@
"""查 Ronaldo 那篇文章的 body 字段。"""
import os, paramiko
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=15):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
if out: print(out, end="")
return out
# 1) 看 body 字段
print("--- 文章 body 字段(可能是空)---")
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, title, length(body_html) as html_len, length(body_text) as text_len, length(body_zh_text) as zh_len, lang_src, translation_status, url FROM articles WHERE id = 175177;\"")
# 2) 看 3 篇典型 aljazeera 文章
print("\n--- 抽 3 篇 aljazeera 看 body 长度分布 ---")
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 3 ORDER BY fetched_at DESC LIMIT 5;\"")
# 3) 抽 BBC(可能是最丰富的)
print("\n--- 抽 3 篇 BBC 看 body ---")
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 2 ORDER BY fetched_at DESC LIMIT 5;\"")
# 4) 拉 RSS 源看看,Al Jazeera 到底有没有内容
print("\n--- 拉 Al Jazeera RSS 原始内容看 ---")
script = b'''
import asyncio, feedparser, httpx
async def main():
async with httpx.AsyncClient(follow_redirects=True, timeout=15) as c:
r = await c.get("https://www.aljazeera.com/xml/rss/all.xml")
f = feedparser.parse(r.text)
for e in f.entries[:3]:
print("---")
print("title:", e.title)
print("link:", e.link)
print("has content:", bool(e.get("content")))
if e.get("content"):
print("content[0] keys:", list(e["content"][0].keys()))
print("content[0].value[:200]:", (e["content"][0].get("value") or "")[:200])
print("has summary:", bool(e.get("summary")))
if e.get("summary"):
print("summary[:200]:", e["summary"][:200])
asyncio.run(main())
'''
import base64
b64 = base64.b64encode(script).decode()
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'")
run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -40", t=30)
c.close()

55
scripts/_direct2.py Normal file
View File

@@ -0,0 +1,55 @@
"""重置 + 直接调 service 测 usage 链路。
实现:用 paramiko 写脚本到容器临时文件,然后 docker exec 跑。"""
import os, paramiko, base64, json
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=60):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
if out: print(out, end="")
if err and "Warning" not in err: print(err, end="", file=__import__("sys").stderr)
return out
rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip()
run("docker exec news-aggregator-redis-1 redis-cli -a '" + rpw + "' DEL translation:month:202606 2>/dev/null")
print("--- usage 重置 0 ---")
# 在本机写脚本,scp 到容器(不行,容器是 worker 容器,用 docker cp)
script = (
"import asyncio\n"
"from app.services.translation.service import service\n"
"from app.redis_client import get_redis\n"
"async def main():\n"
" r = get_redis(); await r.ping()\n"
" print('before:', await r.get('translation:month:202606') or 0, flush=True)\n"
" res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')\n"
" print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True)\n"
" print('after 1:', await r.get('translation:month:202606') or 0, flush=True)\n"
" res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh')\n"
" print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True)\n"
" print('after 2:', await r.get('translation:month:202606') or 0, flush=True)\n"
" res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')\n"
" print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True)\n"
" print('after 3:', await r.get('translation:month:202606') or 0, flush=True)\n"
"asyncio.run(main())\n"
)
local_path = "D:/selftools/diary-news/scripts/_t_direct.py"
with open(local_path, "w", encoding="utf-8") as f:
f.write(script)
# docker cp 进 worker 容器
run("docker cp " + local_path + " news-aggregator-worker-1:/app/_td.py")
print("--- 跑 ---")
run("docker exec -w /app news-aggregator-worker-1 python /app/_td.py 2>&1 | tail -15", t=30)
# /me/usage
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
token = json.loads(out)["access_token"]
u = json.loads(run("curl -s -H 'Authorization: Bearer " + token + "' 'http://localhost/api/v1/me/usage'"))
print("\n--- /me/usage ---")
print(" ", u)
c.close()

57
scripts/_direct3.py Normal file
View File

@@ -0,0 +1,57 @@
"""重置 + 直接调 service 测 usage 链路 — 用 docker exec -i 传脚本。"""
import os, paramiko
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=60):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
if out: print(out, end="")
if err and "Warning" not in err: print(err, end="", file=__import__("sys").stderr)
return out
rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip()
run("docker exec news-aggregator-redis-1 redis-cli -a '" + rpw + "' DEL translation:month:202606 2>/dev/null")
print("--- usage 重置 0 ---")
# 用 stdin 喂脚本
script = '''import asyncio
from app.services.translation.service import service
from app.redis_client import get_redis
async def main():
r = get_redis(); await r.ping()
print("before:", await r.get("translation:month:202606") or 0, flush=True)
res1 = await service.translate("Breaking news from Reuters today.", source="en", target="zh")
print(" call 1: engine=", res1.engine, "chars=", res1.chars, "text=", res1.text[:40], flush=True)
print("after 1:", await r.get("translation:month:202606") or 0, flush=True)
res2 = await service.translate("The market fell sharply after the announcement.", source="en", target="zh")
print(" call 2: engine=", res2.engine, "chars=", res2.chars, flush=True)
print("after 2:", await r.get("translation:month:202606") or 0, flush=True)
res3 = await service.translate("Breaking news from Reuters today.", source="en", target="zh")
print(" call 3 (cache): cached=", res3.cached, "engine=", res3.engine, flush=True)
print("after 3:", await r.get("translation:month:202606") or 0, flush=True)
asyncio.run(main())
'''
# 写到 worker 容器内的 /app 目录
# docker exec -i 把脚本从 stdin 写入
run("docker exec -i -w /app news-aggregator-worker-1 sh -c 'cat > /app/_t.py' 2>/dev/null", t=5) # 这个会 hang
# 改:用 docker exec 的 stdin (paramiko 可以发 stdin)
si, so, se = c.exec_command("docker exec -i -w /app news-aggregator-worker-1 sh -c 'cat > /app/_t.py && python /app/_t.py'", timeout=30)
si.sendall(script.encode("utf-8"))
si.channel.shutdown_write() # 关闭 stdin 告诉 docker 没更多输入
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
print(f"--- 跑 ---\n{out}")
if err and "Warning" not in err: print("err:", err)
# /me/usage
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
token = json.loads(out)["access_token"]
u = json.loads(run("curl -s -H 'Authorization: Bearer " + token + "' 'http://localhost/api/v1/me/usage'"))
print("\n--- /me/usage ---")
print(" ", u)
c.close()

66
scripts/_direct4.py Normal file
View File

@@ -0,0 +1,66 @@
"""最简单的方式:把脚本内容写到容器内,再 docker exec 跑。"""
import os, paramiko
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=60):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
if out: print(out, end="")
if err and "Warning" not in err: print(err, end="", file=__import__("sys").stderr)
return out
rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip()
# 1) 重置
run("docker exec news-aggregator-redis-1 redis-cli -a '" + rpw + "' DEL translation:month:202606 2>/dev/null")
print("--- usage 重置 0 ---")
# 2) 把脚本写到 server 本地 /tmp(用 heredoc 一次性写完)
script_lines = [
"import asyncio",
"from app.services.translation.service import service",
"from app.redis_client import get_redis",
"async def main():",
" r = get_redis(); await r.ping()",
" print('before:', await r.get('translation:month:202606') or 0, flush=True)",
" res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')",
" print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True)",
" print('after 1:', await r.get('translation:month:202606') or 0, flush=True)",
" res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh')",
" print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True)",
" print('after 2:', await r.get('translation:month:2026') or 0, flush=True)" if False else " print('after 2:', await r.get('translation:month:202606') or 0, flush=True)",
" res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')",
" print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True)",
" print('after 3:', await r.get('translation:month:202606') or 0, flush=True)",
"asyncio.run(main())",
]
script = "\n".join(script_lines)
# 写到 server /tmp
local = "D:/selftools/diary-news/scripts/_tscript.py"
with open(local, "w", encoding="utf-8") as f:
f.write(script)
# 复制到 server
si, so, se = c.exec_command("cat > /tmp/_t.py", timeout=10)
with open(local, "r", encoding="utf-8") as f:
si.write(f.read().encode())
si.channel.shutdown_write()
so.read()
print("--- script 写到 /tmp/_t.py ---")
# 复制到 worker 容器
run("docker cp /tmp/_t.py news-aggregator-worker-1:/app/_t.py")
print("--- 跑 ---")
run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -15", t=30)
# /me/usage
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
token = __import__("json").loads(out)["access_token"]
u = __import__("json").loads(run("curl -s -H 'Authorization: Bearer " + token + "' 'http://localhost/api/v1/me/usage'"))
print("\n--- /me/usage ---")
print(" ", u)
c.close()

54
scripts/_direct_test.py Normal file
View File

@@ -0,0 +1,54 @@
import os, paramiko, base64, json
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=60):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
if out: print(out, end="")
if err: print("[err]", err, end="", file=__import__("sys").stderr")
return out
rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip()
# 重置
run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' DEL 'translation:month:202606' 2>&1 | grep -v Warning")
print("--- usage 重置 0 ---")
# 在 worker 进程内直接调 service.translate 两次(确认链路)
script_b64 = base64.b64encode(b'''
import asyncio, sys
from app.services.translation.service import service
from app.redis_client import get_redis
async def main():
r = get_redis()
await r.ping()
print(f"before: {await r.get('translation:month:202606') or 0}", flush=True)
# 1) 全新字符串 -> 走 tencent
res1 = await service.translate("Breaking news from Reuters today.", source="en", target="zh")
print(f" call 1: engine={res1.engine} chars={res1.chars} text={res1.text[:40]!r}", flush=True)
print(f"after 1: {await r.get('translation:month:202606') or 0}", flush=True)
# 2) 另一段
res2 = await service.translate("The market fell sharply after the announcement.", source="en", target="zh")
print(f" call 2: engine={res2.engine} chars={res2.chars}", flush=True)
print(f"after 2: {await r.get('translation:month:202606') or 0}", flush=True)
# 3) 重复 1 的文本 -> 走 cache
res3 = await service.translate("Breaking news from Reuters today.", source="en", target="zh")
print(f" call 3 (cache): cached={res3.cached} engine={res3.engine}", flush=True)
print(f"after 3: {await r.get('translation:month:202606') or 0}", flush=True)
asyncio.run(main())
''').decode()
run(f"docker exec news-aggregator-worker-1 sh -c 'echo {script_b64} | base64 -d > /app/_tt2.py'")
print("--- worker 跑 ---")
run("docker exec -w /app news-aggregator-worker-1 python /app/_tt2.py 2>&1 | tail -15", t=30)
# /me/usage
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
token = json.loads(out)["access_token"]
u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'"))
print(f"\n--- /me/usage ---\n {u}")
c.close()

73
scripts/_final4.py Normal file
View File

@@ -0,0 +1,73 @@
import os, paramiko, base64, json
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=120):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
if out: print(out, end="")
if err: print("[err]", err, end="", file=__import__("sys").stderr)
return out
rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip()
# 1) 服务器 pull
print("--- pull ---")
run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -3")
# 2) 重建 worker + api
print("--- 重建 ---")
run("cd /srv/news && docker compose up -d --force-recreate --no-deps --build worker api 2>&1 | tail -8", t=120)
import time
time.sleep(8)
# 3) 重置 usage = 0
run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' DEL 'translation:month:202606' 2>&1 | grep -v Warning")
print("--- usage reset to 0 ---")
# 4) 把 5 篇文章重置为 pending 触发翻译
print("--- 触发翻译(5 篇)---")
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"UPDATE articles SET translation_status = 'pending' WHERE id IN (SELECT id FROM articles WHERE translation_status = 'ok' ORDER BY id LIMIT 5);\" 2>&1 | tail -2")
# 5) 跑 worker pipeline 重译
script_b64 = base64.b64encode(b'''
import asyncio
from app.workers.pipeline import translate_article
from app.database import AsyncSessionLocal
from app.models.article import Article
from sqlalchemy import select
async def main():
async with AsyncSessionLocal() as s:
rows = (await s.execute(select(Article.id).where(Article.translation_status == 'pending').limit(10))).all()
ids = [r[0] for r in rows]
print(f"translating {len(ids)} pending")
for aid in ids:
await translate_article(aid)
asyncio.run(main())
''').decode()
run(f"docker exec news-aggregator-worker-1 sh -c 'echo {script_b64} | base64 -d > /app/_tt.py'")
run("docker exec -w /app news-aggregator-worker-1 python /app/_tt.py 2>&1 | tail -10", t=120)
# 6) 看 usage
print("\n--- redis usage ---")
out = run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' GET 'translation:month:202606' 2>&1 | grep -v Warning")
print(f" usage: {out.strip()}")
# 7) /me/usage
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
token = json.loads(out)["access_token"]
u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'"))
print(f"--- /me/usage ---\n {u}")
# 8) 容器状态
print("\n--- docker ps ---")
run("docker ps --format 'table {{.Names}}\\t{{.Status}}\\t{{.Ports}}' 2>&1 | tail -10")
# 9) 翻译后统计
print("\n--- 翻译统计 ---")
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, count(*) FROM articles GROUP BY 1 ORDER BY 1;\"")
c.close()

67
scripts/_final5.py Normal file
View File

@@ -0,0 +1,67 @@
import os, paramiko, base64, json, time
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=120):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
if out: print(out, end="")
if err: print("[err]", err, end="", file=__import__("sys").stderr)
return out
rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip()
# 强制重置
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"UPDATE articles SET translation_status='pending' WHERE id IN (SELECT id FROM articles WHERE translation_status='ok' ORDER BY id LIMIT 3);\" 2>&1 | tail -2")
# 等
time.sleep(3)
# 查 pending
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) FROM articles WHERE translation_status='pending';\"")
print(f"pending articles: {out.strip()}")
# 重置 usage
run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' DEL 'translation:month:202606' 2>&1 | grep -v Warning")
# 跑 worker 重译
script_b64 = base64.b64encode(b'''
import asyncio
from app.workers.pipeline import translate_article
from app.database import AsyncSessionLocal
from app.models.article import Article
from sqlalchemy import select
async def main():
async with AsyncSessionLocal() as s:
rows = (await s.execute(select(Article).where(Article.translation_status=='pending').limit(5))).all()
for r in rows: r[0]
ids = [r[0].id for r in rows]
print(f"translating {len(ids)}")
for aid in ids:
try:
await translate_article(aid)
except Exception as e:
print(f" err on {aid}: {e}")
print("done")
asyncio.run(main())
''').decode()
run(f"docker exec news-aggregator-worker-1 sh -c 'echo {script_b64} | base64 -d > /app/_tt.py'")
run("docker exec -w /app news-aggregator-worker-1 python /app/_tt.py 2>&1 | tail -20", t=180)
# 看 usage
out = run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' GET 'translation:month:202606' 2>&1 | grep -v Warning")
print(f"\n--- redis usage: {out.strip()}")
# /me/usage
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
token = json.loads(out)["access_token"]
u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'"))
print(f"--- /me/usage: {u}")
# 翻译后统计
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, count(*) FROM articles GROUP BY 1 ORDER BY 1;\"")
c.close()

81
scripts/_http_check.py Normal file
View File

@@ -0,0 +1,81 @@
"""检查去重逻辑 + 启动 HTTP 实测。"""
import os, paramiko, json
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=15):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
if out: print(out, end="")
return out
# ========== 1. 启动 HTTP 看看 ==========
print("=" * 60)
print("1. HTTP 实测")
print("=" * 60)
# 首页 (Caddy 转发到 frontend)
out = run("curl -sS -o /tmp/idx.html -w 'status=%{http_code} size=%{size_download} type=%{content_type}\\n' http://207.57.129.228/")
print(f"\n[GET /]")
print(f" -> {out.strip()}")
out = run("head -c 200 /tmp/idx.html")
print(f" body[0:200]: {out}")
# /api/v1/healthz
out = run("curl -sS -w '\\nstatus=%{http_code}\\n' http://207.57.129.228/api/v1/healthz")
print(f"\n[GET /api/v1/healthz]")
print(f" -> {out.strip()}")
# /api/v1/articles (没 token 应该 401)
out = run("curl -sS -w '\\nstatus=%{http_code}\\n' http://207.57.129.228/api/v1/articles?limit=3")
print(f"\n[GET /api/v1/articles 无 token]")
print(f" -> {out.strip()[:300]}")
# 登录
out = run("curl -sS -X POST http://207.57.129.228/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
data = json.loads(out)
token = data.get("access_token", "")
print(f"\n[POST /api/v1/auth/login]")
print(f" -> token: {token[:40]}...")
# /api/v1/articles 带 token
out = run("curl -sS -w '\nstatus=%{http_code}\n' -H 'Authorization: Bearer " + token + "' 'http://207.57.129.228/api/v1/articles?limit=2'")
print(f"\n[GET /api/v1/articles?limit=2 带 token]")
print(f" -> {out.strip()[:500]}")
# 测静态资源(favicon)
out = run("curl -sS -o /dev/null -w 'status=%{http_code} type=%{content_type}\\n' http://207.57.129.228/favicon.svg")
print(f"\n[GET /favicon.svg]")
print(f" -> {out.strip()}")
# ========== 2. 去重审计 ==========
print("\n" + "=" * 60)
print("2. 去重审计")
print("=" * 60)
# a) 同一 url_hash 重复数(应该是 0,UNIQUE 约束)
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) - count(DISTINCT url_hash) FROM articles;\"")
print(f"\n[a) 重复 url_hash 数量(应为 0): {out.strip()}")
# b) 同一 url 重复数(可能 url_hash 已经 normalize 过,检查原始 url)
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT url, count(*) FROM articles GROUP BY url HAVING count(*) > 1 LIMIT 5;\"")
print(f"\n[b) 重复 URL(可能含 utm_* 差异):")
print(f" {out if out.strip() else ' (无)'}")
# c) 同源 / 同标题 / 同一天的,看是不是转载
print("\n[c] 标题相似度去重检查(前 50 字符完全相同):")
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT LEFT(title, 60), count(*), array_agg(DISTINCT source_id) FROM articles GROUP BY LEFT(title, 60) HAVING count(*) > 1 ORDER BY count(*) DESC LIMIT 5;\"")
print(f" {out if out.strip() else ' (无)'}")
# d) duplicate_of 字段使用情况
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) FROM articles WHERE duplicate_of IS NOT NULL;\"")
print(f"\n[d) duplicate_of 非空的 article 数: {out.strip()}")
# e) 抓取日志:reuters 失败时是不是会反复重试
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT slug, last_status, consecutive_failures, fetch_interval_min FROM sources ORDER BY id;\"")
print(f"\n[e) 源状态(reuters 失败后 interval 翻倍,看是不是还在重试):")
print(out)
c.close()

12
scripts/_logs.py Normal file
View File

@@ -0,0 +1,12 @@
import os, paramiko
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
# 用 invoke_shell 拉日志
import time
si, so, se = c.exec_command("docker logs --tail=80 news-aggregator-api-1 2>&1", timeout=20)
out = so.read().decode("utf-8", "replace")
print(out)
c.close()

42
scripts/_show_detail.py Normal file
View File

@@ -0,0 +1,42 @@
"""直接看 API 返回的 article 175177 的完整内容。"""
import os, paramiko, json
PW = os.environ["REMOTE_PASS"]
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
def run(cmd, t=15):
si, so, se = c.exec_command(cmd, timeout=t)
out = so.read().decode("utf-8", "replace")
err = se.read().decode("utf-8", "replace")
rc = so.channel.recv_exit_status()
if out: print(out, end="")
return out
# 1) 拉详细 JSON
print("--- /api/v1/articles/175177 详情 ---")
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
token = json.loads(out)["access_token"]
out = run("curl -s -H 'Authorization: Bearer " + token + "' http://localhost/api/v1/articles/175177")
det = json.loads(out)
print(json.dumps(det, ensure_ascii=False, indent=2))
# 2) 试 trafilatura 抓 Al Jazeera 全文
print("\n\n--- 试 trafilatura 抓 Ronaldo 全文 ---")
script = '''
import asyncio, httpx, trafilatura
async def main():
url = "https://www.aljazeera.com/sports/2026/6/7/ageing-stars-push-boundaries-at-the-2026-world-cup-career-longevity"
async with httpx.AsyncClient(follow_redirects=True, timeout=20) as c:
r = await c.get(url, headers={"User-Agent": "Mozilla/5.0"})
print("status:", r.status_code, "len:", len(r.text))
extracted = trafilatura.extract(r.text, include_comments=False, include_tables=False, favor_recall=True)
print("extracted len:", len(extracted or ""))
print("---")
print((extracted or "")[:1000])
asyncio.run(main())
'''
import base64
b64 = base64.b64encode(script.encode()).decode()
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_tr.py'")
run("docker exec -w /app news-aggregator-worker-1 python /app/_tr.py 2>&1 | tail -30", t=30)
c.close()

16
scripts/_t_direct.py Normal file
View File

@@ -0,0 +1,16 @@
import asyncio
from app.services.translation.service import service
from app.redis_client import get_redis
async def main():
r = get_redis(); await r.ping()
print('before:', await r.get('translation:month:202606') or 0, flush=True)
res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')
print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True)
print('after 1:', await r.get('translation:month:202606') or 0, flush=True)
res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh')
print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True)
print('after 2:', await r.get('translation:month:202606') or 0, flush=True)
res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')
print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True)
print('after 3:', await r.get('translation:month:202606') or 0, flush=True)
asyncio.run(main())

16
scripts/_tscript.py Normal file
View File

@@ -0,0 +1,16 @@
import asyncio
from app.services.translation.service import service
from app.redis_client import get_redis
async def main():
r = get_redis(); await r.ping()
print('before:', await r.get('translation:month:202606') or 0, flush=True)
res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')
print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True)
print('after 1:', await r.get('translation:month:202606') or 0, flush=True)
res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh')
print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True)
print('after 2:', await r.get('translation:month:202606') or 0, flush=True)
res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')
print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True)
print('after 3:', await r.get('translation:month:202606') or 0, flush=True)
asyncio.run(main())