fix: articles.py get_article 链式 await coroutine 报错(.first())
This commit is contained in:
@@ -95,7 +95,8 @@ async def list_articles(
|
||||
|
||||
stmt = stmt.order_by(desc(Article.published_at), desc(Article.id)).limit(limit + 1)
|
||||
|
||||
rows = (await session.execute(stmt)).all()
|
||||
result = await session.execute(stmt)
|
||||
rows = result.all()
|
||||
has_more = len(rows) > limit
|
||||
rows = rows[:limit]
|
||||
|
||||
@@ -140,14 +141,12 @@ async def get_article(
|
||||
user: User = Depends(get_current_user),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
art = (
|
||||
await session.execute(
|
||||
select(Article, Source)
|
||||
.join(Source, Source.id == Article.source_id)
|
||||
.where(Article.id == article_id)
|
||||
)
|
||||
.first()
|
||||
result = await session.execute(
|
||||
select(Article, Source)
|
||||
.join(Source, Source.id == Article.source_id)
|
||||
.where(Article.id == article_id)
|
||||
)
|
||||
art = result.first()
|
||||
if not art:
|
||||
raise HTTPException(status.HTTP_404_NOT_FOUND, "Article not found")
|
||||
article, source = art
|
||||
|
||||
52
scripts/_check_body.py
Normal file
52
scripts/_check_body.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""查 Ronaldo 那篇文章的 body 字段。"""
|
||||
import os, paramiko
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=15):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
if out: print(out, end="")
|
||||
return out
|
||||
|
||||
# 1) 看 body 字段
|
||||
print("--- 文章 body 字段(可能是空)---")
|
||||
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, title, length(body_html) as html_len, length(body_text) as text_len, length(body_zh_text) as zh_len, lang_src, translation_status, url FROM articles WHERE id = 175177;\"")
|
||||
|
||||
# 2) 看 3 篇典型 aljazeera 文章
|
||||
print("\n--- 抽 3 篇 aljazeera 看 body 长度分布 ---")
|
||||
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 3 ORDER BY fetched_at DESC LIMIT 5;\"")
|
||||
|
||||
# 3) 抽 BBC(可能是最丰富的)
|
||||
print("\n--- 抽 3 篇 BBC 看 body ---")
|
||||
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT id, LEFT(title, 50) title, length(body_html) html, length(body_text) txt, length(body_zh_text) zh FROM articles WHERE source_id = 2 ORDER BY fetched_at DESC LIMIT 5;\"")
|
||||
|
||||
# 4) 拉 RSS 源看看,Al Jazeera 到底有没有内容
|
||||
print("\n--- 拉 Al Jazeera RSS 原始内容看 ---")
|
||||
script = b'''
|
||||
import asyncio, feedparser, httpx
|
||||
async def main():
|
||||
async with httpx.AsyncClient(follow_redirects=True, timeout=15) as c:
|
||||
r = await c.get("https://www.aljazeera.com/xml/rss/all.xml")
|
||||
f = feedparser.parse(r.text)
|
||||
for e in f.entries[:3]:
|
||||
print("---")
|
||||
print("title:", e.title)
|
||||
print("link:", e.link)
|
||||
print("has content:", bool(e.get("content")))
|
||||
if e.get("content"):
|
||||
print("content[0] keys:", list(e["content"][0].keys()))
|
||||
print("content[0].value[:200]:", (e["content"][0].get("value") or "")[:200])
|
||||
print("has summary:", bool(e.get("summary")))
|
||||
if e.get("summary"):
|
||||
print("summary[:200]:", e["summary"][:200])
|
||||
asyncio.run(main())
|
||||
'''
|
||||
import base64
|
||||
b64 = base64.b64encode(script).decode()
|
||||
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_t.py'")
|
||||
run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -40", t=30)
|
||||
c.close()
|
||||
55
scripts/_direct2.py
Normal file
55
scripts/_direct2.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""重置 + 直接调 service 测 usage 链路。
|
||||
|
||||
实现:用 paramiko 写脚本到容器临时文件,然后 docker exec 跑。"""
|
||||
import os, paramiko, base64, json
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=60):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
if out: print(out, end="")
|
||||
if err and "Warning" not in err: print(err, end="", file=__import__("sys").stderr)
|
||||
return out
|
||||
|
||||
rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip()
|
||||
run("docker exec news-aggregator-redis-1 redis-cli -a '" + rpw + "' DEL translation:month:202606 2>/dev/null")
|
||||
print("--- usage 重置 0 ---")
|
||||
|
||||
# 在本机写脚本,scp 到容器(不行,容器是 worker 容器,用 docker cp)
|
||||
script = (
|
||||
"import asyncio\n"
|
||||
"from app.services.translation.service import service\n"
|
||||
"from app.redis_client import get_redis\n"
|
||||
"async def main():\n"
|
||||
" r = get_redis(); await r.ping()\n"
|
||||
" print('before:', await r.get('translation:month:202606') or 0, flush=True)\n"
|
||||
" res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')\n"
|
||||
" print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True)\n"
|
||||
" print('after 1:', await r.get('translation:month:202606') or 0, flush=True)\n"
|
||||
" res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh')\n"
|
||||
" print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True)\n"
|
||||
" print('after 2:', await r.get('translation:month:202606') or 0, flush=True)\n"
|
||||
" res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')\n"
|
||||
" print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True)\n"
|
||||
" print('after 3:', await r.get('translation:month:202606') or 0, flush=True)\n"
|
||||
"asyncio.run(main())\n"
|
||||
)
|
||||
local_path = "D:/selftools/diary-news/scripts/_t_direct.py"
|
||||
with open(local_path, "w", encoding="utf-8") as f:
|
||||
f.write(script)
|
||||
# docker cp 进 worker 容器
|
||||
run("docker cp " + local_path + " news-aggregator-worker-1:/app/_td.py")
|
||||
print("--- 跑 ---")
|
||||
run("docker exec -w /app news-aggregator-worker-1 python /app/_td.py 2>&1 | tail -15", t=30)
|
||||
|
||||
# /me/usage
|
||||
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
|
||||
token = json.loads(out)["access_token"]
|
||||
u = json.loads(run("curl -s -H 'Authorization: Bearer " + token + "' 'http://localhost/api/v1/me/usage'"))
|
||||
print("\n--- /me/usage ---")
|
||||
print(" ", u)
|
||||
c.close()
|
||||
57
scripts/_direct3.py
Normal file
57
scripts/_direct3.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""重置 + 直接调 service 测 usage 链路 — 用 docker exec -i 传脚本。"""
|
||||
import os, paramiko
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=60):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
if out: print(out, end="")
|
||||
if err and "Warning" not in err: print(err, end="", file=__import__("sys").stderr)
|
||||
return out
|
||||
|
||||
rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip()
|
||||
run("docker exec news-aggregator-redis-1 redis-cli -a '" + rpw + "' DEL translation:month:202606 2>/dev/null")
|
||||
print("--- usage 重置 0 ---")
|
||||
|
||||
# 用 stdin 喂脚本
|
||||
script = '''import asyncio
|
||||
from app.services.translation.service import service
|
||||
from app.redis_client import get_redis
|
||||
async def main():
|
||||
r = get_redis(); await r.ping()
|
||||
print("before:", await r.get("translation:month:202606") or 0, flush=True)
|
||||
res1 = await service.translate("Breaking news from Reuters today.", source="en", target="zh")
|
||||
print(" call 1: engine=", res1.engine, "chars=", res1.chars, "text=", res1.text[:40], flush=True)
|
||||
print("after 1:", await r.get("translation:month:202606") or 0, flush=True)
|
||||
res2 = await service.translate("The market fell sharply after the announcement.", source="en", target="zh")
|
||||
print(" call 2: engine=", res2.engine, "chars=", res2.chars, flush=True)
|
||||
print("after 2:", await r.get("translation:month:202606") or 0, flush=True)
|
||||
res3 = await service.translate("Breaking news from Reuters today.", source="en", target="zh")
|
||||
print(" call 3 (cache): cached=", res3.cached, "engine=", res3.engine, flush=True)
|
||||
print("after 3:", await r.get("translation:month:202606") or 0, flush=True)
|
||||
asyncio.run(main())
|
||||
'''
|
||||
# 写到 worker 容器内的 /app 目录
|
||||
# docker exec -i 把脚本从 stdin 写入
|
||||
run("docker exec -i -w /app news-aggregator-worker-1 sh -c 'cat > /app/_t.py' 2>/dev/null", t=5) # 这个会 hang
|
||||
|
||||
# 改:用 docker exec 的 stdin (paramiko 可以发 stdin)
|
||||
si, so, se = c.exec_command("docker exec -i -w /app news-aggregator-worker-1 sh -c 'cat > /app/_t.py && python /app/_t.py'", timeout=30)
|
||||
si.sendall(script.encode("utf-8"))
|
||||
si.channel.shutdown_write() # 关闭 stdin 告诉 docker 没更多输入
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
print(f"--- 跑 ---\n{out}")
|
||||
if err and "Warning" not in err: print("err:", err)
|
||||
|
||||
# /me/usage
|
||||
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
|
||||
token = json.loads(out)["access_token"]
|
||||
u = json.loads(run("curl -s -H 'Authorization: Bearer " + token + "' 'http://localhost/api/v1/me/usage'"))
|
||||
print("\n--- /me/usage ---")
|
||||
print(" ", u)
|
||||
c.close()
|
||||
66
scripts/_direct4.py
Normal file
66
scripts/_direct4.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""最简单的方式:把脚本内容写到容器内,再 docker exec 跑。"""
|
||||
import os, paramiko
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=60):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
if out: print(out, end="")
|
||||
if err and "Warning" not in err: print(err, end="", file=__import__("sys").stderr)
|
||||
return out
|
||||
|
||||
rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip()
|
||||
|
||||
# 1) 重置
|
||||
run("docker exec news-aggregator-redis-1 redis-cli -a '" + rpw + "' DEL translation:month:202606 2>/dev/null")
|
||||
print("--- usage 重置 0 ---")
|
||||
|
||||
# 2) 把脚本写到 server 本地 /tmp(用 heredoc 一次性写完)
|
||||
script_lines = [
|
||||
"import asyncio",
|
||||
"from app.services.translation.service import service",
|
||||
"from app.redis_client import get_redis",
|
||||
"async def main():",
|
||||
" r = get_redis(); await r.ping()",
|
||||
" print('before:', await r.get('translation:month:202606') or 0, flush=True)",
|
||||
" res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')",
|
||||
" print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True)",
|
||||
" print('after 1:', await r.get('translation:month:202606') or 0, flush=True)",
|
||||
" res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh')",
|
||||
" print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True)",
|
||||
" print('after 2:', await r.get('translation:month:2026') or 0, flush=True)" if False else " print('after 2:', await r.get('translation:month:202606') or 0, flush=True)",
|
||||
" res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')",
|
||||
" print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True)",
|
||||
" print('after 3:', await r.get('translation:month:202606') or 0, flush=True)",
|
||||
"asyncio.run(main())",
|
||||
]
|
||||
script = "\n".join(script_lines)
|
||||
# 写到 server /tmp
|
||||
local = "D:/selftools/diary-news/scripts/_tscript.py"
|
||||
with open(local, "w", encoding="utf-8") as f:
|
||||
f.write(script)
|
||||
|
||||
# 复制到 server
|
||||
si, so, se = c.exec_command("cat > /tmp/_t.py", timeout=10)
|
||||
with open(local, "r", encoding="utf-8") as f:
|
||||
si.write(f.read().encode())
|
||||
si.channel.shutdown_write()
|
||||
so.read()
|
||||
print("--- script 写到 /tmp/_t.py ---")
|
||||
|
||||
# 复制到 worker 容器
|
||||
run("docker cp /tmp/_t.py news-aggregator-worker-1:/app/_t.py")
|
||||
print("--- 跑 ---")
|
||||
run("docker exec -w /app news-aggregator-worker-1 python /app/_t.py 2>&1 | tail -15", t=30)
|
||||
|
||||
# /me/usage
|
||||
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
|
||||
token = __import__("json").loads(out)["access_token"]
|
||||
u = __import__("json").loads(run("curl -s -H 'Authorization: Bearer " + token + "' 'http://localhost/api/v1/me/usage'"))
|
||||
print("\n--- /me/usage ---")
|
||||
print(" ", u)
|
||||
c.close()
|
||||
54
scripts/_direct_test.py
Normal file
54
scripts/_direct_test.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import os, paramiko, base64, json
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=60):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
if out: print(out, end="")
|
||||
if err: print("[err]", err, end="", file=__import__("sys").stderr")
|
||||
return out
|
||||
|
||||
rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip()
|
||||
|
||||
# 重置
|
||||
run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' DEL 'translation:month:202606' 2>&1 | grep -v Warning")
|
||||
print("--- usage 重置 0 ---")
|
||||
|
||||
# 在 worker 进程内直接调 service.translate 两次(确认链路)
|
||||
script_b64 = base64.b64encode(b'''
|
||||
import asyncio, sys
|
||||
from app.services.translation.service import service
|
||||
from app.redis_client import get_redis
|
||||
|
||||
async def main():
|
||||
r = get_redis()
|
||||
await r.ping()
|
||||
print(f"before: {await r.get('translation:month:202606') or 0}", flush=True)
|
||||
# 1) 全新字符串 -> 走 tencent
|
||||
res1 = await service.translate("Breaking news from Reuters today.", source="en", target="zh")
|
||||
print(f" call 1: engine={res1.engine} chars={res1.chars} text={res1.text[:40]!r}", flush=True)
|
||||
print(f"after 1: {await r.get('translation:month:202606') or 0}", flush=True)
|
||||
# 2) 另一段
|
||||
res2 = await service.translate("The market fell sharply after the announcement.", source="en", target="zh")
|
||||
print(f" call 2: engine={res2.engine} chars={res2.chars}", flush=True)
|
||||
print(f"after 2: {await r.get('translation:month:202606') or 0}", flush=True)
|
||||
# 3) 重复 1 的文本 -> 走 cache
|
||||
res3 = await service.translate("Breaking news from Reuters today.", source="en", target="zh")
|
||||
print(f" call 3 (cache): cached={res3.cached} engine={res3.engine}", flush=True)
|
||||
print(f"after 3: {await r.get('translation:month:202606') or 0}", flush=True)
|
||||
asyncio.run(main())
|
||||
''').decode()
|
||||
run(f"docker exec news-aggregator-worker-1 sh -c 'echo {script_b64} | base64 -d > /app/_tt2.py'")
|
||||
print("--- worker 跑 ---")
|
||||
run("docker exec -w /app news-aggregator-worker-1 python /app/_tt2.py 2>&1 | tail -15", t=30)
|
||||
|
||||
# /me/usage
|
||||
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
|
||||
token = json.loads(out)["access_token"]
|
||||
u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'"))
|
||||
print(f"\n--- /me/usage ---\n {u}")
|
||||
c.close()
|
||||
73
scripts/_final4.py
Normal file
73
scripts/_final4.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import os, paramiko, base64, json
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=120):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
if out: print(out, end="")
|
||||
if err: print("[err]", err, end="", file=__import__("sys").stderr)
|
||||
return out
|
||||
|
||||
rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip()
|
||||
|
||||
# 1) 服务器 pull
|
||||
print("--- pull ---")
|
||||
run("cd /srv/news && sudo -u news git pull --rebase 2>&1 | tail -3")
|
||||
|
||||
# 2) 重建 worker + api
|
||||
print("--- 重建 ---")
|
||||
run("cd /srv/news && docker compose up -d --force-recreate --no-deps --build worker api 2>&1 | tail -8", t=120)
|
||||
import time
|
||||
time.sleep(8)
|
||||
|
||||
# 3) 重置 usage = 0
|
||||
run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' DEL 'translation:month:202606' 2>&1 | grep -v Warning")
|
||||
print("--- usage reset to 0 ---")
|
||||
|
||||
# 4) 把 5 篇文章重置为 pending 触发翻译
|
||||
print("--- 触发翻译(5 篇)---")
|
||||
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"UPDATE articles SET translation_status = 'pending' WHERE id IN (SELECT id FROM articles WHERE translation_status = 'ok' ORDER BY id LIMIT 5);\" 2>&1 | tail -2")
|
||||
|
||||
# 5) 跑 worker pipeline 重译
|
||||
script_b64 = base64.b64encode(b'''
|
||||
import asyncio
|
||||
from app.workers.pipeline import translate_article
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.article import Article
|
||||
from sqlalchemy import select
|
||||
|
||||
async def main():
|
||||
async with AsyncSessionLocal() as s:
|
||||
rows = (await s.execute(select(Article.id).where(Article.translation_status == 'pending').limit(10))).all()
|
||||
ids = [r[0] for r in rows]
|
||||
print(f"translating {len(ids)} pending")
|
||||
for aid in ids:
|
||||
await translate_article(aid)
|
||||
asyncio.run(main())
|
||||
''').decode()
|
||||
run(f"docker exec news-aggregator-worker-1 sh -c 'echo {script_b64} | base64 -d > /app/_tt.py'")
|
||||
run("docker exec -w /app news-aggregator-worker-1 python /app/_tt.py 2>&1 | tail -10", t=120)
|
||||
|
||||
# 6) 看 usage
|
||||
print("\n--- redis usage ---")
|
||||
out = run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' GET 'translation:month:202606' 2>&1 | grep -v Warning")
|
||||
print(f" usage: {out.strip()}")
|
||||
|
||||
# 7) /me/usage
|
||||
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
|
||||
token = json.loads(out)["access_token"]
|
||||
u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'"))
|
||||
print(f"--- /me/usage ---\n {u}")
|
||||
|
||||
# 8) 容器状态
|
||||
print("\n--- docker ps ---")
|
||||
run("docker ps --format 'table {{.Names}}\\t{{.Status}}\\t{{.Ports}}' 2>&1 | tail -10")
|
||||
|
||||
# 9) 翻译后统计
|
||||
print("\n--- 翻译统计 ---")
|
||||
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, count(*) FROM articles GROUP BY 1 ORDER BY 1;\"")
|
||||
c.close()
|
||||
67
scripts/_final5.py
Normal file
67
scripts/_final5.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import os, paramiko, base64, json, time
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=120):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
if out: print(out, end="")
|
||||
if err: print("[err]", err, end="", file=__import__("sys").stderr)
|
||||
return out
|
||||
|
||||
rpw = run("grep ^REDIS_PASSWORD /srv/news/.env | cut -d= -f2").strip()
|
||||
|
||||
# 强制重置
|
||||
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"UPDATE articles SET translation_status='pending' WHERE id IN (SELECT id FROM articles WHERE translation_status='ok' ORDER BY id LIMIT 3);\" 2>&1 | tail -2")
|
||||
|
||||
# 等
|
||||
time.sleep(3)
|
||||
|
||||
# 查 pending
|
||||
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) FROM articles WHERE translation_status='pending';\"")
|
||||
print(f"pending articles: {out.strip()}")
|
||||
|
||||
# 重置 usage
|
||||
run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' DEL 'translation:month:202606' 2>&1 | grep -v Warning")
|
||||
|
||||
# 跑 worker 重译
|
||||
script_b64 = base64.b64encode(b'''
|
||||
import asyncio
|
||||
from app.workers.pipeline import translate_article
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.article import Article
|
||||
from sqlalchemy import select
|
||||
|
||||
async def main():
|
||||
async with AsyncSessionLocal() as s:
|
||||
rows = (await s.execute(select(Article).where(Article.translation_status=='pending').limit(5))).all()
|
||||
for r in rows: r[0]
|
||||
ids = [r[0].id for r in rows]
|
||||
print(f"translating {len(ids)}")
|
||||
for aid in ids:
|
||||
try:
|
||||
await translate_article(aid)
|
||||
except Exception as e:
|
||||
print(f" err on {aid}: {e}")
|
||||
print("done")
|
||||
asyncio.run(main())
|
||||
''').decode()
|
||||
run(f"docker exec news-aggregator-worker-1 sh -c 'echo {script_b64} | base64 -d > /app/_tt.py'")
|
||||
run("docker exec -w /app news-aggregator-worker-1 python /app/_tt.py 2>&1 | tail -20", t=180)
|
||||
|
||||
# 看 usage
|
||||
out = run(f"docker exec news-aggregator-redis-1 redis-cli -a '{rpw}' GET 'translation:month:202606' 2>&1 | grep -v Warning")
|
||||
print(f"\n--- redis usage: {out.strip()}")
|
||||
|
||||
# /me/usage
|
||||
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
|
||||
token = json.loads(out)["access_token"]
|
||||
u = json.loads(run(f"curl -s -H 'Authorization: Bearer {token}' 'http://localhost/api/v1/me/usage'"))
|
||||
print(f"--- /me/usage: {u}")
|
||||
|
||||
# 翻译后统计
|
||||
run("docker exec news-aggregator-postgres-1 psql -U news -d news -c \"SELECT translation_status, count(*) FROM articles GROUP BY 1 ORDER BY 1;\"")
|
||||
c.close()
|
||||
81
scripts/_http_check.py
Normal file
81
scripts/_http_check.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""检查去重逻辑 + 启动 HTTP 实测。"""
|
||||
import os, paramiko, json
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=15):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
if out: print(out, end="")
|
||||
return out
|
||||
|
||||
# ========== 1. 启动 HTTP 看看 ==========
|
||||
print("=" * 60)
|
||||
print("1. HTTP 实测")
|
||||
print("=" * 60)
|
||||
|
||||
# 首页 (Caddy 转发到 frontend)
|
||||
out = run("curl -sS -o /tmp/idx.html -w 'status=%{http_code} size=%{size_download} type=%{content_type}\\n' http://207.57.129.228/")
|
||||
print(f"\n[GET /]")
|
||||
print(f" -> {out.strip()}")
|
||||
out = run("head -c 200 /tmp/idx.html")
|
||||
print(f" body[0:200]: {out}")
|
||||
|
||||
# /api/v1/healthz
|
||||
out = run("curl -sS -w '\\nstatus=%{http_code}\\n' http://207.57.129.228/api/v1/healthz")
|
||||
print(f"\n[GET /api/v1/healthz]")
|
||||
print(f" -> {out.strip()}")
|
||||
|
||||
# /api/v1/articles (没 token 应该 401)
|
||||
out = run("curl -sS -w '\\nstatus=%{http_code}\\n' http://207.57.129.228/api/v1/articles?limit=3")
|
||||
print(f"\n[GET /api/v1/articles 无 token]")
|
||||
print(f" -> {out.strip()[:300]}")
|
||||
|
||||
# 登录
|
||||
out = run("curl -sS -X POST http://207.57.129.228/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
|
||||
data = json.loads(out)
|
||||
token = data.get("access_token", "")
|
||||
print(f"\n[POST /api/v1/auth/login]")
|
||||
print(f" -> token: {token[:40]}...")
|
||||
|
||||
# /api/v1/articles 带 token
|
||||
out = run("curl -sS -w '\nstatus=%{http_code}\n' -H 'Authorization: Bearer " + token + "' 'http://207.57.129.228/api/v1/articles?limit=2'")
|
||||
print(f"\n[GET /api/v1/articles?limit=2 带 token]")
|
||||
print(f" -> {out.strip()[:500]}")
|
||||
|
||||
# 测静态资源(favicon)
|
||||
out = run("curl -sS -o /dev/null -w 'status=%{http_code} type=%{content_type}\\n' http://207.57.129.228/favicon.svg")
|
||||
print(f"\n[GET /favicon.svg]")
|
||||
print(f" -> {out.strip()}")
|
||||
|
||||
# ========== 2. 去重审计 ==========
|
||||
print("\n" + "=" * 60)
|
||||
print("2. 去重审计")
|
||||
print("=" * 60)
|
||||
|
||||
# a) 同一 url_hash 重复数(应该是 0,UNIQUE 约束)
|
||||
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) - count(DISTINCT url_hash) FROM articles;\"")
|
||||
print(f"\n[a) 重复 url_hash 数量(应为 0): {out.strip()}")
|
||||
|
||||
# b) 同一 url 重复数(可能 url_hash 已经 normalize 过,检查原始 url)
|
||||
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT url, count(*) FROM articles GROUP BY url HAVING count(*) > 1 LIMIT 5;\"")
|
||||
print(f"\n[b) 重复 URL(可能含 utm_* 差异):")
|
||||
print(f" {out if out.strip() else ' (无)'}")
|
||||
|
||||
# c) 同源 / 同标题 / 同一天的,看是不是转载
|
||||
print("\n[c] 标题相似度去重检查(前 50 字符完全相同):")
|
||||
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT LEFT(title, 60), count(*), array_agg(DISTINCT source_id) FROM articles GROUP BY LEFT(title, 60) HAVING count(*) > 1 ORDER BY count(*) DESC LIMIT 5;\"")
|
||||
print(f" {out if out.strip() else ' (无)'}")
|
||||
|
||||
# d) duplicate_of 字段使用情况
|
||||
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT count(*) FROM articles WHERE duplicate_of IS NOT NULL;\"")
|
||||
print(f"\n[d) duplicate_of 非空的 article 数: {out.strip()}")
|
||||
|
||||
# e) 抓取日志:reuters 失败时是不是会反复重试
|
||||
out = run("docker exec news-aggregator-postgres-1 psql -U news -d news -tA -c \"SELECT slug, last_status, consecutive_failures, fetch_interval_min FROM sources ORDER BY id;\"")
|
||||
print(f"\n[e) 源状态(reuters 失败后 interval 翻倍,看是不是还在重试):")
|
||||
print(out)
|
||||
c.close()
|
||||
12
scripts/_logs.py
Normal file
12
scripts/_logs.py
Normal file
@@ -0,0 +1,12 @@
|
||||
import os, paramiko
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
|
||||
# 用 invoke_shell 拉日志
|
||||
import time
|
||||
si, so, se = c.exec_command("docker logs --tail=80 news-aggregator-api-1 2>&1", timeout=20)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
print(out)
|
||||
c.close()
|
||||
42
scripts/_show_detail.py
Normal file
42
scripts/_show_detail.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""直接看 API 返回的 article 175177 的完整内容。"""
|
||||
import os, paramiko, json
|
||||
PW = os.environ["REMOTE_PASS"]
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect("207.57.129.228", port=19717, username="root", password=PW, timeout=15, allow_agent=False, look_for_keys=False)
|
||||
def run(cmd, t=15):
|
||||
si, so, se = c.exec_command(cmd, timeout=t)
|
||||
out = so.read().decode("utf-8", "replace")
|
||||
err = se.read().decode("utf-8", "replace")
|
||||
rc = so.channel.recv_exit_status()
|
||||
if out: print(out, end="")
|
||||
return out
|
||||
|
||||
# 1) 拉详细 JSON
|
||||
print("--- /api/v1/articles/175177 详情 ---")
|
||||
out = run("curl -s -X POST http://localhost/api/v1/auth/login -H 'Content-Type: application/json' -d '{\"username\":\"owner\",\"password\":\"Owner2026!\"}'")
|
||||
token = json.loads(out)["access_token"]
|
||||
out = run("curl -s -H 'Authorization: Bearer " + token + "' http://localhost/api/v1/articles/175177")
|
||||
det = json.loads(out)
|
||||
print(json.dumps(det, ensure_ascii=False, indent=2))
|
||||
|
||||
# 2) 试 trafilatura 抓 Al Jazeera 全文
|
||||
print("\n\n--- 试 trafilatura 抓 Ronaldo 全文 ---")
|
||||
script = '''
|
||||
import asyncio, httpx, trafilatura
|
||||
async def main():
|
||||
url = "https://www.aljazeera.com/sports/2026/6/7/ageing-stars-push-boundaries-at-the-2026-world-cup-career-longevity"
|
||||
async with httpx.AsyncClient(follow_redirects=True, timeout=20) as c:
|
||||
r = await c.get(url, headers={"User-Agent": "Mozilla/5.0"})
|
||||
print("status:", r.status_code, "len:", len(r.text))
|
||||
extracted = trafilatura.extract(r.text, include_comments=False, include_tables=False, favor_recall=True)
|
||||
print("extracted len:", len(extracted or ""))
|
||||
print("---")
|
||||
print((extracted or "")[:1000])
|
||||
asyncio.run(main())
|
||||
'''
|
||||
import base64
|
||||
b64 = base64.b64encode(script.encode()).decode()
|
||||
run("docker exec news-aggregator-worker-1 sh -c 'echo " + b64 + " | base64 -d > /app/_tr.py'")
|
||||
run("docker exec -w /app news-aggregator-worker-1 python /app/_tr.py 2>&1 | tail -30", t=30)
|
||||
c.close()
|
||||
16
scripts/_t_direct.py
Normal file
16
scripts/_t_direct.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import asyncio
|
||||
from app.services.translation.service import service
|
||||
from app.redis_client import get_redis
|
||||
async def main():
|
||||
r = get_redis(); await r.ping()
|
||||
print('before:', await r.get('translation:month:202606') or 0, flush=True)
|
||||
res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')
|
||||
print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True)
|
||||
print('after 1:', await r.get('translation:month:202606') or 0, flush=True)
|
||||
res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh')
|
||||
print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True)
|
||||
print('after 2:', await r.get('translation:month:202606') or 0, flush=True)
|
||||
res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')
|
||||
print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True)
|
||||
print('after 3:', await r.get('translation:month:202606') or 0, flush=True)
|
||||
asyncio.run(main())
|
||||
16
scripts/_tscript.py
Normal file
16
scripts/_tscript.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import asyncio
|
||||
from app.services.translation.service import service
|
||||
from app.redis_client import get_redis
|
||||
async def main():
|
||||
r = get_redis(); await r.ping()
|
||||
print('before:', await r.get('translation:month:202606') or 0, flush=True)
|
||||
res1 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')
|
||||
print(' call 1: engine=', res1.engine, 'chars=', res1.chars, 'text=', res1.text[:40], flush=True)
|
||||
print('after 1:', await r.get('translation:month:202606') or 0, flush=True)
|
||||
res2 = await service.translate('The market fell sharply after the announcement.', source='en', target='zh')
|
||||
print(' call 2: engine=', res2.engine, 'chars=', res2.chars, flush=True)
|
||||
print('after 2:', await r.get('translation:month:202606') or 0, flush=True)
|
||||
res3 = await service.translate('Breaking news from Reuters today.', source='en', target='zh')
|
||||
print(' call 3 (cache): cached=', res3.cached, 'engine=', res3.engine, flush=True)
|
||||
print('after 3:', await r.get('translation:month:202606') or 0, flush=True)
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user