diff --git a/backend/app/services/llm/enrichment.py b/backend/app/services/llm/enrichment.py
index da4faa8..9d80ec7 100644
--- a/backend/app/services/llm/enrichment.py
+++ b/backend/app/services/llm/enrichment.py
@@ -404,18 +404,29 @@ async def enrichment_loop() -> None:
     while True:
         try:
             async with AsyncSessionLocal() as session:
-                # 已翻译完成 + 4 个状态中至少有一个是 pending
-                # 关键:不能按 translated_at 升序 — 老文章已 enrich,新文章 translated_at=NULL(被排到最后)
-                # 改为按 id 升序(新文章 id 大),循环里再过滤 status
+                # 精准定位待 enrich 的文章:已翻译 + 任一 LLM 状态 ∈ {n/a, pending, failed}
+                # (不能用 order_by id ASC + 内存过滤:已 enrich 的文章 id 可能更小,会占满 limit,
+                #  让 enrichment_loop 永远看不到后面大 id 的 n/a 文章 — 真实踩过的坑)
                 rows = (
                     await session.execute(
                         select(Article)
                         .where(
                             Article.translation_status == "ok",
                             Article.title_zh.is_not(None),
+                            # 任一 LLM 状态不是 ok(包括 NULL)
+                            (
+                                (Article.classify_status.is_(None))
+                                | (Article.classify_status != "ok")
+                                | (Article.format_status.is_(None))
+                                | (Article.format_status != "ok")
+                                | (Article.commentary_status.is_(None))
+                                | (Article.commentary_status != "ok")
+                                | (Article.image_ai_status.is_(None))
+                                | (Article.image_ai_status != "ok")
+                            ),
                         )
                         .order_by(Article.id.asc())
-                        .limit(ENRICHMENT_BATCH_SIZE * 20)  # 多取一些找需要 enrich 的
+                        .limit(ENRICHMENT_BATCH_SIZE * 5)  # 比 batch 略多
                     )
                 ).scalars()
                 candidates = list(rows)
diff --git a/docs/android/app-debug.apk b/docs/android/app-debug.apk
new file mode 100644
index 0000000..c1a0320
Binary files /dev/null and b/docs/android/app-debug.apk differ
diff --git a/scripts/append_mem2.py b/scripts/append_mem2.py
new file mode 100644
index 0000000..5cefe35
--- /dev/null
+++ b/scripts/append_mem2.py
@@ -0,0 +1,39 @@
+p = r'C:\Users\Administrator\.mavis\agents\mavis\memory\MEMORY.md'
+with open(p, encoding='utf-8') as f:
+    s = f.read()
+
+old = '''**已犯**:diary-news healthcheck.py 用了 `f"...%{{http_code}}..."` 写法,3 处全部 NameError,在 detail 解析时 m 还会变 None 引发二次 AttributeError。修法:3 处改普通字符串拼接 + rsplit 拿 status_part。'''
+
+new = '''**已犯**:diary-news healthcheck.py 用了 `f"...%{{http_code}}..."` 写法,3 处全部 NameError,在 detail 解析时 m 还会变 None 引发二次 AttributeError。修法:3 处改普通字符串拼接 + rsplit 拿 status_part。
+
+### global + lambda 闭包 + 条件赋值的隐藏 NameError (2026-06-11)
+Type: pitfall
+
+**坑**:模块顶层 GROUPS 字典里的 `lambda r: check_xxx(r, GLOBAL_VAR)`,GLOBAL_VAR 在 main() 里用 `global X` 声明,只在某个 if 分支里赋值。
+
+**症状**:lambda 调用时 `name 'X' is not defined`,但代码里**所有** global 块都正确声明了。
+
+**根因**:`global X` 只是声明"当前作用域的 X 指向模块 dict",**不**会自动在模块 dict 里创建键。如果 if 分支没走到,模块 dict 里压根没 X 键,lambda 闭包查找时 NameError。
+
+**关键诊断**:`AUTH_TOKEN in module_dict` → False(在 main 跑过后,在没传参的 else 分支里)
+
+**修法**:`global X` 后**立即无条件** `X = ""`(或合理的默认值),保证键存在;再在 if 分支里覆盖。
+
+```python
+def main():
+    global AUTH_TOKEN
+    AUTH_TOKEN = ""  # ← 必须,即使后续不登录也要先写空串
+    if login_success:
+        AUTH_TOKEN = get_token()
+    # else 分支走不到时,lambda 仍能读到 ""
+```
+
+**和 f-string 那个坑的对比**:那次是 f-string 求值时机问题(import 期就报);这次是 lambda deferred 求值 + 模块 dict 缺键,跑 main() 之后才暴露。**两类都要靠"无条件初始化"防御**。'''
+
+if old in s:
+    s = s.replace(old, new, 1)
+    with open(p, 'w', encoding='utf-8') as f:
+        f.write(s)
+    print('appended')
+else:
+    print('old not found, skipping')
diff --git a/scripts/check_enrich_logs.py b/scripts/check_enrich_logs.py
new file mode 100644
index 0000000..bcece44
--- /dev/null
+++ b/scripts/check_enrich_logs.py
@@ -0,0 +1,27 @@
+"""看 enrich 实际行为"""
+import os, paramiko
+
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect("207.57.129.228", port=19717, username="root",
+          password=os.environ["REMOTE_PASS"],
+          timeout=30, allow_agent=False, look_for_keys=False)
+
+
+def run(label, cmd, timeout=20):
+    print(f"\n=== {label} ===")
+    si, so, se = c.exec_command(cmd, timeout=timeout)
+    out = so.read().decode(errors="replace")
+    print(out.rstrip())
+
+
+# 1) enrich 关键字完整日志
+run("1) enrich 关键字全部", "bash -lc 'cd /srv/news && docker compose logs worker 2>&1 | grep -iE \"enrich|llm_settings|llm.enabled\" | head -60'")
+
+# 2) 最近 30 分钟 enrich 关键字
+run("2) enrich 最近 30min", "bash -lc 'cd /srv/news && docker compose logs --since 30m worker 2>&1 | grep -iE \"enrich\" | head -40'")
+
+# 3) worker 当前 asyncio tasks
+run("3) 当前 asyncio tasks", "bash -lc 'cd /srv/news && docker compose exec -T worker python -c \"\nimport asyncio\nasync def m():\n    for t in asyncio.all_tasks():\n        if not t.done(): print(t.get_name(), t.done())\nasyncio.run(m())\n\"'")
+
+c.close()
diff --git a/scripts/check_errors.py b/scripts/check_errors.py
new file mode 100644
index 0000000..53e95dd
--- /dev/null
+++ b/scripts/check_errors.py
@@ -0,0 +1,11 @@
+"""看 12:35 后所有 ERROR/WARNING/Traceback + enrich_article 日志"""
+import os, paramiko, datetime
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect("207.57.129.228", port=19717, username="root",
+          password=os.environ["REMOTE_PASS"],
+          timeout=30, allow_agent=False, look_for_keys=False)
+# 用 --since 24h (覆盖整个 12:35 后的时间)
+si, so, se = c.exec_command("bash -lc 'cd /srv/news && docker compose logs --since 24h worker 2>&1 | grep -iE \"ERROR|WARNING|Traceback|exception|enrich_article|llm|enabled|skip|disabled\" | grep -v httpx | head -60'", timeout=20)
+print(so.read().decode(errors="replace"))
+c.close()
diff --git a/scripts/check_flow.py b/scripts/check_flow.py
new file mode 100644
index 0000000..a3fbc3c
--- /dev/null
+++ b/scripts/check_flow.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""SSH 上服务器,快速检查:
+  1. .env 里的 AGNES_API_KEY 是否已配(不要再打印 key 值)
+  2. worker 进程是否在跑、enrichment_loop 任务是否在跑
+  3. worker 日志最近 200 行是否出现 enrich_article / classify / commentary / format 等关键字
+  4. 翻译/enrich 各自最后处理时间
+  5. enrichment_loop 配置(LLM enable 状态)
+"""
+from __future__ import annotations
+import sys
+import os
+import paramiko
+
+HOST = os.environ.get("REMOTE_HOST", "207.57.129.228")
+PORT = int(os.environ.get("REMOTE_PORT", "19717"))
+USER = os.environ.get("REMOTE_USER", "root")
+PASS = os.environ.get("REMOTE_PASS", "")
+
+if not PASS:
+    print("ERROR: REMOTE_PASS not set", file=sys.stderr)
+    sys.exit(1)
+
+def run(c, cmd, timeout=30, label=""):
+    if label: print(f"\n=== {label} ===")
+    print(f"$ {cmd}")
+    si, so, se = c.exec_command(cmd, timeout=timeout, get_pty=True)
+    out = so.read().decode(errors="replace")
+    err = se.read().decode(errors="replace")
+    rc = so.channel.recv_exit_status()
+    if out.strip(): print(out.rstrip())
+    if err.strip(): print(f"[stderr] {err.rstrip()}")
+    print(f"-> rc={rc}")
+    return out
+
+print(f"连 {USER}@{HOST}:{PORT} ...")
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect(HOST, port=PORT, username=USER, password=PASS,
+          timeout=30, banner_timeout=30, auth_timeout=30,
+          allow_agent=False, look_for_keys=False)
+print("✓ 连上\n")
+
+# 1) AGNES_API_KEY 状态(只看长度,不打值)
+run(c, "cd /srv/news && "
+       "grep -E '^AGNES_(API_KEY|BASE_URL|CHAT_MODEL|IMAGE_MODEL)=' .env | "
+       "awk -F= 'BEGIN{FS=\"=\"} { "
+       "  if ($1==\"AGNES_API_KEY\") { k=$2; gsub(/\"/,\"\",k); "
+       "    printf \"  %s = (length=%d, prefix=%s***)\\n\", $1, length(k), substr(k,1,4) "
+       "  } else { print \"  \" $0 }"
+       "}'", label="1) .env 中 Agnes 相关配置")
+
+# 2) worker 进程 + enrichment_loop 状态
+run(c, "cd /srv/news && "
+       "echo '--- docker compose ps ---' && "
+       "docker compose ps worker && "
+       "echo '--- worker 容器内进程 ---' && "
+       "docker compose exec -T worker sh -c 'ps -ef | grep -E \"python|app.workers\" | grep -v grep' && "
+       "echo '--- enrichment 任务在 asyncio 队列 ---' && "
+       "docker compose exec -T worker sh -c 'cat /proc/1/status 2>/dev/null | head -3' ",
+       label="2) Worker 进程 + enrichment_loop 状态")
+
+# 3) LLM enable 状态(admin_llm_settings 表)
+run(c, "cd /srv/news && set -a && . ./.env && set +a && "
+       "docker compose exec -T postgres psql -U \"$POSTGRES_USER\" -d \"$POSTGRES_DB\" -t -A -F $'\\t' -c "
+       "\"SELECT key, value, updated_at FROM admin_llm_settings ORDER BY key;\" 2>&1 | head -10",
+       label="3) admin_llm_settings(LLM enable 状态)")
+
+# 4) worker 日志最近 200 行
+run(c, "cd /srv/news && "
+       "docker compose logs --no-color --tail=200 worker 2>&1 | tail -80",
+       label="4) worker 日志(最近 200 行)")
+
+# 5) 翻译/enrich 各自最后活跃时间
+run(c, "cd /srv/news && set -a && . ./.env && set +a && "
+       "docker compose exec -T postgres psql -U \"$POSTGRES_USER\" -d \"$POSTGRES_DB\" -t -A -F $'\\t' -c \""
+       "SELECT 'translated_last_5min=' || count(*) FROM articles WHERE translated_at > now() - interval '5 minute';"
+       "SELECT 'classified_last_5min=' || count(*) FROM articles WHERE classify_status='ok' AND translated_at > now() - interval '5 minute';"
+       "SELECT 'format_last_5min=' || count(*) FROM articles WHERE format_status='ok' AND translated_at > now() - interval '5 minute';"
+       "SELECT 'commentary_last_5min=' || count(*) FROM articles WHERE commentary_status='ok' AND translated_at > now() - interval '5 minute';"
+       "SELECT 'image_ai_last_5min=' || count(*) FROM articles WHERE image_ai_status='ok' AND translated_at > now() - interval '5 minute';"
+       "SELECT 'pending_classify=' || count(*) FROM articles WHERE classify_status IN ('pending','n/a') AND translation_status='ok';"
+       "SELECT 'pending_format=' || count(*) FROM articles WHERE format_status IN ('pending','n/a') AND translation_status='ok';"
+       "\"",
+       label="5) 最近 5 分钟 LLM 步骤活跃度")
+
+c.close()
+print("\n✓ 检查完成")
diff --git a/scripts/check_llm_state.py b/scripts/check_llm_state.py
new file mode 100644
index 0000000..9c7e4ee
--- /dev/null
+++ b/scripts/check_llm_state.py
@@ -0,0 +1,72 @@
+"""SSH 上去看 llm_settings + enrichment_loop 状态(v2: 不用复杂 quote)"""
+import os
+import sys
+import paramiko
+
+HOST = "207.57.129.228"
+PORT = 19717
+USER = "root"
+PASS = os.environ.get("REMOTE_PASS", "")
+
+if not PASS:
+    print("REMOTE_PASS not set", file=sys.stderr)
+    sys.exit(1)
+
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect(HOST, port=PORT, username=USER, password=PASS,
+          timeout=30, allow_agent=False, look_for_keys=False)
+print("✓ 连上\n")
+
+
+def run(label, cmd, timeout=30):
+    print(f"\n=== {label} ===")
+    print(f"$ {cmd[:200]}{'...' if len(cmd) > 200 else ''}")
+    try:
+        si, so, se = c.exec_command(cmd, timeout=timeout)
+        out = so.read().decode(errors="replace")
+        err = se.read().decode(errors="replace")
+        rc = so.channel.recv_exit_status()
+    except Exception as e:
+        print(f"[exception] {type(e).__name__}: {e}")
+        return
+    if out.strip(): print(out.rstrip())
+    if err.strip(): print(f"[stderr] {err.rstrip()}")
+    print(f"-> rc={rc}")
+
+
+# 1) 写一个 shell 文件到远程,然后用 bash file.sh 调(避开 quote)
+#    把 SQL 都拼到 /tmp/check_llm.sh
+shell_script = r"""#!/bin/bash
+set -a
+. /srv/news/.env
+set +a
+cd /srv/news
+echo "--- llm_settings ---"
+docker compose exec -T postgres psql -U "$POSTGRES_USER" -d "$POSTGRES_DB" \
+  -c "SELECT id, enabled, chat_model, image_model, interval_sec, updated_at FROM llm_settings;"
+echo "--- worker asyncio tasks ---"
+docker compose exec -T worker python <<'PYEOF'
+import asyncio
+async def main():
+    for t in asyncio.all_tasks():
+        if not t.done():
+            print(f'name={t.get_name()!r} done={t.done()}')
+asyncio.run(main())
+PYEOF
+echo "--- worker 日志(enrich 关键字) ---"
+docker compose logs --tail=500 worker 2>&1 | grep -iE "enrich|llm_settings|enrichment_loop|Traceback" | head -40
+"""
+
+# 写入远程
+si, so, se = c.exec_command("cat > /tmp/check_llm.sh <<'EOFCAT'\n" + shell_script + "\nEOFCAT", timeout=10)
+so.read()
+se.read()
+so.channel.recv_exit_status()
+print("✓ shell 脚本已写入 /tmp/check_llm.sh\n")
+
+# 执行
+run("llm_settings + asyncio + 日志", "bash /tmp/check_llm.sh", timeout=60)
+
+c.close()
+print("\n✓ 完成")
diff --git a/scripts/check_worker_signal.py b/scripts/check_worker_signal.py
new file mode 100644
index 0000000..81642f1
--- /dev/null
+++ b/scripts/check_worker_signal.py
@@ -0,0 +1,29 @@
+"""查 worker shutdown / signal / cancel 关键字"""
+import os, paramiko
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect("207.57.129.228", port=19717, username="root",
+          password=os.environ["REMOTE_PASS"],
+          timeout=30, allow_agent=False, look_for_keys=False)
+
+# 1) shutdown / signal / cancel 关键字
+si, so, se = c.exec_command("bash -lc 'cd /srv/news && docker compose logs worker 2>&1 | grep -iE \"shutdown|stop|signal|cancel|stopping\" | head -20'", timeout=20)
+print("=== 1) shutdown/stop/signal/cancel 关键字 ===")
+print(so.read().decode(errors="replace"))
+
+# 2) news.llm.enrichment 全部日志
+si, so, se = c.exec_command("bash -lc 'cd /srv/news && docker compose logs worker 2>&1 | grep -E \"news.llm.enrichment|enrich\" | head -30'", timeout=20)
+print("\n=== 2) news.llm.enrichment 全部日志 ===")
+print(so.read().decode(errors="replace"))
+
+# 3) news.worker 全部日志
+si, so, se = c.exec_command("bash -lc 'cd /srv/news && docker compose logs worker 2>&1 | grep -E \"news.worker\" | head -20'", timeout=20)
+print("\n=== 3) news.worker 全部日志 ===")
+print(so.read().decode(errors="replace"))
+
+# 4) 容器进程状态
+si, so, se = c.exec_command("bash -lc 'cd /srv/news && docker compose exec -T worker sh -c \"ls /proc/1/task/ | head -20; echo ---; cat /proc/1/status | head -5\"'", timeout=20)
+print("\n=== 4) 容器进程状态 ===")
+print(so.read().decode(errors="replace"))
+
+c.close()
diff --git a/scripts/check_worker_startup.py b/scripts/check_worker_startup.py
new file mode 100644
index 0000000..ebb25ba
--- /dev/null
+++ b/scripts/check_worker_startup.py
@@ -0,0 +1,34 @@
+"""看 worker 启动日志 + Traceback 完整内容"""
+import os, paramiko
+
+PASS = os.environ.get("REMOTE_PASS", "")
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect("207.57.129.228", port=19717, username="root", password=PASS,
+          timeout=30, allow_agent=False, look_for_keys=False)
+
+
+def run(label, cmd, timeout=30):
+    print(f"\n=== {label} ===")
+    si, so, se = c.exec_command(cmd, timeout=timeout)
+    out = so.read().decode(errors="replace")
+    err = se.read().decode(errors="replace")
+    print(out.rstrip())
+    if err.strip():
+        print(f"[stderr] {err.rstrip()}")
+
+
+# 1) worker 启动时全部 INFO/ERROR 日志(关键!)
+run("1) worker 启动日志(前 60 行)", "bash -lc 'cd /srv/news && docker compose logs worker 2>&1 | head -60'")
+
+# 2) Traceback 完整内容
+run("2) Traceback 完整内容", "bash -lc 'cd /srv/news && docker compose logs worker 2>&1 | grep -A 25 Traceback | head -100'")
+
+# 3) container 启动时间 + restart count
+run("3) container 启动时间 + restart count",
+    "docker inspect news-aggregator-worker-1 --format '{{.State.StartedAt}} restarts={{.RestartCount}} status={{.State.Status}}'")
+
+# 4) 服务器当前时间
+run("4) 服务器当前时间", "date '+%Y-%m-%d %H:%M:%S'")
+
+c.close()
diff --git a/scripts/deploy_enrich_fix.py b/scripts/deploy_enrich_fix.py
new file mode 100644
index 0000000..77f585a
--- /dev/null
+++ b/scripts/deploy_enrich_fix.py
@@ -0,0 +1,59 @@
+"""把改完的 enrichment.py 复制到服务器,重建 worker,重启"""
+import os, paramiko
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect("207.57.129.228", port=19717, username="root",
+          password=os.environ["REMOTE_PASS"],
+          timeout=30, allow_agent=False, look_for_keys=False)
+
+
+def run(label, cmd, timeout=180):
+    print(f"\n=== {label} ===")
+    si, so, se = c.exec_command(cmd, timeout=timeout)
+    out = so.read().decode(errors="replace")
+    err = se.read().decode(errors="replace")
+    rc = so.channel.recv_exit_status()
+    if out.strip(): print(out.rstrip())
+    if err.strip(): print(f"[stderr] {err.rstrip()}")
+    print(f"-> rc={rc}")
+
+
+# 1) 复制改完的 enrichment.py 到服务器
+import base64
+# 本地读改完的 enrichment.py
+local = r"D:\selftools\diary-news\backend\app\services\llm\enrichment.py"
+with open(local, encoding="utf-8") as f:
+    content = f.read()
+b64 = base64.b64encode(content.encode("utf-8")).decode("ascii")
+si, so, se = c.exec_command(f"bash -lc 'mkdir -p /srv/news/backend/app/services/llm && echo {b64} | base64 -d > /srv/news/backend/app/services/llm/enrichment.py.new && wc -l /srv/news/backend/app/services/llm/enrichment.py.new'", timeout=30)
+print("=== 1) 复制 enrichment.py.new ===")
+print(so.read().decode(errors="replace").rstrip())
+
+# 备份原文件再覆盖
+si, so, se = c.exec_command("bash -lc 'cd /srv/news/backend/app/services/llm && cp -f enrichment.py enrichment.py.bak.$(date +%Y%m%d_%H%M%S) && mv enrichment.py.new enrichment.py && ls -la enrichment.py*'", timeout=10)
+print("\n=== 2) 备份 + 覆盖 ===")
+print(so.read().decode(errors="replace").rstrip())
+
+# 3) 重建 worker 镜像
+run("3) docker compose build worker(增量,会很快)", "cd /srv/news && docker compose build worker", timeout=180)
+
+# 4) 重启 worker
+run("4) docker compose up -d worker(只重启 worker)", "cd /srv/news && docker compose up -d worker", timeout=60)
+
+# 5) 等启动
+run("5) 等 5 秒", "sleep 5 && date '+%H:%M:%S'")
+
+# 6) 看 enrichment_loop 启动
+run("6) enrichment_loop 启动日志", "cd /srv/news && docker compose logs --tail=20 worker 2>&1 | grep -iE 'enrich|started|enabled'")
+
+# 7) 等 30 秒,看是否开始 enrich
+run("7) 等 30 秒", "sleep 30 && date '+%H:%M:%S'")
+
+# 8) 看是否有 enrich_article 日志
+run("8) enrich_article 日志", "cd /srv/news && docker compose logs --tail=200 worker 2>&1 | grep -E 'enrich_article|classify|commentary' | head -20")
+
+# 9) 看 n/a 数量变化
+run("9) 当前 n/a 数量", "cd /srv/news && docker compose exec -T postgres psql -U news -d news -c \"SELECT classify_status, count(*) FROM articles GROUP BY classify_status ORDER BY count(*) DESC;\"")
+
+c.close()
+print("\n🎉 修复完成")
diff --git a/scripts/diag_enrich_inplace.py b/scripts/diag_enrich_inplace.py
new file mode 100644
index 0000000..3ba8ee7
--- /dev/null
+++ b/scripts/diag_enrich_inplace.py
@@ -0,0 +1,64 @@
+"""在 worker 容器内写文件 + 跑"""
+import os, paramiko, base64
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect("207.57.129.228", port=19717, username="root",
+          password=os.environ["REMOTE_PASS"],
+          timeout=30, allow_agent=False, look_for_keys=False)
+
+py = r"""import asyncio, sys
+sys.path.insert(0, '/app')
+from sqlalchemy import select
+from app.database import AsyncSessionLocal
+from app.models.article import Article
+
+async def main():
+    async with AsyncSessionLocal() as session:
+        rows = (await session.execute(
+            select(Article)
+            .where(Article.translation_status == "ok", Article.title_zh.is_not(None))
+            .order_by(Article.id.asc())
+            .limit(160)
+        )).scalars()
+        candidates = list(rows)
+        print(f"candidates={len(candidates)}")
+        todo = []
+        for a in candidates:
+            statuses = [a.format_status or "pending",
+                        a.classify_status or "pending",
+                        a.image_ai_status or "pending",
+                        a.commentary_status or "pending"]
+            if any(s in ("pending","failed","n/a") for s in statuses):
+                todo.append(a.id)
+            if len(todo) >= 8: break
+        print(f"todo={len(todo)} ids={todo[:5]}")
+        if candidates:
+            a = candidates[0]
+            print(f"first: id={a.id} tr={a.translation_status} fmt={a.format_status} cls={a.classify_status} img={a.image_ai_status} cmt={a.commentary_status}")
+
+asyncio.run(main())
+"""
+
+b64 = base64.b64encode(py.encode("utf-8")).decode("ascii")
+
+# 分两步:先在主机上写(避免 docker exec 不持久文件)
+si, so, se = c.exec_command(f"bash -lc 'echo {b64} | base64 -d > /srv/news/diag.py && ls -la /srv/news/diag.py && cat /srv/news/diag.py | head -3'", timeout=15)
+print("=== step 1: write file ===")
+print(so.read().decode(errors="replace"))
+
+# 再 docker exec(此时文件在 /srv/news 挂载进 worker 容器,会出现在 /app 或 / 目录)
+si, so, se = c.exec_command("bash -lc 'cd /srv/news && docker compose exec -T worker sh -c \"ls /tmp/diag.py 2>/dev/null; ls /app/diag.py 2>/dev/null; ls /diag.py 2>/dev/null; find / -name diag.py 2>/dev/null | head -5\"'", timeout=30)
+print("=== step 2: find diag.py in container ===")
+print(so.read().decode(errors="replace"))
+
+# 直接用 docker compose exec 把文件传进去
+si, so, se = c.exec_command("bash -lc 'cd /srv/news && docker compose exec -T worker sh -c \"cat > /tmp/diag.py\" < diag.py && docker compose exec -T worker ls -la /tmp/diag.py'", timeout=30)
+print("=== step 3: copy file into container ===")
+print(so.read().decode(errors="replace"))
+
+# 跑
+si, so, se = c.exec_command("bash -lc 'cd /srv/news && docker compose exec -T worker python /tmp/diag.py 2>&1 | tail -20'", timeout=30)
+print("=== step 4: run ===")
+print(so.read().decode(errors="replace"))
+
+c.close()
diff --git a/scripts/diag_enrich_v2.py b/scripts/diag_enrich_v2.py
new file mode 100644
index 0000000..c9baf8c
--- /dev/null
+++ b/scripts/diag_enrich_v2.py
@@ -0,0 +1,62 @@
+"""现场重跑 enrichment_loop 查询 + 看排序"""
+import os, paramiko, base64
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect("207.57.129.228", port=19717, username="root",
+          password=os.environ["REMOTE_PASS"],
+          timeout=30, allow_agent=False, look_for_keys=False)
+
+py = r"""import asyncio, sys
+sys.path.insert(0, '/app')
+from sqlalchemy import select
+from app.database import AsyncSessionLocal
+from app.models.article import Article
+
+async def main():
+    async with AsyncSessionLocal() as session:
+        # 跟 enrichment_loop.py:410 一样的查询
+        rows = (await session.execute(
+            select(Article)
+            .where(Article.translation_status == "ok", Article.title_zh.is_not(None))
+            .order_by(Article.id.asc())
+            .limit(160)
+        )).scalars()
+        candidates = list(rows)
+        # 状态分布
+        cls_dist = {}
+        for a in candidates:
+            s = a.classify_status or "NULL"
+            cls_dist[s] = cls_dist.get(s, 0) + 1
+        print(f"candidates={len(candidates)}")
+        print(f"classify 分布: {cls_dist}")
+        # 头 5 篇的 id + status
+        for a in candidates[:5]:
+            print(f"  id={a.id} cls={a.classify_status} fmt={a.format_status} cmt={a.commentary_status} img={a.image_ai_status}")
+        # 尾 5 篇
+        print("--- last 5 ---")
+        for a in candidates[-5:]:
+            print(f"  id={a.id} cls={a.classify_status} fmt={a.format_status} cmt={a.commentary_status} img={a.image_ai_status}")
+        # todo 计算
+        todo = []
+        for a in candidates:
+            statuses = [a.format_status or "pending",
+                        a.classify_status or "pending",
+                        a.image_ai_status or "pending",
+                        a.commentary_status or "pending"]
+            if any(s in ("pending","failed","n/a") for s in statuses):
+                todo.append(a.id)
+            if len(todo) >= 8: break
+        print(f"todo={len(todo)} ids={todo}")
+
+asyncio.run(main())
+"""
+
+b64 = base64.b64encode(py.encode("utf-8")).decode("ascii")
+si, so, se = c.exec_command(f"bash -lc 'echo {b64} | base64 -d > /srv/news/diag.py'", timeout=10)
+so.read()
+
+# 复制进 worker 容器
+si, so, se = c.exec_command("bash -lc 'cd /srv/news && CID=$(docker compose ps -q worker) && docker cp diag.py $CID:/tmp/diag.py && docker compose exec -T worker python /tmp/diag.py 2>&1 | head -30'", timeout=30)
+print(so.read().decode(errors="replace"))
+
+c.close()
diff --git a/scripts/diag_env.py b/scripts/diag_env.py
new file mode 100644
index 0000000..fb7758b
--- /dev/null
+++ b/scripts/diag_env.py
@@ -0,0 +1,24 @@
+"""检查 .env 里的 POSTGRES_USER"""
+import os, paramiko
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect("207.57.129.228", port=19717, username="root",
+          password=os.environ["REMOTE_PASS"],
+          timeout=30, allow_agent=False, look_for_keys=False)
+
+# 看 .env
+si, so, se = c.exec_command("bash -lc 'cd /srv/news && grep -E \"^POSTGRES_(USER|DB|PASSWORD)=\" .env'", timeout=10)
+print("=== .env ===")
+print(so.read().decode(errors="replace"))
+
+# 直接 set 然后 echo
+si, so, se = c.exec_command("bash -lc 'cd /srv/news && set -a && . ./.env && set +a && echo \"USER=$POSTGRES_USER DB=$POSTGRES_DB\"'", timeout=10)
+print("=== set -a 之后 ===")
+print(so.read().decode(errors="replace"))
+
+# 最简方式: 传 PGPASSWORD
+si, so, se = c.exec_command("bash -lc 'cd /srv/news && docker compose exec -T -e PGPASSWORD=news postgres psql -U news -d news -f /tmp/diag.sql'", timeout=15)
+print("=== 硬编码 USER/DB ===")
+print(so.read().decode(errors="replace"))
+
+c.close()
diff --git a/scripts/diag_status_dist.py b/scripts/diag_status_dist.py
new file mode 100644
index 0000000..746c733
--- /dev/null
+++ b/scripts/diag_status_dist.py
@@ -0,0 +1,27 @@
+"""弄清 pending_classify=648 的真实含义 - 用单引号"""
+import os, paramiko
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect("207.57.129.228", port=19717, username="root",
+          password=os.environ["REMOTE_PASS"],
+          timeout=30, allow_agent=False, look_for_keys=False)
+
+cmds = [
+    ("1) llm_settings", "SELECT id, enabled, chat_model, image_model, interval_sec, updated_at FROM llm_settings;"),
+    ("2) translation_status", "SELECT translation_status, count(*) FROM articles GROUP BY translation_status ORDER BY count(*) DESC;"),
+    ("3) classify_status", "SELECT classify_status, count(*) FROM articles GROUP BY classify_status ORDER BY count(*) DESC;"),
+    ("4) format_status", "SELECT format_status, count(*) FROM articles GROUP BY format_status ORDER BY count(*) DESC;"),
+    ("5) commentary_status", "SELECT commentary_status, count(*) FROM articles GROUP BY commentary_status ORDER BY count(*) DESC;"),
+    ("6) image_ai_status", "SELECT image_ai_status, count(*) FROM articles GROUP BY image_ai_status ORDER BY count(*) DESC;"),
+    ("7) 已 enrich 比例(translation=ok AND 4 status 全 ok)", "SELECT count(*) AS fully_enriched FROM articles WHERE translation_status='ok' AND classify_status='ok' AND format_status='ok' AND image_ai_status='ok' AND commentary_status='ok';"),
+    ("8) 翻译 ok 但 classify 是 n/a", "SELECT count(*) FROM articles WHERE translation_status='ok' AND classify_status='n/a';"),
+]
+for label, sql in cmds:
+    # 用 set -a; . .env; set +a 加载 env 变量
+    cmd = f"bash -lc 'cd /srv/news && set -a && . ./.env && set +a && docker compose exec -T postgres psql -U \"$POSTGRES_USER\" -d \"$POSTGRES_DB\" -c \"{sql}\"'"
+    si, so, se = c.exec_command(cmd, timeout=20)
+    out = so.read().decode(errors="replace")
+    print(f"=== {label} ===")
+    print(out.rstrip())
+    print()
+c.close()
diff --git a/scripts/diag_status_more.py b/scripts/diag_status_more.py
new file mode 100644
index 0000000..029ec8e
--- /dev/null
+++ b/scripts/diag_status_more.py
@@ -0,0 +1,13 @@
+"""用 docker cp 复制文件"""
+import os, paramiko
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect("207.57.129.228", port=19717, username="root",
+          password=os.environ["REMOTE_PASS"],
+          timeout=30, allow_agent=False, look_for_keys=False)
+
+# 直接用 docker cp
+si, so, se = c.exec_command("bash -lc 'cd /srv/news && docker cp diag.sql $(docker compose ps -q postgres):/tmp/diag.sql && docker compose exec -T postgres psql -U $POSTGRES_USER -d $POSTGRES_DB -f /tmp/diag.sql'", timeout=30)
+print(so.read().decode(errors="replace"))
+
+c.close()
diff --git a/scripts/diag_step.py b/scripts/diag_step.py
new file mode 100644
index 0000000..3a7c241
--- /dev/null
+++ b/scripts/diag_step.py
@@ -0,0 +1,25 @@
+"""分步调试 - 修 psql 调用的 env"""
+import os, paramiko
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect("207.57.129.228", port=19717, username="root",
+          password=os.environ["REMOTE_PASS"],
+          timeout=30, allow_agent=False, look_for_keys=False)
+
+
+def run(label, cmd, timeout=30):
+    print(f"\n=== {label} ===")
+    si, so, se = c.exec_command(cmd, timeout=timeout)
+    out = so.read().decode(errors="replace")
+    err = se.read().decode(errors="replace")
+    rc = so.channel.recv_exit_status()
+    if out.strip(): print(out.rstrip())
+    if err.strip(): print(f"[stderr] {err.rstrip()}")
+    print(f"-> rc={rc}")
+
+
+# psql 走 docker compose exec(自动加载 .env 的 env_file)
+run("step 4 fix: 用 docker compose exec",
+    "bash -lc 'cd /srv/news && docker compose exec -T postgres psql -U $POSTGRES_USER -d $POSTGRES_DB -f /tmp/diag.sql'")
+
+c.close()
diff --git a/scripts/fix_enrich_loop.py b/scripts/fix_enrich_loop.py
new file mode 100644
index 0000000..29407d8
--- /dev/null
+++ b/scripts/fix_enrich_loop.py
@@ -0,0 +1,266 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""一键修复 diary-news 的 enrichment_loop bug。
+
+真因:`services/llm/enrichment.py:405-419` 的查询用
+`order by id ASC + limit 160 + 内存过滤 status`,但
+- 162 篇最早的文章已经被 enrich 完(4 status 全 ok)
+- 662 篇 n/a 的文章 id > 388354,在 160 limit 之外
+- 每次 while True 循环都看到这 162 篇已 ok,filter 命中 0 → todo=0 → continue 死循环
+
+修法:把 where 条件改成"任一 LLM 状态 != 'ok'",精准定位待 enrich 的文章。
+
+用法:
+  $env:REMOTE_PASS = '<root 密码>'
+  python scripts/fix_enrich_loop.py [--host ...] [--port ...] [--user ...] [--compose-dir ...] [--wait 120]
+
+退出码:
+  0 = 修复 + enrich 跑起来(有 n/a → ok 的变化)
+  1 = 修复但 enrich 未观察到跑(可能是 0 候选 / LLM 调不通)
+  2 = 部署失败
+"""
+from __future__ import annotations
+
+import argparse
+import base64
+import os
+import sys
+import time
+
+import paramiko
+
+# ============== 修复后的 enrichment.py 关键段(只动 where/limit) ==============
+# 完整文件本地读,然后原样上传
+ENRICHMENT_PY_LOCAL = r"D:\selftools\diary-news\backend\app\services\llm\enrichment.py"
+
+
+# ============== 配置 ==============
+DEFAULT_HOST = "207.57.129.228"
+DEFAULT_PORT = 19717
+DEFAULT_USER = "root"
+DEFAULT_COMPOSE = "/srv/news"
+DEFAULT_WAIT_SEC = 120  # 等几分钟看 enrich 是否在跑
+
+
+def ssh_exec(c: paramiko.SSHClient, cmd: str, timeout: int = 300) -> tuple[int, str, str]:
+    """执行远程命令,返回 (rc, stdout, stderr)。出错抛 SSHException。"""
+    si, so, se = c.exec_command(cmd, timeout=timeout, get_pty=True)
+    out = so.read().decode(errors="replace")
+    err = se.read().decode(errors="replace")
+    rc = so.channel.recv_exit_status()
+    return rc, out, err
+
+
+def put_file(c: paramiko.SSHClient, remote_path: str, content_bytes: bytes) -> None:
+    """把本地文件原样传到 remote_path(用 base64 避开 shell quoting)。"""
+    b64 = base64.b64encode(content_bytes).decode("ascii")
+    cmd = (
+        f"bash -lc 'mkdir -p \"$(dirname {remote_path})\" && "
+        f"echo {b64} | base64 -d > {remote_path} && "
+        f"wc -l {remote_path}'"
+    )
+    rc, out, err = ssh_exec(c, cmd, timeout=60)
+    if rc != 0:
+        print(f"[stderr] {err.rstrip()}")
+        raise RuntimeError(f"put_file 失败 rc={rc}")
+    print(f"  ✓ 写入 {remote_path}  {out.strip()}")
+
+
+def put_text_via_file(c: paramiko.SSHClient, remote_path: str, text: str) -> None:
+    """用 base64 + heredoc 写文本(避免 shell 转义噩梦)。"""
+    put_file(c, remote_path, text.encode("utf-8"))
+
+
+def get_text(c: paramiko.SSHClient, remote_path: str) -> str | None:
+    """读远程文件,文件不存在返回 None。"""
+    cmd = f"bash -lc 'if [ -f {remote_path} ]; then cat {remote_path}; fi'"
+    rc, out, err = ssh_exec(c, cmd, timeout=30)
+    if rc != 0 or not out.strip():
+        return None
+    return out
+
+
+# ============== 主流程 ==============
+def main() -> int:
+    ap = argparse.ArgumentParser(description="一键修复 enrichment_loop bug")
+    ap.add_argument("--host", default=os.environ.get("REMOTE_HOST", DEFAULT_HOST))
+    ap.add_argument("--port", type=int, default=int(os.environ.get("REMOTE_PORT", DEFAULT_PORT)))
+    ap.add_argument("--user", default=os.environ.get("REMOTE_USER", DEFAULT_USER))
+    ap.add_argument("--password", default=os.environ.get("REMOTE_PASS", ""))
+    ap.add_argument("--compose-dir", default=os.environ.get("COMPOSE_DIR", DEFAULT_COMPOSE))
+    ap.add_argument("--wait", type=int, default=DEFAULT_WAIT_SEC,
+                    help="部署后等多少秒再检查 enrich 跑了多少(默认 120)")
+    ap.add_argument("--no-build", action="store_true",
+                    help="跳过 docker build(只重启容器,代码改动不会被采纳)")
+    ap.add_argument("--no-restart", action="store_true",
+                    help="跳过 docker up -d(只复制代码 + build)")
+    ap.add_argument("--dry-run", action="store_true",
+                    help="只比对文件,不改不重启")
+    ap.add_argument("--force-recreate", action="store_true",
+                    help="服务器文件不存在时,直接创建新文件(不要求存在)")
+    args = ap.parse_args()
+
+    if not args.password:
+        print("ERROR: 需要 REMOTE_PASS 环境变量或 --password", file=sys.stderr)
+        return 2
+
+    print(f"==== 目标: {args.user}@{args.host}:{args.port} ====")
+    print(f"==== compose: {args.compose_dir} ====")
+    print(f"==== 修复前等待: {args.wait}s ====\n")
+
+    # 0) 读本地改完的 enrichment.py
+    if not os.path.exists(ENRICHMENT_PY_LOCAL):
+        print(f"ERROR: 本地文件不存在 {ENRICHMENT_PY_LOCAL}", file=sys.stderr)
+        return 2
+    with open(ENRICHMENT_PY_LOCAL, encoding="utf-8") as f:
+        local_content = f.read()
+    print(f"[1/6] 本地 enrichment.py {len(local_content)} 字符,{local_content.count(chr(10))} 行")
+
+    # 1) SSH 连
+    print(f"\n[2/6] 连接 SSH ...")
+    c = paramiko.SSHClient()
+    c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+    try:
+        c.connect(args.host, port=args.port, username=args.user, password=args.password,
+                  timeout=30, banner_timeout=30, auth_timeout=30,
+                  allow_agent=False, look_for_keys=False)
+    except Exception as e:
+        print(f"  ✗ SSH 连接失败: {e}")
+        return 2
+    print(f"  ✓ SSH 连接成功")
+
+    remote_py = f"{args.compose_dir}/backend/app/services/llm/enrichment.py"
+
+    # 2) 拿服务器版本比对
+    print(f"\n[3/6] 比对服务器版 enrichment.py ...")
+    remote_content = get_text(c, remote_py)
+    if remote_content is None:
+        # 不直接 exit,先看一下 llm/ 目录到底有什么(.bak 备份可能在)
+        print(f"  ! 服务器文件不存在: {remote_py}")
+        llm_dir = f"{args.compose_dir}/backend/app/services/llm/"
+        rc, ls_out, _ = ssh_exec(c, f"bash -lc 'ls -la {llm_dir} 2>&1'", timeout=15)
+        print(f"\n  --- {llm_dir} 目录内容 ---")
+        print(ls_out.rstrip())
+        print(f"\n  解读:")
+        print(f"    - 如果有 enrichment.py.bak.* 备份在 → 上次部署成功过,文件被外部操作清掉")
+        print(f"    - 如果连 .bak 都没有 → llm/ 目录可能被整体删了,需要重新创建")
+        print(f"    - 如果是 ENOENT 整个目录 → /srv/news/backend/app 目录有问题")
+        print(f"\n  建议 SSH 上去确认:")
+        print(f"    ssh root@{args.host} -p {args.port}")
+        print(f"    ls -la {args.compose_dir}/backend/app/services/llm/")
+        print(f"    ls -la {args.compose_dir}/  # 看 backend/ 是否在")
+        print(f"\n  想要我重写脚本继续往下走(把本地版创建为新文件),加 --force-recreate 即可:")
+        print(f"    python {sys.argv[0]} --force-recreate")
+        if not getattr(args, "force_recreate", False):
+            c.close()
+            return 2
+        print(f"\n  --force-recreate 启用,直接创建新文件(不备份)")
+        # 创建父目录 + 写
+        put_text_via_file(c, remote_py, local_content)
+        print(f"  ✓ 已在服务器创建 {remote_py}")
+
+    elif remote_content == local_content:
+        print(f"  ✓ 服务器已是最新版本(无需重传)")
+    else:
+        if args.dry_run:
+            print(f"  ! 服务器与本地不一致(差异 {len(remote_content)-len(local_content)} 字节),--dry-run 跳过覆盖")
+            c.close()
+            return 0
+        # 备份
+        ts = time.strftime("%Y%m%d_%H%M%S")
+        bak = f"{remote_py}.bak.{ts}"
+        rc, _, _ = ssh_exec(c, f"bash -lc 'cp -f {remote_py} {bak} && echo {bak}'", timeout=15)
+        if rc != 0:
+            print(f"  ✗ 备份失败"); c.close(); return 2
+        print(f"  ✓ 备份到 {bak}")
+        # 覆盖
+        put_text_via_file(c, remote_py, local_content)
+        print(f"  ✓ 覆盖服务器 enrichment.py")
+
+    # 3) 重建 worker 镜像
+    if args.no_build:
+        print(f"\n[4/6] --no-build,跳过 docker build")
+    else:
+        print(f"\n[4/6] docker compose build worker ...")
+        rc, out, err = ssh_exec(c, f"cd {args.compose_dir} && docker compose build worker", timeout=600)
+        if rc != 0:
+            print(f"  ✗ build 失败 rc={rc}")
+            print((out + err)[-2000:])
+            c.close()
+            return 2
+        # 只打最后 3 行(成功的标志)
+        tail = "\n".join((out + err).strip().splitlines()[-3:])
+        print(f"  ✓ build 成功(尾行): {tail[:300]}")
+
+    # 4) 重启 worker
+    if args.no_restart:
+        print(f"\n[5/6] --no-restart,跳过 up -d")
+    else:
+        print(f"\n[5/6] docker compose up -d worker ...")
+        rc, out, err = ssh_exec(c, f"cd {args.compose_dir} && docker compose up -d worker", timeout=120)
+        if rc != 0:
+            print(f"  ✗ 重启失败 rc={rc}")
+            print((out + err)[-2000:])
+            c.close()
+            return 2
+        # 找 "Container ... Started" 这一行
+        started = [l.strip() for l in (out + err).splitlines() if "Started" in l and "worker" in l]
+        print(f"  ✓ {started[-1] if started else 'restarted'}")
+
+    # 5) 等 + 看 enrich 状态
+    print(f"\n[6/6] 等 {args.wait}s 让 worker 起来 + enrichment_loop 跑几批 ...")
+    time.sleep(10)
+    # 看 enrichment_loop 启动
+    rc, out, _ = ssh_exec(c, f"cd {args.compose_dir} && docker compose logs --tail=30 worker 2>&1 | grep -iE 'enrich|started' | head -10", timeout=20)
+    if "enrichment_loop started" in out:
+        print("  ✓ enrichment_loop 已启动")
+    else:
+        print(f"  ! enrichment_loop 启动标志未找到,日志:")
+        for line in out.splitlines()[-5:]:
+            print(f"      {line}")
+
+    # 等 wait 秒
+    print(f"\n  等待 {args.wait}s 让 enrich 真跑起来 ...")
+    time.sleep(args.wait)
+
+    # 6) 看 enrich_article 日志 + n/a 数量
+    rc, log_out, _ = ssh_exec(c, f"cd {args.compose_dir} && docker compose logs --tail=500 worker 2>&1 | grep -E 'enrich_article' | head -10", timeout=20)
+    enrich_count = len([l for l in log_out.splitlines() if "enrich_article" in l])
+    print(f"\n  === enrich_article 日志: {enrich_count} 条 ===")
+    for line in log_out.splitlines()[:5]:
+        print(f"      {line}")
+
+    # 当前 n/a 数量
+    rc, sql_out, _ = ssh_exec(c, f"cd {args.compose_dir} && docker compose exec -T postgres psql -U news -d news -c \"SELECT classify_status, count(*) FROM articles GROUP BY classify_status ORDER BY count(*) DESC;\"", timeout=30)
+    print(f"\n  === 当前 classify_status 分布 ===")
+    print(sql_out.rstrip())
+
+    # 判结果
+    rc_ok = 0
+    if enrich_count == 0:
+        # 看是不是 n/a 数变了(说明 enrichment 跑了但 logger 没打 — 极少见)
+        m = re.search(r"\b(\d+)\s*\|\s*(\d+)\b", sql_out)  # 粗略抓两个数
+        # 简单点:让用户自己看
+        print(f"\n  ⚠ enrich_article 日志 0 条 — enrich 任务可能没在跑")
+        print(f"  排查:")
+        print(f"    docker compose logs worker 2>&1 | grep -E 'enrich|ERROR' | tail -20")
+        rc_ok = 1
+    else:
+        # 看 n/a 数 - 跟 663 对比
+        n_a_match = re.search(r"n/a\s*\|\s*(\d+)", sql_out)
+        if n_a_match:
+            n_a = int(n_a_match.group(1))
+            if n_a < 663:
+                print(f"\n  ✓ n/a 数从 663 降到 {n_a} — 修复成功,enrich 在跑")
+            else:
+                print(f"\n  ⚠ n/a 数 {n_a} 没变(还在 663+),但有 enrich 日志 — 看具体错")
+                rc_ok = 1
+
+    c.close()
+    print(f"\n==== 结束 (rc={rc_ok}) ====")
+    return rc_ok
+
+
+if __name__ == "__main__":
+    import re
+    sys.exit(main())
diff --git a/scripts/healthcheck.py b/scripts/healthcheck.py
new file mode 100644
index 0000000..5ac7643
--- /dev/null
+++ b/scripts/healthcheck.py
@@ -0,0 +1,1288 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""diary-news 服务器健康检查 checklist。
+
+可在本机跑(SSH 远端)或在服务器上直接跑(用 --local)。
+走 docker compose 的 6 个服务:postgres / redis / api / worker / caddy / frontend,
+外加主机层面的端口/磁盘/内存/日志。
+
+依赖:
+  pip install paramiko
+
+用法(Windows PowerShell):
+  $env:REMOTE_PASS = '你的root密码'
+  python scripts/healthcheck.py
+  python scripts/healthcheck.py --local            # 在服务器上直接跑
+  python scripts/healthcheck.py --host 1.2.3.4 --port 22 --user news
+  python scripts/healthcheck.py --only docker,disk # 只跑指定组
+  python scripts/healthcheck.py --json out.json    # 导出结构化结果
+
+环境变量(可覆盖默认值):
+  REMOTE_HOST     207.57.129.228
+  REMOTE_PORT     19717
+  REMOTE_USER     root
+  REMOTE_PASS     (SSH 必填; --local 不需要)
+  COMPOSE_DIR     /srv/news
+  API_BASE_URL    http://127.0.0.1:8000            # API 健康检查端点
+"""
+from __future__ import annotations
+
+import argparse
+import base64
+import json
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field, asdict
+from typing import Callable, Optional
+
+# 可选依赖:只在远程模式下需要
+try:
+    import paramiko  # type: ignore
+except ImportError:
+    paramiko = None  # --local 模式不强制
+
+
+# ============== 配置 ==============
+DEFAULT_HOST       = "207.57.129.228"
+DEFAULT_PORT       = 19717
+DEFAULT_USER       = "root"
+DEFAULT_COMPOSE    = "/srv/news"
+DEFAULT_API_BASE   = "http://127.0.0.1/api/v1/healthz"   # 走 Caddy 80 反代到 api:8000
+SSH_TIMEOUT        = 30
+
+# docker-compose.yml 里声明的 6 个服务
+EXPECTED_SERVICES = ["postgres", "redis", "api", "worker", "caddy", "frontend"]
+
+# 关键端口(默认只检对外服务的 80;其他按需加)
+KEY_PORTS = {
+    "http": 80,            # Caddy / Frontend 对外端口
+}
+
+
+# ============== 数据结构 ==============
+@dataclass
+class Check:
+    name: str
+    group: str
+    ok: bool
+    summary: str
+    detail: str = ""
+    elapsed_ms: int = 0
+    severity: str = "info"  # info / warn / error
+    command: str = ""       # 执行的命令(失败时方便复现)
+
+
+@dataclass
+class Report:
+    target: str
+    started_at: str
+    finished_at: str = ""
+    checks: list = field(default_factory=list)
+
+    def add(self, c: Check, verbose: bool = False) -> None:
+        self.checks.append(asdict(c))
+        # 控制台输出
+        icon = "✓" if c.ok else "✗"
+        sev = "" if c.severity == "info" else f" [{c.severity.upper()}]"
+        print(f"  {icon}{sev} {c.name}: {c.summary}  ({c.elapsed_ms}ms)")
+        # 失败时:error 永远显示完整 detail + 命令;warn 默认前 12 行,--verbose 全显
+        if not c.ok:
+            if c.command:
+                print(f"      $ {c.command}")
+            if c.detail:
+                if c.severity == "error" or verbose:
+                    for line in c.detail.splitlines() or ["(no detail)"]:
+                        print(f"      {line}")
+                else:
+                    lines = c.detail.splitlines()
+                    for line in lines[:12]:
+                        print(f"      {line}")
+                    if len(lines) > 12:
+                        print(f"      ... (共 {len(lines)} 行,用 --verbose 看完整)")
+
+    def summary(self) -> tuple[int, int, int]:
+        ok = sum(1 for c in self.checks if c["ok"])
+        bad = len(self.checks) - ok
+        err = sum(1 for c in self.checks if not c["ok"] and c["severity"] == "error")
+        return ok, bad, err
+
+
+# ============== 远程执行抽象 ==============
+class Remote:
+    """统一封装: paramiko SSH 走远端, --local 直接在本机 shell。"""
+    def __init__(self, local: bool, host: str = "", port: int = 22,
+                 user: str = "root", password: str = ""):
+        self.local = local
+        self.client: Optional[paramiko.SSHClient] = None
+        if local:
+            return
+        if paramiko is None:
+            print("ERROR: paramiko 未安装,远程模式需要 `pip install paramiko`", file=sys.stderr)
+            sys.exit(2)
+        pw = password or os.environ.get("REMOTE_PASS", "")
+        if not pw:
+            print("ERROR: 请先设置环境变量 REMOTE_PASS,或加 --password xxx", file=sys.stderr)
+            sys.exit(2)
+        c = paramiko.SSHClient()
+        c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        c.connect(host, port=port, username=user, password=pw,
+                  timeout=SSH_TIMEOUT, banner_timeout=SSH_TIMEOUT, auth_timeout=SSH_TIMEOUT,
+                  allow_agent=False, look_for_keys=False)
+        self.client = c
+
+    def run(self, cmd: str, timeout: int = 60) -> tuple[int, str, str]:
+        """执行命令,返回 (rc, stdout, stderr)。"""
+        if self.local:
+            import subprocess
+            try:
+                p = subprocess.run(cmd, shell=True, capture_output=True,
+                                   text=True, timeout=timeout)
+                return p.returncode, p.stdout, p.stderr
+            except subprocess.TimeoutExpired as e:
+                return 124, e.stdout or "", f"timeout after {timeout}s"
+        assert self.client is not None
+        _si, so, se = self.client.exec_command(cmd, timeout=timeout, get_pty=True)
+        out = so.read().decode(errors="replace")
+        err = se.read().decode(errors="replace")
+        rc = so.channel.recv_exit_status()
+        return rc, out, err
+
+    def close(self) -> None:
+        if self.client is not None:
+            self.client.close()
+
+
+# ============== 检查项 ==============
+def timed(fn: Callable) -> Callable:
+    def wrapper(*args, **kwargs):
+        t0 = time.time()
+        c = fn(*args, **kwargs)
+        c.elapsed_ms = int((time.time() - t0) * 1000)
+        return c
+    return wrapper
+
+
+@timed
+def check_compose_ps(remote: Remote, compose_dir: str) -> Check:
+    """1.1 docker compose ps — 所有服务应 healthy / running。"""
+    cmd = f"cd {compose_dir} && docker compose ps --format '{{{{.Service}}}}|{{{{.State}}}}|{{{{.Status}}}}'"
+    rc, out, err = remote.run(cmd, timeout=30)
+    lines = [l.strip() for l in out.splitlines() if l.strip()]
+    running, unhealthy, missing = set(), set(), set(EXPECTED_SERVICES)
+    detail_lines = []
+    for line in lines:
+        parts = line.split("|")
+        if len(parts) < 3:
+            continue
+        svc, state, status = parts[0], parts[1], parts[2]
+        missing.discard(svc)
+        detail_lines.append(f"  {svc:10s} {state:12s} {status}")
+        if state.lower() in ("running", "healthy") and "exit" not in status.lower():
+            running.add(svc)
+        elif state.lower() in ("running",) and "(healthy)" in status.lower():
+            running.add(svc)
+        else:
+            unhealthy.add(svc)
+    ok = not missing and not unhealthy and len(running) == len(EXPECTED_SERVICES)
+    summary = (
+        f"{len(running)}/{len(EXPECTED_SERVICES)} running"
+        if ok
+        else f"missing={sorted(missing) or '-'} unhealthy={sorted(unhealthy) or '-'}"
+    )
+    sev = "error" if missing else ("warn" if unhealthy else "info")
+    return Check("docker compose ps", "docker", ok, summary, "\n".join(detail_lines), severity=sev)
+
+
+@timed
+def check_container_logs(remote: Remote, compose_dir: str) -> Check:
+    """1.2 最近 worker / api 日志是否有 ERROR / Traceback。"""
+    cmd = (
+        f"cd {compose_dir} && "
+        "docker compose logs --tail=200 --no-color worker api 2>&1 | "
+        "grep -E -i 'traceback|error|exception|critical' | head -20"
+    )
+    rc, out, err = remote.run(cmd, timeout=30)
+    out = out.strip()
+    if not out:
+        return Check("近 200 行 worker/api 日志无 ERROR", "docker",
+                      True, "clean", severity="info")
+    count = len([l for l in out.splitlines() if l.strip()])
+    return Check("近 200 行 worker/api 日志无 ERROR", "docker",
+                 False, f"{count} 行可疑", out, severity="warn")
+
+
+@timed
+def check_disk(remote: Remote) -> Check:
+    """1.3 磁盘空间 — 关键挂载点使用率。"""
+    rc, out, err = remote.run("df -h --output=target,size,used,avail,pcent 2>/dev/null | grep -E '/$|/srv|/var$'")
+    out = out.strip()
+    high = []
+    for line in out.splitlines():
+        m = re.search(r"(\d+)%", line)
+        if m and int(m.group(1)) >= 85:
+            high.append(line.strip())
+    ok = not high
+    summary = "ok" if ok else f"高占用: {'; '.join(high)}"
+    return Check("磁盘空间", "docker", ok, summary, out, severity="warn" if not ok else "info")
+
+
+def _parse_size_to_mb(token: str) -> float:
+    """把 '1.9Gi' / '806Mi' / '512Ki' / '1024' 转成 MB。"""
+    m = re.match(r"^\s*(\d+(?:\.\d+)?)\s*([KMG]?i?B?)?\s*$", token)
+    if not m:
+        return 0.0
+    val = float(m.group(1))
+    unit = (m.group(2) or "").upper()
+    if unit.startswith("GI") or unit == "G":
+        return val * 1024
+    if unit.startswith("MI") or unit == "M":
+        return val
+    if unit.startswith("KI") or unit == "K":
+        return val / 1024
+    # 无单位,默认 KiB (free -h 罕见)
+    return val / 1024
+
+
+@timed
+def check_memory(remote: Remote) -> Check:
+    """1.4 内存 + Swap。"""
+    rc, out, _ = remote.run("free -h | head -3")
+    out = out.strip()
+    high = False
+    pct = 0.0
+    for line in out.splitlines():
+        if line.startswith("Mem"):
+            parts = line.split()
+            # ['Mem:', 'total', 'used', 'free', 'shared', 'buff/cache', 'available']
+            if len(parts) >= 7:
+                total_mb = _parse_size_to_mb(parts[1])
+                used_mb  = _parse_size_to_mb(parts[2])
+                if total_mb > 0:
+                    pct = used_mb / total_mb * 100
+                    if pct > 90:
+                        high = True
+    summary = "ok" if not high else f">90% used ({pct:.1f}%)"
+    return Check("内存使用", "host", not high, summary, out,
+                 severity="warn" if high else "info")
+
+
+@timed
+def check_ports(remote: Remote) -> Check:
+    """1.5 关键端口监听(默认只检 80)。
+    用 ss -tln 拿到 LISTEN 行的 LocalAddress 字段(第 4 列,包含 0.0.0.0:80、*:443、[::]:80 等)。
+    不用 -H(避免不同发行版 header 行差异); 不用 ss -l(避免加 unix socket 干扰)。
+    """
+    cmd = (
+        "ss -tln 2>/dev/null | "
+        "awk 'tolower($1) ~ /listen/ {print $4}' | sort -u"
+    )
+    rc, out, _ = remote.run(cmd)
+    listening = set()
+    for m in re.finditer(r":(\d+)$", out, re.MULTILINE):
+        listening.add(int(m.group(1)))
+    need = set(KEY_PORTS.values())
+    missing = sorted(need - listening)
+    ok = not missing
+    label = "/".join(str(p) for p in need)
+    return Check(f"关键端口 {label} 监听", "network", ok,
+                 "ok" if ok else f"缺失 {missing}",
+                 f"监听中: {sorted(listening)}\n# raw ss output:\n{out.strip()}",
+                 command=cmd, severity="warn" if not ok else "info")
+
+
+@timed
+def check_docker_system(remote: Remote) -> Check:
+    """1.6 docker system df — 卷 / 镜像 / 构建缓存占用。"""
+    rc, out, _ = remote.run("docker system df 2>&1")
+    out = out.strip()
+    # 看 images / build cache 是否爆掉
+    bloated = False
+    for line in out.splitlines():
+        if "GB" in line:
+            m = re.search(r"(\d+\.\d+)\s*GB", line)
+            if m and float(m.group(1)) > 5:
+                bloated = True
+    return Check("docker system df", "docker", not bloated,
+                 "ok" if not bloated else "有 >5GB 的大件",
+                 out, severity="warn" if bloated else "info")
+
+
+@timed
+def check_api_health(remote: Remote, api_base: str) -> Check:
+    """1.7 API 健康端点。
+    api_base 接受两种形式:
+      - 完整 URL(已含路径): 'http://127.0.0.1/api/v1/healthz' → 直接用
+      - 基础 URL: 'http://127.0.0.1:8000' → 自动拼 /api/v1/healthz
+    """
+    base = api_base.rstrip("/")
+    # 已经看起来是健康端点(以 /healthz 或 /health 结尾)就直接用
+    if base.endswith("/healthz") or base.endswith("/health"):
+        url = base
+    else:
+        url = f"{base}/api/v1/healthz"
+    cmd = (
+        f"curl -sS -m 5 -o /tmp/hc_body -w 'http=%{{http_code}} t=%{{time_total}}\\n' '{url}'; "
+        f"echo '--- body ---'; head -c 400 /tmp/hc_body 2>/dev/null; echo"
+    )
+    rc, out, _ = remote.run(cmd)
+    m = re.search(r"http=(\d+)", out)
+    code = int(m.group(1)) if m else 0
+    ok = 200 <= code < 400
+    summary = f"http={code}" + (" (✓ ok)" if ok else " (✗ failed)")
+    return Check(f"API {url}", "app", ok, summary, out.strip(),
+                 command=cmd, severity="error" if not ok else "info")
+
+
+@timed
+def check_db_counts(remote: Remote, compose_dir: str) -> Check:
+    """1.8 articles / sources 表行数(从 .env 读凭据)。"""
+    cmd = (
+        f"cd {compose_dir} && "
+        "set -a; . ./.env; set +a; "
+        "docker compose exec -T postgres psql -U \"$POSTGRES_USER\" -d \"$POSTGRES_DB\" -t -A -c "
+        "\"SELECT 'articles='||count(*) FROM articles;"
+        "SELECT 'sources='||count(*) FROM sources;"
+        "SELECT 'translated='||count(*) FROM articles WHERE title_zh IS NOT NULL;"
+        "SELECT 'untranslated_24h='||count(*) FROM articles "
+        "  WHERE published_at > now() - interval '24 hour' AND title_zh IS NULL;\" 2>&1"
+    )
+    rc, out, _ = remote.run(cmd, timeout=30)
+    out = out.strip()
+    untrans_m = re.search(r"untranslated_24h=(\d+)", out)
+    untrans_24h = int(untrans_m.group(1)) if untrans_m else -1
+    ok = rc == 0 and untrans_24h <= 50  # 24h 内未翻译超过 50 算异常
+    sev = "warn" if (untrans_24h > 50 and untrans_24h <= 200) else ("error" if untrans_24h > 200 else "info")
+    return Check("DB 行数 articles/sources", "app", ok,
+                 out.replace("\n", " | "),
+                 severity=sev)
+
+
+@timed
+def check_llm_workflow(remote: Remote, compose_dir: str) -> Check:
+    """1.13 LLM 工作流落实度:5 个步骤的状态分布 + 24h 增量。
+    步骤(按 enrichment.py:294 顺序):
+      1. 翻译   translation_status   (translation_loop)
+      2. 分类   classify_status      (enrichment 第 1 步)
+      3. 排版   format_status        (enrichment 第 2 步,生成 body_zh_formatted)
+      4. 插图   image_ai_status      (enrichment 第 3 步,生成 image_ai_url)
+      5. 评论   commentary_status    (enrichment 第 4 步,生成 commentary)
+    判据:
+      - 翻译失败的行 ≥ 5%        → warn(但已知有可能是源站没译文、源是中文等,不是 worker 锅)
+      - 24h 增量中,翻译成功的文章里:
+          LLM 全部 n/a   → info(LLM 增强关闭 / 还没轮到这个 batch)
+          LLM 全部 ok    → ✓ 好
+          任一 failed 比例 ≥ 20%  → warn(LLM 部分任务坏掉)
+    区分"n/a"(LLM 关了)和"pending"(排队中)和"ok/failed":
+      - LLM 没配 / 关了 → 全 n/a,这是正常状态,info
+      - LLM 开了但文章还没 enrich 完 → n/a + pending 共存,info
+    """
+    # 一次拿 5 个状态的全局分布 + 24h 内翻译成功的文章里 4 个 LLM 状态的分布
+    sql = r"""
+SELECT 'tr_glob' AS k, translation_status AS st, count(*)::int AS n
+  FROM articles GROUP BY translation_status
+UNION ALL
+SELECT 'cl_glob', classify_status, count(*)::int FROM articles GROUP BY classify_status
+UNION ALL
+SELECT 'fm_glob', format_status,   count(*)::int FROM articles GROUP BY format_status
+UNION ALL
+SELECT 'im_glob', image_ai_status, count(*)::int FROM articles GROUP BY image_ai_status
+UNION ALL
+SELECT 'co_glob', commentary_status, count(*)::int FROM articles GROUP BY commentary_status
+UNION ALL
+-- 24h 内翻译成功(translation_status=ok)的文章里,4 个 LLM 状态分布
+SELECT 'cl_24h', classify_status,   count(*)::int FROM articles
+  WHERE translation_status='ok' AND translated_at > now()-interval '24 hour'
+  GROUP BY classify_status
+UNION ALL
+SELECT 'fm_24h', format_status,     count(*)::int FROM articles
+  WHERE translation_status='ok' AND translated_at > now()-interval '24 hour'
+  GROUP BY format_status
+UNION ALL
+SELECT 'im_24h', image_ai_status,   count(*)::int FROM articles
+  WHERE translation_status='ok' AND translated_at > now()-interval '24 hour'
+  GROUP BY image_ai_status
+UNION ALL
+SELECT 'co_24h', commentary_status, count(*)::int FROM articles
+  WHERE translation_status='ok' AND translated_at > now()-interval '24 hour'
+  GROUP BY commentary_status;
+"""
+    cmd = (
+        f"cd {compose_dir} && "
+        "set -a; . ./.env; set +a; "
+        "docker compose exec -T postgres psql -U \"$POSTGRES_USER\" -d \"$POSTGRES_DB\" -t -A -F $'\\t' -c \""
+        + sql.replace(chr(34), chr(92) + chr(34))
+        + "\" 2>&1"
+    )
+    rc, out, _ = remote.run(cmd, timeout=30)
+
+    # 解析:tab 分隔,3 列 (k, st, n)
+    glob: dict[str, dict[str, int]] = {}  # glob['tr_glob'] = {'ok': 100, 'failed': 5, ...}
+    for line in out.splitlines():
+        line = line.strip()
+        if line.count("\t") < 2:
+            continue
+        k, st, n_s = line.split("\t", 2)
+        try:
+            n = int(n_s)
+        except ValueError:
+            continue
+        glob.setdefault(k, {})[st] = n
+
+    if not glob:
+        return Check(
+            "LLM 工作流落实度(翻译/分类/排版/插图/评论)", "app", False,
+            "查询无结果(SQL 失败?)",
+            detail=out[:600],
+            command=cmd,
+            severity="error",
+        )
+
+    # === 1) 翻译全局健康 ===
+    tr = glob.get("tr_glob", {})
+    tr_total = sum(tr.values())
+    tr_failed = tr.get("failed", 0) + tr.get("partial", 0)
+    tr_failed_pct = (tr_failed / tr_total * 100) if tr_total else 0.0
+    tr_ok = tr.get("ok", 0)
+
+    # === 2) 24h 翻译成功的文章里 4 个 LLM 状态的落实度 ===
+    #    总样本 = cl_24h 的所有值之和(也等于其他 3 个的样本量)
+    llm_24h_total = sum(glob.get("cl_24h", {}).values())
+    llm_summary: list[str] = []
+    llm_issues: list[str] = []
+    for prefix, name in [("cl_24h", "分类"), ("fm_24h", "排版"),
+                         ("im_24h", "插图"), ("co_24h", "评论")]:
+        d = glob.get(prefix, {})
+        ok = d.get("ok", 0)
+        failed = d.get("failed", 0)
+        pending = d.get("pending", 0)
+        na = d.get("n/a", 0)
+        if llm_24h_total == 0:
+            llm_summary.append(f"{name}: 无 24h 翻译样本")
+            continue
+        ok_pct = ok / llm_24h_total * 100
+        fail_pct = failed / llm_24h_total * 100
+        llm_summary.append(
+            f"{name}: ok={ok} failed={failed} pending={pending} n/a={na}  ({ok_pct:.0f}% ok)"
+        )
+        if fail_pct >= 20:
+            llm_issues.append(f"{name} 24h 失败率 {fail_pct:.0f}% (≥20%)")
+
+    # === 3) 全局 LLM 状态分布(用于看整体)===
+    glob_parts: list[str] = []
+    for prefix, name in [("cl_glob", "分类"), ("fm_glob", "排版"),
+                         ("im_glob", "插图"), ("co_glob", "评论")]:
+        d = glob.get(prefix, {})
+        if d:
+            parts = ",".join(f"{k}={v}" for k, v in sorted(d.items(), key=lambda x: -x[1])[:3])
+            glob_parts.append(f"{name} {parts}")
+
+    # === 4) 汇总判据 ===
+    issues: list[str] = []
+    if tr_failed_pct >= 20:
+        issues.append(f"翻译失败率 {tr_failed_pct:.0f}% ≥20%")
+    elif tr_failed_pct >= 5:
+        issues.append(f"翻译失败率 {tr_failed_pct:.0f}% ≥5%")
+    issues.extend(llm_issues)
+
+    if llm_24h_total == 0:
+        # 24h 内没翻译成功的文章,工作流谈不上"落实"不"落实",info 跳过
+        sev = "info"
+        summary = f"24h 内无翻译成功样本(无法评估 LLM 工作流)"
+    else:
+        sev = "error" if any("≥20%" in i and "失败" in i for i in issues) else (
+            "warn" if issues else "info"
+        )
+        summary = f"翻译 ok={tr_ok}/{tr_total} ({100 - tr_failed_pct:.0f}%) | " + " · ".join(llm_summary)
+        if issues:
+            summary += " · " + "; ".join(issues[:2])
+
+    detail_lines = [
+        f"翻译全局(全量): " + ", ".join(f"{k}={v}" for k, v in sorted(tr.items(), key=lambda x: -x[1])),
+        f"翻译失败率: {tr_failed_pct:.1f}%",
+        f"24h 已翻译文章样本: {llm_24h_total} 篇",
+    ] + llm_summary + [
+        "",
+        "全局 LLM 状态(全量,取 top3):",
+    ] + [f"  {p}" for p in glob_parts]
+    if issues:
+        detail_lines.append("")
+        detail_lines.append("⚠ 问题: " + "; ".join(issues))
+
+    ok = not issues and llm_24h_total > 0
+    return Check(
+        "LLM 工作流落实度(翻译/分类/排版/插图/评论)", "app", ok, summary,
+        detail="\n".join(detail_lines),
+        command="psql: 5 个 status 字段 × 全局/24h 分布",
+        severity=sev,
+    )
+
+
+@timed
+def check_translation_sample(remote: Remote, compose_dir: str, sample_n: int = 3) -> Check:
+    """1.9 抽查最近 24h 内已翻译的 N 篇文章(默认 3 篇),检查翻译质量。
+
+    抽样条件: published_at > now()-24h AND title_zh IS NOT NULL
+              AND translation_status IN ('ok','partial')
+    判据(每篇):
+      - title_zh 非空
+      - body_zh_text 非空
+      - title_zh != title  (未翻译 fallback 的典型表现)
+      - title_zh 长度 >= 2
+    整体判据:
+      - 没候选:  info (无样本,worker 还没产出)
+      - 全部通过: ok
+      - 通过 1 / N 篇: error (翻译管线几乎坏了)
+      - 通过 2..N-1: warn (部分文章翻译坏掉)
+    """
+    # 一次拉 sample_n 条,字段用 \t 分隔,转义好 psql 输出
+    sql = (
+        f"SELECT id, "
+        f"  coalesce(source_id::text,'?') AS src, "
+        f"  title, "
+        f"  title_zh, "
+        f"  coalesce(substring(body_zh_text, 1, 200), '') AS body_zh_preview, "
+        f"  translation_status, "
+        f"  translation_engine, "
+        f"  coalesce(to_char(translated_at, 'YYYY-MM-DD HH24:MI'), '-') AS tat, "
+        f"  coalesce(lang_src,'-') AS lang, "
+        f"  coalesce(char_length(title),0)  AS tlen, "
+        f"  coalesce(char_length(title_zh),0) AS zlen, "
+        f"  coalesce(char_length(body_zh_text),0) AS blen "
+        f"FROM articles "
+        f"WHERE published_at > now() - interval '24 hour' "
+        f"  AND title_zh IS NOT NULL "
+        f"  AND translation_status IN ('ok','partial') "
+        f"ORDER BY random() "
+        f"LIMIT {sample_n};"
+    )
+    # 头部一行,方便按列对齐
+    header = "id\tsrc\ttitle\ttitle_zh\tbody_zh_preview\tstatus\tengine\ttranslated_at\tlang\ttlen\tzlen\tblen"
+    cmd = (
+        f"cd {compose_dir} && "
+        "set -a; . ./.env; set +a; "
+        f"echo '{header}'; "
+        f"docker compose exec -T postgres psql -U \"$POSTGRES_USER\" -d \"$POSTGRES_DB\" -t -A -F $'\\t' -c \"{sql.replace(chr(34), chr(92)+chr(34))}\" 2>&1"
+    )
+    rc, out, err = remote.run(cmd, timeout=30)
+
+    # 解析输出:跳过 header 行(就是 echo 的那个),保留真实数据行
+    lines = [l for l in out.splitlines() if l.strip() and not l.startswith("id\t")]
+    # 一些 psql 在 -t 模式下仍可能输出 NOTICE 之类 — 按制表符列数过滤
+    rows = []
+    for l in lines:
+        if l.count("\t") >= 9:  # 至少 10 列
+            rows.append(l.split("\t"))
+
+    if not rows:
+        # 候选为 0 = 24h 内没有已翻译文章(可能刚启动 / 数据少)
+        return Check(
+            f"翻译抽查({sample_n}篇/24h)", "app", True,
+            f"无样本(24h 内暂无已翻译文章)",
+            detail=f"# raw output:\n{out.strip()[:500]}",
+            severity="info",
+            command=cmd,
+        )
+
+    # 逐篇判分
+    verdicts: list[tuple[bool, str]] = []  # (ok, 一行可读摘要)
+    bad_detail: list[str] = []
+    for cols in rows:
+        try:
+            (aid, src, title, title_zh, body_zh_pv, status,
+             engine, tat, lang, tlen, zlen, blen) = cols[:12]
+        except ValueError:
+            continue
+        tlen_i, zlen_i, blen_i = int(tlen or 0), int(zlen or 0), int(blen or 0)
+        # 判据
+        reasons: list[str] = []
+        if not title_zh.strip():
+            reasons.append("title_zh 空")
+        if not body_zh_pv.strip():
+            reasons.append("body_zh_text 空")
+        if title_zh.strip() and title.strip() and title_zh.strip() == title.strip():
+            reasons.append("title_zh == title(未翻译)")
+        if zlen_i < 2:
+            reasons.append(f"title_zh 长度={zlen_i}")
+        is_ok = len(reasons) == 0
+        verdicts.append((is_ok, reasons))
+
+        # 详细行:可读的"原文标题 / 译文标题 / 长度 / 状态"
+        t_disp = (title[:50] + "…") if len(title) > 50 else title
+        z_disp = (title_zh[:50] + "…") if len(title_zh) > 50 else title_zh
+        line = (f"#{aid} src={src} lang={lang} status={status} "
+                f"len: 原 {tlen_i} → 译 {zlen_i} (body_zh {blen_i}) "
+                f"engine={engine} at={tat}")
+        if is_ok:
+            line = "✓ " + line + f"\n      原: {t_disp}\n      译: {z_disp}"
+        else:
+            line = "✗ " + line + f"\n      原因: {'; '.join(reasons)}\n      原: {t_disp}\n      译: {z_disp}"
+        bad_detail.append(line)
+
+    passed = sum(1 for ok, _ in verdicts if ok)
+    total = len(verdicts)
+    if passed == total:
+        sev, summary = "info", f"{passed}/{total} 通过"
+    elif passed == 0:
+        sev, summary = "error", f"0/{total} 通过 ⚠ 翻译管线可能挂了"
+    else:
+        sev = "warn"
+        summary = f"{passed}/{total} 通过(部分文章翻译异常)"
+
+    ok_flag = (passed == total)
+    return Check(
+        f"翻译抽查({sample_n}篇/24h)", "app", ok_flag, summary,
+        detail="\n".join(bad_detail),
+        command=cmd, severity=sev,
+    )
+
+
+@timed
+def check_redis(remote: Remote, compose_dir: str) -> Check:
+    """1.9 Redis ping + 内存。"""
+    cmd = (
+        f"cd {compose_dir} && "
+        "set -a; . ./.env; set +a; "
+        "docker compose exec -T redis redis-cli -a \"$REDIS_PASSWORD\" --no-auth-warning "
+        "ping 2>&1; "
+        "docker compose exec -T redis redis-cli -a \"$REDIS_PASSWORD\" --no-auth-warning "
+        "info memory 2>&1 | grep -E 'used_memory_human|used_memory_peak_human|maxmemory_human'"
+    )
+    rc, out, _ = remote.run(cmd, timeout=20)
+    pong = "PONG" in out
+    return Check("Redis", "app", pong, out.strip().replace("\n", " | "),
+                 severity="error" if not pong else "info")
+
+
+@timed
+def check_homepage(remote: Remote, api_base: str, auth_token: str = "") -> Check:
+    """1.10 首页 SPA + Feed API + 移动端适配。
+    前端是 Vue SPA,首页 index.html 是空壳;真正要查的是:
+      1) /             200 + 包含 viewport meta + 引用了 JS bundle
+      2) /api/v1/articles?page=1&page_size=10 返回 {items,total,total_pages},
+         items[].title_zh 存在(翻译过的文章会展示)  ← 此端点需 auth
+      3) 移动端: index.html 含 viewport,前端 style.css 含 @media (max-width: 768px)
+    401 视为"端点需要 token,服务正常" → info,不污染汇总。
+    """
+    # 1) 拉首页 HTML
+    rc1, html, _ = remote.run("curl -sS -m 5 http://127.0.0.1/", timeout=10)
+    has_viewport = "name=\"viewport\"" in html or "name='viewport'" in html
+    has_app_div  = 'id="app"' in html
+    has_js       = "main.ts" in html or "/src/main.ts" in html or "/assets/index-" in html
+    has_lang_zh  = 'lang="zh-CN"' in html or "lang='zh-CN'" in html
+
+    # 2) 拉首页文章列表 API(需 auth)
+    api_url = f"{api_base.rstrip('/').removesuffix('/api/v1/healthz')}/api/v1/articles?page=1&page_size=10"
+    auth_header = ""
+    if auth_token:
+        # 用 base64 转义,避免 shell history / ps 里看见明文
+        tok_b64 = base64.b64encode(auth_token.encode("utf-8")).decode("ascii")
+        auth_header = f" -H 'Authorization: Bearer $(echo {tok_b64} | base64 -d)'"
+    rc2, body, _ = remote.run(
+        "curl -sS -m 8 '" + api_url + "'" + auth_header +
+        " -w '\\n---HTTP=%{http_code} TIME=%{time_total}---\\n' 2>&1",
+        timeout=15,
+    )
+    items: list = []
+    api_code = 0
+    total = 0
+    api_err = ""
+    try:
+        marker = "\n---HTTP="
+        if marker in body:
+            json_part, status_part = body.rsplit(marker, 1)
+            m = re.search(r"HTTP=(\d+)", status_part)
+            api_code = int(m.group(1)) if m else 0
+        else:
+            json_part = body
+        data = json.loads(json_part)
+        items = data.get("items") or []
+        total = int(data.get("total") or 0)
+    except Exception as e:
+        api_err = f"{type(e).__name__}: {e}"
+        data = None
+
+    # 3) 移动端断点 — 在服务端 grep 计数,避免 head 截断
+    css_href = ""
+    m = re.search(r'<link[^>]+rel="stylesheet"[^>]+href="([^"]+)"', html)
+    if m:
+        css_href = m.group(1)
+    mobile_768 = mobile_480 = 0
+    if css_href:
+        cmd_css = (
+            "curl -sS -m 8 'http://127.0.0.1" + css_href + "' | "
+            "grep -oc -E 'max-width:[[:space:]]*768px' || true; "
+            "echo ---480---; "
+            "curl -sS -m 8 'http://127.0.0.1" + css_href + "' | "
+            "grep -oc -E 'max-width:[[:space:]]*480px' || true"
+        )
+        rc3, css_out, _ = remote.run(cmd_css, timeout=15)
+        # 解析"数\n---480---\n数"
+        parts = re.split(r"---480---", css_out)
+        try: mobile_768 = int((parts[0].strip().splitlines() or ["0"])[-1])
+        except Exception: pass
+        try: mobile_480 = int((parts[1].strip().splitlines() or ["0"])[-1]) if len(parts) > 1 else 0
+        except Exception: pass
+
+    # === 汇总 ===
+    issues: list[str] = []
+    if not has_viewport:   issues.append("首页 HTML 缺 viewport meta(移动端不友好)")
+    if not has_app_div:    issues.append("首页 HTML 缺 #app 挂载点")
+    if not has_js:         issues.append("首页 HTML 没引 JS bundle")
+    if not has_lang_zh:    issues.append("首页 HTML lang 不是 zh-CN")
+
+    # Feed API 状态:401 没带 token 时不算 error;带 token 还 401 算 error
+    need_auth_msg = ""
+    if api_code == 401 and not auth_token:
+        need_auth_msg = "Feed API 401(端点需登录)— 用 --auth-user / --auth-pass 传 owner 凭据"
+    elif api_code != 200:
+        issues.append(f"Feed API 返回 {api_code} (非 200)")
+    if api_err:
+        issues.append(f"Feed API 解析失败: {api_err}")
+    if data is not None and not items and api_code == 200:
+        issues.append(f"Feed API 返回 items 为空 (total={total})")
+
+    # 译文抽样
+    sample = []
+    for it in items[:3]:
+        sample.append({
+            "id":        it.get("id"),
+            "title":     (it.get("title") or "")[:60],
+            "title_zh":  (it.get("title_zh") or "")[:60],
+            "status":    it.get("translation_status"),
+            "engine":    it.get("translation_engine"),
+        })
+    has_zh = sum(1 for it in items if it.get("title_zh"))
+
+    summary_parts = [
+        f"html: {'✓' if has_viewport and has_app_div and has_js else '✗'}",
+        f"feed: {len(items)}/{total} (有译文 {has_zh})" if api_code == 200
+            else f"feed: http={api_code}",
+        f"mobile-css: {mobile_768}×768 + {mobile_480}×480" if css_href
+            else "mobile-css: (无 CSS 链接)",
+    ]
+    summary = " · ".join(summary_parts)
+    if need_auth_msg:
+        summary += " · " + need_auth_msg
+    elif issues:
+        summary += " · " + "; ".join(issues[:2])
+
+    # 判定:HTML 元素都齐 + (有 token 拿到了数据 或 401 无 token 算 info)
+    html_ok = has_viewport and has_app_div and has_js and has_lang_zh
+    if need_auth_msg:
+        # 没 token → 401 → 服务正常,降级 info
+        ok = html_ok
+        sev = "info"
+    else:
+        ok = html_ok and not issues
+        sev = "error" if (api_code not in (0, 200) and not need_auth_msg) else (
+            "warn" if issues else "info"
+        )
+
+    detail_lines = [
+        f"首页 HTML: viewport={has_viewport} #app={has_app_div} js={has_js} lang-zh={has_lang_zh}",
+        f"Feed API: http={api_code} items={len(items)} total={total} 译过={has_zh}",
+    ]
+    if css_href:
+        detail_lines.append(f"CSS: {css_href}  mobile: 768px={mobile_768} 处, 480px={mobile_480} 处")
+    if sample:
+        detail_lines.append("首屏抽样:")
+        for s in sample:
+            detail_lines.append(
+                f"  #{s['id']} {s['title']!r} → {s['title_zh']!r} "
+                f"[{s['status']}/{s['engine']}]"
+            )
+    if need_auth_msg:
+        detail_lines.append("提示: " + need_auth_msg)
+    if issues:
+        detail_lines.append("问题: " + "; ".join(issues))
+
+    return Check(
+        "首页 SPA + Feed API + 移动端", "app", ok, summary,
+        detail="\n".join(detail_lines),
+        command=f"GET /; GET {api_url}; GET {css_href or '(no css)'}",
+        severity=sev,
+    )
+
+
+@timed
+def check_article_detail(remote: Remote, api_base: str, auth_token: str = "") -> Check:
+    """1.11 详情页:取一篇最新已翻译文章,GET /api/v1/articles/{id},看:
+      - status=200
+      - 字段齐: title / title_zh / body_zh_text 或 body_zh_formatted
+      - body_zh_formatted 含 <div class="article-body">  (说明 LLM 排版版带了 CSS 容器)
+      - 移动端: meta viewport(首页的) + 详情页路由 /article/{id}
+    401 视为"端点需 auth"— 提示用户加 --auth-user / --auth-pass,不污染汇总。
+    """
+    base = api_base.rstrip("/").removesuffix("/api/v1/healthz")
+    list_url = f"{base}/api/v1/articles?page=1&page_size=1"
+    tok_b64 = base64.b64encode(auth_token.encode("utf-8")).decode("ascii") if auth_token else ""
+    auth_h = f" -H 'Authorization: Bearer $(echo {tok_b64} | base64 -d)'" if tok_b64 else ""
+    rc, list_body, _ = remote.run(
+        "curl -sS -m 8 '" + list_url + "'" + auth_h + " -w '\\n---HTTP=%{http_code}---\\n' 2>&1",
+        timeout=10,
+    )
+    article_id = None
+    list_code = 0
+    if rc == 0 and list_body:
+        try:
+            marker = "\n---HTTP="
+            if marker in list_body:
+                json_part, status_part = list_body.rsplit(marker, 1)
+                m = re.search(r"HTTP=(\d+)", status_part)
+                list_code = int(m.group(1)) if m else 0
+            else:
+                json_part = list_body
+            data = json.loads(json_part)
+            if data.get("items"):
+                article_id = data["items"][0]["id"]
+        except Exception:
+            pass
+    if list_code == 401 and not auth_token:
+        return Check(
+            "详情页 API + 译文 CSS", "app", True,
+            "需 owner token(用 --auth-user / --auth-pass)",
+            detail=f"# raw list response:\n{list_body[:300]}",
+            command=f"GET {list_url} (no token)",
+            severity="info",
+        )
+    if not article_id:
+        return Check(
+            "详情页 API + 译文 CSS", "app", False,
+            f"无可用文章样本(列表 http={list_code}, items=0?)",
+            detail=list_body[:500],
+            command=list_url,
+            severity="warn",
+        )
+
+    # 拉详情
+    detail_url = f"{base}/api/v1/articles/{article_id}"
+    rc2, body2, _ = remote.run(
+        "curl -sS -m 8 '" + detail_url + "'" + auth_h + " -w '\\n---HTTP=%{http_code}---\\n' 2>&1",
+        timeout=10,
+    )
+    api_code = 0
+    article = {}
+    parse_err = ""
+    try:
+        marker = "\n---HTTP="
+        if marker in body2:
+            json_part, status_part = body2.rsplit(marker, 1)
+        else:
+            json_part, status_part = body2, ""
+        m = re.search(r"HTTP=(\d+)", status_part)
+        api_code = int(m.group(1)) if m else 0
+        article = json.loads(json_part)
+    except Exception as e:
+        parse_err = f"{type(e).__name__}: {e}"
+
+    if api_code != 200 or not article:
+        return Check(
+            f"详情页 API #{article_id} + 译文 CSS", "app", False,
+            f"http={api_code} parse_err={parse_err or '-'}",
+            detail=body2[:500],
+            command=detail_url,
+            severity="error",
+        )
+
+    # 判据
+    title    = article.get("title") or ""
+    title_zh = article.get("title_zh") or ""
+    body_zh_text     = article.get("body_zh_text") or ""
+    body_zh_formatted = article.get("body_zh_formatted") or ""
+    body_zh_html     = article.get("body_zh_html") or ""
+    fmt_status  = article.get("format_status") or "n/a"
+    tr_status   = article.get("translation_status") or "-"
+    tr_engine   = article.get("translation_engine") or "-"
+
+    issues: list[str] = []
+    if not title_zh:                issues.append("缺 title_zh(无译文)")
+    if not (body_zh_text or body_zh_formatted or body_zh_html):
+        issues.append("缺 body_zh_text/formatted/html(译文全空)")
+    if title_zh and title and title_zh.strip() == title.strip():
+        issues.append("title_zh == title(未翻译)")
+
+    has_css_container = (
+        'class="article-body"' in body_zh_formatted
+        or "class='article-body'" in body_zh_formatted
+    )
+    css_info = "✓ 排版版带 .article-body 容器" if has_css_container else (
+        "✗ 排版版缺 .article-body 容器(译文没套 CSS)"
+        if body_zh_formatted
+        else "— 无排版版(用原始译文展示)"
+    )
+
+    if not has_css_container and body_zh_formatted:
+        issues.append("排版版 body_zh_formatted 缺 .article-body CSS 容器")
+
+    summary = (
+        f"#{article_id} {tr_status}/{tr_engine} fmt={fmt_status} "
+        f"译字 {len(title_zh)}/{len(body_zh_text)}; CSS {css_info}"
+    )
+    if issues:
+        summary += " · " + "; ".join(issues[:2])
+
+    detail_lines = [
+        f"原标题: {title[:80]!r}",
+        f"译标题: {title_zh[:80]!r}",
+        f"body_zh_text 长度: {len(body_zh_text)}",
+        f"body_zh_formatted 长度: {len(body_zh_formatted)}  status={fmt_status}",
+        f"body_zh_html 长度: {len(body_zh_html)}",
+        f"CSS 容器(.article-body): {'有' if has_css_container else '无'}",
+    ]
+    # 抽 body_zh_formatted 前 300 字符(可能 < 字符被转义了)
+    if body_zh_formatted:
+        detail_lines.append(f"body_zh_formatted 前 300: {body_zh_formatted[:300]!r}")
+
+    ok = not issues and api_code == 200
+    sev = "error" if (api_code != 200) else ("warn" if issues else "info")
+    return Check(
+        f"详情页 API #{article_id} + 译文 CSS", "app", ok, summary,
+        detail="\n".join(detail_lines),
+        command=detail_url,
+        severity=sev,
+    )
+
+
+@timed
+def check_agnes_llm(remote: Remote, compose_dir: str) -> Check:
+    """1.12 Agnes LLM 健康:真发一次 chat/completions 调用。
+    - 读 .env 的 AGNES_API_KEY / AGNES_BASE_URL / AGNES_CHAT_MODEL
+    - 没配 → info 跳过(LLM 增强是可选模块)
+    - 配了 → 发一次最小调用(max_tokens=8,短 prompt)看 200 + choices[0].message.content
+    注意:为了避免 API 密钥泄露到 shell history,密钥用 base64 编码后
+    在远程 shell 里 decode 出来,再注入到 curl Header。
+    """
+    # 1) 读 .env 拿 3 个变量
+    rc, env_out, _ = remote.run(
+        f"cd {compose_dir} 2>/dev/null && "
+        "grep -E '^(AGNES_API_KEY|AGNES_BASE_URL|AGNES_CHAT_MODEL)=' .env 2>/dev/null"
+    )
+    api_key = base_url = model = ""
+    for line in env_out.splitlines():
+        m = re.match(r"^AGNES_API_KEY=(.+)$", line)
+        if m: api_key = m.group(1).strip().strip('"').strip("'")
+        m = re.match(r"^AGNES_BASE_URL=(.+)$", line)
+        if m: base_url = m.group(1).strip().strip('"').strip("'")
+        m = re.match(r"^AGNES_CHAT_MODEL=(.+)$", line)
+        if m: model = m.group(1).strip().strip('"').strip("'")
+
+    if not api_key or api_key.startswith("your_"):
+        return Check(
+            "Agnes LLM 联通", "app", True,
+            "未配 AGNES_API_KEY(LLM 增强模块关闭),跳过",
+            detail=env_out.strip()[:300],
+            severity="info",
+        )
+
+    base_url = base_url or "https://apihub.agnes-ai.com/v1"
+    model = model or "agnes-2.0-flash"
+    chat_url = f"{base_url.rstrip('/')}/chat/completions"
+
+    # 2) base64 编码密钥 + payload,再在 shell 里 decode 出来拼 header
+    #    避免 API key 出现在 process list / history 里
+    key_b64     = base64.b64encode(api_key.encode("utf-8")).decode("ascii")
+    payload_obj = {
+        "model": model,
+        "messages": [
+            {"role": "system", "content": "You are a ping bot. Reply with a single word."},
+            {"role": "user",   "content": "ping"},
+        ],
+        "max_tokens": 8,
+        "temperature": 0,
+    }
+    payload_b64 = base64.b64encode(
+        json.dumps(payload_obj, ensure_ascii=False).encode("utf-8")
+    ).decode("ascii")
+
+    cmd = (
+        f"KEY_B64={key_b64}; "
+        f"PAYLOAD_B64={payload_b64}; "
+        "BODY=$(echo \"$PAYLOAD_B64\" | base64 -d); "
+        f"curl -sS -m 25 -o /tmp/agnes_resp -w 'http=%{{http_code}} t=%{{time_total}}\\n' "
+        "-H \"Authorization: Bearer $(echo $KEY_B64 | base64 -d)\" "
+        "-H 'Content-Type: application/json' "
+        f"-d \"$BODY\" '{chat_url}'; "
+        "echo '--- body (first 400 chars) ---'; head -c 400 /tmp/agnes_resp 2>/dev/null; echo"
+    )
+    rc2, out, _ = remote.run(cmd, timeout=40)
+
+    # 解析
+    m = re.search(r"http=(\d+)\s+t=([\d.]+)", out)
+    code = int(m.group(1)) if m else 0
+    elapsed = float(m.group(2)) if m else 0
+    body_str = ""
+    if "--- body" in out:
+        body_str = out.split("--- body", 1)[1].split("---", 1)[-1].strip()
+
+    if code != 200:
+        return Check(
+            f"Agnes LLM chat 调用", "app", False,
+            f"http={code} t={elapsed:.1f}s",
+            detail=out[:600],
+            command=f"POST {chat_url}  (auth via base64-decoded key, not echoed)",
+            severity="error",
+        )
+    # 看返回里有没有 text
+    try:
+        resp = json.loads(out.split("--- body", 1)[-1].split("---", 1)[-1].strip() or body_str)
+        text = (resp.get("choices") or [{}])[0].get("message", {}).get("content", "")
+    except Exception:
+        text = ""
+
+    ok = code == 200 and bool(text)
+    summary = f"http={code} t={elapsed:.1f}s model={model} reply={text[:30]!r}"
+    return Check(
+        "Agnes LLM chat 调用", "app", ok, summary,
+        detail=f"# model: {model}\n# base_url: {base_url}\n# raw:\n{out[:800]}",
+        command=f"POST {chat_url}",
+        severity="info" if ok else "warn",
+    )
+
+
+@timed
+def check_caddy(remote: Remote) -> Check:
+    """1.10 Caddy 反代 — 80 端口根路径 200/301/302。"""
+    cmd = "curl -sS -m 5 -o /dev/null -w 'http=%{http_code} t=%{time_total}\\n' http://127.0.0.1/"
+    rc, out, _ = remote.run(cmd)
+    m = re.search(r"http=(\d+)", out)
+    code = int(m.group(1)) if m else 0
+    ok = 200 <= code < 400
+    return Check("Caddy http://127.0.0.1/", "app", ok, out.strip(),
+                 severity="error" if not ok else "info")
+
+
+@timed
+def check_frontend(remote: Remote) -> Check:
+    """1.11 Frontend — 80 端口 / 返回 index.html。"""
+    cmd = (
+        "curl -sS -m 5 -o /dev/null -w 'http=%{http_code} t=%{time_total} ct=%{content_type}\\n' http://127.0.0.1/; "
+        "curl -sS -m 5 http://127.0.0.1/ | head -3"
+    )
+    rc, out, _ = remote.run(cmd)
+    m = re.search(r"http=(\d+)", out)
+    code = int(m.group(1)) if m else 0
+    ok = 200 <= code < 400 and ("html" in out.lower() or "<!doctype" in out.lower())
+    return Check("Frontend 首页", "app", ok, out.splitlines()[0] if out else "",
+                 out, severity="warn" if not ok else "info")
+
+
+@timed
+def check_tls_cert(remote: Remote) -> Check:
+    """1.12 HTTPS 证书 — 仅在 .env 里 DOMAIN 非空时检查。"""
+    # 先从 .env 读 DOMAIN 值(没配就跳过)
+    rc, env_out, _ = remote.run(
+        f"cd {COMPOSE_DIR} 2>/dev/null && "
+        "grep -E '^DOMAIN=' .env 2>/dev/null | head -1"
+    )
+    domain = ""
+    for line in env_out.splitlines():
+        m = re.match(r"^DOMAIN=(.+)$", line.strip())
+        if m:
+            domain = m.group(1).strip().strip('"').strip("'")
+            break
+    if not domain:
+        return Check("HTTPS 证书(域名)", "app", True,
+                     "未配 DOMAIN,跳过(走 IP 模式)", severity="info")
+    # 有域名,拉证书
+    cmd2 = f"echo | openssl s_client -servername {domain} -connect {domain}:443 2>/dev/null | openssl x509 -noout -dates 2>&1"
+    rc2, out2, _ = remote.run(cmd2, timeout=15)
+    m = re.search(r"notAfter=(.+)", out2)
+    if not m:
+        return Check(f"HTTPS 证书 {domain}", "app", False,
+                     "无法获取证书(可能 443 未开)", out2, severity="warn")
+    return Check(f"HTTPS 证书 {domain}", "app", True, f"notAfter={m.group(1).strip()}",
+                 severity="info")
+
+
+@timed
+def check_docker_logs_size(remote: Remote, compose_dir: str) -> Check:
+    """1.13 日志卷积压。"""
+    cmd = (
+        f"cd {compose_dir} && "
+        "docker compose logs --no-color --tail=0 2>&1 >/dev/null; "
+        "du -sh /var/lib/docker/containers/*/*-json.log 2>/dev/null | sort -h | tail -5"
+    )
+    rc, out, _ = remote.run(cmd, timeout=20)
+    big = []
+    for line in out.splitlines():
+        m = re.match(r"(\d+)([KMG]?)\s+", line.strip())
+        if not m: continue
+        size, unit = int(m.group(1)), m.group(2)
+        mb = size * (1024 if unit == "G" else 1 if unit == "M" else 1/1024)
+        if unit == "G" or (unit == "M" and size > 200):
+            big.append(line.strip())
+    return Check("容器日志大小", "docker", not big,
+                 "ok" if not big else f"大日志: {'; '.join(big)}",
+                 out, severity="warn" if big else "info")
+
+
+# ============== 主流程 ==============
+GROUPS: dict[str, list[Callable]] = {
+    "docker": [
+        ("docker compose ps",          lambda r: check_compose_ps(r, COMPOSE_DIR)),
+        ("近 200 行 worker/api 日志",  lambda r: check_container_logs(r, COMPOSE_DIR)),
+        ("docker system df",           lambda r: check_docker_system(r)),
+        ("容器日志大小",               lambda r: check_docker_logs_size(r, COMPOSE_DIR)),
+    ],
+    "host": [
+        ("磁盘空间",   lambda r: check_disk(r)),
+        ("内存使用",   lambda r: check_memory(r)),
+    ],
+    "network": [
+        ("关键端口监听", lambda r: check_ports(r)),
+    ],
+    "app": [
+        ("API 健康",            lambda r: check_api_health(r, API_BASE)),
+        ("Redis ping",          lambda r: check_redis(r, COMPOSE_DIR)),
+        ("DB 行数",             lambda r: check_db_counts(r, COMPOSE_DIR)),
+        ("LLM 工作流落实度",    lambda r: check_llm_workflow(r, COMPOSE_DIR)),
+        (f"翻译抽查",           lambda r: check_translation_sample(r, COMPOSE_DIR, SAMPLE_N)),
+        ("Caddy 反代",          lambda r: check_caddy(r)),
+        ("Frontend 首页",       lambda r: check_frontend(r)),
+        ("首页 SPA + Feed API", lambda r: check_homepage(r, API_BASE, AUTH_TOKEN)),
+        ("详情页 + 译文 CSS",   lambda r: check_article_detail(r, API_BASE, AUTH_TOKEN)),
+        ("Agnes LLM 调用",      lambda r: check_agnes_llm(r, COMPOSE_DIR)),
+        ("HTTPS 证书",          lambda r: check_tls_cert(r)),
+    ],
+}
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser(
+        description="diary-news 服务器健康检查",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="示例:\n"
+               "  python healthcheck.py                    # 跑全部\n"
+               "  python healthcheck.py --only docker,app  # 只跑 docker 和 app 组\n"
+               "  python healthcheck.py --local --compose-dir .  # 服务器本地跑\n"
+               "  python healthcheck.py --json report.json # 导出结构化报告\n",
+    )
+    ap.add_argument("--local", action="store_true", help="在服务器本地跑,不走 SSH")
+    ap.add_argument("--host", default=os.environ.get("REMOTE_HOST", DEFAULT_HOST))
+    ap.add_argument("--port", type=int, default=int(os.environ.get("REMOTE_PORT", DEFAULT_PORT)))
+    ap.add_argument("--user", default=os.environ.get("REMOTE_USER", DEFAULT_USER))
+    ap.add_argument("--password", default=os.environ.get("REMOTE_PASS", ""))
+    ap.add_argument("--compose-dir", default=os.environ.get("COMPOSE_DIR", DEFAULT_COMPOSE))
+    ap.add_argument("--api-base", default=os.environ.get("API_BASE_URL", DEFAULT_API_BASE))
+    ap.add_argument("--only", help="逗号分隔的组名: docker,host,network,app")
+    ap.add_argument("--skip", help="逗号分隔的组名,跳过")
+    ap.add_argument("--json", dest="json_out", help="把结果写到 JSON 文件")
+    ap.add_argument("--quiet", action="store_true", help="只输出汇总")
+    ap.add_argument("--verbose", "-v", action="store_true",
+                    help="显示失败项的完整原始输出(默认 warn 截断 12 行)")
+    ap.add_argument("--sample", type=int, default=3,
+                    help="翻译抽查的文章数(默认 3 篇,24h 内已翻译的随机样本)")
+    ap.add_argument("--auth-user", default=os.environ.get("OWNER_USER", "owner"),
+                    help="owner 用户名(用于获取 JWT token,调 /api/v1/auth/login)")
+    ap.add_argument("--auth-pass", default=os.environ.get("OWNER_PASS", ""),
+                    help="owner 密码(env: OWNER_PASS)。如不传,API 端点会降级为 info(不污染汇总)")
+    ap.add_argument("--skip-auth", action="store_true",
+                    help="明确跳过 auth token,等价于不传 --auth-pass")
+    args = ap.parse_args()
+
+    global COMPOSE_DIR, API_BASE, SAMPLE_N, AUTH_TOKEN
+    COMPOSE_DIR = args.compose_dir
+    API_BASE    = args.api_base
+    SAMPLE_N    = max(1, min(args.sample, 20))  # 1..20 封顶,避免误传爆 1000
+    # 提前在 main 函数顶部声明,稍后赋值后,GROUPS 里的 lambda 能读到
+
+    only = set((args.only or "").split(",")) - {""}
+    skip = set((args.skip or "").split(",")) - {""}
+
+    target = "local" if args.local else f"{args.user}@{args.host}:{args.port}"
+    print(f"==== diary-news 健康检查 ====")
+    print(f"目标:  {target}")
+    print(f"目录:  {COMPOSE_DIR}")
+    print(f"时间:  {time.strftime('%Y-%m-%d %H:%M:%S %Z')}")
+    print()
+
+    remote = Remote(local=args.local, host=args.host, port=args.port,
+                    user=args.user, password=args.password)
+    report = Report(target=target, started_at=time.strftime("%Y-%m-%dT%H:%M:%S%z"))
+
+    # ===== 拿 owner token(可选)=====
+    # 必须无条件初始化:即使跳过了 login,AUTH_TOKEN 也要在模块 dict 里,
+    # 否则 GROUPS 里的 lambda 闭包查找时会 NameError。
+    global AUTH_TOKEN
+    AUTH_TOKEN = ""
+    if not args.skip_auth and args.auth_pass:
+        # base64 编码密码再传,避免出现在 process list
+        pw_b64 = base64.b64encode(args.auth_pass.encode("utf-8")).decode("ascii")
+        login_url = f"{API_BASE.rstrip('/').removesuffix('/api/v1/healthz')}/api/v1/auth/login"
+        login_cmd = (
+            f"PW_B64={pw_b64}; "
+            "BODY=$(printf '{\"username\":\"%s\",\"password\":\"'\"$(echo $PW_B64 | base64 -d)\"'\"}' \""
+            + args.auth_user + "\"); "
+            "curl -sS -m 8 -o /tmp/login_resp -w 'http=%{http_code}\\n' "
+            f"-H 'Content-Type: application/json' -d \"$BODY\" '{login_url}'; "
+            "echo '--- token (jwt header only) ---'; "
+            "head -c 200 /tmp/login_resp 2>/dev/null; echo"
+        )
+        rc, out, _ = remote.run(login_cmd, timeout=15)
+        m = re.search(r"http=(\d+)", out)
+        if m and m.group(1) == "200":
+            try:
+                body_str = out.rsplit("--- token (jwt header only) ---", 1)[-1].strip()
+                body_str = body_str.rstrip("---").strip()
+                resp = json.loads(body_str)
+                AUTH_TOKEN = resp.get("access_token") or resp.get("accessToken") or resp.get("token") or ""
+            except Exception as e:
+                print(f"  ⚠ auth: 解析响应失败 {e}")
+        if AUTH_TOKEN:
+            print(f"  ✓ auth: 已登录 owner='{args.auth_user}', token 长度 {len(AUTH_TOKEN)}")
+        else:
+            code_str = m.group(1) if m else "?"
+            print(f"  ⚠ auth: 登录失败 http={code_str}, API 检查项将无 token(降级 info)")
+    else:
+        print("  · auth: 未传 --auth-pass(API 检查项将降级为 info 提示)")
+
+    try:
+        for group, fns in GROUPS.items():
+            if only and group not in only: continue
+            if skip and group in skip: continue
+            print(f"--- [{group}] ---")
+            for name, fn in fns:
+                try:
+                    c = fn(remote)
+                    if not args.quiet:
+                        report.add(c, verbose=args.verbose)
+                    else:
+                        report.checks.append(asdict(c))
+                except Exception as e:
+                    err_c = Check(name, group, False, f"异常: {e}",
+                                  detail=f"type={type(e).__name__}\n{type(e).__doc__ or ''}",
+                                  severity="error")
+                    if not args.quiet:
+                        report.add(err_c, verbose=args.verbose)
+                    else:
+                        report.checks.append(asdict(err_c))
+            print()
+    finally:
+        remote.close()
+
+    report.finished_at = time.strftime("%Y-%m-%dT%H:%M:%S%z")
+    ok, bad, err = report.summary()
+    print(f"==== 汇总 ====")
+    print(f"  合计 {len(report.checks)} 项 · 通过 {ok} · 失败 {bad} · 严重错误 {err}")
+    if err > 0:
+        print(f"  ✗ 存在 {err} 个 error 级问题,建议立即排查")
+        code = 2
+    elif bad > 0:
+        print(f"  ⚠ 存在 {bad} 个 warn 级问题,建议看一下")
+        code = 1
+    else:
+        print(f"  ✓ 全部通过")
+        code = 0
+
+    if args.json_out:
+        with open(args.json_out, "w", encoding="utf-8") as f:
+            json.dump(asdict(report), f, ensure_ascii=False, indent=2)
+        print(f"  报告已写入: {args.json_out}")
+
+    return code
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/push_agnes_key.py b/scripts/push_agnes_key.py
new file mode 100644
index 0000000..8c5c537
--- /dev/null
+++ b/scripts/push_agnes_key.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""安全推送新 AGNES_API_KEY 到服务器的 .env,然后重启 worker。
+- 旧 key(已暴露过的)会被拒绝使用
+- 新 key 通过 base64 中转,SSH 进程列表和 bash history 都看不到明文
+- 写完后立即验证:重启 worker + check_agnes_llm ping 一次
+"""
+from __future__ import annotations
+import base64
+import os
+import sys
+import paramiko
+
+# === 你之前贴过的、已经暴露的旧 key(防呆:拒绝再次使用)===
+LEAKED_KEYS = {
+    "sk-F4XwNlhgZbODf1XT9QcWd5ObLsoKIa9v8xUWkNlRRyjwITaC",
+    # 如果你已经轮换过,旧 key 就作废了;但这个常量是"硬性黑名单",永远不推送
+}
+
+HOST = os.environ.get("REMOTE_HOST", "207.57.129.228")
+PORT = int(os.environ.get("REMOTE_PORT", "19717"))
+USER = os.environ.get("REMOTE_USER", "root")
+PASS = os.environ.get("REMOTE_PASS", "")
+COMPOSE_DIR = os.environ.get("COMPOSE_DIR", "/srv/news")
+NEW_KEY = os.environ.get("NEW_AGNES_KEY", "")
+
+def die(msg: str, code: int = 1) -> None:
+    print(f"✗ {msg}", file=sys.stderr)
+    sys.exit(code)
+
+def ssh_exec(c: paramiko.SSHClient, cmd: str, timeout: int = 30) -> tuple[int, str, str]:
+    si, so, se = c.exec_command(cmd, timeout=timeout, get_pty=True)
+    out = so.read().decode(errors="replace")
+    err = se.read().decode(errors="replace")
+    rc = so.channel.recv_exit_status()
+    return rc, out, err
+
+def main() -> int:
+    # 1) 前置检查
+    if not PASS:
+        die("需要 REMOTE_PASS 环境变量")
+    if not NEW_KEY:
+        die("需要 NEW_AGNES_KEY 环境变量(去 Agnes 控制台重新生成的新 key)")
+    if NEW_KEY in LEAKED_KEYS:
+        die("拒绝:你输入的是已暴露的旧 key。请去 Agnes 控制台撤销 + 重新生成新 key。")
+    if not (NEW_KEY.startswith("sk-") or len(NEW_KEY) >= 20):
+        die(f"NEW_AGNES_KEY 格式可疑(前缀={NEW_KEY[:6]!r},长度={len(NEW_KEY)}),拒绝推送")
+    
+    # 2) 预演:本地 echo 一下 key 长度,不显示内容
+    print(f"准备推送:新 key 长度={len(NEW_KEY)},前缀={NEW_KEY[:4]}***")
+
+    # 3) SSH
+    print(f"连 SSH: {USER}@{HOST}:{PORT} ...")
+    c = paramiko.SSHClient()
+    c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+    c.connect(HOST, port=PORT, username=USER, password=PASS,
+              timeout=30, banner_timeout=30, auth_timeout=30,
+              allow_agent=False, look_for_keys=False)
+    print("✓ SSH 连接成功")
+
+    # 4) 读现有 .env(先备份)
+    rc, out, _ = ssh_exec(c, f"cd {COMPOSE_DIR} && cp -f .env .env.bak.$(date +%Y%m%d_%H%M%S) && ls -la .env*")
+    print("✓ .env 已备份(输出在下方):")
+    for line in out.strip().splitlines():
+        if ".env" in line:
+            print(f"   {line}")
+
+    # 5) base64 编码新 key + 远程 shell 里 decode 写文件
+    key_b64 = base64.b64encode(NEW_KEY.encode("utf-8")).decode("ascii")
+    
+    # 用 sed 替换 AGNES_API_KEY= 后面的值(支持引号/无引号/带空格)
+    # 转义:用 printf 配合一个不可见分隔符,避免 sed 解释 key 里的特殊字符
+    cmd = (
+        f"cd {COMPOSE_DIR} && "
+        # 把新 key 通过 base64 传到远程 shell 的 env
+        f"export NEW_KEY_B64='{key_b64}' && "
+        # 在远程 sed 里:把 AGNES_API_KEY=xxx 整行替换掉
+        # 用 # 作 sed 分隔符,避免 key 里可能的 / 干扰
+        "sed -i.bak2 -E 's#^AGNES_API_KEY=.*#AGNES_API_KEY=\"'\"$(echo $NEW_KEY_B64 | base64 -d)\"'\"#' .env && "
+        "echo '--- 修改后的 AGNES_API_KEY 行(隐藏中间部分)---' && "
+        "grep '^AGNES_API_KEY=' .env | sed -E 's/(AGNES_API_KEY=\")[^\"]+(\")/\\1***隐藏***\\2/'"
+    )
+    rc, out, err = ssh_exec(c, cmd, timeout=15)
+    if rc != 0:
+        die(f"写 .env 失败 rc={rc} err={err}")
+    print("✓ .env 已更新")
+    for line in out.strip().splitlines():
+        if "AGNES_API_KEY" in line:
+            print(f"   {line}")
+
+    # 6) 重启 worker(让新 key 生效)
+    print("重启 worker 容器...")
+    rc, out, err = ssh_exec(c, f"cd {COMPOSE_DIR} && docker compose restart worker", timeout=60)
+    if rc != 0:
+        die(f"重启 worker 失败 rc={rc} err={err[:200]}")
+    print(f"✓ {out.strip().splitlines()[-1] if out.strip() else 'restarted'}")
+
+    # 7) 等 worker 起来 + 跑 Agnes ping
+    print("等 worker 起来(5s)...")
+    ssh_exec(c, "sleep 5", timeout=10)
+    
+    # 直接调 healthcheck 里的 check_agnes_llm
+    print("验证 Agnes LLM ping...")
+    rc, out, _ = ssh_exec(c, f"cd {COMPOSE_DIR} && set -a && . ./.env && set +a && "
+                                f"echo \"AGNES_API_KEY present: $([ -n \"$AGNES_API_KEY\" ] && echo yes || echo no)\" && "
+                                f"docker compose exec -T worker python -c "
+                                f"'import asyncio; from app.services.llm.client import LlmClient; "
+                                f"c = LlmClient(); print(\"configured:\", c.is_configured())' 2>&1 | tail -5",
+                            timeout=20)
+    for line in out.strip().splitlines():
+        print(f"   {line}")
+
+    c.close()
+    print()
+    print("🎉 完成。已就绪的事情:")
+    print("   1) 旧 key 拒绝推送,你用的是新 key")
+    print("   2) .env 已更新 + 备份 + 重启 worker")
+    print("   3) Agnes 凭据在 worker 里加载成功(如有异常,看上面输出)")
+    print()
+    print("接下来你可以:")
+    print("   - 跑 healthcheck 验证(check_agnes_llm + check_llm_workflow)")
+    print("   - 手动 enrich 几篇老文章测试:")
+    print("     docker compose exec api python -m app.scripts.re_enrich --limit 3")
+    print("   - 等几小时,看 worker 自动跑批,LLM 工作流状态从 n/a → ok")
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main())
\ No newline at end of file
diff --git a/scripts/push_to_gitea.py b/scripts/push_to_gitea.py
new file mode 100644
index 0000000..a73e791
--- /dev/null
+++ b/scripts/push_to_gitea.py
@@ -0,0 +1,208 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""一键推送到 Gitea 远程。
+
+流程(每步都先预览,要你确认才进下一步):
+  1. git status + diff --stat  → 让你看要推哪些文件
+  2. 检查 .env / secrets 是否会进 git(check-ignore + 内容嗅探)
+  3. 检查 APK / 大文件
+  4. git add <你指定的文件>  → 不 add . / 全量,精准 add
+  5. git status 再次确认
+  6. git commit -m <你给的 message>  → 没 message 就退出
+  7. git push origin main  → 推
+
+退出码:
+  0 = 成功推到 origin/main
+  1 = 你取消
+  2 = 推送失败(网络 / 认证 / 冲突)
+"""
+from __future__ import annotations
+import argparse
+import os
+import subprocess
+import sys
+
+REPO_DIR = r"D:\selftools\diary-news"
+REMOTE = "origin"
+BRANCH = "main"
+
+
+def run(cmd: str, check: bool = True, capture: bool = True) -> subprocess.CompletedProcess:
+    """在仓库目录跑 git 命令。"""
+    return subprocess.run(
+        cmd, shell=True, cwd=REPO_DIR, check=check,
+        capture_output=capture, text=True, encoding="utf-8", errors="replace",
+    )
+
+
+def confirm(prompt: str, default_yes: bool = False) -> bool:
+    suffix = "[Y/n]" if default_yes else "[y/N]"
+    ans = input(f"{prompt} {suffix}: ").strip().lower()
+    if not ans:
+        return default_yes
+    return ans in ("y", "yes")
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser(description="推送 diary-news 到 Gitea")
+    ap.add_argument("--message", "-m", help="commit message(必填,不然 dry-run 后停)")
+    ap.add_argument("--files", nargs="*", help="要 add 的文件路径(默认 add 所有 untracked + modified)")
+    ap.add_argument("--do-push", action="store_true", default=True,
+                    help="默认会真推;传 --no-do-push 只 commit 不 push")
+    ap.add_argument("--no-do-push", dest="do_push", action="store_false")
+    ap.add_argument("--allow-apk", action="store_true", help="允许推 APK(默认会拦)")
+    args = ap.parse_args()
+
+    print(f"==== 仓库: {REPO_DIR} ====")
+    print(f"==== 远程: {REMOTE}/{BRANCH} ====\n")
+
+    # 0) 远程是否同步
+    r = run("git rev-parse --abbrev-ref HEAD")
+    if r.stdout.strip() != BRANCH:
+        print(f"✗ 当前在 {r.stdout.strip()},不在 {BRANCH} 分支,拒绝推")
+        return 2
+    print(f"✓ 在 {BRANCH} 分支")
+
+    r = run("git fetch --quiet " + REMOTE)
+    if r.returncode != 0:
+        print(f"✗ fetch 失败: {r.stderr}"); return 2
+
+    behind = run(f"git log HEAD..{REMOTE}/{BRANCH} --oneline", capture=True).stdout.strip()
+    if behind:
+        print(f"✗ 本地落后于 {REMOTE}/{BRANCH},先 pull:")
+        print(behind)
+        return 2
+    print(f"✓ 本地与 {REMOTE}/{BRANCH} 同步")
+
+    # 1) 当前状态
+    print(f"\n==== 1) git status ====")
+    print(run("git status --short").stdout.rstrip() or "(无变更)")
+
+    # 2) 大文件嗅探(> 5MB) — 用 git status --porcelain + Python 拿 size
+    #    (避开 shell pipe 退出码 255 / xargs 空输入 / Windows 路径带空格)
+    print(f"\n==== 2) 大文件嗅探(> 5MB) ====")
+    r = run("git status --porcelain", check=False)
+    big = []  # [(size_mb, path), ...]
+    for line in r.stdout.splitlines():
+        if not line.strip():
+            continue
+        # 格式: "XY filename" — XY 是 2 字符,后面可能 1 空格或更复杂(rename)
+        # 取第三个 token 起为路径
+        parts = line.split(maxsplit=1)
+        if len(parts) < 2:
+            continue
+        path = parts[1].strip().strip('"')
+        if not os.path.exists(path):
+            continue
+        try:
+            size_mb = os.path.getsize(path) / 1024 / 1024
+        except OSError:
+            continue
+        if size_mb > 5:
+            big.append((size_mb, path))
+    big.sort(key=lambda x: -x[0])
+    if big:
+        for size_mb, path in big:
+            print(f"  ⚠ {size_mb:.1f} MB  {path}")
+    else:
+        print("  ✓ 无 > 5MB 文件")
+
+    if big and not args.allow_apk:
+        apk = [p for _, p in big if p.lower().endswith(".apk")]
+        if apk:
+            print(f"\n  ✗ 检测到 APK,默认拒绝推送(传 --allow-apk 强制推)")
+            print(f"  建议:把 '{apk[0]}' 加进 .gitignore,或上传到 release page")
+            return 1
+
+    # 3) 敏感文件嗅探(.env / secrets / .key / .pem) — 纯 Python
+    print(f"\n==== 3) 敏感文件嗅探 ====")
+    sensitive = []
+    keywords = (".env", "secret", "password", ".key", ".pem", "credentials")
+    r = run("git status --porcelain", check=False)
+    for line in r.stdout.splitlines():
+        if not line.strip():
+            continue
+        path = line.split(maxsplit=1)[1].strip().strip('"')
+        low = path.lower()
+        if any(k in low for k in keywords):
+            sensitive.append(path)
+    if sensitive:
+        for p in sensitive:
+            print(f"  ⚠ {p}")
+    else:
+        print("  ✓ 未发现敏感文件(.env / secrets / key / pem)")
+
+    # 4) 选文件
+    print(f"\n==== 4) 选择要 add 的文件 ====")
+    if args.files:
+        files = args.files
+    else:
+        # 默认 add 所有 untracked + modified(不包含被 ignore 的)
+        r = run("git ls-files --modified --others --exclude-standard")
+        files = [f for f in r.stdout.splitlines() if f.strip()]
+    if not files:
+        print("  (无文件可 add)"); return 1
+    print(f"  共 {len(files)} 个:")
+    for f in files:
+        size_note = ""
+        if f.lower().endswith(".apk"):
+            size_note = "  ← APK!"
+        print(f"    {f}{size_note}")
+
+    if not confirm("确认 add 上面这些?"):
+        print("已取消"); return 1
+
+    # 5) git add
+    for f in files:
+        r = run(f'git add -- "{f}"')
+        if r.returncode != 0:
+            print(f"  ✗ git add {f} 失败: {r.stderr}"); return 2
+    print(f"  ✓ 已 add {len(files)} 个文件")
+
+    # 6) 再确认 status
+    print(f"\n==== 5) git status(已 staged) ====")
+    print(run("git status --short").stdout.rstrip())
+
+    # 7) diff stat
+    print(f"\n==== 6) diff --stat(将 commit 的内容) ====")
+    print(run("git diff --cached --stat").stdout.rstrip())
+
+    # 8) commit
+    if not args.message:
+        msg = input("commit message (直接回车用 'chore: ...'): ").strip()
+        if not msg:
+            msg = "chore: push via push_to_gitea.py"
+    else:
+        msg = args.message
+    print(f"\n==== 7) git commit -m \"{msg}\" ====")
+    r = run(f'git commit -m "{msg}"')
+    if r.returncode != 0:
+        # 可能是空 commit 或 hooks 拒
+        print(f"  ! commit 退出码 {r.returncode}")
+        print(r.stdout)
+        print(r.stderr)
+        return 2
+    print(f"  ✓ commit 成功")
+    print(r.stdout.rstrip())
+
+    # 9) push
+    if not args.push:
+        print(f"\n==== 8) --no-push 跳过,只 commit ====")
+        print("  下次手动:git push origin main")
+        return 0
+
+    print(f"\n==== 8) git push {REMOTE} {BRANCH} ====")
+    if not confirm(f"确认推 {REMOTE}/{BRANCH}?"):
+        print("已取消,commit 已留下但未推"); return 1
+    r = run(f"git push {REMOTE} {BRANCH}")
+    if r.returncode != 0:
+        print(f"  ✗ push 失败: {r.stderr}"); return 2
+    print(f"  ✓ push 成功")
+    print(r.stdout.rstrip())
+
+    print(f"\n🎉 完成!新 commit 已推 {REMOTE}/{BRANCH}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/test_auth.py b/scripts/test_auth.py
new file mode 100644
index 0000000..5c84cc1
--- /dev/null
+++ b/scripts/test_auth.py
@@ -0,0 +1,33 @@
+"""模拟:跳过 SSH,直接调 GROUPS 的 lambda"""
+import sys
+sys.path.insert(0, r'D:\selftools\diary-news\scripts')
+
+# 绕过 SSH:让 Remote.local 直接走本机 + 接受任意参数
+import healthcheck as hc
+
+# 关键:模拟 main 跑过的副作用
+hc.COMPOSE_DIR = "/srv/news"
+hc.API_BASE = "http://127.0.0.1/api/v1/healthz"
+hc.SAMPLE_N = 3
+
+# 直接给 AUTH_TOKEN 赋值,看 lambda 能不能取到
+hc.AUTH_TOKEN = "fake-token-123"
+
+# mock Remote 让 check 函数不真发请求
+class FakeRemote:
+    def run(self, cmd, timeout=10):
+        # 返回一些可解析的内容
+        if "curl" in cmd and "articles" in cmd and "id=" not in cmd:
+            return 0, '{"items":[{"id":542,"title":"x","title_zh":"X","translation_status":"ok","translation_engine":"tencent"}],"total":1,"total_pages":1}\n---HTTP=200---\n', ""
+        return 0, "ok", ""
+
+remote = FakeRemote()
+
+# 直接调 GROUPS['app'] 里那两个会读 AUTH_TOKEN 的
+for name, fn in hc.GROUPS['app']:
+    if "Feed API" in name or "详情页" in name:
+        try:
+            c = fn(remote)
+            print(f"{name}: ok ok={c.ok} summary={c.summary}")
+        except Exception as e:
+            print(f"{name}: EXC {type(e).__name__}: {e}")
diff --git a/scripts/test_curl_401.py b/scripts/test_curl_401.py
new file mode 100644
index 0000000..0948c66
--- /dev/null
+++ b/scripts/test_curl_401.py
@@ -0,0 +1,13 @@
+"""repro: curl 401 时的 stdout 形态"""
+import subprocess
+# 走 localhost
+r = subprocess.run(
+    ["curl", "-sS", "-m", "8",
+     "-w", "\n---HTTP=%{http_code} TIME=%{time_total}---\n",
+     "http://127.0.0.1:9999/nonexistent"],  # 不存在的端口
+    capture_output=True, text=True, timeout=10
+)
+print("=== 不存在的端口(预期:curl 报错,http=000)===")
+print("rc:", r.returncode)
+print("stdout:", repr(r.stdout[:300]))
+print("stderr:", repr(r.stderr[:200]))
diff --git a/scripts/test_global_repro.py b/scripts/test_global_repro.py
new file mode 100644
index 0000000..daf9d60
--- /dev/null
+++ b/scripts/test_global_repro.py
@@ -0,0 +1,14 @@
+"""exact repro: 跟脚本结构一致"""
+GROUPS = {
+    "x": [("t", lambda r: (r, AUTH_TOKEN))]  # AUTH_TOKEN 在模块全局
+}
+
+def main():
+    global AUTH_TOKEN
+    AUTH_TOKEN = "hello"
+    for g, fns in GROUPS.items():
+        for name, fn in fns:
+            print(name, "->", fn("R"))
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/verify_enrich.py b/scripts/verify_enrich.py
new file mode 100644
index 0000000..6f91ddd
--- /dev/null
+++ b/scripts/verify_enrich.py
@@ -0,0 +1,39 @@
+"""30 秒后再连,看 enrich 是否开始干活"""
+import os, paramiko, time
+time.sleep(15)  # 等等让它跑一会
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect("207.57.129.228", port=19717, username="root",
+          password=os.environ["REMOTE_PASS"],
+          timeout=30, allow_agent=False, look_for_keys=False)
+
+
+def run(label, cmd, timeout=30):
+    print(f"\n=== {label} ===")
+    try:
+        si, so, se = c.exec_command(cmd, timeout=timeout)
+        out = so.read().decode(errors="replace")
+        err = se.read().decode(errors="replace")
+        print(out.rstrip())
+        if err.strip(): print(f"[stderr] {err.rstrip()}")
+    except Exception as e:
+        print(f"[exc] {type(e).__name__}: {e}")
+
+
+# 1) enrichment_loop 启动
+run("1) enrichment_loop 启动", "bash -lc 'cd /srv/news && docker compose logs --tail=50 worker 2>&1 | grep -iE \"enrich|started\" | head -20'")
+
+# 2) enrich_article 日志(关键)
+run("2) enrich_article 日志", "bash -lc 'cd /srv/news && docker compose logs --tail=200 worker 2>&1 | grep -E \"enrich_article|classify|format ok|commentary ok\" | head -20'")
+
+# 3) 当前 n/a 数
+run("3) 当前 n/a 数", "bash -lc 'cd /srv/news && docker compose exec -T postgres psql -U news -d news -c \"SELECT classify_status, count(*) FROM articles GROUP BY classify_status ORDER BY count(*) DESC;\"'")
+
+# 4) 1 分钟后再看一次
+time.sleep(60)
+run("4) 1 分钟后 n/a 数", "bash -lc 'cd /srv/news && docker compose exec -T postgres psql -U news -d news -c \"SELECT classify_status, count(*) FROM articles GROUP BY classify_status ORDER BY count(*) DESC;\"'")
+
+# 5) enrich_article 日志
+run("5) 1 分钟后 enrich_article 日志", "bash -lc 'cd /srv/news && docker compose logs --tail=100 worker 2>&1 | grep enrich_article | tail -20'")
+
+c.close()
diff --git a/scripts/verify_worker.py b/scripts/verify_worker.py
new file mode 100644
index 0000000..00a0f53
--- /dev/null
+++ b/scripts/verify_worker.py
@@ -0,0 +1,34 @@
+"""看完整 worker 启动 + 状态"""
+import os, paramiko
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect("207.57.129.228", port=19717, username="root",
+          password=os.environ["REMOTE_PASS"],
+          timeout=30, allow_agent=False, look_for_keys=False)
+
+
+def run(label, cmd, timeout=30):
+    print(f"\n=== {label} ===")
+    try:
+        si, so, se = c.exec_command(cmd, timeout=timeout)
+        out = so.read().decode(errors="replace")
+        err = se.read().decode(errors="replace")
+        print(out.rstrip())
+        if err.strip(): print(f"[stderr] {err.rstrip()}")
+    except Exception as e:
+        print(f"[exc] {type(e).__name__}: {e}")
+
+
+# 1) worker 启动 INFO 日志
+run("1) worker 启动 INFO 日志", "bash -lc 'cd /srv/news && docker compose logs --tail=80 worker 2>&1 | grep -E \"INFO|ERROR|WARNING\" | grep -v httpx | head -30'")
+
+# 2) worker 是否在跑
+run("2) worker ps", "bash -lc 'cd /srv/news && docker compose ps worker'")
+
+# 3) enrichment_loop 文件确认(我看的是改完的版本吗?)
+run("3) 服务器 enrichment.py 头部", "bash -lc 'head -10 /srv/news/backend/app/services/llm/enrichment.py.new 2>/dev/null; echo ---; head -10 /srv/news/backend/app/services/llm/enrichment.py'")
+
+# 4) 容器内 enrichment.py 第 410-425 行(看是不是新版本)
+run("4) 容器内 enrichment.py 410-425 行", "bash -lc 'cd /srv/news && docker compose exec -T worker sed -n \"405,435p\" /app/app/services/llm/enrichment.py'")
+
+c.close()