(说明 LLM 排版版带了 CSS 容器) + - 移动端: meta viewport(首页的) + 详情页路由 /article/{id} + 401 视为"端点需 auth"— 提示用户加 --auth-user / --auth-pass,不污染汇总。 + """ + base = api_base.rstrip("/").removesuffix("/api/v1/healthz") + list_url = f"{base}/api/v1/articles?page=1&page_size=1" + tok_b64 = base64.b64encode(auth_token.encode("utf-8")).decode("ascii") if auth_token else "" + auth_h = f" -H 'Authorization: Bearer $(echo {tok_b64} | base64 -d)'" if tok_b64 else "" + rc, list_body, _ = remote.run( + "curl -sS -m 8 '" + list_url + "'" + auth_h + " -w '\\n---HTTP=%{http_code}---\\n' 2>&1", + timeout=10, + ) + article_id = None + list_code = 0 + if rc == 0 and list_body: + try: + marker = "\n---HTTP=" + if marker in list_body: + json_part, status_part = list_body.rsplit(marker, 1) + m = re.search(r"HTTP=(\d+)", status_part) + list_code = int(m.group(1)) if m else 0 + else: + json_part = list_body + data = json.loads(json_part) + if data.get("items"): + article_id = data["items"][0]["id"] + except Exception: + pass + if list_code == 401 and not auth_token: + return Check( + "详情页 API + 译文 CSS", "app", True, + "需 owner token(用 --auth-user / --auth-pass)", + detail=f"# raw list response:\n{list_body[:300]}", + command=f"GET {list_url} (no token)", + severity="info", + ) + if not article_id: + return Check( + "详情页 API + 译文 CSS", "app", False, + f"无可用文章样本(列表 http={list_code}, items=0?)", + detail=list_body[:500], + command=list_url, + severity="warn", + ) + + # 拉详情 + detail_url = f"{base}/api/v1/articles/{article_id}" + rc2, body2, _ = remote.run( + "curl -sS -m 8 '" + detail_url + "'" + auth_h + " -w '\\n---HTTP=%{http_code}---\\n' 2>&1", + timeout=10, + ) + api_code = 0 + article = {} + parse_err = "" + try: + marker = "\n---HTTP=" + if marker in body2: + json_part, status_part = body2.rsplit(marker, 1) + else: + json_part, status_part = body2, "" + m = re.search(r"HTTP=(\d+)", status_part) + api_code = int(m.group(1)) if m else 0 + article = json.loads(json_part) + except Exception as e: + parse_err = f"{type(e).__name__}: {e}" + + if api_code != 200 or not article: + return Check( + f"详情页 API #{article_id} + 译文 CSS", "app", False, + f"http={api_code} parse_err={parse_err or '-'}", + detail=body2[:500], + command=detail_url, + severity="error", + ) + + # 判据 + title = article.get("title") or "" + title_zh = article.get("title_zh") or "" + body_zh_text = article.get("body_zh_text") or "" + body_zh_formatted = article.get("body_zh_formatted") or "" + body_zh_html = article.get("body_zh_html") or "" + fmt_status = article.get("format_status") or "n/a" + tr_status = article.get("translation_status") or "-" + tr_engine = article.get("translation_engine") or "-" + + issues: list[str] = [] + if not title_zh: issues.append("缺 title_zh(无译文)") + if not (body_zh_text or body_zh_formatted or body_zh_html): + issues.append("缺 body_zh_text/formatted/html(译文全空)") + if title_zh and title and title_zh.strip() == title.strip(): + issues.append("title_zh == title(未翻译)") + + has_css_container = ( + 'class="article-body"' in body_zh_formatted + or "class='article-body'" in body_zh_formatted + ) + css_info = "✓ 排版版带 .article-body 容器" if has_css_container else ( + "✗ 排版版缺 .article-body 容器(译文没套 CSS)" + if body_zh_formatted + else "— 无排版版(用原始译文展示)" + ) + + if not has_css_container and body_zh_formatted: + issues.append("排版版 body_zh_formatted 缺 .article-body CSS 容器") + + summary = ( + f"#{article_id} {tr_status}/{tr_engine} fmt={fmt_status} " + f"译字 {len(title_zh)}/{len(body_zh_text)}; CSS {css_info}" + ) + if issues: + summary += " · " + "; ".join(issues[:2]) + + detail_lines = [ + f"原标题: {title[:80]!r}", + f"译标题: {title_zh[:80]!r}", + f"body_zh_text 长度: {len(body_zh_text)}", + f"body_zh_formatted 长度: {len(body_zh_formatted)} status={fmt_status}", + f"body_zh_html 长度: {len(body_zh_html)}", + f"CSS 容器(.article-body): {'有' if has_css_container else '无'}", + ] + # 抽 body_zh_formatted 前 300 字符(可能 < 字符被转义了) + if body_zh_formatted: + detail_lines.append(f"body_zh_formatted 前 300: {body_zh_formatted[:300]!r}") + + ok = not issues and api_code == 200 + sev = "error" if (api_code != 200) else ("warn" if issues else "info") + return Check( + f"详情页 API #{article_id} + 译文 CSS", "app", ok, summary, + detail="\n".join(detail_lines), + command=detail_url, + severity=sev, + ) + + +@timed +def check_agnes_llm(remote: Remote, compose_dir: str) -> Check: + """1.12 Agnes LLM 健康:真发一次 chat/completions 调用。 + - 读 .env 的 AGNES_API_KEY / AGNES_BASE_URL / AGNES_CHAT_MODEL + - 没配 → info 跳过(LLM 增强是可选模块) + - 配了 → 发一次最小调用(max_tokens=8,短 prompt)看 200 + choices[0].message.content + 注意:为了避免 API 密钥泄露到 shell history,密钥用 base64 编码后 + 在远程 shell 里 decode 出来,再注入到 curl Header。 + """ + # 1) 读 .env 拿 3 个变量 + rc, env_out, _ = remote.run( + f"cd {compose_dir} 2>/dev/null && " + "grep -E '^(AGNES_API_KEY|AGNES_BASE_URL|AGNES_CHAT_MODEL)=' .env 2>/dev/null" + ) + api_key = base_url = model = "" + for line in env_out.splitlines(): + m = re.match(r"^AGNES_API_KEY=(.+)$", line) + if m: api_key = m.group(1).strip().strip('"').strip("'") + m = re.match(r"^AGNES_BASE_URL=(.+)$", line) + if m: base_url = m.group(1).strip().strip('"').strip("'") + m = re.match(r"^AGNES_CHAT_MODEL=(.+)$", line) + if m: model = m.group(1).strip().strip('"').strip("'") + + if not api_key or api_key.startswith("your_"): + return Check( + "Agnes LLM 联通", "app", True, + "未配 AGNES_API_KEY(LLM 增强模块关闭),跳过", + detail=env_out.strip()[:300], + severity="info", + ) + + base_url = base_url or "https://apihub.agnes-ai.com/v1" + model = model or "agnes-2.0-flash" + chat_url = f"{base_url.rstrip('/')}/chat/completions" + + # 2) base64 编码密钥 + payload,再在 shell 里 decode 出来拼 header + # 避免 API key 出现在 process list / history 里 + key_b64 = base64.b64encode(api_key.encode("utf-8")).decode("ascii") + payload_obj = { + "model": model, + "messages": [ + {"role": "system", "content": "You are a ping bot. Reply with a single word."}, + {"role": "user", "content": "ping"}, + ], + "max_tokens": 8, + "temperature": 0, + } + payload_b64 = base64.b64encode( + json.dumps(payload_obj, ensure_ascii=False).encode("utf-8") + ).decode("ascii") + + cmd = ( + f"KEY_B64={key_b64}; " + f"PAYLOAD_B64={payload_b64}; " + "BODY=$(echo \"$PAYLOAD_B64\" | base64 -d); " + f"curl -sS -m 25 -o /tmp/agnes_resp -w 'http=%{{http_code}} t=%{{time_total}}\\n' " + "-H \"Authorization: Bearer $(echo $KEY_B64 | base64 -d)\" " + "-H 'Content-Type: application/json' " + f"-d \"$BODY\" '{chat_url}'; " + "echo '--- body (first 400 chars) ---'; head -c 400 /tmp/agnes_resp 2>/dev/null; echo" + ) + rc2, out, _ = remote.run(cmd, timeout=40) + + # 解析 + m = re.search(r"http=(\d+)\s+t=([\d.]+)", out) + code = int(m.group(1)) if m else 0 + elapsed = float(m.group(2)) if m else 0 + body_str = "" + if "--- body" in out: + body_str = out.split("--- body", 1)[1].split("---", 1)[-1].strip() + + if code != 200: + return Check( + f"Agnes LLM chat 调用", "app", False, + f"http={code} t={elapsed:.1f}s", + detail=out[:600], + command=f"POST {chat_url} (auth via base64-decoded key, not echoed)", + severity="error", + ) + # 看返回里有没有 text + try: + resp = json.loads(out.split("--- body", 1)[-1].split("---", 1)[-1].strip() or body_str) + text = (resp.get("choices") or [{}])[0].get("message", {}).get("content", "") + except Exception: + text = "" + + ok = code == 200 and bool(text) + summary = f"http={code} t={elapsed:.1f}s model={model} reply={text[:30]!r}" + return Check( + "Agnes LLM chat 调用", "app", ok, summary, + detail=f"# model: {model}\n# base_url: {base_url}\n# raw:\n{out[:800]}", + command=f"POST {chat_url}", + severity="info" if ok else "warn", + ) + + +@timed +def check_caddy(remote: Remote) -> Check: + """1.10 Caddy 反代 — 80 端口根路径 200/301/302。""" + cmd = "curl -sS -m 5 -o /dev/null -w 'http=%{http_code} t=%{time_total}\\n' http://127.0.0.1/" + rc, out, _ = remote.run(cmd) + m = re.search(r"http=(\d+)", out) + code = int(m.group(1)) if m else 0 + ok = 200 <= code < 400 + return Check("Caddy http://127.0.0.1/", "app", ok, out.strip(), + severity="error" if not ok else "info") + + +@timed +def check_frontend(remote: Remote) -> Check: + """1.11 Frontend — 80 端口 / 返回 index.html。""" + cmd = ( + "curl -sS -m 5 -o /dev/null -w 'http=%{http_code} t=%{time_total} ct=%{content_type}\\n' http://127.0.0.1/; " + "curl -sS -m 5 http://127.0.0.1/ | head -3" + ) + rc, out, _ = remote.run(cmd) + m = re.search(r"http=(\d+)", out) + code = int(m.group(1)) if m else 0 + ok = 200 <= code < 400 and ("html" in out.lower() or " Check: + """1.12 HTTPS 证书 — 仅在 .env 里 DOMAIN 非空时检查。""" + # 先从 .env 读 DOMAIN 值(没配就跳过) + rc, env_out, _ = remote.run( + f"cd {COMPOSE_DIR} 2>/dev/null && " + "grep -E '^DOMAIN=' .env 2>/dev/null | head -1" + ) + domain = "" + for line in env_out.splitlines(): + m = re.match(r"^DOMAIN=(.+)$", line.strip()) + if m: + domain = m.group(1).strip().strip('"').strip("'") + break + if not domain: + return Check("HTTPS 证书(域名)", "app", True, + "未配 DOMAIN,跳过(走 IP 模式)", severity="info") + # 有域名,拉证书 + cmd2 = f"echo | openssl s_client -servername {domain} -connect {domain}:443 2>/dev/null | openssl x509 -noout -dates 2>&1" + rc2, out2, _ = remote.run(cmd2, timeout=15) + m = re.search(r"notAfter=(.+)", out2) + if not m: + return Check(f"HTTPS 证书 {domain}", "app", False, + "无法获取证书(可能 443 未开)", out2, severity="warn") + return Check(f"HTTPS 证书 {domain}", "app", True, f"notAfter={m.group(1).strip()}", + severity="info") + + +@timed +def check_docker_logs_size(remote: Remote, compose_dir: str) -> Check: + """1.13 日志卷积压。""" + cmd = ( + f"cd {compose_dir} && " + "docker compose logs --no-color --tail=0 2>&1 >/dev/null; " + "du -sh /var/lib/docker/containers/*/*-json.log 2>/dev/null | sort -h | tail -5" + ) + rc, out, _ = remote.run(cmd, timeout=20) + big = [] + for line in out.splitlines(): + m = re.match(r"(\d+)([KMG]?)\s+", line.strip()) + if not m: continue + size, unit = int(m.group(1)), m.group(2) + mb = size * (1024 if unit == "G" else 1 if unit == "M" else 1/1024) + if unit == "G" or (unit == "M" and size > 200): + big.append(line.strip()) + return Check("容器日志大小", "docker", not big, + "ok" if not big else f"大日志: {'; '.join(big)}", + out, severity="warn" if big else "info") + + +# ============== 主流程 ============== +GROUPS: dict[str, list[Callable]] = { + "docker": [ + ("docker compose ps", lambda r: check_compose_ps(r, COMPOSE_DIR)), + ("近 200 行 worker/api 日志", lambda r: check_container_logs(r, COMPOSE_DIR)), + ("docker system df", lambda r: check_docker_system(r)), + ("容器日志大小", lambda r: check_docker_logs_size(r, COMPOSE_DIR)), + ], + "host": [ + ("磁盘空间", lambda r: check_disk(r)), + ("内存使用", lambda r: check_memory(r)), + ], + "network": [ + ("关键端口监听", lambda r: check_ports(r)), + ], + "app": [ + ("API 健康", lambda r: check_api_health(r, API_BASE)), + ("Redis ping", lambda r: check_redis(r, COMPOSE_DIR)), + ("DB 行数", lambda r: check_db_counts(r, COMPOSE_DIR)), + ("LLM 工作流落实度", lambda r: check_llm_workflow(r, COMPOSE_DIR)), + (f"翻译抽查", lambda r: check_translation_sample(r, COMPOSE_DIR, SAMPLE_N)), + ("Caddy 反代", lambda r: check_caddy(r)), + ("Frontend 首页", lambda r: check_frontend(r)), + ("首页 SPA + Feed API", lambda r: check_homepage(r, API_BASE, AUTH_TOKEN)), + ("详情页 + 译文 CSS", lambda r: check_article_detail(r, API_BASE, AUTH_TOKEN)), + ("Agnes LLM 调用", lambda r: check_agnes_llm(r, COMPOSE_DIR)), + ("HTTPS 证书", lambda r: check_tls_cert(r)), + ], +} + + +def main() -> int: + ap = argparse.ArgumentParser( + description="diary-news 服务器健康检查", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog="示例:\n" + " python healthcheck.py # 跑全部\n" + " python healthcheck.py --only docker,app # 只跑 docker 和 app 组\n" + " python healthcheck.py --local --compose-dir . # 服务器本地跑\n" + " python healthcheck.py --json report.json # 导出结构化报告\n", + ) + ap.add_argument("--local", action="store_true", help="在服务器本地跑,不走 SSH") + ap.add_argument("--host", default=os.environ.get("REMOTE_HOST", DEFAULT_HOST)) + ap.add_argument("--port", type=int, default=int(os.environ.get("REMOTE_PORT", DEFAULT_PORT))) + ap.add_argument("--user", default=os.environ.get("REMOTE_USER", DEFAULT_USER)) + ap.add_argument("--password", default=os.environ.get("REMOTE_PASS", "")) + ap.add_argument("--compose-dir", default=os.environ.get("COMPOSE_DIR", DEFAULT_COMPOSE)) + ap.add_argument("--api-base", default=os.environ.get("API_BASE_URL", DEFAULT_API_BASE)) + ap.add_argument("--only", help="逗号分隔的组名: docker,host,network,app") + ap.add_argument("--skip", help="逗号分隔的组名,跳过") + ap.add_argument("--json", dest="json_out", help="把结果写到 JSON 文件") + ap.add_argument("--quiet", action="store_true", help="只输出汇总") + ap.add_argument("--verbose", "-v", action="store_true", + help="显示失败项的完整原始输出(默认 warn 截断 12 行)") + ap.add_argument("--sample", type=int, default=3, + help="翻译抽查的文章数(默认 3 篇,24h 内已翻译的随机样本)") + ap.add_argument("--auth-user", default=os.environ.get("OWNER_USER", "owner"), + help="owner 用户名(用于获取 JWT token,调 /api/v1/auth/login)") + ap.add_argument("--auth-pass", default=os.environ.get("OWNER_PASS", ""), + help="owner 密码(env: OWNER_PASS)。如不传,API 端点会降级为 info(不污染汇总)") + ap.add_argument("--skip-auth", action="store_true", + help="明确跳过 auth token,等价于不传 --auth-pass") + args = ap.parse_args() + + global COMPOSE_DIR, API_BASE, SAMPLE_N, AUTH_TOKEN + COMPOSE_DIR = args.compose_dir + API_BASE = args.api_base + SAMPLE_N = max(1, min(args.sample, 20)) # 1..20 封顶,避免误传爆 1000 + # 提前在 main 函数顶部声明,稍后赋值后,GROUPS 里的 lambda 能读到 + + only = set((args.only or "").split(",")) - {""} + skip = set((args.skip or "").split(",")) - {""} + + target = "local" if args.local else f"{args.user}@{args.host}:{args.port}" + print(f"==== diary-news 健康检查 ====") + print(f"目标: {target}") + print(f"目录: {COMPOSE_DIR}") + print(f"时间: {time.strftime('%Y-%m-%d %H:%M:%S %Z')}") + print() + + remote = Remote(local=args.local, host=args.host, port=args.port, + user=args.user, password=args.password) + report = Report(target=target, started_at=time.strftime("%Y-%m-%dT%H:%M:%S%z")) + + # ===== 拿 owner token(可选)===== + # 必须无条件初始化:即使跳过了 login,AUTH_TOKEN 也要在模块 dict 里, + # 否则 GROUPS 里的 lambda 闭包查找时会 NameError。 + global AUTH_TOKEN + AUTH_TOKEN = "" + if not args.skip_auth and args.auth_pass: + # base64 编码密码再传,避免出现在 process list + pw_b64 = base64.b64encode(args.auth_pass.encode("utf-8")).decode("ascii") + login_url = f"{API_BASE.rstrip('/').removesuffix('/api/v1/healthz')}/api/v1/auth/login" + login_cmd = ( + f"PW_B64={pw_b64}; " + "BODY=$(printf '{\"username\":\"%s\",\"password\":\"'\"$(echo $PW_B64 | base64 -d)\"'\"}' \"" + + args.auth_user + "\"); " + "curl -sS -m 8 -o /tmp/login_resp -w 'http=%{http_code}\\n' " + f"-H 'Content-Type: application/json' -d \"$BODY\" '{login_url}'; " + "echo '--- token (jwt header only) ---'; " + "head -c 200 /tmp/login_resp 2>/dev/null; echo" + ) + rc, out, _ = remote.run(login_cmd, timeout=15) + m = re.search(r"http=(\d+)", out) + if m and m.group(1) == "200": + try: + body_str = out.rsplit("--- token (jwt header only) ---", 1)[-1].strip() + body_str = body_str.rstrip("---").strip() + resp = json.loads(body_str) + AUTH_TOKEN = resp.get("access_token") or resp.get("accessToken") or resp.get("token") or "" + except Exception as e: + print(f" ⚠ auth: 解析响应失败 {e}") + if AUTH_TOKEN: + print(f" ✓ auth: 已登录 owner='{args.auth_user}', token 长度 {len(AUTH_TOKEN)}") + else: + code_str = m.group(1) if m else "?" + print(f" ⚠ auth: 登录失败 http={code_str}, API 检查项将无 token(降级 info)") + else: + print(" · auth: 未传 --auth-pass(API 检查项将降级为 info 提示)") + + try: + for group, fns in GROUPS.items(): + if only and group not in only: continue + if skip and group in skip: continue + print(f"--- [{group}] ---") + for name, fn in fns: + try: + c = fn(remote) + if not args.quiet: + report.add(c, verbose=args.verbose) + else: + report.checks.append(asdict(c)) + except Exception as e: + err_c = Check(name, group, False, f"异常: {e}", + detail=f"type={type(e).__name__}\n{type(e).__doc__ or ''}", + severity="error") + if not args.quiet: + report.add(err_c, verbose=args.verbose) + else: + report.checks.append(asdict(err_c)) + print() + finally: + remote.close() + + report.finished_at = time.strftime("%Y-%m-%dT%H:%M:%S%z") + ok, bad, err = report.summary() + print(f"==== 汇总 ====") + print(f" 合计 {len(report.checks)} 项 · 通过 {ok} · 失败 {bad} · 严重错误 {err}") + if err > 0: + print(f" ✗ 存在 {err} 个 error 级问题,建议立即排查") + code = 2 + elif bad > 0: + print(f" ⚠ 存在 {bad} 个 warn 级问题,建议看一下") + code = 1 + else: + print(f" ✓ 全部通过") + code = 0 + + if args.json_out: + with open(args.json_out, "w", encoding="utf-8") as f: + json.dump(asdict(report), f, ensure_ascii=False, indent=2) + print(f" 报告已写入: {args.json_out}") + + return code + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/push_agnes_key.py b/scripts/push_agnes_key.py new file mode 100644 index 0000000..8c5c537 --- /dev/null +++ b/scripts/push_agnes_key.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +"""安全推送新 AGNES_API_KEY 到服务器的 .env,然后重启 worker。 +- 旧 key(已暴露过的)会被拒绝使用 +- 新 key 通过 base64 中转,SSH 进程列表和 bash history 都看不到明文 +- 写完后立即验证:重启 worker + check_agnes_llm ping 一次 +""" +from __future__ import annotations +import base64 +import os +import sys +import paramiko + +# === 你之前贴过的、已经暴露的旧 key(防呆:拒绝再次使用)=== +LEAKED_KEYS = { + "sk-F4XwNlhgZbODf1XT9QcWd5ObLsoKIa9v8xUWkNlRRyjwITaC", + # 如果你已经轮换过,旧 key 就作废了;但这个常量是"硬性黑名单",永远不推送 +} + +HOST = os.environ.get("REMOTE_HOST", "207.57.129.228") +PORT = int(os.environ.get("REMOTE_PORT", "19717")) +USER = os.environ.get("REMOTE_USER", "root") +PASS = os.environ.get("REMOTE_PASS", "") +COMPOSE_DIR = os.environ.get("COMPOSE_DIR", "/srv/news") +NEW_KEY = os.environ.get("NEW_AGNES_KEY", "") + +def die(msg: str, code: int = 1) -> None: + print(f"✗ {msg}", file=sys.stderr) + sys.exit(code) + +def ssh_exec(c: paramiko.SSHClient, cmd: str, timeout: int = 30) -> tuple[int, str, str]: + si, so, se = c.exec_command(cmd, timeout=timeout, get_pty=True) + out = so.read().decode(errors="replace") + err = se.read().decode(errors="replace") + rc = so.channel.recv_exit_status() + return rc, out, err + +def main() -> int: + # 1) 前置检查 + if not PASS: + die("需要 REMOTE_PASS 环境变量") + if not NEW_KEY: + die("需要 NEW_AGNES_KEY 环境变量(去 Agnes 控制台重新生成的新 key)") + if NEW_KEY in LEAKED_KEYS: + die("拒绝:你输入的是已暴露的旧 key。请去 Agnes 控制台撤销 + 重新生成新 key。") + if not (NEW_KEY.startswith("sk-") or len(NEW_KEY) >= 20): + die(f"NEW_AGNES_KEY 格式可疑(前缀={NEW_KEY[:6]!r},长度={len(NEW_KEY)}),拒绝推送") + + # 2) 预演:本地 echo 一下 key 长度,不显示内容 + print(f"准备推送:新 key 长度={len(NEW_KEY)},前缀={NEW_KEY[:4]}***") + + # 3) SSH + print(f"连 SSH: {USER}@{HOST}:{PORT} ...") + c = paramiko.SSHClient() + c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + c.connect(HOST, port=PORT, username=USER, password=PASS, + timeout=30, banner_timeout=30, auth_timeout=30, + allow_agent=False, look_for_keys=False) + print("✓ SSH 连接成功") + + # 4) 读现有 .env(先备份) + rc, out, _ = ssh_exec(c, f"cd {COMPOSE_DIR} && cp -f .env .env.bak.$(date +%Y%m%d_%H%M%S) && ls -la .env*") + print("✓ .env 已备份(输出在下方):") + for line in out.strip().splitlines(): + if ".env" in line: + print(f" {line}") + + # 5) base64 编码新 key + 远程 shell 里 decode 写文件 + key_b64 = base64.b64encode(NEW_KEY.encode("utf-8")).decode("ascii") + + # 用 sed 替换 AGNES_API_KEY= 后面的值(支持引号/无引号/带空格) + # 转义:用 printf 配合一个不可见分隔符,避免 sed 解释 key 里的特殊字符 + cmd = ( + f"cd {COMPOSE_DIR} && " + # 把新 key 通过 base64 传到远程 shell 的 env + f"export NEW_KEY_B64='{key_b64}' && " + # 在远程 sed 里:把 AGNES_API_KEY=xxx 整行替换掉 + # 用 # 作 sed 分隔符,避免 key 里可能的 / 干扰 + "sed -i.bak2 -E 's#^AGNES_API_KEY=.*#AGNES_API_KEY=\"'\"$(echo $NEW_KEY_B64 | base64 -d)\"'\"#' .env && " + "echo '--- 修改后的 AGNES_API_KEY 行(隐藏中间部分)---' && " + "grep '^AGNES_API_KEY=' .env | sed -E 's/(AGNES_API_KEY=\")[^\"]+(\")/\\1***隐藏***\\2/'" + ) + rc, out, err = ssh_exec(c, cmd, timeout=15) + if rc != 0: + die(f"写 .env 失败 rc={rc} err={err}") + print("✓ .env 已更新") + for line in out.strip().splitlines(): + if "AGNES_API_KEY" in line: + print(f" {line}") + + # 6) 重启 worker(让新 key 生效) + print("重启 worker 容器...") + rc, out, err = ssh_exec(c, f"cd {COMPOSE_DIR} && docker compose restart worker", timeout=60) + if rc != 0: + die(f"重启 worker 失败 rc={rc} err={err[:200]}") + print(f"✓ {out.strip().splitlines()[-1] if out.strip() else 'restarted'}") + + # 7) 等 worker 起来 + 跑 Agnes ping + print("等 worker 起来(5s)...") + ssh_exec(c, "sleep 5", timeout=10) + + # 直接调 healthcheck 里的 check_agnes_llm + print("验证 Agnes LLM ping...") + rc, out, _ = ssh_exec(c, f"cd {COMPOSE_DIR} && set -a && . ./.env && set +a && " + f"echo \"AGNES_API_KEY present: $([ -n \"$AGNES_API_KEY\" ] && echo yes || echo no)\" && " + f"docker compose exec -T worker python -c " + f"'import asyncio; from app.services.llm.client import LlmClient; " + f"c = LlmClient(); print(\"configured:\", c.is_configured())' 2>&1 | tail -5", + timeout=20) + for line in out.strip().splitlines(): + print(f" {line}") + + c.close() + print() + print("🎉 完成。已就绪的事情:") + print(" 1) 旧 key 拒绝推送,你用的是新 key") + print(" 2) .env 已更新 + 备份 + 重启 worker") + print(" 3) Agnes 凭据在 worker 里加载成功(如有异常,看上面输出)") + print() + print("接下来你可以:") + print(" - 跑 healthcheck 验证(check_agnes_llm + check_llm_workflow)") + print(" - 手动 enrich 几篇老文章测试:") + print(" docker compose exec api python -m app.scripts.re_enrich --limit 3") + print(" - 等几小时,看 worker 自动跑批,LLM 工作流状态从 n/a → ok") + return 0 + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/scripts/push_to_gitea.py b/scripts/push_to_gitea.py new file mode 100644 index 0000000..a73e791 --- /dev/null +++ b/scripts/push_to_gitea.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +"""一键推送到 Gitea 远程。 + +流程(每步都先预览,要你确认才进下一步): + 1. git status + diff --stat → 让你看要推哪些文件 + 2. 检查 .env / secrets 是否会进 git(check-ignore + 内容嗅探) + 3. 检查 APK / 大文件 + 4. git add <你指定的文件> → 不 add . / 全量,精准 add + 5. git status 再次确认 + 6. git commit -m <你给的 message> → 没 message 就退出 + 7. git push origin main → 推 + +退出码: + 0 = 成功推到 origin/main + 1 = 你取消 + 2 = 推送失败(网络 / 认证 / 冲突) +""" +from __future__ import annotations +import argparse +import os +import subprocess +import sys + +REPO_DIR = r"D:\selftools\diary-news" +REMOTE = "origin" +BRANCH = "main" + + +def run(cmd: str, check: bool = True, capture: bool = True) -> subprocess.CompletedProcess: + """在仓库目录跑 git 命令。""" + return subprocess.run( + cmd, shell=True, cwd=REPO_DIR, check=check, + capture_output=capture, text=True, encoding="utf-8", errors="replace", + ) + + +def confirm(prompt: str, default_yes: bool = False) -> bool: + suffix = "[Y/n]" if default_yes else "[y/N]" + ans = input(f"{prompt} {suffix}: ").strip().lower() + if not ans: + return default_yes + return ans in ("y", "yes") + + +def main() -> int: + ap = argparse.ArgumentParser(description="推送 diary-news 到 Gitea") + ap.add_argument("--message", "-m", help="commit message(必填,不然 dry-run 后停)") + ap.add_argument("--files", nargs="*", help="要 add 的文件路径(默认 add 所有 untracked + modified)") + ap.add_argument("--do-push", action="store_true", default=True, + help="默认会真推;传 --no-do-push 只 commit 不 push") + ap.add_argument("--no-do-push", dest="do_push", action="store_false") + ap.add_argument("--allow-apk", action="store_true", help="允许推 APK(默认会拦)") + args = ap.parse_args() + + print(f"==== 仓库: {REPO_DIR} ====") + print(f"==== 远程: {REMOTE}/{BRANCH} ====\n") + + # 0) 远程是否同步 + r = run("git rev-parse --abbrev-ref HEAD") + if r.stdout.strip() != BRANCH: + print(f"✗ 当前在 {r.stdout.strip()},不在 {BRANCH} 分支,拒绝推") + return 2 + print(f"✓ 在 {BRANCH} 分支") + + r = run("git fetch --quiet " + REMOTE) + if r.returncode != 0: + print(f"✗ fetch 失败: {r.stderr}"); return 2 + + behind = run(f"git log HEAD..{REMOTE}/{BRANCH} --oneline", capture=True).stdout.strip() + if behind: + print(f"✗ 本地落后于 {REMOTE}/{BRANCH},先 pull:") + print(behind) + return 2 + print(f"✓ 本地与 {REMOTE}/{BRANCH} 同步") + + # 1) 当前状态 + print(f"\n==== 1) git status ====") + print(run("git status --short").stdout.rstrip() or "(无变更)") + + # 2) 大文件嗅探(> 5MB) — 用 git status --porcelain + Python 拿 size + # (避开 shell pipe 退出码 255 / xargs 空输入 / Windows 路径带空格) + print(f"\n==== 2) 大文件嗅探(> 5MB) ====") + r = run("git status --porcelain", check=False) + big = [] # [(size_mb, path), ...] + for line in r.stdout.splitlines(): + if not line.strip(): + continue + # 格式: "XY filename" — XY 是 2 字符,后面可能 1 空格或更复杂(rename) + # 取第三个 token 起为路径 + parts = line.split(maxsplit=1) + if len(parts) < 2: + continue + path = parts[1].strip().strip('"') + if not os.path.exists(path): + continue + try: + size_mb = os.path.getsize(path) / 1024 / 1024 + except OSError: + continue + if size_mb > 5: + big.append((size_mb, path)) + big.sort(key=lambda x: -x[0]) + if big: + for size_mb, path in big: + print(f" ⚠ {size_mb:.1f} MB {path}") + else: + print(" ✓ 无 > 5MB 文件") + + if big and not args.allow_apk: + apk = [p for _, p in big if p.lower().endswith(".apk")] + if apk: + print(f"\n ✗ 检测到 APK,默认拒绝推送(传 --allow-apk 强制推)") + print(f" 建议:把 '{apk[0]}' 加进 .gitignore,或上传到 release page") + return 1 + + # 3) 敏感文件嗅探(.env / secrets / .key / .pem) — 纯 Python + print(f"\n==== 3) 敏感文件嗅探 ====") + sensitive = [] + keywords = (".env", "secret", "password", ".key", ".pem", "credentials") + r = run("git status --porcelain", check=False) + for line in r.stdout.splitlines(): + if not line.strip(): + continue + path = line.split(maxsplit=1)[1].strip().strip('"') + low = path.lower() + if any(k in low for k in keywords): + sensitive.append(path) + if sensitive: + for p in sensitive: + print(f" ⚠ {p}") + else: + print(" ✓ 未发现敏感文件(.env / secrets / key / pem)") + + # 4) 选文件 + print(f"\n==== 4) 选择要 add 的文件 ====") + if args.files: + files = args.files + else: + # 默认 add 所有 untracked + modified(不包含被 ignore 的) + r = run("git ls-files --modified --others --exclude-standard") + files = [f for f in r.stdout.splitlines() if f.strip()] + if not files: + print(" (无文件可 add)"); return 1 + print(f" 共 {len(files)} 个:") + for f in files: + size_note = "" + if f.lower().endswith(".apk"): + size_note = " ← APK!" + print(f" {f}{size_note}") + + if not confirm("确认 add 上面这些?"): + print("已取消"); return 1 + + # 5) git add + for f in files: + r = run(f'git add -- "{f}"') + if r.returncode != 0: + print(f" ✗ git add {f} 失败: {r.stderr}"); return 2 + print(f" ✓ 已 add {len(files)} 个文件") + + # 6) 再确认 status + print(f"\n==== 5) git status(已 staged) ====") + print(run("git status --short").stdout.rstrip()) + + # 7) diff stat + print(f"\n==== 6) diff --stat(将 commit 的内容) ====") + print(run("git diff --cached --stat").stdout.rstrip()) + + # 8) commit + if not args.message: + msg = input("commit message (直接回车用 'chore: ...'): ").strip() + if not msg: + msg = "chore: push via push_to_gitea.py" + else: + msg = args.message + print(f"\n==== 7) git commit -m \"{msg}\" ====") + r = run(f'git commit -m "{msg}"') + if r.returncode != 0: + # 可能是空 commit 或 hooks 拒 + print(f" ! commit 退出码 {r.returncode}") + print(r.stdout) + print(r.stderr) + return 2 + print(f" ✓ commit 成功") + print(r.stdout.rstrip()) + + # 9) push + if not args.push: + print(f"\n==== 8) --no-push 跳过,只 commit ====") + print(" 下次手动:git push origin main") + return 0 + + print(f"\n==== 8) git push {REMOTE} {BRANCH} ====") + if not confirm(f"确认推 {REMOTE}/{BRANCH}?"): + print("已取消,commit 已留下但未推"); return 1 + r = run(f"git push {REMOTE} {BRANCH}") + if r.returncode != 0: + print(f" ✗ push 失败: {r.stderr}"); return 2 + print(f" ✓ push 成功") + print(r.stdout.rstrip()) + + print(f"\n🎉 完成!新 commit 已推 {REMOTE}/{BRANCH}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/test_auth.py b/scripts/test_auth.py new file mode 100644 index 0000000..5c84cc1 --- /dev/null +++ b/scripts/test_auth.py @@ -0,0 +1,33 @@ +"""模拟:跳过 SSH,直接调 GROUPS 的 lambda""" +import sys +sys.path.insert(0, r'D:\selftools\diary-news\scripts') + +# 绕过 SSH:让 Remote.local 直接走本机 + 接受任意参数 +import healthcheck as hc + +# 关键:模拟 main 跑过的副作用 +hc.COMPOSE_DIR = "/srv/news" +hc.API_BASE = "http://127.0.0.1/api/v1/healthz" +hc.SAMPLE_N = 3 + +# 直接给 AUTH_TOKEN 赋值,看 lambda 能不能取到 +hc.AUTH_TOKEN = "fake-token-123" + +# mock Remote 让 check 函数不真发请求 +class FakeRemote: + def run(self, cmd, timeout=10): + # 返回一些可解析的内容 + if "curl" in cmd and "articles" in cmd and "id=" not in cmd: + return 0, '{"items":[{"id":542,"title":"x","title_zh":"X","translation_status":"ok","translation_engine":"tencent"}],"total":1,"total_pages":1}\n---HTTP=200---\n', "" + return 0, "ok", "" + +remote = FakeRemote() + +# 直接调 GROUPS['app'] 里那两个会读 AUTH_TOKEN 的 +for name, fn in hc.GROUPS['app']: + if "Feed API" in name or "详情页" in name: + try: + c = fn(remote) + print(f"{name}: ok ok={c.ok} summary={c.summary}") + except Exception as e: + print(f"{name}: EXC {type(e).__name__}: {e}") diff --git a/scripts/test_curl_401.py b/scripts/test_curl_401.py new file mode 100644 index 0000000..0948c66 --- /dev/null +++ b/scripts/test_curl_401.py @@ -0,0 +1,13 @@ +"""repro: curl 401 时的 stdout 形态""" +import subprocess +# 走 localhost +r = subprocess.run( + ["curl", "-sS", "-m", "8", + "-w", "\n---HTTP=%{http_code} TIME=%{time_total}---\n", + "http://127.0.0.1:9999/nonexistent"], # 不存在的端口 + capture_output=True, text=True, timeout=10 +) +print("=== 不存在的端口(预期:curl 报错,http=000)===") +print("rc:", r.returncode) +print("stdout:", repr(r.stdout[:300])) +print("stderr:", repr(r.stderr[:200])) diff --git a/scripts/test_global_repro.py b/scripts/test_global_repro.py new file mode 100644 index 0000000..daf9d60 --- /dev/null +++ b/scripts/test_global_repro.py @@ -0,0 +1,14 @@ +"""exact repro: 跟脚本结构一致""" +GROUPS = { + "x": [("t", lambda r: (r, AUTH_TOKEN))] # AUTH_TOKEN 在模块全局 +} + +def main(): + global AUTH_TOKEN + AUTH_TOKEN = "hello" + for g, fns in GROUPS.items(): + for name, fn in fns: + print(name, "->", fn("R")) + +if __name__ == "__main__": + main() diff --git a/scripts/verify_enrich.py b/scripts/verify_enrich.py new file mode 100644 index 0000000..6f91ddd --- /dev/null +++ b/scripts/verify_enrich.py @@ -0,0 +1,39 @@ +"""30 秒后再连,看 enrich 是否开始干活""" +import os, paramiko, time +time.sleep(15) # 等等让它跑一会 +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", + password=os.environ["REMOTE_PASS"], + timeout=30, allow_agent=False, look_for_keys=False) + + +def run(label, cmd, timeout=30): + print(f"\n=== {label} ===") + try: + si, so, se = c.exec_command(cmd, timeout=timeout) + out = so.read().decode(errors="replace") + err = se.read().decode(errors="replace") + print(out.rstrip()) + if err.strip(): print(f"[stderr] {err.rstrip()}") + except Exception as e: + print(f"[exc] {type(e).__name__}: {e}") + + +# 1) enrichment_loop 启动 +run("1) enrichment_loop 启动", "bash -lc 'cd /srv/news && docker compose logs --tail=50 worker 2>&1 | grep -iE \"enrich|started\" | head -20'") + +# 2) enrich_article 日志(关键) +run("2) enrich_article 日志", "bash -lc 'cd /srv/news && docker compose logs --tail=200 worker 2>&1 | grep -E \"enrich_article|classify|format ok|commentary ok\" | head -20'") + +# 3) 当前 n/a 数 +run("3) 当前 n/a 数", "bash -lc 'cd /srv/news && docker compose exec -T postgres psql -U news -d news -c \"SELECT classify_status, count(*) FROM articles GROUP BY classify_status ORDER BY count(*) DESC;\"'") + +# 4) 1 分钟后再看一次 +time.sleep(60) +run("4) 1 分钟后 n/a 数", "bash -lc 'cd /srv/news && docker compose exec -T postgres psql -U news -d news -c \"SELECT classify_status, count(*) FROM articles GROUP BY classify_status ORDER BY count(*) DESC;\"'") + +# 5) enrich_article 日志 +run("5) 1 分钟后 enrich_article 日志", "bash -lc 'cd /srv/news && docker compose logs --tail=100 worker 2>&1 | grep enrich_article | tail -20'") + +c.close() diff --git a/scripts/verify_worker.py b/scripts/verify_worker.py new file mode 100644 index 0000000..00a0f53 --- /dev/null +++ b/scripts/verify_worker.py @@ -0,0 +1,34 @@ +"""看完整 worker 启动 + 状态""" +import os, paramiko +c = paramiko.SSHClient() +c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect("207.57.129.228", port=19717, username="root", + password=os.environ["REMOTE_PASS"], + timeout=30, allow_agent=False, look_for_keys=False) + + +def run(label, cmd, timeout=30): + print(f"\n=== {label} ===") + try: + si, so, se = c.exec_command(cmd, timeout=timeout) + out = so.read().decode(errors="replace") + err = se.read().decode(errors="replace") + print(out.rstrip()) + if err.strip(): print(f"[stderr] {err.rstrip()}") + except Exception as e: + print(f"[exc] {type(e).__name__}: {e}") + + +# 1) worker 启动 INFO 日志 +run("1) worker 启动 INFO 日志", "bash -lc 'cd /srv/news && docker compose logs --tail=80 worker 2>&1 | grep -E \"INFO|ERROR|WARNING\" | grep -v httpx | head -30'") + +# 2) worker 是否在跑 +run("2) worker ps", "bash -lc 'cd /srv/news && docker compose ps worker'") + +# 3) enrichment_loop 文件确认(我看的是改完的版本吗?) +run("3) 服务器 enrichment.py 头部", "bash -lc 'head -10 /srv/news/backend/app/services/llm/enrichment.py.new 2>/dev/null; echo ---; head -10 /srv/news/backend/app/services/llm/enrichment.py'") + +# 4) 容器内 enrichment.py 第 410-425 行(看是不是新版本) +run("4) 容器内 enrichment.py 410-425 行", "bash -lc 'cd /srv/news && docker compose exec -T worker sed -n \"405,435p\" /app/app/services/llm/enrichment.py'") + +c.close()