feat(commentary): 双 provider 评论 — Angel(Agnes) + 美团大模型(LongCat)

- 新增 articles.commentary_meituan{_status,_model,_error} 4 列 + commentary_engine
- LlmSetting 加 meituan_api_key/base_url/chat_model/interval_sec/enabled/commentary_prompt
- 新 app/services/llm/providers.py 工厂,支持多 provider 客户端
- enrichment 流程改为 commentary_angel + commentary_meituan 并行(asyncio.gather),
  任一 provider 失败不影响另一个
- enrichment_loop 状态判定:任一 provider 状态不是 ok 都视为待 enrich
- alembic 0004_dual_commentary 迁移
- 前端 Feed 卡片 + ArticleDetail 详情页各加一条'美团评论'卡
- AdminLlmSettings 加美团 provider 配置卡(独立 api_key 编辑器,不回显明文)
- LlmSettingOut.meituan_api_key_set (bool) 替代直接回传 key
- 默认 URL https://api.longcat.chat/openai/v1 / 默认模型 LongCat-2.0-Preview
This commit is contained in:
xiaji
2026-06-12 19:00:00 +08:00
parent 3ab6e4c7d0
commit bc36a1fc38
15 changed files with 2746 additions and 48 deletions

View File

@@ -1,10 +1,14 @@
"""LLM 智能增强服务(翻译后调)。
4 个独立任务(按顺序):
1. classify — 分类 + 黑名单 gate(命中则删文章,后 3 步跳过)
2. format — 排版译文(写入 body_zh_formatted,容器用 .article-body + 段落 .diary-para)
3. image — 生成插图(写入 image_ai_url,prompt 用正文第一段)
4. commentary — 写点评(写入 commentary)
5 个独立任务(按顺序):
1. classify — 分类 + 黑名单 gate(命中则删文章,后 4 步跳过)
2. format — 排版译文(写入 body_zh_formatted,容器用 .article-body + 段落 .diary-para)
3. image — 生成插图(写入 image_ai_url,prompt 用正文第一段)
4. commentary_angel — 写 Angel 评论(写入 commentary)
5. commentary_meituan — 写美团评论(写入 commentary_meituan)
双 provider 评论:Angel + 美团 大模型(LongCat) 并行,各自独立 try/except,
任一失败不影响另一个。commentary_engine 字段记录实际写入的 provider。
排版容器 CSS(固定,不再让用户改):
- 字体: system-ui 字体栈
@@ -39,6 +43,12 @@ from app.models.llm_setting import LlmSetting
from app.models.source import Source
from app.schemas.llm import get_default_prompts
from app.services.llm.client import LlmClient
from app.services.llm.providers import (
PROVIDER_ANGEL,
PROVIDER_COMMENTARY_DEFAULTS,
PROVIDER_MEITUAN,
is_provider_enabled,
)
logger = logging.getLogger("news.llm.enrichment")
@@ -144,6 +154,12 @@ async def get_setting() -> LlmSetting:
return row
# === 双 provider 评论 ===
# Angel: commentary / commentary_status(沿用旧字段,完全不动)
# 美团: commentary_meituan / commentary_meituan_status / commentary_meituan_model / commentary_meituan_error
# commentary_engine 记录实际写入的 provider:angel / meituan / "angel,meituan"
# === 单任务:format ===
async def _enrich_format(article: Article, setting: LlmSetting, client: LlmClient) -> None:
template = setting.format_prompt or get_default_prompts()["format_prompt"]
@@ -270,9 +286,18 @@ def _first_paragraph(text: str, max_chars: int) -> str:
return ""
# === 单任务:commentary ===
async def _enrich_commentary(article: Article, setting: LlmSetting, client: LlmClient) -> None:
template = setting.commentary_prompt or get_default_prompts()["commentary_prompt"]
# === 单任务:commentary(provider 通用版)===
# provider=PROVIDER_ANGEL → 写入 commentary / commentary_status(老字段,完全不动)
# provider=PROVIDER_MEITUAN → 写入 commentary_meituan / commentary_meituan_status / commentary_meituan_model / commentary_meituan_error
def _default_commentary_prompt() -> str:
return get_default_prompts()["commentary_prompt"]
async def _enrich_commentary_angel(
article: Article, setting: LlmSetting, client: LlmClient
) -> None:
"""Angel 评论 — 写入老字段(向后兼容)。"""
template = setting.commentary_prompt or _default_commentary_prompt()
prompt = _safe_format(
template,
{
@@ -280,23 +305,65 @@ async def _enrich_commentary(article: Article, setting: LlmSetting, client: LlmC
"body": (article.body_zh_text or "")[:3000],
},
)
defaults = PROVIDER_COMMENTARY_DEFAULTS[PROVIDER_ANGEL]
text = await client.chat(
system="你是资深新闻评论员。",
system=defaults["system"],
user=prompt,
temperature=0.6,
max_tokens=600,
temperature=defaults["temperature"],
max_tokens=defaults["max_tokens"],
)
article.commentary = text or None
article.commentary_status = "ok"
# 记录 provider(已存在的 "angel" / 追加为 "angel,meituan")
engines = set(filter(None, (article.commentary_engine or "").split(",")))
engines.add(PROVIDER_ANGEL)
article.commentary_engine = ",".join(sorted(engines))
async def _enrich_commentary_meituan(
article: Article, setting: LlmSetting, client: LlmClient
) -> None:
"""美团评论 — 写入 commentary_meituan 等新字段。"""
# 优先用 setting.meituan_commentary_prompt,留空用默认
template = setting.meituan_commentary_prompt or _default_commentary_prompt()
prompt = _safe_format(
template,
{
"title": (article.title_zh or article.title)[:200],
"body": (article.body_zh_text or "")[:3000],
},
)
defaults = PROVIDER_COMMENTARY_DEFAULTS[PROVIDER_MEITUAN]
try:
text = await client.chat(
system=defaults["system"],
user=prompt,
temperature=defaults["temperature"],
max_tokens=defaults["max_tokens"],
)
article.commentary_meituan = text or None
article.commentary_meituan_status = "ok"
article.commentary_meituan_error = None
article.commentary_meituan_model = client.chat_model
engines = set(filter(None, (article.commentary_engine or "").split(",")))
engines.add(PROVIDER_MEITUAN)
article.commentary_engine = ",".join(sorted(engines))
except Exception as e:
# 美团 provider 失败,标 failed 但不影响 Angel
article.commentary_meituan_status = "failed"
article.commentary_meituan_error = f"{type(e).__name__}: {e}"[:1000]
article.commentary_meituan = None
raise
# === 总编排:enrich_article ===
async def enrich_article(article_id: int) -> dict[str, str]:
"""对单篇文章做 4 项 LLM 增强。
"""对单篇文章做 5 项 LLM 增强。
顺序:classify(黑名单 gate) → format → image → commentary
顺序:classify(黑名单 gate) → format → image → commentary(angel + meituan 并行)
- classify 命中 blocklist → 整篇文章 DELETE,后续任务直接 return
- 任一任务失败,只标 status 不影响其他任务
- 双 provider 评论:Angel 和美团 用 asyncio.gather 并行,任一失败不影响另一个
返回 {task: status} 字典(用于日志)。
"""
@@ -315,7 +382,10 @@ async def enrich_article(article_id: int) -> dict[str, str]:
setting = await get_setting()
if not setting.enabled:
logger.info("enrich_article: llm disabled, skip id=%s", article_id)
return {"format": "skipped", "classify": "skipped", "image": "skipped", "commentary": "skipped"}
return {
"format": "skipped", "classify": "skipped", "image": "skipped",
"commentary_angel": "skipped", "commentary_meituan": "skipped",
}
# 用配置生成 client(允许热改设置)
client = LlmClient(
@@ -324,6 +394,12 @@ async def enrich_article(article_id: int) -> dict[str, str]:
interval_sec=setting.interval_sec,
)
# 美团 provider client(可能为 None = 未配置)
meituan_client = None
if is_provider_enabled(PROVIDER_MEITUAN, setting):
from app.services.llm.providers import get_meituan_client
meituan_client = get_meituan_client(setting)
results: dict[str, str] = {}
async with AsyncSessionLocal() as session:
@@ -341,14 +417,17 @@ async def enrich_article(article_id: int) -> dict[str, str]:
if cats:
art.category = ",".join(cats)[:64] or None
if drop:
# 命中 blocklist → 删文章,后续 3 步全跳
# 命中 blocklist → 删文章,后续 4 步全跳
logger.info(
"enrich_article id=%s dropped (blocklist hit, cats=%s, blocklist=%s)",
article_id, cats, blocklist,
)
await session.delete(art)
await session.commit()
return {"classify": "dropped", "format": "skipped", "image": "skipped", "commentary": "skipped"}
return {
"classify": "dropped", "format": "skipped", "image": "skipped",
"commentary_angel": "skipped", "commentary_meituan": "skipped",
}
except Exception as e:
logger.exception("enrich classify failed for article %s: %s", article_id, e)
art.classify_status = "failed"
@@ -373,14 +452,32 @@ async def enrich_article(article_id: int) -> dict[str, str]:
art.image_ai_status = "failed"
results["image"] = f"failed:{type(e).__name__}"
# === 4) commentary ===
try:
await _enrich_commentary(art, setting, client)
results["commentary"] = "ok"
except Exception as e:
logger.exception("enrich commentary failed for article %s: %s", article_id, e)
art.commentary_status = "failed"
results["commentary"] = f"failed:{type(e).__name__}"
# === 4 + 5) commentary_angel + commentary_meituan 并行 ===
# 关键:每个 provider 独立的 try/except,任一失败不影响另一个
# 但 gather 需要返回 tuple,这里用嵌套函数封装
async def _safe_angel() -> None:
try:
await _enrich_commentary_angel(art, setting, client)
results["commentary_angel"] = "ok"
except Exception as e:
logger.exception("enrich commentary_angel failed for article %s: %s", article_id, e)
art.commentary_status = "failed"
results["commentary_angel"] = f"failed:{type(e).__name__}"
async def _safe_meituan() -> None:
if meituan_client is None:
art.commentary_meituan_status = "n/a"
results["commentary_meituan"] = "n/a"
return
try:
await _enrich_commentary_meituan(art, setting, meituan_client)
results["commentary_meituan"] = "ok"
except Exception as e:
logger.exception("enrich commentary_meituan failed for article %s: %s", article_id, e)
# status 已在内部置 failed
results["commentary_meituan"] = f"failed:{type(e).__name__}"
await asyncio.gather(_safe_angel(), _safe_meituan())
await session.commit()
logger.info("enrich_article id=%s: %s", article_id, results)
@@ -423,6 +520,8 @@ async def enrichment_loop() -> None:
| (Article.commentary_status != "ok")
| (Article.image_ai_status.is_(None))
| (Article.image_ai_status != "ok")
| (Article.commentary_meituan_status.is_(None))
| (Article.commentary_meituan_status.in_(("n/a", "pending", "failed")))
),
)
.order_by(Article.id.asc())
@@ -431,7 +530,7 @@ async def enrichment_loop() -> None:
).scalars()
candidates = list(rows)
# 过滤:任一 *_status 是 pending
# 过滤:任一 *_status 是 pending(包括 NULL 和 n/a)
todo_ids: list[int] = []
for a in candidates:
statuses = [
@@ -439,6 +538,7 @@ async def enrichment_loop() -> None:
a.classify_status or "pending",
a.image_ai_status or "pending",
a.commentary_status or "pending",
a.commentary_meituan_status or "pending",
]
if any(s in ("pending", "failed", "n/a") for s in statuses):
todo_ids.append(a.id)
@@ -450,7 +550,7 @@ async def enrichment_loop() -> None:
continue
# 并发 enrich 多篇(LlmClient 内部 interval_sec 已经做了限速,这里只并发不限并发上限)
# 但为了不让 Agnes API 同时打太多,加一层并发上限
# 但为了不让 LLM API 同时打太多,加一层并发上限
sem = asyncio.Semaphore(3)
async def _run_one(aid: int) -> None:
async with sem:

View File

@@ -0,0 +1,102 @@
"""LLM provider 工厂。
历史:全站只用一个 LlmClient(单例)指 Agnes。
现在:支持多个 provider,各自独立 base_url / api_key / model / 节流。
- `get_angel_client(setting)` — Agnes 客户端(原 LlmClient 等价)
- `get_meituan_client(setting)` — 美团大模型客户端(OpenAI 兼容,LongCat)
设计:
- 工厂每次返回新实例(无状态;节流靠 client 内部 Semaphore 自带)
- Provider 不可用(api_key 空)= 返回 None
- `get_provider_commentary_defaults()` 暴露 Angel / 美团 的 temperature / max_tokens / system 差异。
"""
from __future__ import annotations
import logging
from typing import Any
from app.models.llm_setting import LlmSetting
from app.services.llm.client import LlmClient
logger = logging.getLogger("news.llm.providers")
# === Provider 名常量(供 enrichment/前端/日志统一引用)===
PROVIDER_ANGEL = "angel" # Agnes(原 LlmClient 默认端点)
PROVIDER_MEITUAN = "meituan" # 美团大模型(LongCat,OpenAI 兼容)
def get_angel_client(setting: LlmSetting) -> LlmClient:
"""Agnes 客户端 — 与 LlmClient 单例行为完全一致。"""
return LlmClient(
chat_model=setting.chat_model,
image_model=setting.image_model,
interval_sec=setting.interval_sec,
)
def get_meituan_client(setting: LlmSetting) -> LlmClient | None:
"""美团大模型(LongCat)客户端。
配置来源:llm_settings 表里 meituan_* 字段(API key / base_url / model / interval / enabled)。
"""
from app.config import settings as app_settings # 延迟导入,避免循环
api_key = getattr(setting, "meituan_api_key", "") or app_settings.meituan_api_key
base_url = (
getattr(setting, "meituan_base_url", "") or app_settings.meituan_base_url
)
model = (
getattr(setting, "meituan_chat_model", "") or app_settings.meituan_chat_model
)
interval = (
getattr(setting, "meituan_interval_sec", None) or app_settings.meituan_interval_sec
)
if not api_key:
return None
return LlmClient(
base_url=base_url or "https://api.longcat.chat/openai/v1",
api_key=api_key,
chat_model=model or "LongCat-2.0-Preview",
interval_sec=float(interval or 2.0),
)
def get_provider_client(provider: str, setting: LlmSetting) -> LlmClient | None:
"""统一入口:按 provider 名取客户端。不可用时返回 None。"""
if provider == PROVIDER_ANGEL:
c = get_angel_client(setting)
return c if c.is_configured() else None
if provider == PROVIDER_MEITUAN:
return get_meituan_client(setting)
raise ValueError(f"unknown provider: {provider}")
def is_provider_enabled(provider: str, setting: LlmSetting) -> bool:
"""provider 是否启用 + 配置齐全。"""
if not setting.enabled:
return False
if provider == PROVIDER_ANGEL:
return get_provider_client(PROVIDER_ANGEL, setting) is not None
if provider == PROVIDER_MEITUAN:
if not bool(getattr(setting, "meituan_enabled", True)):
return False
return get_provider_client(PROVIDER_MEITUAN, setting) is not None
return False
# === Provider 评论差异(温度 / max_tokens / system)===
# Angel: temperature=0.6, max_tokens=600, system="你是资深新闻评论员。"
# 美团: temperature=0.7, max_tokens=1000, system=None(用户示例无 system 字段)
PROVIDER_COMMENTARY_DEFAULTS: dict[str, dict[str, Any]] = {
PROVIDER_ANGEL: {
"temperature": 0.6,
"max_tokens": 600,
"system": "你是资深新闻评论员。",
},
PROVIDER_MEITUAN: {
"temperature": 0.7,
"max_tokens": 1000,
"system": None,
},
}