feat(llm): 排版容器固定CSS + 插图用正文第一段 + 适中尺寸

- enrichment._enrich_format:把排版好的段落包到带固定 CSS 的 <div class=article-body> 里 (font: system-ui / 17px / line-height 1.7 / color #3e3e3e / p margin-bottom 1.5em) CSS 同时内联到 style 属性,前端 .article-body 全局类做兑底 - enrichment._enrich_image:prompt 改用 body_zh_text 的第一段(原为 title); 新增 {body} 占位符,image_prompt_template 默认模板同步改写 - 插图尺寸写死为 768x512(适中);image_size 字段保留供用户手改但默认行为不依赖它 - 分类明确多标签(2-5 个),提示词加 {body} 变量,容错读 categories/tags 两种 key - AdminLlmSettings.vue:placeholder / 变量说明同步更新
2026-06-08 20:53:21 +08:00
parent 82a92032bb
commit 380e8b124e
5 changed files with 109 additions and 22 deletions
--- a/backend/app/services/llm/enrichment.py
+++ b/backend/app/services/llm/enrichment.py
@@ -2,10 +2,17 @@

 4 个独立任务:
  1. format   — 排版译文(写入 body_zh_formatted)
-  2. classify — 分类(写入 category)
-  3. image    — 生成插图(写入 image_ai_url)
+  2. classify — 分类(写入 category,多标签)
+  3. image    — 生成插图(写入 image_ai_url,prompt 用正文第一段)
  4. commentary — 写点评(写入 commentary)

+排版容器 CSS(固定,不再让用户改):
+- 字体: system-ui 字体栈
+- 字号: 17px
+- 行高: 1.7
+- 颜色: #3e3e3e
+- 段落: margin-bottom 1.5em(自动空一行)
+
 设计:
 - 任务入口: enrich_article(article_id, settings_row)
 - 任务间互不影响:每个任务独立 try/except + 写 status
@@ -28,6 +35,25 @@ from app.services.llm.client import LlmClient

 logger = logging.getLogger("news.llm.enrichment")

+# === 排版容器固定 CSS(项目级固定,不再让用户改)===
+# 同时内联到 body_zh_formatted 的容器 div 的 style 属性上,
+# 保证分享/邮件/导出场景下样式不丢;前端全局 .article-body 类做兜底。
+ARTICLE_BODY_FONT_FAMILY = (
+    "system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, "
+    "'Helvetica Neue', sans-serif"
+)
+ARTICLE_BODY_FONT_SIZE = "17px"
+ARTICLE_BODY_LINE_HEIGHT = "1.7"
+ARTICLE_BODY_COLOR = "#3e3e3e"
+ARTICLE_BODY_P_MARGIN_BOTTOM = "1.5em"
+
+# === 插图默认尺寸(适中,不再用 1024x768)===
+# 写死到 enrichment 里,行为稳定;setting.image_size 仍可由用户在 UI 改,
+# 但默认行为不依赖它,避免意外被改成很大。
+DEFAULT_IMAGE_SIZE = "768x512"
+DEFAULT_IMAGE_FIRST_PARA_CHARS = 400  # 提取第一段最多用这么多字
+DEFAULT_IMAGE_MAX_TAGS = 5  # 分类标签上限(多标签)
+

 # === 获取当前设置(行锁 + 缓存刷新)===
 async def get_setting() -> LlmSetting:
@@ -60,39 +86,81 @@ async def _enrich_format(article: Article, setting: LlmSetting, client: LlmClien
        temperature=0.3,
        max_tokens=2000,
    )
-    # 极简 HTML 包裹:按段切 + <p>
+    # 极简 HTML 包裹:按段切 + <p>,整体包到带固定 CSS 的 <div> 里
    parts = [f"<p>{p.strip()}</p>" for p in text.split("\n\n") if p.strip()]
-    article.body_zh_formatted = "\n".join(parts) or None
+    if not parts:
+        article.body_zh_formatted = None
+    else:
+        article.body_zh_formatted = _wrap_article_body("\n".join(parts))
    article.format_status = "ok"


+def _wrap_article_body(inner_html: str) -> str:
+    """把排版好的段落包到带固定 CSS 的 <div class='article-body'> 里。
+
+    CSS 同时内联到 style 属性(分享/导出样式不丢)+ class 名(前端全局类可覆盖)。
+    """
+    inline_style = (
+        f"font-family:{ARTICLE_BODY_FONT_FAMILY};"
+        f"font-size:{ARTICLE_BODY_FONT_SIZE};"
+        f"line-height:{ARTICLE_BODY_LINE_HEIGHT};"
+        f"color:{ARTICLE_BODY_COLOR};"
+    )
+    # 段落样式也内联,保证 v-html 渲染时一定生效
+    p_style = f"margin:0 0 {ARTICLE_BODY_P_MARGIN_BOTTOM} 0;"
+    inner_with_p_style = inner_html.replace("<p>", f'<p style="{p_style}">')
+    return f'<div class="article-body" style="{inline_style}">{inner_with_p_style}</div>'
+
+
 # === 单任务:classify ===
 async def _enrich_classify(article: Article, setting: LlmSetting, client: LlmClient) -> None:
    prompt = (setting.classify_prompt or get_default_prompts()["classify_prompt"]).format(
        title=(article.title_zh or article.title)[:200],
        summary=(article.summary_zh or "")[:400],
+        body=(article.body_zh_text or "")[:1500],
    )
    result = await client.classify_json(
        system="你是新闻分类助手,只返回 JSON。",
        user=prompt,
    )
-    cats = result.get("categories") or []
+    cats = result.get("categories") or result.get("tags") or []
    if isinstance(cats, list) and cats:
-        article.category = ",".join(str(c).strip() for c in cats[:3])[:32]
+        # 多标签(2-5 个),逗号分隔存到 category 字段(已有索引)
+        joined = ",".join(str(c).strip() for c in cats[:DEFAULT_IMAGE_MAX_TAGS] if str(c).strip())
+        article.category = joined[:64] or None
    article.classify_status = "ok"


 # === 单任务:image ===
 async def _enrich_image(article: Article, setting: LlmSetting, client: LlmClient) -> None:
    template = (setting.image_prompt_template or get_default_prompts()["image_prompt_template"])
-    # 默认用 title_zh(若有),否则用原文 title
+    # 用正文第一段作为 prompt(英文 prompt 走 title 仍可工作,所以 title 也带上作 fallback)
+    first_para = _first_paragraph(article.body_zh_text or "", max_chars=DEFAULT_IMAGE_FIRST_PARA_CHARS)
+    if not first_para:
+        first_para = (article.title_zh or article.title or "")[:200]
    title_for_prompt = (article.title_zh or article.title or "")[:200]
-    prompt = template.format(title=title_for_prompt)
-    url = await client.generate_image(prompt, size=setting.image_size)
+    # template 同时支持 {body} 和 {title} 两种占位符
+    try:
+        prompt = template.format(body=first_para, title=title_for_prompt)
+    except (KeyError, IndexError):
+        # 用户改坏了 template,fallback 用 {title} 模式
+        prompt = template.format(title=title_for_prompt)
+    url = await client.generate_image(prompt, size=DEFAULT_IMAGE_SIZE)
    article.image_ai_url = url
    article.image_ai_status = "ok"


+def _first_paragraph(text: str, max_chars: int) -> str:
+    """取正文第一段(按 \\n\\n 切)。如果首段超长就截断。"""
+    if not text:
+        return ""
+    for p in text.split("\n\n"):
+        p = p.strip()
+        if p:
+            return p[:max_chars]
+    return ""
+
+
 # === 单任务:commentary ===
 async def _enrich_commentary(article: Article, setting: LlmSetting, client: LlmClient) -> None:
    prompt = (setting.commentary_prompt or get_default_prompts()["commentary_prompt"]).format(