feat(db): 0003 migration + LlmSetting/Source 模型加 blocklist_tags

- 新增 alembic 0003: sources.blocklist_tags + llm_settings.blocklist_tags(JSONB)
- 两层配置:全局(llm_settings) + per-source(sources),合并去重后注入 classify prompt
- 默认空数组,不影响存量数据;admin API 在下个 commit 暴露编辑入口
This commit is contained in:
Mavis
2026-06-09 14:30:38 +08:00
parent 6da59da934
commit d0d1014505
3 changed files with 69 additions and 0 deletions

View File

@@ -0,0 +1,56 @@
"""Sources / llm_settings 加 blocklist_tags(屏蔽分类标签)
设计:
- sources.blocklist_tags: 当前源级屏蔽词(per-source)
- llm_settings.blocklist_tags: 全局屏蔽词(对所有源生效)
- 合并去重后注入到 classify prompt,LLM 判断文章类别是否命中 → drop
注意:
- 这两个字段在 classify 阶段之前是空数组(默认 '[]'::jsonb),
不会影响存量数据;admin_llm/admin.py 会暴露编辑入口。
Revision ID: 0003
Revises: 0002
Create Date: 2026-06-09
"""
from __future__ import annotations
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects.postgresql import JSONB
revision: str = "0003"
down_revision: Union[str, None] = "0002"
branch_labels = None
depends_on = None
def upgrade() -> None:
# === sources 加 per-source 屏蔽词 ===
op.add_column(
"sources",
sa.Column(
"blocklist_tags",
JSONB,
nullable=False,
server_default=sa.text("'[]'::jsonb"),
),
)
# === llm_settings 加全局屏蔽词 ===
op.add_column(
"llm_settings",
sa.Column(
"blocklist_tags",
JSONB,
nullable=False,
server_default=sa.text("'[]'::jsonb"),
),
)
def downgrade() -> None:
op.drop_column("llm_settings", "blocklist_tags")
op.drop_column("sources", "blocklist_tags")

View File

@@ -11,6 +11,7 @@ from __future__ import annotations
from datetime import datetime from datetime import datetime
from sqlalchemy import Boolean, DateTime, Integer, String, Text, func from sqlalchemy import Boolean, DateTime, Integer, String, Text, func
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.orm import Mapped, mapped_column
from app.database import Base from app.database import Base
@@ -28,6 +29,13 @@ class LlmSetting(Base):
commentary_prompt: Mapped[str | None] = mapped_column(Text) commentary_prompt: Mapped[str | None] = mapped_column(Text)
image_prompt_template: Mapped[str | None] = mapped_column(Text) image_prompt_template: Mapped[str | None] = mapped_column(Text)
# === 全局屏蔽分类标签(如 ["体育", "娱乐"])===
# 与 sources.blocklist_tags 合并去重后注入 classify prompt;
# 命中则删文章(drop)
blocklist_tags: Mapped[list[str]] = mapped_column(
JSONB, nullable=False, default=list, server_default="[]"
)
# === 插图参数 === # === 插图参数 ===
image_size: Mapped[str] = mapped_column(String(16), default="768x512", nullable=False) image_size: Mapped[str] = mapped_column(String(16), default="768x512", nullable=False)

View File

@@ -14,6 +14,7 @@ from sqlalchemy import (
Text, Text,
func, func,
) )
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column, relationship from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.database import Base from app.database import Base
@@ -50,6 +51,10 @@ class Source(Base):
language_src: Mapped[str | None] = mapped_column(String(8)) language_src: Mapped[str | None] = mapped_column(String(8))
priority: Mapped[int] = mapped_column(Integer, default=50, nullable=False, index=True) priority: Mapped[int] = mapped_column(Integer, default=50, nullable=False, index=True)
headers_json: Mapped[dict | None] = mapped_column(JSON) headers_json: Mapped[dict | None] = mapped_column(JSON)
# 源级屏蔽分类标签(如 ["体育", "娱乐"]);与 llm_settings.blocklist_tags 合并后使用
blocklist_tags: Mapped[list[str]] = mapped_column(
JSONB, nullable=False, default=list, server_default="[]"
)
last_fetched_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) last_fetched_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
last_status: Mapped[str | None] = mapped_column(String(64)) last_status: Mapped[str | None] = mapped_column(String(64))
consecutive_failures: Mapped[int] = mapped_column(Integer, default=0, nullable=False) consecutive_failures: Mapped[int] = mapped_column(Integer, default=0, nullable=False)