feat(search): 智能搜索建议 - 固化候选词表 (search_keywords + search_title_suggestions)

后端:
- alembic 0009: 两张固化表 + GIN prefix_keys 索引 + articles trigger
- /api/v1/search/suggestions: 混合 A(高频词 ts_stat) + B(真实标题) + 冷启动 fallback
- worker 每日 03:00 + 启动时刷新 search_keywords
- 顺便填 commit 11 TODO: articles.title_zh_tsv + GIN 索引(未来 FTS 基础)

前端:
- NInput -> NAutoComplete + debounce 250ms
- 选标题 -> 跳详情;选关键词 -> 填入 + 触发搜索
- AbortController 防 race condition

性能: prefix_keys @> ARRAY[prefix] 走 GIN 亚毫秒,100w 行也稳
This commit is contained in:
mavis
2026-06-15 18:26:35 +08:00
parent b674fb4b22
commit c3aa0f0cb6
13 changed files with 1028 additions and 7 deletions

View File

@@ -7,6 +7,8 @@ from app.models.article import Article # noqa: F401
from app.models.article_read import ArticleRead # noqa: F401
from app.models.bookmark import Bookmark # noqa: F401
from app.models.llm_setting import LlmSetting # noqa: F401
from app.models.search_keyword import SearchKeyword # noqa: F401
from app.models.search_title_suggestion import SearchTitleSuggestion # noqa: F401
from app.models.source import Source, SourceKind # noqa: F401
from app.models.subscription import Subscription # noqa: F401
from app.models.user import User, UserRole # noqa: F401
@@ -17,6 +19,8 @@ __all__ = [
"ArticleRead",
"Bookmark",
"LlmSetting",
"SearchKeyword",
"SearchTitleSuggestion",
"Source",
"SourceKind",
"Subscription",

View File

@@ -0,0 +1,45 @@
"""搜索建议候选词表(固化,worker 每日 ts_stat 刷新)。
- 数据源:articles.title_zh + body_zh_text + commentary + commentary_meituan
- 用途:/api/v1/search/suggestions 返回"高频词"建议(A 方案)
- 刷新:每日凌晨 worker 调 refresh_search_keywords() 全量重建
- 查询:prefix_keys @> ARRAY[''] 走 GIN 索引(亚毫秒)
"""
from __future__ import annotations
from datetime import datetime
from sqlalchemy import BigInteger, DateTime, Integer, String, Text, func
from sqlalchemy.dialects.postgresql import ARRAY
from sqlalchemy.orm import Mapped, mapped_column
from app.database import Base
class SearchKeyword(Base):
__tablename__ = "search_keywords"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
keyword: Mapped[str] = mapped_column(Text, nullable=False)
# ts_stat / title_extract / manual
source: Mapped[str] = mapped_column(String(32), nullable=False)
# 词频或文章数(权重,排序用)
weight: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
# 预计算前缀数组,['美','美联储','美联储宣'] for '美联储宣布...'
prefix_keys: Mapped[list[str]] = mapped_column(ARRAY(Text), nullable=False)
last_seen_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
__table_args__ = (
# db-level unique 留给 alembic 迁移创建(__table_args__ 只是 ORM 侧参考)
# 实际 UNIQUE 约束在 0009 迁移里建
)
def __repr__(self) -> str:
return f"<SearchKeyword {self.keyword!r} src={self.source} weight={self.weight}>"

View File

@@ -0,0 +1,43 @@
"""搜索建议 - 真实文章标题片段表(articles 写入 trigger 自动维护)。
- 数据源:articles.title_zh(优先)/ articles.title(短新闻回退)
- 用途:/api/v1/search/suggestions 返回"真实文章标题"建议(B 方案)
- 维护:PG trigger(articles INSERT/UPDATE OF title_zh/title/published_at 触发)
- 查询:prefix_keys @> ARRAY[''] 走 GIN 索引,按 published_at DESC 排序
"""
from __future__ import annotations
from datetime import datetime
from sqlalchemy import BigInteger, DateTime, ForeignKey, String, func
from sqlalchemy.dialects.postgresql import ARRAY, TEXT
from sqlalchemy.orm import Mapped, mapped_column
from app.database import Base
class SearchTitleSuggestion(Base):
__tablename__ = "search_title_suggestions"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
article_id: Mapped[int] = mapped_column(
BigInteger,
ForeignKey("articles.id", ondelete="CASCADE"),
nullable=False,
)
# 该条用的是哪边的文本:'zh' (title_zh) / 'src' (title 短新闻回退)
title_lang: Mapped[str] = mapped_column(String(8), nullable=False, default="zh")
# 预计算前缀数组(从第 1 字到全词)
prefix_keys: Mapped[list[str]] = mapped_column(ARRAY(TEXT), nullable=False)
published_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
def __repr__(self) -> str:
return f"<SearchTitleSuggestion article_id={self.article_id} lang={self.title_lang}>"