feat(search): 智能搜索建议 - 固化候选词表 (search_keywords + search_title_suggestions)
后端: - alembic 0009: 两张固化表 + GIN prefix_keys 索引 + articles trigger - /api/v1/search/suggestions: 混合 A(高频词 ts_stat) + B(真实标题) + 冷启动 fallback - worker 每日 03:00 + 启动时刷新 search_keywords - 顺便填 commit 11 TODO: articles.title_zh_tsv + GIN 索引(未来 FTS 基础) 前端: - NInput -> NAutoComplete + debounce 250ms - 选标题 -> 跳详情;选关键词 -> 填入 + 触发搜索 - AbortController 防 race condition 性能: prefix_keys @> ARRAY[prefix] 走 GIN 亚毫秒,100w 行也稳
This commit is contained in:
@@ -7,6 +7,8 @@ from app.models.article import Article # noqa: F401
|
||||
from app.models.article_read import ArticleRead # noqa: F401
|
||||
from app.models.bookmark import Bookmark # noqa: F401
|
||||
from app.models.llm_setting import LlmSetting # noqa: F401
|
||||
from app.models.search_keyword import SearchKeyword # noqa: F401
|
||||
from app.models.search_title_suggestion import SearchTitleSuggestion # noqa: F401
|
||||
from app.models.source import Source, SourceKind # noqa: F401
|
||||
from app.models.subscription import Subscription # noqa: F401
|
||||
from app.models.user import User, UserRole # noqa: F401
|
||||
@@ -17,6 +19,8 @@ __all__ = [
|
||||
"ArticleRead",
|
||||
"Bookmark",
|
||||
"LlmSetting",
|
||||
"SearchKeyword",
|
||||
"SearchTitleSuggestion",
|
||||
"Source",
|
||||
"SourceKind",
|
||||
"Subscription",
|
||||
|
||||
45
backend/app/models/search_keyword.py
Normal file
45
backend/app/models/search_keyword.py
Normal file
@@ -0,0 +1,45 @@
|
||||
"""搜索建议候选词表(固化,worker 每日 ts_stat 刷新)。
|
||||
|
||||
- 数据源:articles.title_zh + body_zh_text + commentary + commentary_meituan
|
||||
- 用途:/api/v1/search/suggestions 返回"高频词"建议(A 方案)
|
||||
- 刷新:每日凌晨 worker 调 refresh_search_keywords() 全量重建
|
||||
- 查询:prefix_keys @> ARRAY['美'] 走 GIN 索引(亚毫秒)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import BigInteger, DateTime, Integer, String, Text, func
|
||||
from sqlalchemy.dialects.postgresql import ARRAY
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class SearchKeyword(Base):
|
||||
__tablename__ = "search_keywords"
|
||||
|
||||
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
|
||||
|
||||
keyword: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
# ts_stat / title_extract / manual
|
||||
source: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
# 词频或文章数(权重,排序用)
|
||||
weight: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
# 预计算前缀数组,['美','美联储','美联储宣'] for '美联储宣布...'
|
||||
prefix_keys: Mapped[list[str]] = mapped_column(ARRAY(Text), nullable=False)
|
||||
|
||||
last_seen_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
# db-level unique 留给 alembic 迁移创建(__table_args__ 只是 ORM 侧参考)
|
||||
# 实际 UNIQUE 约束在 0009 迁移里建
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<SearchKeyword {self.keyword!r} src={self.source} weight={self.weight}>"
|
||||
43
backend/app/models/search_title_suggestion.py
Normal file
43
backend/app/models/search_title_suggestion.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""搜索建议 - 真实文章标题片段表(articles 写入 trigger 自动维护)。
|
||||
|
||||
- 数据源:articles.title_zh(优先)/ articles.title(短新闻回退)
|
||||
- 用途:/api/v1/search/suggestions 返回"真实文章标题"建议(B 方案)
|
||||
- 维护:PG trigger(articles INSERT/UPDATE OF title_zh/title/published_at 触发)
|
||||
- 查询:prefix_keys @> ARRAY['美'] 走 GIN 索引,按 published_at DESC 排序
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import BigInteger, DateTime, ForeignKey, String, func
|
||||
from sqlalchemy.dialects.postgresql import ARRAY, TEXT
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class SearchTitleSuggestion(Base):
|
||||
__tablename__ = "search_title_suggestions"
|
||||
|
||||
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
|
||||
|
||||
article_id: Mapped[int] = mapped_column(
|
||||
BigInteger,
|
||||
ForeignKey("articles.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
# 该条用的是哪边的文本:'zh' (title_zh) / 'src' (title 短新闻回退)
|
||||
title_lang: Mapped[str] = mapped_column(String(8), nullable=False, default="zh")
|
||||
|
||||
# 预计算前缀数组(从第 1 字到全词)
|
||||
prefix_keys: Mapped[list[str]] = mapped_column(ARRAY(TEXT), nullable=False)
|
||||
|
||||
published_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<SearchTitleSuggestion article_id={self.article_id} lang={self.title_lang}>"
|
||||
Reference in New Issue
Block a user