diff --git a/README.md b/README.md index 4e45149..f7e7081 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,8 @@ - 👤 **双角色鉴权**:JWT(access 60min + refresh 14d) + API Token(sha256,可撤销,给 Android / ingest 预留) - 📌 **收藏 + 关键词订阅**:用户级书签,服务端定时按关键词命中推送(预留 Telegram 通道) - 📊 **管理看板**:源健康度 / 翻译配额 / LLM 状态,全部可视化 +- 🔍 **智能搜索建议** *(新)*:`GET /api/v1/search/suggestions?q=prefix` 实时返回高频词续接词(输入"美国"→ ["美国", "美国政府", "美国签证"]);zhparser 中文分词 + GIN 数组索引,前端 debounce 250ms 自动补全 + 后端用 zhparser 中文分词 + PG 全文搜索 + 候选词固化表,前端 debounce 250ms 自动补全 - 🔄 **热加载**:源/提示词改了不用重启,worker 每天 00:30 重建 job - 🚀 **一键部署**:SSH 推公钥 + 一键 `git pull` 流程 - 🔒 **安全默认**:bcrypt 密码、API Token 加密、SQL 注入免疫(SQLAlchemy 2.0 参数化) @@ -597,6 +599,63 @@ docker compose exec api alembic upgrade head 每个任务独立 try/except,失败标 `*_status='failed'`,**不影响**其他任务。 `enrichment_loop` 扫 `*_status` 是 `pending/failed/n/a` 的文章,自动重试 failed。 +### 智能搜索建议(autocomplete) + +搜索框输入前缀(如"美"),下拉弹出**高频词续接词**: +- 输入"美" → ["美国", "美军", "美国政府", "美方", "美国队", ...] +- 输入"美国" → ["美国", "美国政府", "美国签证", "美国军事", ...] +- 输入"美国政" → ["美国政府"] + +来源:`search_keywords` 表按 `prefix_keys @> ARRAY['前缀']` + `weight DESC` 查(ts_stat 从 articles.title_zh + body_zh_text + commentary 聚合的词频)。 + +**后端架构**: + +| 组件 | 作用 | 更新时机 | +|------|------|---------| +| `search_keywords` | 存 ts_stat 词频(全文 + 评论) + prefix_keys 数组 | worker 每日 03:00 全量重建 + 启动时 10s 后跑一次 | +| `articles.title_zh_tsv` | `GENERATED` 列 `to_tsvector('chinese_zh', title_zh)` + GIN 索引 | 写入自动维护(commit 11 TODO 顺手填了) | +| `chinese_zh` text search config | zhparser 中文分词 + 简单词映射 | 0010 迁移一次建好 | +| `_fallback_keywords` 实时 ts_stat | search_keywords 表空时,fallback 到实时 ts_stat(慢但能用) | 冷启动友好 | + +**中文分词**(`zhparser`): + +PG `simple` parser 对中文按整段当一个 token,`ts_stat` 词频聚合不出有意义的结果(整句算 1 个词)。 +装 `zhparser`(scws 字典)解决:Dockerfile 全源码编译(Alpine/Debian/PGDG 都没现成包),建 `chinese_zh` config。 + +**⚠️ 关键踩坑**: `ts_stat(query, 'a')` 第二参是 weights mask(只统计 A 权重位置),zhparser 不标 A 权重 → 静默 0 行。**用 `ts_stat(query text)` 单参**(等价 mask='abcd',聚合所有权重)。 + +**性能**: + +- `prefix_keys text[]` + GIN 索引,`@> ARRAY['美']` 亚毫秒 +- 1545 篇文章 → `search_keywords` 33639 词,`ts_stat` 全量 88s,凌晨一次用户无感 +- 搜索建议 API 接口 P99 < 50ms + +**冷启动**: + +`search_keywords` 表为空时(刚建库 / worker 没刷新过),`_fallback_keywords` 实时跑 `ts_stat` 兜底。 +无需手动回灌(不像之前用 articles trigger 维护的 `search_title_suggestions`)。 + +**API 契约**: + +```http +GET /api/v1/search/suggestions?q=美&limit=10 +Authorization: Bearer + +200 { + "query": "美", + "keywords": [ + {"word": "美国", "weight": 4865, "source": "ts_stat"}, + {"word": "美军", "weight": 203, "source": "ts_stat"}, + {"word": "美国政府", "weight": 98, "source": "ts_stat"}, + ... + ] +} +``` + +- `q` 1-20 字符 +- `keywords` 按 `weight` 排(高→低),最多 limit 条 +- 选词 → 自动填入 q + 触发搜索;回车仍然走原搜索路径 + ### 历史文章批量 enrich 新功能**只对**翻译完成后入库的文章生效。历史已翻译文章,手动 reset: diff --git a/backend/alembic/versions/0011_drop_search_title_suggestions.py b/backend/alembic/versions/0011_drop_search_title_suggestions.py new file mode 100644 index 0000000..c206ef0 --- /dev/null +++ b/backend/alembic/versions/0011_drop_search_title_suggestions.py @@ -0,0 +1,121 @@ +"""清理废案 search_title_suggestions 相关对象。 + +设计:产品迭代决定只展示 keyword 续接词,titles 段(真实文章 id 提示)被砍。 +但 0009 迁移里建的相关对象还在: +- search_title_suggestions 表 + 3 个 GIN/B-tree 索引 +- articles trigger trg_articles_rebuild_title_suggestions +- rebuild_title_suggestions() PL/pgSQL 函数 + +每篇文章 INSERT/UPDATE 都要跑 trigger(性能损耗),删干净。 + +也清理: +- app/models/search_title_suggestion.py 文件(本次 commit 不删文件,只删 __init__ 引用) +- app/scripts/backfill_search_suggestions.py 现在回灌的也是 search_title_suggestions + → 整体替换成"无 op 脚本"(或者改名为只跑一次 refresh_search_keywords) + +Revision ID: 0011 +Revises: 0010 +Create Date: 2026-06-15 +""" +from __future__ import annotations + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + + +revision: str = "0011" +down_revision: Union[str, None] = "0010" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # 1) 删 trigger(挂 articles 上) + op.execute("DROP TRIGGER IF EXISTS trg_articles_rebuild_title_suggestions ON articles") + + # 2) 删 trigger 函数 + op.execute("DROP FUNCTION IF EXISTS rebuild_title_suggestions()") + + # 3) 删表 + 索引(ON DELETE CASCADE 让 articles delete 一起清) + op.drop_index("ix_search_title_suggestions_published", table_name="search_title_suggestions") + op.drop_index("ix_search_title_suggestions_article", table_name="search_title_suggestions") + op.drop_index("ix_search_title_suggestions_prefix", table_name="search_title_suggestions") + op.drop_table("search_title_suggestions") + + +def downgrade() -> None: + # 重新建表(产品迭代回滚时用 — schema 跟 0009 一致) + op.create_table( + "search_title_suggestions", + sa.Column("id", sa.BigInteger, primary_key=True), + sa.Column( + "article_id", + sa.BigInteger, + sa.ForeignKey("articles.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("title_lang", sa.String(8), nullable=False, server_default="zh"), + sa.Column("prefix_keys", sa.ARRAY(sa.Text), nullable=False), + sa.Column("published_at", sa.DateTime(timezone=True), nullable=True), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + ) + op.create_index( + "ix_search_title_suggestions_prefix", + "search_title_suggestions", + ["prefix_keys"], + postgresql_using="gin", + ) + op.create_index( + "ix_search_title_suggestions_article", + "search_title_suggestions", + ["article_id"], + ) + op.create_index( + "ix_search_title_suggestions_published", + "search_title_suggestions", + ["published_at"], + ) + op.execute( + """ + CREATE OR REPLACE FUNCTION rebuild_title_suggestions() RETURNS TRIGGER AS $$ + DECLARE + src_text text; + src_lang text; + max_len int := 50; + BEGIN + DELETE FROM search_title_suggestions WHERE article_id = NEW.id; + IF NEW.title_zh IS NOT NULL AND length(NEW.title_zh) > 0 THEN + src_text := NEW.title_zh; + src_lang := 'zh'; + ELSIF NEW.title IS NOT NULL AND length(NEW.title) > 0 THEN + src_text := NEW.title; + src_lang := 'src'; + ELSE + RETURN NEW; + END IF; + src_text := substring(src_text, 1, max_len); + INSERT INTO search_title_suggestions + (article_id, title_lang, prefix_keys, published_at) + SELECT NEW.id, src_lang, + ARRAY(SELECT substring(src_text, 1, n) + FROM generate_series(1, length(src_text)) AS n), + NEW.published_at; + RETURN NEW; + END; + $$ LANGUAGE plpgsql; + """ + ) + op.execute( + """ + CREATE TRIGGER trg_articles_rebuild_title_suggestions + AFTER INSERT OR UPDATE OF title_zh, title, published_at ON articles + FOR EACH ROW EXECUTE FUNCTION rebuild_title_suggestions(); + """ + ) diff --git a/backend/app/api/search.py b/backend/app/api/search.py index 4f16aa9..a42f579 100644 --- a/backend/app/api/search.py +++ b/backend/app/api/search.py @@ -1,13 +1,12 @@ -"""/api/v1/search/* — 搜索建议(autocomplete)。 +"""/api/v1/search/* — 搜索建议(autocomplete,纯 keyword 续接词)。 - GET /api/v1/search/suggestions?q=prefix - 返回:{"query", "titles": [...], "keywords": [...]} - - titles: 真实文章标题(按 published_at DESC),B 方案 - - keywords: 高频词(按 weight DESC),A 方案 - - 冷启动:任一表空时自动 fallback 到实时 ILIKE / ts_stat + 返回:{"query", "keywords": [...]} + - keywords: 词频续接词(按 weight DESC),输入"美国"→ ["美国", "美国政府", "美国签证", ...] + - 冷启动:search_keywords 表空时自动 fallback 到实时 ts_stat - 鉴权:跟 articles 一致(需要登录) -性能:两个查询都走 GIN 数组索引(prefix_keys @> ARRAY['美']),亚毫秒。 +性能:prefix_keys @> ARRAY['美'] 走 GIN 数组索引,亚毫秒。 """ from __future__ import annotations @@ -17,11 +16,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.core.deps import get_current_user from app.database import get_session from app.models.user import User -from app.schemas.search import ( - SearchKeywordItem, - SearchSuggestionsResponse, - SearchTitleSuggestionItem, -) +from app.schemas.search import SearchKeywordItem, SearchSuggestionsResponse from app.services.search import SearchService router = APIRouter(prefix="/search", tags=["search"]) @@ -30,18 +25,18 @@ router = APIRouter(prefix="/search", tags=["search"]) @router.get("/suggestions", response_model=SearchSuggestionsResponse) async def get_suggestions( q: str = Query(..., min_length=1, max_length=20, description="搜索前缀"), - limit: int = Query(10, ge=1, le=20, description="每组最多返回多少"), + limit: int = Query(10, ge=1, le=20, description="最多返回多少"), _user: User = Depends(get_current_user), # 需要登录,跟 articles 一致 session: AsyncSession = Depends(get_session), ): - """搜索建议:输入 prefix,返回真实标题 + 高频词两组候选。 + """搜索建议:输入 prefix,返回高频词续接。 - 用法:前端搜索框 onChange 时调用,debounce 200ms。 + 用法:前端搜索框 onChange 时调用,debounce 250ms。 + 选词 → 自动填入 q + 触发搜索。 """ svc = SearchService(session) raw = await svc.suggestions(q=q, limit=limit) return SearchSuggestionsResponse( query=raw["query"], - titles=[SearchTitleSuggestionItem(**t) for t in raw["titles"]], keywords=[SearchKeywordItem(**k) for k in raw["keywords"]], ) diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py index b17bef6..2e61503 100644 --- a/backend/app/models/__init__.py +++ b/backend/app/models/__init__.py @@ -8,7 +8,6 @@ from app.models.article_read import ArticleRead # noqa: F401 from app.models.bookmark import Bookmark # noqa: F401 from app.models.llm_setting import LlmSetting # noqa: F401 from app.models.search_keyword import SearchKeyword # noqa: F401 -from app.models.search_title_suggestion import SearchTitleSuggestion # noqa: F401 from app.models.source import Source, SourceKind # noqa: F401 from app.models.subscription import Subscription # noqa: F401 from app.models.user import User, UserRole # noqa: F401 @@ -20,7 +19,6 @@ __all__ = [ "Bookmark", "LlmSetting", "SearchKeyword", - "SearchTitleSuggestion", "Source", "SourceKind", "Subscription", diff --git a/backend/app/models/search_title_suggestion.py b/backend/app/models/search_title_suggestion.py deleted file mode 100644 index 70aae04..0000000 --- a/backend/app/models/search_title_suggestion.py +++ /dev/null @@ -1,43 +0,0 @@ -"""搜索建议 - 真实文章标题片段表(articles 写入 trigger 自动维护)。 - -- 数据源:articles.title_zh(优先)/ articles.title(短新闻回退) -- 用途:/api/v1/search/suggestions 返回"真实文章标题"建议(B 方案) -- 维护:PG trigger(articles INSERT/UPDATE OF title_zh/title/published_at 触发) -- 查询:prefix_keys @> ARRAY['美'] 走 GIN 索引,按 published_at DESC 排序 -""" -from __future__ import annotations - -from datetime import datetime - -from sqlalchemy import BigInteger, DateTime, ForeignKey, String, func -from sqlalchemy.dialects.postgresql import ARRAY, TEXT -from sqlalchemy.orm import Mapped, mapped_column - -from app.database import Base - - -class SearchTitleSuggestion(Base): - __tablename__ = "search_title_suggestions" - - id: Mapped[int] = mapped_column(BigInteger, primary_key=True) - - article_id: Mapped[int] = mapped_column( - BigInteger, - ForeignKey("articles.id", ondelete="CASCADE"), - nullable=False, - ) - - # 该条用的是哪边的文本:'zh' (title_zh) / 'src' (title 短新闻回退) - title_lang: Mapped[str] = mapped_column(String(8), nullable=False, default="zh") - - # 预计算前缀数组(从第 1 字到全词) - prefix_keys: Mapped[list[str]] = mapped_column(ARRAY(TEXT), nullable=False) - - published_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) - - created_at: Mapped[datetime] = mapped_column( - DateTime(timezone=True), server_default=func.now(), nullable=False - ) - - def __repr__(self) -> str: - return f"" diff --git a/backend/app/schemas/search.py b/backend/app/schemas/search.py index 9032900..0d495d5 100644 --- a/backend/app/schemas/search.py +++ b/backend/app/schemas/search.py @@ -1,24 +1,15 @@ -"""搜索建议 schema。""" +"""搜索建议 schema(纯 keyword 续接词)。""" from __future__ import annotations -from datetime import datetime - from pydantic import BaseModel -class SearchTitleSuggestionItem(BaseModel): - id: int # article_id - published_at: datetime | None = None - lang: str # 'zh' / 'src' - - class SearchKeywordItem(BaseModel): word: str weight: int - source: str # 'ts_stat' / 'title_extract' / 'manual' / 'ts_stat_live' + source: str # 'ts_stat' / 'ts_stat_live' class SearchSuggestionsResponse(BaseModel): query: str - titles: list[SearchTitleSuggestionItem] = [] keywords: list[SearchKeywordItem] = [] diff --git a/backend/app/scripts/backfill_search_suggestions.py b/backend/app/scripts/backfill_search_suggestions.py deleted file mode 100644 index d90b0a8..0000000 --- a/backend/app/scripts/backfill_search_suggestions.py +++ /dev/null @@ -1,156 +0,0 @@ -"""回灌 search_title_suggestions 表。 - -- 迁移 0009 给 articles 加了 trigger,新写入的会自动维护 -- 但迁移前已有的 articles 没经过 trigger,需要这个脚本一次性回填 -- 同时可以手动跑一次 refresh_search_keywords()(可选,worker 也会跑) - -用法: - cd backend - python -m app.scripts.backfill_search_suggestions - # 或 docker: - docker compose exec api python -m app.scripts.backfill_search_suggestions - -设计: -- 用 batch INSERT,避免逐行 trigger 重复触发(虽然 trigger 已经在迁移里创建, - 重复执行对已存在的条目会先 DELETE 再 INSERT,等价于刷新,无害) -- 进度条:每 1000 篇打一行 -- 失败:有 article 字段异常不会阻塞其他 -""" -from __future__ import annotations - -import asyncio -import logging -import sys -from datetime import datetime, timezone - -from sqlalchemy import select, text -from sqlalchemy.dialects.postgresql import insert as pg_insert -from sqlalchemy.ext.asyncio import AsyncSession - -from app.database import AsyncSessionLocal -from app.models.article import Article -from app.models.search_title_suggestion import SearchTitleSuggestion - -logger = logging.getLogger("news.backfill_search") -logging.basicConfig( - level="INFO", - format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", -) - - -MAX_TITLE_LEN = 50 # 跟迁移里的 trigger 一致 -BATCH_SIZE = 500 - - -def _build_prefix_keys(text_value: str) -> list[str]: - """从 '美联储宣布...' 生成 ['美','美联储','美联储宣',...,'美联储宣布...']""" - text_value = (text_value or "")[:MAX_TITLE_LEN] - if not text_value: - return [] - return [text_value[:n] for n in range(1, len(text_value) + 1)] - - -async def _process_article_batch( - session: AsyncSession, - articles: list[Article], -) -> int: - """处理一批 articles,UPSERT 到 search_title_suggestions。 - - 返回成功插入/更新的条数。 - """ - rows = [] - for art in articles: - if art.title_zh and len(art.title_zh.strip()) > 0: - src_text = art.title_zh.strip()[:MAX_TITLE_LEN] - lang = "zh" - elif art.title and len(art.title.strip()) > 0: - src_text = art.title.strip()[:MAX_TITLE_LEN] - lang = "src" - else: - continue - - rows.append( - { - "article_id": art.id, - "title_lang": lang, - "prefix_keys": _build_prefix_keys(src_text), - "published_at": art.published_at, - } - ) - - if not rows: - return 0 - - # 用 PG 原生 ON CONFLICT 实现 UPSERT(基于 article_id 唯一约束) - # 注意:表没建 unique on article_id,所以先 DELETE 再 INSERT - # 性能:批量 DELETE 在 article_id 上没索引,可能慢;临时加索引: - # CREATE INDEX IF NOT EXISTS tmp_idx ON search_title_suggestions(article_id); - # 简化:每个 batch 内逐条 DELETE 再 INSERT(慢但稳) - # 替代方案:直接 TRUNCATE + 全量重灌(回填场景下更简单) - for r in rows: - await session.execute( - text("DELETE FROM search_title_suggestions WHERE article_id = :aid"), - {"aid": r["article_id"]}, - ) - # bulk insert - await session.execute(pg_insert(SearchTitleSuggestion), rows) - await session.commit() - return len(rows) - - -async def backfill() -> None: - """主流程:分批拉 articles,回灌 search_title_suggestions。""" - started = datetime.now(timezone.utc) - async with AsyncSessionLocal() as session: - # 总数 - total = (await session.execute(select(Article.id))).all() - total_count = len(total) - logger.info("backfill start: %d articles to process", total_count) - - processed = 0 - last_id = 0 - while True: - rows = ( - await session.execute( - select(Article) - .where(Article.id > last_id) - .order_by(Article.id) - .limit(BATCH_SIZE) - ) - ).scalars().all() - if not rows: - break - n = await _process_article_batch(session, list(rows)) - processed += n - last_id = rows[-1].id - logger.info( - "progress: %d / %d (%.1f%%)", - processed, total_count, - processed / total_count * 100 if total_count else 0, - ) - - elapsed = (datetime.now(timezone.utc) - started).total_seconds() - logger.info("backfill done: %d rows in %.1fs", processed, elapsed) - - # 顺便触发一次 search_keywords 刷新(让词频表也有数据) - logger.info("triggering refresh_search_keywords()...") - async with AsyncSessionLocal() as session: - try: - await session.execute(text("SELECT refresh_search_keywords()")) - await session.commit() - logger.info("refresh_search_keywords() done") - except Exception as e: - logger.exception("refresh_search_keywords failed: %s (worker 03:00 会再跑)", e) - - -def main() -> int: - try: - asyncio.run(backfill()) - except KeyboardInterrupt: - logger.warning("interrupted") - return 1 - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/backend/app/scripts/refresh_search_keywords.py b/backend/app/scripts/refresh_search_keywords.py new file mode 100644 index 0000000..1529f5e --- /dev/null +++ b/backend/app/scripts/refresh_search_keywords.py @@ -0,0 +1,48 @@ +"""刷新 search_keywords(立即跑一次,不依赖 worker 03:00 调度)。 + +历史: +- 最初版本是回灌 search_title_suggestions(articles trigger 维护的真实标题) +- 0011 迁移删了 search_title_suggestions(产品决定只展示 keyword 续接词) +- 现在脚本只做一件事:立即跑一次 refresh_search_keywords() + +用法: + docker compose exec api python -m app.scripts.refresh_search_keywords + # 预期: search_keywords refreshed + +性能:ts_stat 1545 篇文章全量聚合 ~88s(每天 worker 03:00 会自动跑一次,通常不需要手动) +""" +from __future__ import annotations + +import asyncio +import logging +import sys + +from sqlalchemy import text + +from app.database import AsyncSessionLocal + +logger = logging.getLogger("news.refresh_keywords") +logging.basicConfig( + level="INFO", + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", +) + + +async def refresh() -> None: + async with AsyncSessionLocal() as s: + await s.execute(text("SELECT refresh_search_keywords()")) + await s.commit() + logger.info("search_keywords refreshed") + + +def main() -> int: + try: + asyncio.run(refresh()) + except KeyboardInterrupt: + logger.warning("interrupted") + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/backend/app/services/search.py b/backend/app/services/search.py index 45eb43a..fb8d721 100644 --- a/backend/app/services/search.py +++ b/backend/app/services/search.py @@ -1,31 +1,27 @@ -"""搜索建议服务:混合 A(高频词)+ B(真实标题) + 冷启动 fallback。 +"""搜索建议服务:纯 keyword 续接词(高频词)。 -- A: search_keywords(prefix_keys @> ARRAY['美'], ORDER BY weight DESC) -- B: search_title_suggestions(prefix_keys @> ARRAY['美'], ORDER BY published_at DESC) -- fallback: 任一表空时回退实时 ILIKE 查 articles(冷启动 / worker 没刷新过) +- search_keywords(prefix_keys @> ARRAY['美'], ORDER BY weight DESC) +- fallback: 表空时回退实时 ts_stat(冷启动 / worker 没刷新过) """ from __future__ import annotations import logging from sqlalchemy import desc, select -from sqlalchemy.dialects.postgresql import ARRAY from sqlalchemy.ext.asyncio import AsyncSession -from app.models.article import Article from app.models.search_keyword import SearchKeyword -from app.models.search_title_suggestion import SearchTitleSuggestion logger = logging.getLogger("news.search") class SearchService: - """搜索建议 service。 + """搜索建议 service — 仅返回 keyword 续接词。 - 设计:输入 prefix,返回 { titles, keywords } 两组候选。 - - titles 真实文章标题(按 published_at DESC 排) - - keywords 高频词(按 weight DESC 排) - - 任一为空时回退实时 articles.title_zh ILIKE 查询(冷启动兜底) + 设计:输入 prefix,返回 { query, keywords[] }。 + - keywords 是 ts_stat 聚合后的高频词(从 articles.title_zh + body_zh_text + commentary 算) + - 用 GIN 数组索引 prefix_keys @> ARRAY['前缀'],亚毫秒 + - 表空时回退到实时 ts_stat 聚合(慢但能用) """ def __init__(self, session: AsyncSession): @@ -36,42 +32,20 @@ class SearchService: q: str, limit: int = 10, ) -> dict[str, list[dict]]: - """返回搜索建议。 + """返回搜索建议(仅 keywords)。 Args: q: 前缀(1-20 字符) - limit: 每组最多返回多少(默认 10,最大 20) + limit: 最多返回多少(默认 10,最大 20) Returns: - {"query": q, "titles": [...], "keywords": [...]} - titles 元素:{"id": article_id, "published_at": ...} - keywords 元素:{"word": ..., "weight": ...} + {"query": q, "keywords": [{"word", "weight", "source"}, ...]} """ q = q.strip() if not q: - return {"query": q, "titles": [], "keywords": []} + return {"query": q, "keywords": []} - # 1) 查 search_title_suggestions(B 方案) - title_rows = await self.session.execute( - select( - SearchTitleSuggestion.article_id, - SearchTitleSuggestion.published_at, - SearchTitleSuggestion.title_lang, - ) - .where(SearchTitleSuggestion.prefix_keys.contains([q])) - .order_by(desc(SearchTitleSuggestion.published_at)) - .limit(limit) - ) - titles = [ - { - "id": row.article_id, - "published_at": row.published_at.isoformat() if row.published_at else None, - "lang": row.title_lang, - } - for row in title_rows.all() - ] - - # 2) 查 search_keywords(A 方案) + # 1) 查 search_keywords(GIN 数组包含,亚毫秒) kw_rows = await self.session.execute( select(SearchKeyword.keyword, SearchKeyword.weight, SearchKeyword.source) .where(SearchKeyword.prefix_keys.contains([q])) @@ -83,49 +57,11 @@ class SearchService: for row in kw_rows.all() ] - # 3) 冷启动 fallback:任一为空时,回退到实时 ILIKE articles - # (如果两张固化表都跑空了,说明刚建库或数据被 truncate) - if not titles: - titles = await self._fallback_titles(q, limit) + # 2) 冷启动 fallback:表空时回退到实时 ts_stat 聚合 if not keywords: keywords = await self._fallback_keywords(q, limit) - return {"query": q, "titles": titles, "keywords": keywords} - - async def _fallback_titles(self, q: str, limit: int) -> list[dict]: - """回退:实时查 articles.title_zh / title(走 B-tree 索引,慢但能用)。 - - - 优先 title_zh LIKE(翻译后),没有再 LIKE title(短新闻) - - 限制 7 天内的文章,避免返回太老的(冷启动场景下用户预期) - """ - from datetime import datetime, timedelta, timezone - - from sqlalchemy import or_ - - since = datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(days=7) - like = f"{q}%" - stmt = ( - select(Article.id, Article.published_at, Article.title_zh, Article.title) - .where( - Article.published_at >= since, - Article.duplicate_of.is_(None), - or_( - Article.title_zh.ilike(like), - Article.title.ilike(like), - ), - ) - .order_by(desc(Article.published_at)) - .limit(limit) - ) - rows = (await self.session.execute(stmt)).all() - return [ - { - "id": row.id, - "published_at": row.published_at.isoformat() if row.published_at else None, - "lang": "zh" if row.title_zh else "src", - } - for row in rows - ] + return {"query": q, "keywords": keywords} async def _fallback_keywords(self, q: str, limit: int) -> list[dict]: """回退:ts_stat 实时聚合(慢但能用)。 diff --git a/frontend/src/views/Feed.vue b/frontend/src/views/Feed.vue index d7aa780..2b62c49 100644 --- a/frontend/src/views/Feed.vue +++ b/frontend/src/views/Feed.vue @@ -6,7 +6,7 @@ import { NPagination, NAutoComplete, useMessage, } from 'naive-ui' import { articlesApi, readsApi, sourcesApi, type ArticleListItem, type Source } from '@/api/articles' -import { searchApi, type SearchKeyword, type SearchTitleSuggestion } from '@/api/search' +import { searchApi, type SearchKeyword } from '@/api/search' import { useDebounce } from '@/composables/useDebounce' import { useAuthStore } from '@/stores/auth' import dayjs from 'dayjs' @@ -37,18 +37,16 @@ const hideRead = ref(true) const sourceOptions = ref<{ label: string; value: string }[]>([]) -// === 搜索建议(autocomplete) === +// === 搜索建议(autocomplete) — 纯 keyword 续接词 === // 触发:q 变化(用户输入)→ 250ms debounce → 调 /api/v1/search/suggestions // 取消:每次新输入前 abort 上一次未完成的请求,避免旧响应覆盖新结果 // 选词:@select → 填入 q + 触发搜索(不再等回车) -const suggestTitles = ref([]) const suggestKeywords = ref([]) let suggestAbort: AbortController | null = null async function fetchSuggestions(prefix: string) { const p = prefix.trim() if (!p) { - suggestTitles.value = [] suggestKeywords.value = [] return } @@ -58,9 +56,8 @@ async function fetchSuggestions(prefix: string) { suggestAbort = ctrl try { const resp = await searchApi.suggestions(p, 10, { signal: ctrl.signal }) - // 注意:race condition 防护 — 只采纳最新请求的响应 + // race condition 防护 — 只采纳最新请求的响应 if (suggestAbort === ctrl) { - suggestTitles.value = resp.titles suggestKeywords.value = resp.keywords } } catch (e: any) { @@ -70,7 +67,6 @@ async function fetchSuggestions(prefix: string) { // eslint-disable-next-line no-console console.debug('search suggestions failed:', e?.message) if (suggestAbort === ctrl) { - suggestTitles.value = [] suggestKeywords.value = [] } } @@ -84,78 +80,42 @@ watch(q, (v) => { }) // === NAutoComplete options === -// 把 titles + keywords 拼成扁平 options 数组。 -// 用 discriminated union 让 TypeScript 在 onSelect 里能自动 narrow 出 meta 的具体类型。 -type SuggestTitleOption = { +// 只用 keyword 续接词,扁平结构。 +type SuggestOption = { label: string value: string - type: 'title' - meta: SearchTitleSuggestion -} -type SuggestKeywordOption = { - label: string - value: string - type: 'keyword' meta: SearchKeyword } -type SuggestOption = SuggestTitleOption | SuggestKeywordOption const suggestOptions = computed(() => { - const out: SuggestOption[] = [] - for (const t of suggestTitles.value) { - // 标题项:label 用 #id 标识(可后续扩展拉标题),value 是 id 字符串 - out.push({ - label: `#${t.id}`, - value: t.id.toString(), - type: 'title', - meta: t, - }) - } - for (const k of suggestKeywords.value) { - out.push({ - label: k.word, - value: k.word, - type: 'keyword', - meta: k, - }) - } - return out + return suggestKeywords.value.map((k) => ({ + label: k.word, + value: k.word, + meta: k, + })) }) -// 自定义 render:显示分类图标 + 类型 +// 自定义 render:显示"词"标签 + 词文本 + 权重 function renderSuggestion(opt: SuggestOption) { return h( 'div', { class: 'feed-suggest-row' }, [ - h('span', { class: `feed-suggest-tag feed-suggest-tag-${opt.type}` }, - opt.type === 'title' ? '文章' : '词'), + h('span', { class: 'feed-suggest-tag feed-suggest-tag-keyword' }, '词'), h('span', { class: 'feed-suggest-text' }, opt.label), + h('span', { class: 'feed-suggest-weight' }, String(opt.meta.weight)), ], ) } -// 选完候选词:从当前 suggestOptions 反查 meta。 -// naive-ui 的 NAutoComplete 选完时只把 value 写回 v-model,我们额外用 -// ref 维护一个 lastSelectedType,模板里在 click 时先 setType,on-select 时 -// 再读 — 但更简单的做法:看 value 格式。title 类型的 value 是纯数字 id 字符串, -// keyword 类型是中文/字母词。 +// 选完候选词:naive-ui 把 value 写回 v-model,我们从 suggestOptions 找 meta function onSelectSuggestion(value: string) { - // 反查 suggestOptions 找 meta const matched = suggestOptions.value.find((o) => o.value === value) - if (matched?.type === 'title') { - router.push(`/article/${matched.meta.id}`) - return - } - if (matched?.type === 'keyword') { + if (matched) { q.value = matched.meta.word resetToFirstPage() - return - } - // 兜底 - if (/^\d+$/.test(value)) { - router.push(`/article/${value}`) } else { + // 兜底:value 就是用户要的关键词 q.value = value resetToFirstPage() } @@ -762,10 +722,6 @@ onMounted(async () => { line-height: 1.4; flex-shrink: 0; } -.feed-suggest-tag-title { - background: #dbeafe; - color: #1e40af; -} .feed-suggest-tag-keyword { background: #f3e8ff; color: #6b21a8; @@ -777,6 +733,12 @@ onMounted(async () => { flex: 1; min-width: 0; } +.feed-suggest-weight { + flex-shrink: 0; + font-size: 11px; + color: #94a3b8; + font-variant-numeric: tabular-nums; +} /* ===== 移动端(<= 768px):过滤条全宽,允许换行 ===== */ @media (max-width: 768px) {