"""搜索建议服务:纯 keyword 续接词(高频词)。 - search_keywords(prefix_keys @> ARRAY['美'], ORDER BY weight DESC) - fallback: 表空时回退实时 ts_stat(冷启动 / worker 没刷新过) """ from __future__ import annotations import logging from sqlalchemy import desc, select from sqlalchemy.ext.asyncio import AsyncSession from app.models.search_keyword import SearchKeyword logger = logging.getLogger("news.search") class SearchService: """搜索建议 service — 仅返回 keyword 续接词。 设计:输入 prefix,返回 { query, keywords[] }。 - keywords 是 ts_stat 聚合后的高频词(从 articles.title_zh + body_zh_text + commentary 算) - 用 GIN 数组索引 prefix_keys @> ARRAY['前缀'],亚毫秒 - 表空时回退到实时 ts_stat 聚合(慢但能用) """ def __init__(self, session: AsyncSession): self.session = session async def suggestions( self, q: str, limit: int = 10, ) -> dict[str, list[dict]]: """返回搜索建议(仅 keywords)。 Args: q: 前缀(1-20 字符) limit: 最多返回多少(默认 10,最大 20) Returns: {"query": q, "keywords": [{"word", "weight", "source"}, ...]} """ q = q.strip() if not q: return {"query": q, "keywords": []} # 1) 查 search_keywords(GIN 数组包含,亚毫秒) kw_rows = await self.session.execute( select(SearchKeyword.keyword, SearchKeyword.weight, SearchKeyword.source) .where(SearchKeyword.prefix_keys.contains([q])) .order_by(desc(SearchKeyword.weight)) .limit(limit) ) keywords = [ {"word": row.keyword, "weight": row.weight, "source": row.source} for row in kw_rows.all() ] # 2) 冷启动 fallback:表空时回退到实时 ts_stat 聚合 if not keywords: keywords = await self._fallback_keywords(q, limit) return {"query": q, "keywords": keywords} async def _fallback_keywords(self, q: str, limit: int) -> list[dict]: """回退:ts_stat 实时聚合(慢但能用)。 - 从 articles.title_zh + body_zh_text 实时 to_tsvector(chinese_zh) - 适用:search_keywords 表空 + worker 没刷新过 - ts_stat(text) 单参 — 第二参 weights mask 不能传 'a'(zhparser 不标 A 权重会 0 行) """ from sqlalchemy import text sql = text( """ SELECT word, nentry::int AS weight FROM ts_stat( $$SELECT to_tsvector('chinese_zh', coalesce(title_zh, '') || ' ' || coalesce(body_zh_text, '') ) FROM articles WHERE title_zh IS NOT NULL OR body_zh_text IS NOT NULL LIMIT 500$$ ) AS s WHERE word LIKE :prefix ORDER BY nentry DESC LIMIT :lim """ ) rows = ( await self.session.execute(sql, {"prefix": f"{q}%", "lim": limit}) ).all() return [{"word": r.word, "weight": r.weight, "source": "ts_stat_live"} for r in rows]