diff --git a/backend/alembic/versions/0009_search_suggestions.py b/backend/alembic/versions/0009_search_suggestions.py index 4433ce9..33e9822 100644 --- a/backend/alembic/versions/0009_search_suggestions.py +++ b/backend/alembic/versions/0009_search_suggestions.py @@ -218,23 +218,19 @@ def upgrade() -> None: FROM generate_series(1, length(word)) AS n ) FROM ts_stat( - 'simple', - ( - SELECT to_tsvector( - 'simple', - coalesce(title_zh, '') || ' ' || - coalesce(body_zh_text, '') || ' ' || - coalesce(commentary, '') || ' ' || - coalesce(commentary_meituan, '') - ) - FROM articles - WHERE title_zh IS NOT NULL - OR body_zh_text IS NOT NULL - OR commentary IS NOT NULL - OR commentary_meituan IS NOT NULL + $$SELECT to_tsvector('simple', + coalesce(title_zh, '') || ' ' || + coalesce(body_zh_text, '') || ' ' || + coalesce(commentary, '') || ' ' || + coalesce(commentary_meituan, '') ) - ) - WHERE length(word) >= 2; -- 过滤单字噪音(中文标点/单字停用词) + FROM articles + WHERE title_zh IS NOT NULL + OR body_zh_text IS NOT NULL + OR commentary IS NOT NULL + OR commentary_meituan IS NOT NULL$$ + ) AS s + WHERE length(s.word) >= 2; -- 过滤单字噪音(中文标点/单字停用词) END; $$ LANGUAGE plpgsql; diff --git a/backend/alembic/versions/0010_zhparser_chinese.py b/backend/alembic/versions/0010_zhparser_chinese.py index 83c1b88..221e4fe 100644 --- a/backend/alembic/versions/0010_zhparser_chinese.py +++ b/backend/alembic/versions/0010_zhparser_chinese.py @@ -80,7 +80,8 @@ def upgrade() -> None: TRUNCATE search_keywords; -- ts_stat(query text) 接受 SQL 字符串,内部执行并聚合词频 - -- 'a' = 任意权重(A/B/C/D 四档,这里聚合所有) + -- 单参(等价 mask='abcd',聚合所有权重) + -- ⚠️ 不能传 'a' mask — zhparser 不标 A 权重,会 0 行 INSERT INTO search_keywords (keyword, source, weight, prefix_keys) SELECT word, @@ -91,8 +92,7 @@ def upgrade() -> None: FROM generate_series(1, length(word)) AS n ) FROM ts_stat( - $q$ - SELECT to_tsvector('chinese_zh', + $$SELECT to_tsvector('chinese_zh', coalesce(title_zh, '') || ' ' || coalesce(body_zh_text, '') || ' ' || coalesce(commentary, '') || ' ' || @@ -102,10 +102,9 @@ def upgrade() -> None: WHERE title_zh IS NOT NULL OR body_zh_text IS NOT NULL OR commentary IS NOT NULL - OR commentary_meituan IS NOT NULL - $q$, 'a' - ) - WHERE length(word) >= 2; + OR commentary_meituan IS NOT NULL$$ + ) AS s + WHERE length(s.word) >= 2; END; $func$ LANGUAGE plpgsql; """ @@ -135,8 +134,7 @@ def downgrade() -> None: FROM generate_series(1, length(word)) AS n ) FROM ts_stat( - $q$ - SELECT to_tsvector('simple', + $$SELECT to_tsvector('simple', coalesce(title_zh, '') || ' ' || coalesce(body_zh_text, '') || ' ' || coalesce(commentary, '') || ' ' || @@ -146,10 +144,9 @@ def downgrade() -> None: WHERE title_zh IS NOT NULL OR body_zh_text IS NOT NULL OR commentary IS NOT NULL - OR commentary_meituan IS NOT NULL - $q$, 'a' - ) - WHERE length(word) >= 2; + OR commentary_meituan IS NOT NULL$$ + ) AS s + WHERE length(s.word) >= 2; END; $func$ LANGUAGE plpgsql; """ diff --git a/backend/app/services/search.py b/backend/app/services/search.py index 427e7cf..45eb43a 100644 --- a/backend/app/services/search.py +++ b/backend/app/services/search.py @@ -130,8 +130,9 @@ class SearchService: async def _fallback_keywords(self, q: str, limit: int) -> list[dict]: """回退:ts_stat 实时聚合(慢但能用)。 - - 从 articles.title_zh + body_zh_text 实时 to_tsvector - - 适用:search_keywords 表空 + ts_stat 之前的全量聚合 + - 从 articles.title_zh + body_zh_text 实时 to_tsvector(chinese_zh) + - 适用:search_keywords 表空 + worker 没刷新过 + - ts_stat(text) 单参 — 第二参 weights mask 不能传 'a'(zhparser 不标 A 权重会 0 行) """ from sqlalchemy import text @@ -139,16 +140,13 @@ class SearchService: """ SELECT word, nentry::int AS weight FROM ts_stat( - 'simple', - ( - SELECT to_tsvector( - 'simple', - coalesce(title_zh, '') || ' ' || coalesce(body_zh_text, '') - ) - FROM articles - WHERE title_zh IS NOT NULL OR body_zh_text IS NOT NULL + $$SELECT to_tsvector('chinese_zh', + coalesce(title_zh, '') || ' ' || coalesce(body_zh_text, '') ) - ) + FROM articles + WHERE title_zh IS NOT NULL OR body_zh_text IS NOT NULL + LIMIT 500$$ + ) AS s WHERE word LIKE :prefix ORDER BY nentry DESC LIMIT :lim