fix(search): ts_stat 改单参(text),避免 'a' mask 静默 0 行

zhparser 不标 A 权重(也不标 B/C/D),传 'a' mask 给 ts_stat(text, weights) 会过滤掉所有词
但不报错,静默 0 行。改成 ts_stat(text) 单参(等价 mask='abcd',聚合所有权重)。

修:
- 0010 迁移里 refresh_search_keywords() 改用单参 ts_stat
- 0010 迁移 downgrade 部分同步修
- 0009 迁移 refresh_search_keywords() 同步修
- services/search.py _fallback_keywords 改用 chinese_zh + 单参 ts_stat
This commit is contained in:
mavis
2026-06-15 19:19:19 +08:00
parent e85a27f69d
commit db4fd8699b
3 changed files with 31 additions and 40 deletions

View File

@@ -218,23 +218,19 @@ def upgrade() -> None:
FROM generate_series(1, length(word)) AS n
)
FROM ts_stat(
'simple',
(
SELECT to_tsvector(
'simple',
coalesce(title_zh, '') || ' ' ||
coalesce(body_zh_text, '') || ' ' ||
coalesce(commentary, '') || ' ' ||
coalesce(commentary_meituan, '')
)
FROM articles
WHERE title_zh IS NOT NULL
OR body_zh_text IS NOT NULL
OR commentary IS NOT NULL
OR commentary_meituan IS NOT NULL
$$SELECT to_tsvector('simple',
coalesce(title_zh, '') || ' ' ||
coalesce(body_zh_text, '') || ' ' ||
coalesce(commentary, '') || ' ' ||
coalesce(commentary_meituan, '')
)
)
WHERE length(word) >= 2; -- 过滤单字噪音(中文标点/单字停用词)
FROM articles
WHERE title_zh IS NOT NULL
OR body_zh_text IS NOT NULL
OR commentary IS NOT NULL
OR commentary_meituan IS NOT NULL$$
) AS s
WHERE length(s.word) >= 2; -- 过滤单字噪音(中文标点/单字停用词)
END;
$$ LANGUAGE plpgsql;

View File

@@ -80,7 +80,8 @@ def upgrade() -> None:
TRUNCATE search_keywords;
-- ts_stat(query text) 接受 SQL 字符串,内部执行并聚合词频
-- 'a' = 任意权重(A/B/C/D 四档,这里聚合所有)
-- 单参(等价 mask='abcd',聚合所有权重)
-- ⚠️ 不能传 'a' mask — zhparser 不标 A 权重,会 0 行
INSERT INTO search_keywords (keyword, source, weight, prefix_keys)
SELECT
word,
@@ -91,8 +92,7 @@ def upgrade() -> None:
FROM generate_series(1, length(word)) AS n
)
FROM ts_stat(
$q$
SELECT to_tsvector('chinese_zh',
$$SELECT to_tsvector('chinese_zh',
coalesce(title_zh, '') || ' ' ||
coalesce(body_zh_text, '') || ' ' ||
coalesce(commentary, '') || ' ' ||
@@ -102,10 +102,9 @@ def upgrade() -> None:
WHERE title_zh IS NOT NULL
OR body_zh_text IS NOT NULL
OR commentary IS NOT NULL
OR commentary_meituan IS NOT NULL
$q$, 'a'
)
WHERE length(word) >= 2;
OR commentary_meituan IS NOT NULL$$
) AS s
WHERE length(s.word) >= 2;
END;
$func$ LANGUAGE plpgsql;
"""
@@ -135,8 +134,7 @@ def downgrade() -> None:
FROM generate_series(1, length(word)) AS n
)
FROM ts_stat(
$q$
SELECT to_tsvector('simple',
$$SELECT to_tsvector('simple',
coalesce(title_zh, '') || ' ' ||
coalesce(body_zh_text, '') || ' ' ||
coalesce(commentary, '') || ' ' ||
@@ -146,10 +144,9 @@ def downgrade() -> None:
WHERE title_zh IS NOT NULL
OR body_zh_text IS NOT NULL
OR commentary IS NOT NULL
OR commentary_meituan IS NOT NULL
$q$, 'a'
)
WHERE length(word) >= 2;
OR commentary_meituan IS NOT NULL$$
) AS s
WHERE length(s.word) >= 2;
END;
$func$ LANGUAGE plpgsql;
"""

View File

@@ -130,8 +130,9 @@ class SearchService:
async def _fallback_keywords(self, q: str, limit: int) -> list[dict]:
"""回退:ts_stat 实时聚合(慢但能用)。
- 从 articles.title_zh + body_zh_text 实时 to_tsvector
- 适用:search_keywords 表空 + ts_stat 之前的全量聚合
- 从 articles.title_zh + body_zh_text 实时 to_tsvector(chinese_zh)
- 适用:search_keywords 表空 + worker 没刷新过
- ts_stat(text) 单参 — 第二参 weights mask 不能传 'a'(zhparser 不标 A 权重会 0 行)
"""
from sqlalchemy import text
@@ -139,16 +140,13 @@ class SearchService:
"""
SELECT word, nentry::int AS weight
FROM ts_stat(
'simple',
(
SELECT to_tsvector(
'simple',
coalesce(title_zh, '') || ' ' || coalesce(body_zh_text, '')
)
FROM articles
WHERE title_zh IS NOT NULL OR body_zh_text IS NOT NULL
$$SELECT to_tsvector('chinese_zh',
coalesce(title_zh, '') || ' ' || coalesce(body_zh_text, '')
)
)
FROM articles
WHERE title_zh IS NOT NULL OR body_zh_text IS NOT NULL
LIMIT 500$$
) AS s
WHERE word LIKE :prefix
ORDER BY nentry DESC
LIMIT :lim