fix(search): ts_stat 改单参(text),避免 'a' mask 静默 0 行
zhparser 不标 A 权重(也不标 B/C/D),传 'a' mask 给 ts_stat(text, weights) 会过滤掉所有词 但不报错,静默 0 行。改成 ts_stat(text) 单参(等价 mask='abcd',聚合所有权重)。 修: - 0010 迁移里 refresh_search_keywords() 改用单参 ts_stat - 0010 迁移 downgrade 部分同步修 - 0009 迁移 refresh_search_keywords() 同步修 - services/search.py _fallback_keywords 改用 chinese_zh + 单参 ts_stat
This commit is contained in:
@@ -218,23 +218,19 @@ def upgrade() -> None:
|
||||
FROM generate_series(1, length(word)) AS n
|
||||
)
|
||||
FROM ts_stat(
|
||||
'simple',
|
||||
(
|
||||
SELECT to_tsvector(
|
||||
'simple',
|
||||
coalesce(title_zh, '') || ' ' ||
|
||||
coalesce(body_zh_text, '') || ' ' ||
|
||||
coalesce(commentary, '') || ' ' ||
|
||||
coalesce(commentary_meituan, '')
|
||||
)
|
||||
FROM articles
|
||||
WHERE title_zh IS NOT NULL
|
||||
OR body_zh_text IS NOT NULL
|
||||
OR commentary IS NOT NULL
|
||||
OR commentary_meituan IS NOT NULL
|
||||
$$SELECT to_tsvector('simple',
|
||||
coalesce(title_zh, '') || ' ' ||
|
||||
coalesce(body_zh_text, '') || ' ' ||
|
||||
coalesce(commentary, '') || ' ' ||
|
||||
coalesce(commentary_meituan, '')
|
||||
)
|
||||
)
|
||||
WHERE length(word) >= 2; -- 过滤单字噪音(中文标点/单字停用词)
|
||||
FROM articles
|
||||
WHERE title_zh IS NOT NULL
|
||||
OR body_zh_text IS NOT NULL
|
||||
OR commentary IS NOT NULL
|
||||
OR commentary_meituan IS NOT NULL$$
|
||||
) AS s
|
||||
WHERE length(s.word) >= 2; -- 过滤单字噪音(中文标点/单字停用词)
|
||||
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
@@ -80,7 +80,8 @@ def upgrade() -> None:
|
||||
TRUNCATE search_keywords;
|
||||
|
||||
-- ts_stat(query text) 接受 SQL 字符串,内部执行并聚合词频
|
||||
-- 'a' = 任意权重(A/B/C/D 四档,这里聚合所有)
|
||||
-- 单参(等价 mask='abcd',聚合所有权重)
|
||||
-- ⚠️ 不能传 'a' mask — zhparser 不标 A 权重,会 0 行
|
||||
INSERT INTO search_keywords (keyword, source, weight, prefix_keys)
|
||||
SELECT
|
||||
word,
|
||||
@@ -91,8 +92,7 @@ def upgrade() -> None:
|
||||
FROM generate_series(1, length(word)) AS n
|
||||
)
|
||||
FROM ts_stat(
|
||||
$q$
|
||||
SELECT to_tsvector('chinese_zh',
|
||||
$$SELECT to_tsvector('chinese_zh',
|
||||
coalesce(title_zh, '') || ' ' ||
|
||||
coalesce(body_zh_text, '') || ' ' ||
|
||||
coalesce(commentary, '') || ' ' ||
|
||||
@@ -102,10 +102,9 @@ def upgrade() -> None:
|
||||
WHERE title_zh IS NOT NULL
|
||||
OR body_zh_text IS NOT NULL
|
||||
OR commentary IS NOT NULL
|
||||
OR commentary_meituan IS NOT NULL
|
||||
$q$, 'a'
|
||||
)
|
||||
WHERE length(word) >= 2;
|
||||
OR commentary_meituan IS NOT NULL$$
|
||||
) AS s
|
||||
WHERE length(s.word) >= 2;
|
||||
END;
|
||||
$func$ LANGUAGE plpgsql;
|
||||
"""
|
||||
@@ -135,8 +134,7 @@ def downgrade() -> None:
|
||||
FROM generate_series(1, length(word)) AS n
|
||||
)
|
||||
FROM ts_stat(
|
||||
$q$
|
||||
SELECT to_tsvector('simple',
|
||||
$$SELECT to_tsvector('simple',
|
||||
coalesce(title_zh, '') || ' ' ||
|
||||
coalesce(body_zh_text, '') || ' ' ||
|
||||
coalesce(commentary, '') || ' ' ||
|
||||
@@ -146,10 +144,9 @@ def downgrade() -> None:
|
||||
WHERE title_zh IS NOT NULL
|
||||
OR body_zh_text IS NOT NULL
|
||||
OR commentary IS NOT NULL
|
||||
OR commentary_meituan IS NOT NULL
|
||||
$q$, 'a'
|
||||
)
|
||||
WHERE length(word) >= 2;
|
||||
OR commentary_meituan IS NOT NULL$$
|
||||
) AS s
|
||||
WHERE length(s.word) >= 2;
|
||||
END;
|
||||
$func$ LANGUAGE plpgsql;
|
||||
"""
|
||||
|
||||
@@ -130,8 +130,9 @@ class SearchService:
|
||||
async def _fallback_keywords(self, q: str, limit: int) -> list[dict]:
|
||||
"""回退:ts_stat 实时聚合(慢但能用)。
|
||||
|
||||
- 从 articles.title_zh + body_zh_text 实时 to_tsvector
|
||||
- 适用:search_keywords 表空 + ts_stat 之前的全量聚合
|
||||
- 从 articles.title_zh + body_zh_text 实时 to_tsvector(chinese_zh)
|
||||
- 适用:search_keywords 表空 + worker 没刷新过
|
||||
- ts_stat(text) 单参 — 第二参 weights mask 不能传 'a'(zhparser 不标 A 权重会 0 行)
|
||||
"""
|
||||
from sqlalchemy import text
|
||||
|
||||
@@ -139,16 +140,13 @@ class SearchService:
|
||||
"""
|
||||
SELECT word, nentry::int AS weight
|
||||
FROM ts_stat(
|
||||
'simple',
|
||||
(
|
||||
SELECT to_tsvector(
|
||||
'simple',
|
||||
coalesce(title_zh, '') || ' ' || coalesce(body_zh_text, '')
|
||||
)
|
||||
FROM articles
|
||||
WHERE title_zh IS NOT NULL OR body_zh_text IS NOT NULL
|
||||
$$SELECT to_tsvector('chinese_zh',
|
||||
coalesce(title_zh, '') || ' ' || coalesce(body_zh_text, '')
|
||||
)
|
||||
)
|
||||
FROM articles
|
||||
WHERE title_zh IS NOT NULL OR body_zh_text IS NOT NULL
|
||||
LIMIT 500$$
|
||||
) AS s
|
||||
WHERE word LIKE :prefix
|
||||
ORDER BY nentry DESC
|
||||
LIMIT :lim
|
||||
|
||||
Reference in New Issue
Block a user