fix(search): ts_stat 改单参(text),避免 'a' mask 静默 0 行
zhparser 不标 A 权重(也不标 B/C/D),传 'a' mask 给 ts_stat(text, weights) 会过滤掉所有词 但不报错,静默 0 行。改成 ts_stat(text) 单参(等价 mask='abcd',聚合所有权重)。 修: - 0010 迁移里 refresh_search_keywords() 改用单参 ts_stat - 0010 迁移 downgrade 部分同步修 - 0009 迁移 refresh_search_keywords() 同步修 - services/search.py _fallback_keywords 改用 chinese_zh + 单参 ts_stat
This commit is contained in:
@@ -218,10 +218,7 @@ def upgrade() -> None:
|
|||||||
FROM generate_series(1, length(word)) AS n
|
FROM generate_series(1, length(word)) AS n
|
||||||
)
|
)
|
||||||
FROM ts_stat(
|
FROM ts_stat(
|
||||||
'simple',
|
$$SELECT to_tsvector('simple',
|
||||||
(
|
|
||||||
SELECT to_tsvector(
|
|
||||||
'simple',
|
|
||||||
coalesce(title_zh, '') || ' ' ||
|
coalesce(title_zh, '') || ' ' ||
|
||||||
coalesce(body_zh_text, '') || ' ' ||
|
coalesce(body_zh_text, '') || ' ' ||
|
||||||
coalesce(commentary, '') || ' ' ||
|
coalesce(commentary, '') || ' ' ||
|
||||||
@@ -231,10 +228,9 @@ def upgrade() -> None:
|
|||||||
WHERE title_zh IS NOT NULL
|
WHERE title_zh IS NOT NULL
|
||||||
OR body_zh_text IS NOT NULL
|
OR body_zh_text IS NOT NULL
|
||||||
OR commentary IS NOT NULL
|
OR commentary IS NOT NULL
|
||||||
OR commentary_meituan IS NOT NULL
|
OR commentary_meituan IS NOT NULL$$
|
||||||
)
|
) AS s
|
||||||
)
|
WHERE length(s.word) >= 2; -- 过滤单字噪音(中文标点/单字停用词)
|
||||||
WHERE length(word) >= 2; -- 过滤单字噪音(中文标点/单字停用词)
|
|
||||||
|
|
||||||
END;
|
END;
|
||||||
$$ LANGUAGE plpgsql;
|
$$ LANGUAGE plpgsql;
|
||||||
|
|||||||
@@ -80,7 +80,8 @@ def upgrade() -> None:
|
|||||||
TRUNCATE search_keywords;
|
TRUNCATE search_keywords;
|
||||||
|
|
||||||
-- ts_stat(query text) 接受 SQL 字符串,内部执行并聚合词频
|
-- ts_stat(query text) 接受 SQL 字符串,内部执行并聚合词频
|
||||||
-- 'a' = 任意权重(A/B/C/D 四档,这里聚合所有)
|
-- 单参(等价 mask='abcd',聚合所有权重)
|
||||||
|
-- ⚠️ 不能传 'a' mask — zhparser 不标 A 权重,会 0 行
|
||||||
INSERT INTO search_keywords (keyword, source, weight, prefix_keys)
|
INSERT INTO search_keywords (keyword, source, weight, prefix_keys)
|
||||||
SELECT
|
SELECT
|
||||||
word,
|
word,
|
||||||
@@ -91,8 +92,7 @@ def upgrade() -> None:
|
|||||||
FROM generate_series(1, length(word)) AS n
|
FROM generate_series(1, length(word)) AS n
|
||||||
)
|
)
|
||||||
FROM ts_stat(
|
FROM ts_stat(
|
||||||
$q$
|
$$SELECT to_tsvector('chinese_zh',
|
||||||
SELECT to_tsvector('chinese_zh',
|
|
||||||
coalesce(title_zh, '') || ' ' ||
|
coalesce(title_zh, '') || ' ' ||
|
||||||
coalesce(body_zh_text, '') || ' ' ||
|
coalesce(body_zh_text, '') || ' ' ||
|
||||||
coalesce(commentary, '') || ' ' ||
|
coalesce(commentary, '') || ' ' ||
|
||||||
@@ -102,10 +102,9 @@ def upgrade() -> None:
|
|||||||
WHERE title_zh IS NOT NULL
|
WHERE title_zh IS NOT NULL
|
||||||
OR body_zh_text IS NOT NULL
|
OR body_zh_text IS NOT NULL
|
||||||
OR commentary IS NOT NULL
|
OR commentary IS NOT NULL
|
||||||
OR commentary_meituan IS NOT NULL
|
OR commentary_meituan IS NOT NULL$$
|
||||||
$q$, 'a'
|
) AS s
|
||||||
)
|
WHERE length(s.word) >= 2;
|
||||||
WHERE length(word) >= 2;
|
|
||||||
END;
|
END;
|
||||||
$func$ LANGUAGE plpgsql;
|
$func$ LANGUAGE plpgsql;
|
||||||
"""
|
"""
|
||||||
@@ -135,8 +134,7 @@ def downgrade() -> None:
|
|||||||
FROM generate_series(1, length(word)) AS n
|
FROM generate_series(1, length(word)) AS n
|
||||||
)
|
)
|
||||||
FROM ts_stat(
|
FROM ts_stat(
|
||||||
$q$
|
$$SELECT to_tsvector('simple',
|
||||||
SELECT to_tsvector('simple',
|
|
||||||
coalesce(title_zh, '') || ' ' ||
|
coalesce(title_zh, '') || ' ' ||
|
||||||
coalesce(body_zh_text, '') || ' ' ||
|
coalesce(body_zh_text, '') || ' ' ||
|
||||||
coalesce(commentary, '') || ' ' ||
|
coalesce(commentary, '') || ' ' ||
|
||||||
@@ -146,10 +144,9 @@ def downgrade() -> None:
|
|||||||
WHERE title_zh IS NOT NULL
|
WHERE title_zh IS NOT NULL
|
||||||
OR body_zh_text IS NOT NULL
|
OR body_zh_text IS NOT NULL
|
||||||
OR commentary IS NOT NULL
|
OR commentary IS NOT NULL
|
||||||
OR commentary_meituan IS NOT NULL
|
OR commentary_meituan IS NOT NULL$$
|
||||||
$q$, 'a'
|
) AS s
|
||||||
)
|
WHERE length(s.word) >= 2;
|
||||||
WHERE length(word) >= 2;
|
|
||||||
END;
|
END;
|
||||||
$func$ LANGUAGE plpgsql;
|
$func$ LANGUAGE plpgsql;
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -130,8 +130,9 @@ class SearchService:
|
|||||||
async def _fallback_keywords(self, q: str, limit: int) -> list[dict]:
|
async def _fallback_keywords(self, q: str, limit: int) -> list[dict]:
|
||||||
"""回退:ts_stat 实时聚合(慢但能用)。
|
"""回退:ts_stat 实时聚合(慢但能用)。
|
||||||
|
|
||||||
- 从 articles.title_zh + body_zh_text 实时 to_tsvector
|
- 从 articles.title_zh + body_zh_text 实时 to_tsvector(chinese_zh)
|
||||||
- 适用:search_keywords 表空 + ts_stat 之前的全量聚合
|
- 适用:search_keywords 表空 + worker 没刷新过
|
||||||
|
- ts_stat(text) 单参 — 第二参 weights mask 不能传 'a'(zhparser 不标 A 权重会 0 行)
|
||||||
"""
|
"""
|
||||||
from sqlalchemy import text
|
from sqlalchemy import text
|
||||||
|
|
||||||
@@ -139,16 +140,13 @@ class SearchService:
|
|||||||
"""
|
"""
|
||||||
SELECT word, nentry::int AS weight
|
SELECT word, nentry::int AS weight
|
||||||
FROM ts_stat(
|
FROM ts_stat(
|
||||||
'simple',
|
$$SELECT to_tsvector('chinese_zh',
|
||||||
(
|
|
||||||
SELECT to_tsvector(
|
|
||||||
'simple',
|
|
||||||
coalesce(title_zh, '') || ' ' || coalesce(body_zh_text, '')
|
coalesce(title_zh, '') || ' ' || coalesce(body_zh_text, '')
|
||||||
)
|
)
|
||||||
FROM articles
|
FROM articles
|
||||||
WHERE title_zh IS NOT NULL OR body_zh_text IS NOT NULL
|
WHERE title_zh IS NOT NULL OR body_zh_text IS NOT NULL
|
||||||
)
|
LIMIT 500$$
|
||||||
)
|
) AS s
|
||||||
WHERE word LIKE :prefix
|
WHERE word LIKE :prefix
|
||||||
ORDER BY nentry DESC
|
ORDER BY nentry DESC
|
||||||
LIMIT :lim
|
LIMIT :lim
|
||||||
|
|||||||
Reference in New Issue
Block a user