feat(search): 智能搜索建议 - 固化候选词表 (search_keywords + search_title_suggestions)
后端: - alembic 0009: 两张固化表 + GIN prefix_keys 索引 + articles trigger - /api/v1/search/suggestions: 混合 A(高频词 ts_stat) + B(真实标题) + 冷启动 fallback - worker 每日 03:00 + 启动时刷新 search_keywords - 顺便填 commit 11 TODO: articles.title_zh_tsv + GIN 索引(未来 FTS 基础) 前端: - NInput -> NAutoComplete + debounce 250ms - 选标题 -> 跳详情;选关键词 -> 填入 + 触发搜索 - AbortController 防 race condition 性能: prefix_keys @> ARRAY[prefix] 走 GIN 亚毫秒,100w 行也稳
This commit is contained in:
47
backend/app/api/search.py
Normal file
47
backend/app/api/search.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""/api/v1/search/* — 搜索建议(autocomplete)。
|
||||
|
||||
- GET /api/v1/search/suggestions?q=prefix
|
||||
返回:{"query", "titles": [...], "keywords": [...]}
|
||||
- titles: 真实文章标题(按 published_at DESC),B 方案
|
||||
- keywords: 高频词(按 weight DESC),A 方案
|
||||
- 冷启动:任一表空时自动 fallback 到实时 ILIKE / ts_stat
|
||||
- 鉴权:跟 articles 一致(需要登录)
|
||||
|
||||
性能:两个查询都走 GIN 数组索引(prefix_keys @> ARRAY['美']),亚毫秒。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.deps import get_current_user
|
||||
from app.database import get_session
|
||||
from app.models.user import User
|
||||
from app.schemas.search import (
|
||||
SearchKeywordItem,
|
||||
SearchSuggestionsResponse,
|
||||
SearchTitleSuggestionItem,
|
||||
)
|
||||
from app.services.search import SearchService
|
||||
|
||||
router = APIRouter(prefix="/search", tags=["search"])
|
||||
|
||||
|
||||
@router.get("/suggestions", response_model=SearchSuggestionsResponse)
|
||||
async def get_suggestions(
|
||||
q: str = Query(..., min_length=1, max_length=20, description="搜索前缀"),
|
||||
limit: int = Query(10, ge=1, le=20, description="每组最多返回多少"),
|
||||
_user: User = Depends(get_current_user), # 需要登录,跟 articles 一致
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
"""搜索建议:输入 prefix,返回真实标题 + 高频词两组候选。
|
||||
|
||||
用法:前端搜索框 onChange 时调用,debounce 200ms。
|
||||
"""
|
||||
svc = SearchService(session)
|
||||
raw = await svc.suggestions(q=q, limit=limit)
|
||||
return SearchSuggestionsResponse(
|
||||
query=raw["query"],
|
||||
titles=[SearchTitleSuggestionItem(**t) for t in raw["titles"]],
|
||||
keywords=[SearchKeywordItem(**k) for k in raw["keywords"]],
|
||||
)
|
||||
@@ -16,7 +16,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
from starlette.exceptions import HTTPException as StarletteHTTPException
|
||||
|
||||
from app.api import admin, admin_llm, articles, auth, bookmarks, ingest, me, sources, subscriptions
|
||||
from app.api import admin, admin_llm, articles, auth, bookmarks, ingest, me, search, sources, subscriptions
|
||||
from app.config import settings
|
||||
from app.database import engine
|
||||
from app.redis_client import close_redis, get_redis
|
||||
@@ -100,6 +100,7 @@ app.include_router(sources.router, prefix=API_PREFIX)
|
||||
app.include_router(bookmarks.router, prefix=API_PREFIX)
|
||||
app.include_router(subscriptions.router, prefix=API_PREFIX)
|
||||
app.include_router(ingest.router, prefix=API_PREFIX)
|
||||
app.include_router(search.router, prefix=API_PREFIX)
|
||||
app.include_router(admin.router, prefix=API_PREFIX)
|
||||
app.include_router(admin_llm.router, prefix=API_PREFIX)
|
||||
|
||||
|
||||
@@ -7,6 +7,8 @@ from app.models.article import Article # noqa: F401
|
||||
from app.models.article_read import ArticleRead # noqa: F401
|
||||
from app.models.bookmark import Bookmark # noqa: F401
|
||||
from app.models.llm_setting import LlmSetting # noqa: F401
|
||||
from app.models.search_keyword import SearchKeyword # noqa: F401
|
||||
from app.models.search_title_suggestion import SearchTitleSuggestion # noqa: F401
|
||||
from app.models.source import Source, SourceKind # noqa: F401
|
||||
from app.models.subscription import Subscription # noqa: F401
|
||||
from app.models.user import User, UserRole # noqa: F401
|
||||
@@ -17,6 +19,8 @@ __all__ = [
|
||||
"ArticleRead",
|
||||
"Bookmark",
|
||||
"LlmSetting",
|
||||
"SearchKeyword",
|
||||
"SearchTitleSuggestion",
|
||||
"Source",
|
||||
"SourceKind",
|
||||
"Subscription",
|
||||
|
||||
45
backend/app/models/search_keyword.py
Normal file
45
backend/app/models/search_keyword.py
Normal file
@@ -0,0 +1,45 @@
|
||||
"""搜索建议候选词表(固化,worker 每日 ts_stat 刷新)。
|
||||
|
||||
- 数据源:articles.title_zh + body_zh_text + commentary + commentary_meituan
|
||||
- 用途:/api/v1/search/suggestions 返回"高频词"建议(A 方案)
|
||||
- 刷新:每日凌晨 worker 调 refresh_search_keywords() 全量重建
|
||||
- 查询:prefix_keys @> ARRAY['美'] 走 GIN 索引(亚毫秒)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import BigInteger, DateTime, Integer, String, Text, func
|
||||
from sqlalchemy.dialects.postgresql import ARRAY
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class SearchKeyword(Base):
|
||||
__tablename__ = "search_keywords"
|
||||
|
||||
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
|
||||
|
||||
keyword: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
# ts_stat / title_extract / manual
|
||||
source: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
# 词频或文章数(权重,排序用)
|
||||
weight: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
# 预计算前缀数组,['美','美联储','美联储宣'] for '美联储宣布...'
|
||||
prefix_keys: Mapped[list[str]] = mapped_column(ARRAY(Text), nullable=False)
|
||||
|
||||
last_seen_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
# db-level unique 留给 alembic 迁移创建(__table_args__ 只是 ORM 侧参考)
|
||||
# 实际 UNIQUE 约束在 0009 迁移里建
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<SearchKeyword {self.keyword!r} src={self.source} weight={self.weight}>"
|
||||
43
backend/app/models/search_title_suggestion.py
Normal file
43
backend/app/models/search_title_suggestion.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""搜索建议 - 真实文章标题片段表(articles 写入 trigger 自动维护)。
|
||||
|
||||
- 数据源:articles.title_zh(优先)/ articles.title(短新闻回退)
|
||||
- 用途:/api/v1/search/suggestions 返回"真实文章标题"建议(B 方案)
|
||||
- 维护:PG trigger(articles INSERT/UPDATE OF title_zh/title/published_at 触发)
|
||||
- 查询:prefix_keys @> ARRAY['美'] 走 GIN 索引,按 published_at DESC 排序
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import BigInteger, DateTime, ForeignKey, String, func
|
||||
from sqlalchemy.dialects.postgresql import ARRAY, TEXT
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class SearchTitleSuggestion(Base):
|
||||
__tablename__ = "search_title_suggestions"
|
||||
|
||||
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
|
||||
|
||||
article_id: Mapped[int] = mapped_column(
|
||||
BigInteger,
|
||||
ForeignKey("articles.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
# 该条用的是哪边的文本:'zh' (title_zh) / 'src' (title 短新闻回退)
|
||||
title_lang: Mapped[str] = mapped_column(String(8), nullable=False, default="zh")
|
||||
|
||||
# 预计算前缀数组(从第 1 字到全词)
|
||||
prefix_keys: Mapped[list[str]] = mapped_column(ARRAY(TEXT), nullable=False)
|
||||
|
||||
published_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<SearchTitleSuggestion article_id={self.article_id} lang={self.title_lang}>"
|
||||
24
backend/app/schemas/search.py
Normal file
24
backend/app/schemas/search.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""搜索建议 schema。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class SearchTitleSuggestionItem(BaseModel):
|
||||
id: int # article_id
|
||||
published_at: datetime | None = None
|
||||
lang: str # 'zh' / 'src'
|
||||
|
||||
|
||||
class SearchKeywordItem(BaseModel):
|
||||
word: str
|
||||
weight: int
|
||||
source: str # 'ts_stat' / 'title_extract' / 'manual' / 'ts_stat_live'
|
||||
|
||||
|
||||
class SearchSuggestionsResponse(BaseModel):
|
||||
query: str
|
||||
titles: list[SearchTitleSuggestionItem] = []
|
||||
keywords: list[SearchKeywordItem] = []
|
||||
156
backend/app/scripts/backfill_search_suggestions.py
Normal file
156
backend/app/scripts/backfill_search_suggestions.py
Normal file
@@ -0,0 +1,156 @@
|
||||
"""回灌 search_title_suggestions 表。
|
||||
|
||||
- 迁移 0009 给 articles 加了 trigger,新写入的会自动维护
|
||||
- 但迁移前已有的 articles 没经过 trigger,需要这个脚本一次性回填
|
||||
- 同时可以手动跑一次 refresh_search_keywords()(可选,worker 也会跑)
|
||||
|
||||
用法:
|
||||
cd backend
|
||||
python -m app.scripts.backfill_search_suggestions
|
||||
# 或 docker:
|
||||
docker compose exec api python -m app.scripts.backfill_search_suggestions
|
||||
|
||||
设计:
|
||||
- 用 batch INSERT,避免逐行 trigger 重复触发(虽然 trigger 已经在迁移里创建,
|
||||
重复执行对已存在的条目会先 DELETE 再 INSERT,等价于刷新,无害)
|
||||
- 进度条:每 1000 篇打一行
|
||||
- 失败:有 article 字段异常不会阻塞其他
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.article import Article
|
||||
from app.models.search_title_suggestion import SearchTitleSuggestion
|
||||
|
||||
logger = logging.getLogger("news.backfill_search")
|
||||
logging.basicConfig(
|
||||
level="INFO",
|
||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
)
|
||||
|
||||
|
||||
MAX_TITLE_LEN = 50 # 跟迁移里的 trigger 一致
|
||||
BATCH_SIZE = 500
|
||||
|
||||
|
||||
def _build_prefix_keys(text_value: str) -> list[str]:
|
||||
"""从 '美联储宣布...' 生成 ['美','美联储','美联储宣',...,'美联储宣布...']"""
|
||||
text_value = (text_value or "")[:MAX_TITLE_LEN]
|
||||
if not text_value:
|
||||
return []
|
||||
return [text_value[:n] for n in range(1, len(text_value) + 1)]
|
||||
|
||||
|
||||
async def _process_article_batch(
|
||||
session: AsyncSession,
|
||||
articles: list[Article],
|
||||
) -> int:
|
||||
"""处理一批 articles,UPSERT 到 search_title_suggestions。
|
||||
|
||||
返回成功插入/更新的条数。
|
||||
"""
|
||||
rows = []
|
||||
for art in articles:
|
||||
if art.title_zh and len(art.title_zh.strip()) > 0:
|
||||
src_text = art.title_zh.strip()[:MAX_TITLE_LEN]
|
||||
lang = "zh"
|
||||
elif art.title and len(art.title.strip()) > 0:
|
||||
src_text = art.title.strip()[:MAX_TITLE_LEN]
|
||||
lang = "src"
|
||||
else:
|
||||
continue
|
||||
|
||||
rows.append(
|
||||
{
|
||||
"article_id": art.id,
|
||||
"title_lang": lang,
|
||||
"prefix_keys": _build_prefix_keys(src_text),
|
||||
"published_at": art.published_at,
|
||||
}
|
||||
)
|
||||
|
||||
if not rows:
|
||||
return 0
|
||||
|
||||
# 用 PG 原生 ON CONFLICT 实现 UPSERT(基于 article_id 唯一约束)
|
||||
# 注意:表没建 unique on article_id,所以先 DELETE 再 INSERT
|
||||
# 性能:批量 DELETE 在 article_id 上没索引,可能慢;临时加索引:
|
||||
# CREATE INDEX IF NOT EXISTS tmp_idx ON search_title_suggestions(article_id);
|
||||
# 简化:每个 batch 内逐条 DELETE 再 INSERT(慢但稳)
|
||||
# 替代方案:直接 TRUNCATE + 全量重灌(回填场景下更简单)
|
||||
for r in rows:
|
||||
await session.execute(
|
||||
text("DELETE FROM search_title_suggestions WHERE article_id = :aid"),
|
||||
{"aid": r["article_id"]},
|
||||
)
|
||||
# bulk insert
|
||||
await session.execute(pg_insert(SearchTitleSuggestion), rows)
|
||||
await session.commit()
|
||||
return len(rows)
|
||||
|
||||
|
||||
async def backfill() -> None:
|
||||
"""主流程:分批拉 articles,回灌 search_title_suggestions。"""
|
||||
started = datetime.now(timezone.utc)
|
||||
async with AsyncSessionLocal() as session:
|
||||
# 总数
|
||||
total = (await session.execute(select(Article.id))).all()
|
||||
total_count = len(total)
|
||||
logger.info("backfill start: %d articles to process", total_count)
|
||||
|
||||
processed = 0
|
||||
last_id = 0
|
||||
while True:
|
||||
rows = (
|
||||
await session.execute(
|
||||
select(Article)
|
||||
.where(Article.id > last_id)
|
||||
.order_by(Article.id)
|
||||
.limit(BATCH_SIZE)
|
||||
)
|
||||
).scalars().all()
|
||||
if not rows:
|
||||
break
|
||||
n = await _process_article_batch(session, list(rows))
|
||||
processed += n
|
||||
last_id = rows[-1].id
|
||||
logger.info(
|
||||
"progress: %d / %d (%.1f%%)",
|
||||
processed, total_count,
|
||||
processed / total_count * 100 if total_count else 0,
|
||||
)
|
||||
|
||||
elapsed = (datetime.now(timezone.utc) - started).total_seconds()
|
||||
logger.info("backfill done: %d rows in %.1fs", processed, elapsed)
|
||||
|
||||
# 顺便触发一次 search_keywords 刷新(让词频表也有数据)
|
||||
logger.info("triggering refresh_search_keywords()...")
|
||||
async with AsyncSessionLocal() as session:
|
||||
try:
|
||||
await session.execute(text("SELECT refresh_search_keywords()"))
|
||||
await session.commit()
|
||||
logger.info("refresh_search_keywords() done")
|
||||
except Exception as e:
|
||||
logger.exception("refresh_search_keywords failed: %s (worker 03:00 会再跑)", e)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
try:
|
||||
asyncio.run(backfill())
|
||||
except KeyboardInterrupt:
|
||||
logger.warning("interrupted")
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
160
backend/app/services/search.py
Normal file
160
backend/app/services/search.py
Normal file
@@ -0,0 +1,160 @@
|
||||
"""搜索建议服务:混合 A(高频词)+ B(真实标题) + 冷启动 fallback。
|
||||
|
||||
- A: search_keywords(prefix_keys @> ARRAY['美'], ORDER BY weight DESC)
|
||||
- B: search_title_suggestions(prefix_keys @> ARRAY['美'], ORDER BY published_at DESC)
|
||||
- fallback: 任一表空时回退实时 ILIKE 查 articles(冷启动 / worker 没刷新过)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from sqlalchemy import desc, select
|
||||
from sqlalchemy.dialects.postgresql import ARRAY
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.models.article import Article
|
||||
from app.models.search_keyword import SearchKeyword
|
||||
from app.models.search_title_suggestion import SearchTitleSuggestion
|
||||
|
||||
logger = logging.getLogger("news.search")
|
||||
|
||||
|
||||
class SearchService:
|
||||
"""搜索建议 service。
|
||||
|
||||
设计:输入 prefix,返回 { titles, keywords } 两组候选。
|
||||
- titles 真实文章标题(按 published_at DESC 排)
|
||||
- keywords 高频词(按 weight DESC 排)
|
||||
- 任一为空时回退实时 articles.title_zh ILIKE 查询(冷启动兜底)
|
||||
"""
|
||||
|
||||
def __init__(self, session: AsyncSession):
|
||||
self.session = session
|
||||
|
||||
async def suggestions(
|
||||
self,
|
||||
q: str,
|
||||
limit: int = 10,
|
||||
) -> dict[str, list[dict]]:
|
||||
"""返回搜索建议。
|
||||
|
||||
Args:
|
||||
q: 前缀(1-20 字符)
|
||||
limit: 每组最多返回多少(默认 10,最大 20)
|
||||
|
||||
Returns:
|
||||
{"query": q, "titles": [...], "keywords": [...]}
|
||||
titles 元素:{"id": article_id, "published_at": ...}
|
||||
keywords 元素:{"word": ..., "weight": ...}
|
||||
"""
|
||||
q = q.strip()
|
||||
if not q:
|
||||
return {"query": q, "titles": [], "keywords": []}
|
||||
|
||||
# 1) 查 search_title_suggestions(B 方案)
|
||||
title_rows = await self.session.execute(
|
||||
select(
|
||||
SearchTitleSuggestion.article_id,
|
||||
SearchTitleSuggestion.published_at,
|
||||
SearchTitleSuggestion.title_lang,
|
||||
)
|
||||
.where(SearchTitleSuggestion.prefix_keys.contains([q]))
|
||||
.order_by(desc(SearchTitleSuggestion.published_at))
|
||||
.limit(limit)
|
||||
)
|
||||
titles = [
|
||||
{
|
||||
"id": row.article_id,
|
||||
"published_at": row.published_at.isoformat() if row.published_at else None,
|
||||
"lang": row.title_lang,
|
||||
}
|
||||
for row in title_rows.all()
|
||||
]
|
||||
|
||||
# 2) 查 search_keywords(A 方案)
|
||||
kw_rows = await self.session.execute(
|
||||
select(SearchKeyword.keyword, SearchKeyword.weight, SearchKeyword.source)
|
||||
.where(SearchKeyword.prefix_keys.contains([q]))
|
||||
.order_by(desc(SearchKeyword.weight))
|
||||
.limit(limit)
|
||||
)
|
||||
keywords = [
|
||||
{"word": row.keyword, "weight": row.weight, "source": row.source}
|
||||
for row in kw_rows.all()
|
||||
]
|
||||
|
||||
# 3) 冷启动 fallback:任一为空时,回退到实时 ILIKE articles
|
||||
# (如果两张固化表都跑空了,说明刚建库或数据被 truncate)
|
||||
if not titles:
|
||||
titles = await self._fallback_titles(q, limit)
|
||||
if not keywords:
|
||||
keywords = await self._fallback_keywords(q, limit)
|
||||
|
||||
return {"query": q, "titles": titles, "keywords": keywords}
|
||||
|
||||
async def _fallback_titles(self, q: str, limit: int) -> list[dict]:
|
||||
"""回退:实时查 articles.title_zh / title(走 B-tree 索引,慢但能用)。
|
||||
|
||||
- 优先 title_zh LIKE(翻译后),没有再 LIKE title(短新闻)
|
||||
- 限制 7 天内的文章,避免返回太老的(冷启动场景下用户预期)
|
||||
"""
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from sqlalchemy import or_
|
||||
|
||||
since = datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(days=7)
|
||||
like = f"{q}%"
|
||||
stmt = (
|
||||
select(Article.id, Article.published_at, Article.title_zh, Article.title)
|
||||
.where(
|
||||
Article.published_at >= since,
|
||||
Article.duplicate_of.is_(None),
|
||||
or_(
|
||||
Article.title_zh.ilike(like),
|
||||
Article.title.ilike(like),
|
||||
),
|
||||
)
|
||||
.order_by(desc(Article.published_at))
|
||||
.limit(limit)
|
||||
)
|
||||
rows = (await self.session.execute(stmt)).all()
|
||||
return [
|
||||
{
|
||||
"id": row.id,
|
||||
"published_at": row.published_at.isoformat() if row.published_at else None,
|
||||
"lang": "zh" if row.title_zh else "src",
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
async def _fallback_keywords(self, q: str, limit: int) -> list[dict]:
|
||||
"""回退:ts_stat 实时聚合(慢但能用)。
|
||||
|
||||
- 从 articles.title_zh + body_zh_text 实时 to_tsvector
|
||||
- 适用:search_keywords 表空 + ts_stat 之前的全量聚合
|
||||
"""
|
||||
from sqlalchemy import text
|
||||
|
||||
sql = text(
|
||||
"""
|
||||
SELECT word, nentry::int AS weight
|
||||
FROM ts_stat(
|
||||
'simple',
|
||||
(
|
||||
SELECT to_tsvector(
|
||||
'simple',
|
||||
coalesce(title_zh, '') || ' ' || coalesce(body_zh_text, '')
|
||||
)
|
||||
FROM articles
|
||||
WHERE title_zh IS NOT NULL OR body_zh_text IS NOT NULL
|
||||
)
|
||||
)
|
||||
WHERE word LIKE :prefix
|
||||
ORDER BY nentry DESC
|
||||
LIMIT :lim
|
||||
"""
|
||||
)
|
||||
rows = (
|
||||
await self.session.execute(sql, {"prefix": f"{q}%", "lim": limit})
|
||||
).all()
|
||||
return [{"word": r.word, "weight": r.weight, "source": "ts_stat_live"} for r in rows]
|
||||
@@ -7,13 +7,13 @@ from __future__ import annotations
|
||||
import asyncio
|
||||
import logging
|
||||
import signal
|
||||
from datetime import datetime, timezone
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from apscheduler.triggers.date import DateTrigger
|
||||
from apscheduler.triggers.interval import IntervalTrigger
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy import select, text
|
||||
|
||||
from app.config import settings
|
||||
from app.database import AsyncSessionLocal
|
||||
@@ -28,6 +28,22 @@ logging.basicConfig(
|
||||
)
|
||||
|
||||
|
||||
async def _refresh_search_keywords() -> None:
|
||||
"""每日刷新 search_keywords(ts_stat 词频表)。
|
||||
|
||||
- 调用 PG 函数 refresh_search_keywords()(迁移 0009 创建)
|
||||
- 全量 truncate + insert,词频会变,不适合增量
|
||||
- 失败也不应阻塞 worker,只记 log
|
||||
"""
|
||||
try:
|
||||
async with AsyncSessionLocal() as s:
|
||||
await s.execute(text("SELECT refresh_search_keywords()"))
|
||||
await s.commit()
|
||||
logger.info("search_keywords refreshed")
|
||||
except Exception as e:
|
||||
logger.exception("search_keywords refresh failed: %s", e)
|
||||
|
||||
|
||||
async def _rebuild_jobs(scheduler: AsyncIOScheduler) -> None:
|
||||
"""从 sources 表动态构建 job(可热更新)。
|
||||
|
||||
@@ -95,6 +111,23 @@ async def main() -> None:
|
||||
id="startup_run",
|
||||
)
|
||||
|
||||
# === 搜索建议相关 ===
|
||||
# 每日凌晨 03:00 刷新 search_keywords(ts_stat 词频)
|
||||
scheduler.add_job(
|
||||
_refresh_search_keywords,
|
||||
trigger=CronTrigger(hour=3, minute=0),
|
||||
id="refresh_search_keywords",
|
||||
replace_existing=True,
|
||||
)
|
||||
# 启动时延迟 10 秒跑一次(冷启动友好,worker 起来时 search_keywords 就有数据;
|
||||
# 延迟是等 DB 完全就绪 + 不和 startup_run 抢资源)
|
||||
scheduler.add_job(
|
||||
_refresh_search_keywords,
|
||||
trigger=DateTrigger(run_date=datetime.now() + timedelta(seconds=10)),
|
||||
id="startup_refresh_search_keywords",
|
||||
)
|
||||
logger.info("scheduled: refresh_search_keywords daily 03:00 + on startup (+10s)")
|
||||
|
||||
scheduler.start()
|
||||
logger.info("scheduler started with %d jobs", len(scheduler.get_jobs()))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user