后端(支持 api_push source 创建/调度): - schemas/source.py:SourceIn.url 改成 str(允许 api_push 的 api-push:// 占位) - admin.py create_source 简化 url 传递 - workers/__main__.py:_rebuild_jobs 跳过 api_push 源(它是被动接收,不抓取) - workers/pipeline.py:run_once 也加同条件,api_push 不进抓取循环 前端: - api/articles.ts:ArticleListItem 加 is_short_news(required)/source_ref; ArticleDetail 加 external_id;导出 IngestTokenOut;adminApi 加 list/create/revoke ingest token 三个方法 - views/Feed.vue:卡片根 class 短新闻加 short-card(淡蓝底 #f6f9fc + 左侧 3px 蓝色色条 #4f9eff);元信息栏加 📰 短讯 角标;长新闻摘要 body_zh_text 截前 200 字,短新闻不截取保留换行(white-space: pre-wrap); 短新闻不显示 AI 插图 - views/ArticleDetail.vue:tag 行加 📰 短讯 + source_ref 角标;短新闻 路径下隐藏翻译状态/重译/原文链接按钮;正文区短新闻直接渲染 body_zh_text,跳过译文/原文/AI 配图卡片;Angel + 美团双评论卡片 都保留 - views/AdminSources.vue:kind 加 api_push 选项;api_push 源 URL 字段 变只读占位、隐藏抓取间隔;列表操作列加 🔑 Token 按钮; 弹窗支持生成(raw_token 一次性显示 + 复制)/列表/撤销 文档: - docs/api-push.md:调用方契约 + 三层去重 + 限速 + lifecycle + owner 操作手册 + curl/Python 示例 + 重试策略 + 故障排查 - README.md:关键特性加 API Push;API 概览加 /api/v1/ingest 和 3 个 /admin/.../ingest-tokens 端点
136 lines
4.3 KiB
Python
136 lines
4.3 KiB
Python
"""Worker 入口:启动调度器 + 异步任务。
|
|
|
|
`docker compose exec worker python -m app.workers`
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import signal
|
|
from datetime import datetime, timezone
|
|
|
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
from apscheduler.triggers.cron import CronTrigger
|
|
from apscheduler.triggers.date import DateTrigger
|
|
from apscheduler.triggers.interval import IntervalTrigger
|
|
from sqlalchemy import select
|
|
|
|
from app.config import settings
|
|
from app.database import AsyncSessionLocal
|
|
from app.models.source import Source
|
|
from app.services.llm.enrichment import enrichment_loop
|
|
from app.workers.pipeline import fetch_one_source, run_once, translation_loop
|
|
|
|
logger = logging.getLogger("news.worker")
|
|
logging.basicConfig(
|
|
level=settings.log_level,
|
|
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
)
|
|
|
|
|
|
async def _rebuild_jobs(scheduler: AsyncIOScheduler) -> None:
|
|
"""从 sources 表动态构建 job(可热更新)。
|
|
|
|
只调度有抓取语义的源(rss / html_list / tg_channel);
|
|
api_push 是被动接收,不进 fetch 调度。
|
|
"""
|
|
scheduler.remove_all_jobs()
|
|
async with AsyncSessionLocal() as s:
|
|
rows = (await s.execute(select(Source).where(Source.enabled.is_(True)))).scalars()
|
|
sources = list(rows)
|
|
if not sources:
|
|
logger.warning("no enabled sources; scheduler idle")
|
|
return
|
|
for src in sources:
|
|
# api_push 源不抓取(由 /api/v1/ingest 被动接收),跳过调度
|
|
if src.kind.value == "api_push":
|
|
logger.debug("skip scheduling api_push source: %s", src.slug)
|
|
continue
|
|
trigger = (
|
|
CronTrigger.from_crontab(src.fetch_cron)
|
|
if src.fetch_cron
|
|
else IntervalTrigger(minutes=src.fetch_interval_min)
|
|
)
|
|
scheduler.add_job(
|
|
fetch_one_source,
|
|
trigger=trigger,
|
|
args=[src.id],
|
|
id=f"src:{src.slug}",
|
|
replace_existing=True,
|
|
max_instances=1,
|
|
coalesce=True,
|
|
misfire_grace_time=300,
|
|
)
|
|
logger.info("scheduled %s every %s", src.slug, src.fetch_cron or f"{src.fetch_interval_min}m")
|
|
|
|
|
|
async def _daily_rebuild() -> None:
|
|
"""每天 00:30 重建 job 列表(支持运行时新增源)。"""
|
|
scheduler = AsyncIOScheduler()
|
|
# 临时实例,只为重建用
|
|
# 实际用全局 scheduler 实例
|
|
pass
|
|
|
|
|
|
def build_scheduler() -> AsyncIOScheduler:
|
|
sched = AsyncIOScheduler(timezone="Asia/Hong_Kong")
|
|
return sched
|
|
|
|
|
|
async def main() -> None:
|
|
scheduler = build_scheduler()
|
|
await _rebuild_jobs(scheduler)
|
|
# 每天 00:30 重建一次
|
|
scheduler.add_job(
|
|
_rebuild_jobs,
|
|
trigger=CronTrigger(hour=0, minute=30),
|
|
args=[scheduler],
|
|
id="rebuild_jobs",
|
|
replace_existing=True,
|
|
)
|
|
# 启动时立即跑一次(只一次,用 DateTrigger 避免 IntervalTrigger 被 max_instances 拒绝刷日志)
|
|
scheduler.add_job(
|
|
run_once,
|
|
trigger=DateTrigger(run_date=datetime.now(timezone.utc)),
|
|
id="startup_run",
|
|
)
|
|
|
|
scheduler.start()
|
|
logger.info("scheduler started with %d jobs", len(scheduler.get_jobs()))
|
|
|
|
# 独立的翻译后台循环(不和 RSS 抓取并行;1 篇/秒)
|
|
translation_task = asyncio.create_task(translation_loop(), name="translation_loop")
|
|
logger.info("translation_loop task scheduled (1 article/sec)")
|
|
|
|
# 独立的 LLM 增强后台循环(翻译完成后,跑 4 项 LLM 任务)
|
|
enrichment_task = asyncio.create_task(enrichment_loop(), name="enrichment_loop")
|
|
logger.info("enrichment_loop task scheduled (scans translated articles)")
|
|
|
|
stop = asyncio.Event()
|
|
|
|
def _signal_handler():
|
|
logger.info("shutdown signal received")
|
|
stop.set()
|
|
|
|
loop = asyncio.get_running_loop()
|
|
for sig in (signal.SIGINT, signal.SIGTERM):
|
|
try:
|
|
loop.add_signal_handler(sig, _signal_handler)
|
|
except NotImplementedError:
|
|
# Windows 等不支持
|
|
pass
|
|
|
|
await stop.wait()
|
|
logger.info("stopping scheduler and background loops")
|
|
for t in (translation_task, enrichment_task):
|
|
t.cancel()
|
|
try:
|
|
await t
|
|
except asyncio.CancelledError:
|
|
pass
|
|
scheduler.shutdown(wait=False)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|