Files
diary-news/backend/app/workers/__main__.py
xiaji e274246056 feat(ingest): API Push 前端层 + 文档 + 端到端联通
后端(支持 api_push source 创建/调度):
- schemas/source.py:SourceIn.url 改成 str(允许 api_push 的 api-push:// 占位)
- admin.py create_source 简化 url 传递
- workers/__main__.py:_rebuild_jobs 跳过 api_push 源(它是被动接收,不抓取)
- workers/pipeline.py:run_once 也加同条件,api_push 不进抓取循环

前端:
- api/articles.ts:ArticleListItem 加 is_short_news(required)/source_ref;
  ArticleDetail 加 external_id;导出 IngestTokenOut;adminApi 加
  list/create/revoke ingest token 三个方法
- views/Feed.vue:卡片根 class 短新闻加 short-card(淡蓝底 #f6f9fc +
  左侧 3px 蓝色色条 #4f9eff);元信息栏加 📰 短讯 角标;长新闻摘要
  body_zh_text 截前 200 字,短新闻不截取保留换行(white-space: pre-wrap);
  短新闻不显示 AI 插图
- views/ArticleDetail.vue:tag 行加 📰 短讯 + source_ref 角标;短新闻
  路径下隐藏翻译状态/重译/原文链接按钮;正文区短新闻直接渲染
  body_zh_text,跳过译文/原文/AI 配图卡片;Angel + 美团双评论卡片
  都保留
- views/AdminSources.vue:kind 加 api_push 选项;api_push 源 URL 字段
  变只读占位、隐藏抓取间隔;列表操作列加 🔑 Token 按钮;
  弹窗支持生成(raw_token 一次性显示 + 复制)/列表/撤销

文档:
- docs/api-push.md:调用方契约 + 三层去重 + 限速 + lifecycle +
  owner 操作手册 + curl/Python 示例 + 重试策略 + 故障排查
- README.md:关键特性加 API Push;API 概览加 /api/v1/ingest 和
  3 个 /admin/.../ingest-tokens 端点
2026-06-14 16:15:21 +08:00

136 lines
4.3 KiB
Python

"""Worker 入口:启动调度器 + 异步任务。
`docker compose exec worker python -m app.workers`
"""
from __future__ import annotations
import asyncio
import logging
import signal
from datetime import datetime, timezone
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from apscheduler.triggers.date import DateTrigger
from apscheduler.triggers.interval import IntervalTrigger
from sqlalchemy import select
from app.config import settings
from app.database import AsyncSessionLocal
from app.models.source import Source
from app.services.llm.enrichment import enrichment_loop
from app.workers.pipeline import fetch_one_source, run_once, translation_loop
logger = logging.getLogger("news.worker")
logging.basicConfig(
level=settings.log_level,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
async def _rebuild_jobs(scheduler: AsyncIOScheduler) -> None:
"""从 sources 表动态构建 job(可热更新)。
只调度有抓取语义的源(rss / html_list / tg_channel);
api_push 是被动接收,不进 fetch 调度。
"""
scheduler.remove_all_jobs()
async with AsyncSessionLocal() as s:
rows = (await s.execute(select(Source).where(Source.enabled.is_(True)))).scalars()
sources = list(rows)
if not sources:
logger.warning("no enabled sources; scheduler idle")
return
for src in sources:
# api_push 源不抓取(由 /api/v1/ingest 被动接收),跳过调度
if src.kind.value == "api_push":
logger.debug("skip scheduling api_push source: %s", src.slug)
continue
trigger = (
CronTrigger.from_crontab(src.fetch_cron)
if src.fetch_cron
else IntervalTrigger(minutes=src.fetch_interval_min)
)
scheduler.add_job(
fetch_one_source,
trigger=trigger,
args=[src.id],
id=f"src:{src.slug}",
replace_existing=True,
max_instances=1,
coalesce=True,
misfire_grace_time=300,
)
logger.info("scheduled %s every %s", src.slug, src.fetch_cron or f"{src.fetch_interval_min}m")
async def _daily_rebuild() -> None:
"""每天 00:30 重建 job 列表(支持运行时新增源)。"""
scheduler = AsyncIOScheduler()
# 临时实例,只为重建用
# 实际用全局 scheduler 实例
pass
def build_scheduler() -> AsyncIOScheduler:
sched = AsyncIOScheduler(timezone="Asia/Hong_Kong")
return sched
async def main() -> None:
scheduler = build_scheduler()
await _rebuild_jobs(scheduler)
# 每天 00:30 重建一次
scheduler.add_job(
_rebuild_jobs,
trigger=CronTrigger(hour=0, minute=30),
args=[scheduler],
id="rebuild_jobs",
replace_existing=True,
)
# 启动时立即跑一次(只一次,用 DateTrigger 避免 IntervalTrigger 被 max_instances 拒绝刷日志)
scheduler.add_job(
run_once,
trigger=DateTrigger(run_date=datetime.now(timezone.utc)),
id="startup_run",
)
scheduler.start()
logger.info("scheduler started with %d jobs", len(scheduler.get_jobs()))
# 独立的翻译后台循环(不和 RSS 抓取并行;1 篇/秒)
translation_task = asyncio.create_task(translation_loop(), name="translation_loop")
logger.info("translation_loop task scheduled (1 article/sec)")
# 独立的 LLM 增强后台循环(翻译完成后,跑 4 项 LLM 任务)
enrichment_task = asyncio.create_task(enrichment_loop(), name="enrichment_loop")
logger.info("enrichment_loop task scheduled (scans translated articles)")
stop = asyncio.Event()
def _signal_handler():
logger.info("shutdown signal received")
stop.set()
loop = asyncio.get_running_loop()
for sig in (signal.SIGINT, signal.SIGTERM):
try:
loop.add_signal_handler(sig, _signal_handler)
except NotImplementedError:
# Windows 等不支持
pass
await stop.wait()
logger.info("stopping scheduler and background loops")
for t in (translation_task, enrichment_task):
t.cancel()
try:
await t
except asyncio.CancelledError:
pass
scheduler.shutdown(wait=False)
if __name__ == "__main__":
asyncio.run(main())