Files
diary-news/backend/app/workers/__main__.py
Mavis 8d73f4fb28 fix(llm+worker+deploy): 兼容老 prompt 模板 + 消除 startup_run 日志噪音
- enrichment: 新增 _safe_format (基于 _SafeDict),缺失占位符保留原样不抛 KeyError。
  _enrich_format / _enrich_classify / _enrich_image / _enrich_commentary
  全部走 _safe_format,数据库里老 prompt(不支持 {body})不再让整条 article 卡住。
  复现: 388183 classify 一直 KeyError,enrichment_loop 反复重试它,316 篇全卡在 n/a。
- workers.__main__: startup_run 从 IntervalTrigger(minutes=0) 改成 DateTrigger
  (只跑一次),消除 'maximum number of running instances reached' 刷屏 WARNING。
- deploy_pull: 改 _connect 自动识别 RSA / Ed25519 / ECDSA key(原硬编码 Ed25519Key)
2026-06-08 21:20:43 +08:00

128 lines
4.0 KiB
Python

"""Worker 入口:启动调度器 + 异步任务。
`docker compose exec worker python -m app.workers`
"""
from __future__ import annotations
import asyncio
import logging
import signal
from datetime import datetime, timezone
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from apscheduler.triggers.date import DateTrigger
from apscheduler.triggers.interval import IntervalTrigger
from sqlalchemy import select
from app.config import settings
from app.database import AsyncSessionLocal
from app.models.source import Source
from app.services.llm.enrichment import enrichment_loop
from app.workers.pipeline import fetch_one_source, run_once, translation_loop
logger = logging.getLogger("news.worker")
logging.basicConfig(
level=settings.log_level,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
async def _rebuild_jobs(scheduler: AsyncIOScheduler) -> None:
"""从 sources 表动态构建 job(可热更新)。"""
scheduler.remove_all_jobs()
async with AsyncSessionLocal() as s:
rows = (await s.execute(select(Source).where(Source.enabled.is_(True)))).scalars()
sources = list(rows)
if not sources:
logger.warning("no enabled sources; scheduler idle")
return
for src in sources:
trigger = (
CronTrigger.from_crontab(src.fetch_cron)
if src.fetch_cron
else IntervalTrigger(minutes=src.fetch_interval_min)
)
scheduler.add_job(
fetch_one_source,
trigger=trigger,
args=[src.id],
id=f"src:{src.slug}",
replace_existing=True,
max_instances=1,
coalesce=True,
misfire_grace_time=300,
)
logger.info("scheduled %s every %s", src.slug, src.fetch_cron or f"{src.fetch_interval_min}m")
async def _daily_rebuild() -> None:
"""每天 00:30 重建 job 列表(支持运行时新增源)。"""
scheduler = AsyncIOScheduler()
# 临时实例,只为重建用
# 实际用全局 scheduler 实例
pass
def build_scheduler() -> AsyncIOScheduler:
sched = AsyncIOScheduler(timezone="Asia/Hong_Kong")
return sched
async def main() -> None:
scheduler = build_scheduler()
await _rebuild_jobs(scheduler)
# 每天 00:30 重建一次
scheduler.add_job(
_rebuild_jobs,
trigger=CronTrigger(hour=0, minute=30),
args=[scheduler],
id="rebuild_jobs",
replace_existing=True,
)
# 启动时立即跑一次(只一次,用 DateTrigger 避免 IntervalTrigger 被 max_instances 拒绝刷日志)
scheduler.add_job(
run_once,
trigger=DateTrigger(run_date=datetime.now(timezone.utc)),
id="startup_run",
)
scheduler.start()
logger.info("scheduler started with %d jobs", len(scheduler.get_jobs()))
# 独立的翻译后台循环(不和 RSS 抓取并行;1 篇/秒)
translation_task = asyncio.create_task(translation_loop(), name="translation_loop")
logger.info("translation_loop task scheduled (1 article/sec)")
# 独立的 LLM 增强后台循环(翻译完成后,跑 4 项 LLM 任务)
enrichment_task = asyncio.create_task(enrichment_loop(), name="enrichment_loop")
logger.info("enrichment_loop task scheduled (scans translated articles)")
stop = asyncio.Event()
def _signal_handler():
logger.info("shutdown signal received")
stop.set()
loop = asyncio.get_running_loop()
for sig in (signal.SIGINT, signal.SIGTERM):
try:
loop.add_signal_handler(sig, _signal_handler)
except NotImplementedError:
# Windows 等不支持
pass
await stop.wait()
logger.info("stopping scheduler and background loops")
for t in (translation_task, enrichment_task):
t.cancel()
try:
await t
except asyncio.CancelledError:
pass
scheduler.shutdown(wait=False)
if __name__ == "__main__":
asyncio.run(main())