feat(ingest): API Push 前端层 + 文档 + 端到端联通
后端(支持 api_push source 创建/调度): - schemas/source.py:SourceIn.url 改成 str(允许 api_push 的 api-push:// 占位) - admin.py create_source 简化 url 传递 - workers/__main__.py:_rebuild_jobs 跳过 api_push 源(它是被动接收,不抓取) - workers/pipeline.py:run_once 也加同条件,api_push 不进抓取循环 前端: - api/articles.ts:ArticleListItem 加 is_short_news(required)/source_ref; ArticleDetail 加 external_id;导出 IngestTokenOut;adminApi 加 list/create/revoke ingest token 三个方法 - views/Feed.vue:卡片根 class 短新闻加 short-card(淡蓝底 #f6f9fc + 左侧 3px 蓝色色条 #4f9eff);元信息栏加 📰 短讯 角标;长新闻摘要 body_zh_text 截前 200 字,短新闻不截取保留换行(white-space: pre-wrap); 短新闻不显示 AI 插图 - views/ArticleDetail.vue:tag 行加 📰 短讯 + source_ref 角标;短新闻 路径下隐藏翻译状态/重译/原文链接按钮;正文区短新闻直接渲染 body_zh_text,跳过译文/原文/AI 配图卡片;Angel + 美团双评论卡片 都保留 - views/AdminSources.vue:kind 加 api_push 选项;api_push 源 URL 字段 变只读占位、隐藏抓取间隔;列表操作列加 🔑 Token 按钮; 弹窗支持生成(raw_token 一次性显示 + 复制)/列表/撤销 文档: - docs/api-push.md:调用方契约 + 三层去重 + 限速 + lifecycle + owner 操作手册 + curl/Python 示例 + 重试策略 + 故障排查 - README.md:关键特性加 API Push;API 概览加 /api/v1/ingest 和 3 个 /admin/.../ingest-tokens 端点
This commit is contained in:
@@ -47,7 +47,7 @@ async def create_source(body: SourceIn, session: AsyncSession = Depends(get_sess
|
||||
name=body.name,
|
||||
slug=body.slug,
|
||||
kind=body.kind,
|
||||
url=str(body.url),
|
||||
url=body.url,
|
||||
detail_selector=body.detail_selector,
|
||||
region=body.region,
|
||||
language_src=body.language_src,
|
||||
|
||||
@@ -2,8 +2,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Annotated
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, HttpUrl
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from app.models.source import SourceKind
|
||||
|
||||
@@ -29,11 +30,21 @@ class SourceOut(BaseModel):
|
||||
blocklist_tags: list[str] = []
|
||||
|
||||
|
||||
# url 字段:正常源要 HttpUrl(校验合法 URL),但 api_push 源是合成占位(类似 api-push://...)
|
||||
# 用 Annotated Union 区分:rss/html_list/tg_channel → HttpUrl;api_push → str
|
||||
# 但 SourceIn.kind 未知时(前端一次提交),无法静态区分。最简单的兼容:统一接受 str,
|
||||
# 入库前在 admin.create_source 里按 kind 分支校验。
|
||||
# 这里改成 str(最长 2048),保留手工校验的责任。
|
||||
SourceUrlStr = Annotated[str, Field(min_length=1, max_length=2048)]
|
||||
|
||||
|
||||
class SourceIn(BaseModel):
|
||||
name: str = Field(min_length=1, max_length=128)
|
||||
slug: str = Field(min_length=1, max_length=128, pattern=r"^[a-z0-9-]+$")
|
||||
kind: SourceKind = SourceKind.RSS
|
||||
url: HttpUrl
|
||||
# url:不再强制 HttpUrl,允许 api_push 源的合成 url(api-push://...);
|
||||
# rss/html_list/tg_channel 由 admin.create_source 在入库前手工校验
|
||||
url: str = Field(min_length=1, max_length=2048)
|
||||
region: str | None = None
|
||||
language_src: str | None = None
|
||||
priority: int = Field(default=50, ge=1, le=100)
|
||||
|
||||
@@ -29,7 +29,11 @@ logging.basicConfig(
|
||||
|
||||
|
||||
async def _rebuild_jobs(scheduler: AsyncIOScheduler) -> None:
|
||||
"""从 sources 表动态构建 job(可热更新)。"""
|
||||
"""从 sources 表动态构建 job(可热更新)。
|
||||
|
||||
只调度有抓取语义的源(rss / html_list / tg_channel);
|
||||
api_push 是被动接收,不进 fetch 调度。
|
||||
"""
|
||||
scheduler.remove_all_jobs()
|
||||
async with AsyncSessionLocal() as s:
|
||||
rows = (await s.execute(select(Source).where(Source.enabled.is_(True)))).scalars()
|
||||
@@ -38,6 +42,10 @@ async def _rebuild_jobs(scheduler: AsyncIOScheduler) -> None:
|
||||
logger.warning("no enabled sources; scheduler idle")
|
||||
return
|
||||
for src in sources:
|
||||
# api_push 源不抓取(由 /api/v1/ingest 被动接收),跳过调度
|
||||
if src.kind.value == "api_push":
|
||||
logger.debug("skip scheduling api_push source: %s", src.slug)
|
||||
continue
|
||||
trigger = (
|
||||
CronTrigger.from_crontab(src.fetch_cron)
|
||||
if src.fetch_cron
|
||||
|
||||
@@ -293,10 +293,14 @@ def _wrap_html(text: str) -> str:
|
||||
# === 全量跑(供测试 / 手动触发) ===
|
||||
async def run_once() -> None:
|
||||
async with AsyncSessionLocal() as session:
|
||||
rows = (await session.execute(select(Source).where(Source.enabled.is_(True)))).scalars()
|
||||
rows = (
|
||||
await session.execute(
|
||||
select(Source).where(Source.enabled.is_(True), Source.kind != SourceKind.API_PUSH)
|
||||
)
|
||||
).scalars()
|
||||
sources = list(rows)
|
||||
|
||||
logger.info("run_once: %d enabled sources", len(sources))
|
||||
logger.info("run_once: %d enabled sources (api_push excluded)", len(sources))
|
||||
tasks = [fetch_one_source(s.id) for s in sources]
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user