feat(ingest): API Push 短新闻接口层
- POST /api/v1/ingest:鉴权(X-Ingest-Token) + 限速(每 token 2 篇/秒,
Redis 滑动桶,INGEST_RATE_PER_SEC 可调) + 三层去重(L1 external_id /
L2 content_hash / L3 DB UNIQUE 兜底,均带 reason)
- 写入字段:is_short_news=True、translation/format/image_ai_status='n/a'、
classify_status=(有 tags?'ok':'pending')、commentary_{angel,meituan}_status='pending'、
body_zh_text=body_text(走统一路径,前端/prompt 不用改)
- services/fetchers/api_push.py:compute_content_hash + synthesize_url +
normalize_published_at + build_initial_status 纯函数
- schemas/ingest.py:IngestPayload(title 1-200/body 1-5000/tags 去重去空) +
IngestResponse(article_id/content_hash/status/reason/matched_external_id)
- admin.py:POST/GET/DELETE /admin/sources/{id}/ingest-tokens — owner 生成
(raw_token 仅一次性返回)、列出、撤销
- schemas/article.py:ArticleListItem 加 is_short_news/source_ref;
ArticleDetail 加 is_short_news/source_ref/external_id
- main.py:挂 ingest router;config.py + .env.example:ingest_rate_per_sec 默认 2
短新闻由 commit 1 enrichment_loop 自动接管 classify + 双 provider commentary,
跳过 format/image。
This commit is contained in:
73
backend/app/schemas/ingest.py
Normal file
73
backend/app/schemas/ingest.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""API Push 短新闻 ingest 接口的 I/O schema。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
|
||||
# 长度上限(commit 1 规划)
|
||||
TITLE_MAX = 200
|
||||
BODY_MAX = 5000
|
||||
EXTERNAL_ID_MAX = 128
|
||||
SOURCE_REF_MAX = 64
|
||||
AUTHOR_MAX = 255
|
||||
TAGS_MAX = 10 # 最多 10 个标签
|
||||
TAG_MAX_LEN = 32 # 单个标签最长
|
||||
|
||||
|
||||
class IngestPayload(BaseModel):
|
||||
"""POST /api/v1/ingest 请求体。
|
||||
|
||||
必填:title + body
|
||||
推荐:external_id(幂等 key)
|
||||
可选:url / source_ref / author / published_at / tags
|
||||
"""
|
||||
|
||||
external_id: str | None = Field(default=None, max_length=EXTERNAL_ID_MAX)
|
||||
title: str = Field(min_length=1, max_length=TITLE_MAX)
|
||||
body: str = Field(min_length=1, max_length=BODY_MAX)
|
||||
url: str | None = Field(default=None, max_length=2048)
|
||||
source_ref: str | None = Field(default=None, max_length=SOURCE_REF_MAX)
|
||||
author: str | None = Field(default=None, max_length=AUTHOR_MAX)
|
||||
published_at: datetime | None = None
|
||||
tags: list[str] | None = Field(default=None, max_length=TAGS_MAX)
|
||||
|
||||
@field_validator("title", "body")
|
||||
@classmethod
|
||||
def _strip_and_check(cls, v: str) -> str:
|
||||
v = v.strip()
|
||||
if not v:
|
||||
raise ValueError("must not be empty after strip")
|
||||
return v
|
||||
|
||||
@field_validator("tags")
|
||||
@classmethod
|
||||
def _clean_tags(cls, v: list[str] | None) -> list[str] | None:
|
||||
if v is None:
|
||||
return None
|
||||
out: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for t in v:
|
||||
t = t.strip()
|
||||
if not t:
|
||||
continue
|
||||
if len(t) > TAG_MAX_LEN:
|
||||
raise ValueError(f"tag too long: {t[:16]}...")
|
||||
if t in seen:
|
||||
continue
|
||||
seen.add(t)
|
||||
out.append(t)
|
||||
return out or None
|
||||
|
||||
|
||||
class IngestResponse(BaseModel):
|
||||
"""POST /api/v1/ingest 响应。"""
|
||||
|
||||
article_id: int
|
||||
content_hash: str
|
||||
status: Literal["created", "duplicate"]
|
||||
# duplicate 时,告知是 L1 / L2 / L3 哪一层命中
|
||||
reason: str | None = None # 仅 duplicate 时填充
|
||||
matched_external_id: str | None = None # 仅 L1 命中时填充
|
||||
Reference in New Issue
Block a user