"""API Push 短新闻 ingest 接口的 I/O schema。""" from __future__ import annotations from datetime import datetime from typing import Literal from pydantic import BaseModel, Field, field_validator # 长度上限(commit 1 规划) TITLE_MAX = 200 BODY_MAX = 5000 EXTERNAL_ID_MAX = 128 SOURCE_REF_MAX = 64 AUTHOR_MAX = 255 TAGS_MAX = 10 # 最多 10 个标签 TAG_MAX_LEN = 32 # 单个标签最长 class IngestPayload(BaseModel): """POST /api/v1/ingest 请求体。 必填:title + body 推荐:external_id(幂等 key) 可选:url / source_ref / author / published_at / tags """ external_id: str | None = Field(default=None, max_length=EXTERNAL_ID_MAX) title: str = Field(min_length=1, max_length=TITLE_MAX) body: str = Field(min_length=1, max_length=BODY_MAX) url: str | None = Field(default=None, max_length=2048) source_ref: str | None = Field(default=None, max_length=SOURCE_REF_MAX) author: str | None = Field(default=None, max_length=AUTHOR_MAX) published_at: datetime | None = None tags: list[str] | None = Field(default=None, max_length=TAGS_MAX) @field_validator("title", "body") @classmethod def _strip_and_check(cls, v: str) -> str: v = v.strip() if not v: raise ValueError("must not be empty after strip") return v @field_validator("tags") @classmethod def _clean_tags(cls, v: list[str] | None) -> list[str] | None: if v is None: return None out: list[str] = [] seen: set[str] = set() for t in v: t = t.strip() if not t: continue if len(t) > TAG_MAX_LEN: raise ValueError(f"tag too long: {t[:16]}...") if t in seen: continue seen.add(t) out.append(t) return out or None class IngestResponse(BaseModel): """POST /api/v1/ingest 响应。""" article_id: int content_hash: str status: Literal["created", "duplicate"] # duplicate 时,告知是 L1 / L2 / L3 哪一层命中 reason: str | None = None # 仅 duplicate 时填充 matched_external_id: str | None = None # 仅 L1 命中时填充