- alembic 0008:articles 加 is_short_news/external_id/source_ref/content_hash (UNIQUE);sources.kind 加 'api_push';api_tokens 加 purpose + source_id - SourceKind.API_PUSH enum;Article/ApiToken model 加新字段 - enrichment_article 短新闻跳过 format/image; enrichment_loop SQL 加 is_short_news 路径(并入'可 enrich' 条件) - 入库侧由 commit 2(ingest 接口)负责:写 body_zh_text=body_text, format/image/commentary_meituan_status='n/a', classify/commentary_status='pending'(带 tags 时 classify='ok') 无迁移爆炸半径:articles.url 保持 NOT NULL,短新闻合成 api-push:// 占位
75 lines
2.8 KiB
Python
75 lines
2.8 KiB
Python
"""采集源模型。"""
|
|
from __future__ import annotations
|
|
|
|
import enum
|
|
from datetime import datetime
|
|
|
|
from sqlalchemy import (
|
|
JSON,
|
|
Boolean,
|
|
DateTime,
|
|
Enum,
|
|
Integer,
|
|
String,
|
|
Text,
|
|
func,
|
|
)
|
|
from sqlalchemy.dialects.postgresql import JSONB
|
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
|
|
from app.database import Base
|
|
|
|
|
|
class SourceKind(str, enum.Enum):
|
|
RSS = "rss"
|
|
HTML_LIST = "html_list"
|
|
TG_CHANNEL = "tg_channel"
|
|
API_PUSH = "api_push" # 外部 POST /api/v1/ingest 推送短新闻
|
|
|
|
|
|
class Source(Base):
|
|
__tablename__ = "sources"
|
|
|
|
id: Mapped[int] = mapped_column(primary_key=True)
|
|
name: Mapped[str] = mapped_column(String(128), nullable=False)
|
|
slug: Mapped[str] = mapped_column(String(128), unique=True, index=True, nullable=False)
|
|
kind: Mapped[SourceKind] = mapped_column(
|
|
Enum(
|
|
SourceKind,
|
|
name="source_kind",
|
|
values_callable=lambda x: [e.value for e in x],
|
|
),
|
|
default=SourceKind.RSS,
|
|
nullable=False,
|
|
)
|
|
url: Mapped[str] = mapped_column(Text, nullable=False)
|
|
detail_selector: Mapped[dict | None] = mapped_column(JSON)
|
|
fetch_interval_min: Mapped[int] = mapped_column(Integer, default=60, nullable=False)
|
|
fetch_cron: Mapped[str | None] = mapped_column(String(64)) # 5 段 cron
|
|
translate_to: Mapped[str] = mapped_column(String(8), default="zh", nullable=False)
|
|
enabled: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False)
|
|
region: Mapped[str | None] = mapped_column(String(32), index=True)
|
|
language_src: Mapped[str | None] = mapped_column(String(8))
|
|
priority: Mapped[int] = mapped_column(Integer, default=50, nullable=False, index=True)
|
|
headers_json: Mapped[dict | None] = mapped_column(JSON)
|
|
# 源级屏蔽分类标签(如 ["体育", "娱乐"]);与 llm_settings.blocklist_tags 合并后使用
|
|
blocklist_tags: Mapped[list[str]] = mapped_column(
|
|
JSONB, nullable=False, default=list, server_default="[]"
|
|
)
|
|
last_fetched_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
|
last_status: Mapped[str | None] = mapped_column(String(64))
|
|
consecutive_failures: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True), server_default=func.now(), nullable=False
|
|
)
|
|
updated_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False
|
|
)
|
|
|
|
articles: Mapped[list["Article"]] = relationship( # noqa: F821
|
|
back_populates="source", cascade="all, delete-orphan", lazy="noload"
|
|
)
|
|
|
|
def __repr__(self) -> str:
|
|
return f"<Source id={self.id} slug={self.slug} kind={self.kind.value}>"
|