Files
diary-news/backend/alembic/versions/0008_api_push.py
xiaji 3091f291b2 feat(ingest): API Push 短新闻数据层
- alembic 0008:articles 加 is_short_news/external_id/source_ref/content_hash
  (UNIQUE);sources.kind 加 'api_push';api_tokens 加 purpose + source_id
- SourceKind.API_PUSH enum;Article/ApiToken model 加新字段
- enrichment_article 短新闻跳过 format/image;
  enrichment_loop SQL 加 is_short_news 路径(并入'可 enrich' 条件)
- 入库侧由 commit 2(ingest 接口)负责:写 body_zh_text=body_text,
  format/image/commentary_meituan_status='n/a',
  classify/commentary_status='pending'(带 tags 时 classify='ok')

无迁移爆炸半径:articles.url 保持 NOT NULL,短新闻合成 api-push:// 占位
2026-06-14 15:51:22 +08:00

116 lines
4.0 KiB
Python

"""API Push 短新闻来源
新增字段:
- articles.is_short_news BOOL NOT NULL DEFAULT false (索引)
- articles.external_id VARCHAR(128) nullable (索引)
- articles.source_ref VARCHAR(64) nullable (索引)
- articles.content_hash VARCHAR(40) nullable UNIQUE (索引,内容去重核心 key)
- articles.url TEXT nullable (放宽 — 短新闻可合成 url)
- sources.kind ENUM 加 'api_push'
- api_tokens.purpose VARCHAR(16) NOT NULL DEFAULT 'mobile' (索引)
值域: mobile / ingest
- api_tokens.source_id INTEGER NULL FK sources.id ON DELETE CASCADE (索引,
ingest token 绑定的 source)
Revision ID: 0008
Revises: 0007
Create Date: 2026-06-14
"""
from __future__ import annotations
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "0008"
down_revision: Union[str, None] = "0007"
branch_labels = None
depends_on = None
def upgrade() -> None:
# === 1) sources.kind 加 'api_push' ===
# PG enum 加 value 必须用 ALTER TYPE,alembic 没有原生 op,直接 execute
op.execute("ALTER TYPE source_kind ADD VALUE IF NOT EXISTS 'api_push'")
# === 2) articles:5 个字段 ===
# 2.1) is_short_news
op.add_column(
"articles",
sa.Column(
"is_short_news",
sa.Boolean,
nullable=False,
server_default=sa.text("false"),
),
)
op.create_index("ix_articles_is_short_news", "articles", ["is_short_news"])
# 2.2) external_id
op.add_column(
"articles",
sa.Column("external_id", sa.String(128), nullable=True),
)
op.create_index("ix_articles_external_id", "articles", ["external_id"])
# 2.3) source_ref
op.add_column(
"articles",
sa.Column("source_ref", sa.String(64), nullable=True),
)
op.create_index("ix_articles_source_ref", "articles", ["source_ref"])
# 2.4) content_hash (UNIQUE,核心去重 key)
op.add_column(
"articles",
sa.Column("content_hash", sa.String(40), nullable=True),
)
op.create_index("ix_articles_content_hash", "articles", ["content_hash"], unique=True)
# 注:articles.url 保持 NOT NULL。短新闻入库时会合成 "api-push://source-slug/content-hash"
# 作为占位,避免改动下游 schema(ArticleDetail.url 等)引出更大爆炸半径。
# === 3) api_tokens.purpose + source_id ===
op.add_column(
"api_tokens",
sa.Column(
"purpose",
sa.String(16),
nullable=False,
server_default="mobile",
),
)
op.create_index("ix_api_tokens_purpose", "api_tokens", ["purpose"])
op.add_column(
"api_tokens",
sa.Column(
"source_id",
sa.Integer,
sa.ForeignKey("sources.id", ondelete="CASCADE"),
nullable=True,
),
)
op.create_index("ix_api_tokens_source_id", "api_tokens", ["source_id"])
def downgrade() -> None:
# === 反向顺序 ===
op.drop_index("ix_api_tokens_source_id", table_name="api_tokens")
op.drop_column("api_tokens", "source_id")
op.drop_index("ix_api_tokens_purpose", table_name="api_tokens")
op.drop_column("api_tokens", "purpose")
op.drop_index("ix_articles_content_hash", table_name="articles")
op.drop_column("articles", "content_hash")
op.drop_index("ix_articles_source_ref", table_name="articles")
op.drop_column("articles", "source_ref")
op.drop_index("ix_articles_external_id", table_name="articles")
op.drop_column("articles", "external_id")
op.drop_index("ix_articles_is_short_news", table_name="articles")
op.drop_column("articles", "is_short_news")
# PG enum remove value 没有原生支持,需要重建类型。
# 这里只保留注释,提醒运维:downgrade 后如需去掉 api_push enum 值,
# 需手工 ALTER TYPE (CREATE TYPE ... + ALTER COLUMN + DROP TYPE)。