Files
diary-news/backend/app/models/article.py
xiaji 3091f291b2 feat(ingest): API Push 短新闻数据层
- alembic 0008:articles 加 is_short_news/external_id/source_ref/content_hash
  (UNIQUE);sources.kind 加 'api_push';api_tokens 加 purpose + source_id
- SourceKind.API_PUSH enum;Article/ApiToken model 加新字段
- enrichment_article 短新闻跳过 format/image;
  enrichment_loop SQL 加 is_short_news 路径(并入'可 enrich' 条件)
- 入库侧由 commit 2(ingest 接口)负责:写 body_zh_text=body_text,
  format/image/commentary_meituan_status='n/a',
  classify/commentary_status='pending'(带 tags 时 classify='ok')

无迁移爆炸半径:articles.url 保持 NOT NULL,短新闻合成 api-push:// 占位
2026-06-14 15:51:22 +08:00

129 lines
5.2 KiB
Python

"""文章主表:原文 + 译文 + ML 字段预留。"""
from __future__ import annotations
from datetime import datetime
from sqlalchemy import (
BigInteger,
Boolean,
DateTime,
Float,
ForeignKey,
Index,
Integer,
String,
Text,
func,
)
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.database import Base
class Article(Base):
__tablename__ = "articles"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
# === 来源 ===
source_id: Mapped[int] = mapped_column(
ForeignKey("sources.id", ondelete="CASCADE"), nullable=False, index=True
)
source: Mapped["Source"] = relationship(back_populates="articles", lazy="joined") # noqa: F821
# === 原文标识 ===
url: Mapped[str] = mapped_column(Text, nullable=False)
url_hash: Mapped[str] = mapped_column(String(40), unique=True, nullable=False, index=True)
guid: Mapped[str | None] = mapped_column(String(255), index=True) # 源站给的 ID
# === API Push 短新闻特有 ===
is_short_news: Mapped[bool] = mapped_column(
Boolean, default=False, nullable=False, index=True
)
external_id: Mapped[str | None] = mapped_column(String(128), index=True) # 调用方幂等 key
source_ref: Mapped[str | None] = mapped_column(String(64), index=True) # 短新闻里再细分来源
content_hash: Mapped[str | None] = mapped_column(
String(40), unique=True, index=True
) # 内容指纹,核心去重 key(NULL 不参与 unique)
# === 原文内容 ===
title: Mapped[str] = mapped_column(Text, nullable=False)
body_html: Mapped[str | None] = mapped_column(Text) # 抽取后保留结构
body_text: Mapped[str] = mapped_column(Text, nullable=False, default="")
lang_src: Mapped[str | None] = mapped_column(String(8))
author: Mapped[str | None] = mapped_column(String(255))
image_url: Mapped[str | None] = mapped_column(Text)
# === 译文 ===
title_zh: Mapped[str | None] = mapped_column(Text)
body_zh_html: Mapped[str | None] = mapped_column(Text)
body_zh_text: Mapped[str | None] = mapped_column(Text)
body_zh_formatted: Mapped[str | None] = mapped_column(Text) # LLM 排版后
summary_zh: Mapped[str | None] = mapped_column(Text)
# === 翻译状态 ===
translation_status: Mapped[str] = mapped_column(
String(16), default="pending", nullable=False, index=True
)
# pending / ok / partial / failed / n/a
translation_engine: Mapped[str | None] = mapped_column(String(16))
# tencent / nllb / cache / skip
translation_chars: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
translated_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
# === LLM 增强状态(每个独立状态)===
format_status: Mapped[str] = mapped_column(
String(16), default="n/a", nullable=False
) # pending/ok/failed/n/a
classify_status: Mapped[str] = mapped_column(
String(16), default="n/a", nullable=False
)
image_ai_status: Mapped[str] = mapped_column(
String(16), default="n/a", nullable=False
)
# === 双 provider 评论(Angel = 原 commentary,美团 = LongCat)===
commentary_status: Mapped[str] = mapped_column(
String(16), default="n/a", nullable=False
)
commentary_engine: Mapped[str | None] = mapped_column(String(32)) # angel / meituan / 多 provider 拼接
commentary_meituan_status: Mapped[str] = mapped_column(
String(16), default="n/a", nullable=False
)
commentary_meituan_model: Mapped[str | None] = mapped_column(String(64))
commentary_meituan_error: Mapped[str | None] = mapped_column(Text)
# === LLM 增强内容 ===
image_ai_url: Mapped[str | None] = mapped_column(Text) # AI 生成的插图
# === ML 字段(预留,MVP 全 null)===
category: Mapped[str | None] = mapped_column(String(32), index=True)
commentary: Mapped[str | None] = mapped_column(Text) # Angel 评论
commentary_meituan: Mapped[str | None] = mapped_column(Text) # 美团评论
entities: Mapped[dict | None] = mapped_column(JSONB)
sentiment: Mapped[float | None] = mapped_column(Float)
topic_id: Mapped[str | None] = mapped_column(String(64), index=True)
bias: Mapped[str | None] = mapped_column(String(16)) # left/center/right
# === 去重 ===
duplicate_of: Mapped[int | None] = mapped_column(
ForeignKey("articles.id", ondelete="SET NULL"), index=True
)
# === 时间 ===
published_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), index=True)
fetched_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False, index=True
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
__table_args__ = (
Index("ix_articles_source_published", "source_id", "published_at"),
Index("ix_articles_status_published", "translation_status", "published_at"),
)
def __repr__(self) -> str:
return f"<Article id={self.id} src={self.source_id} status={self.translation_status}>"