"""文章主表:原文 + 译文 + ML 字段预留。""" from __future__ import annotations from datetime import datetime from sqlalchemy import ( BigInteger, DateTime, Float, ForeignKey, Index, Integer, String, Text, func, ) from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import Mapped, mapped_column, relationship from app.database import Base class Article(Base): __tablename__ = "articles" id: Mapped[int] = mapped_column(BigInteger, primary_key=True) # === 来源 === source_id: Mapped[int] = mapped_column( ForeignKey("sources.id", ondelete="CASCADE"), nullable=False, index=True ) source: Mapped["Source"] = relationship(back_populates="articles", lazy="joined") # noqa: F821 # === 原文标识 === url: Mapped[str] = mapped_column(Text, nullable=False) url_hash: Mapped[str] = mapped_column(String(40), unique=True, nullable=False, index=True) guid: Mapped[str | None] = mapped_column(String(255), index=True) # 源站给的 ID # === 原文内容 === title: Mapped[str] = mapped_column(Text, nullable=False) body_html: Mapped[str | None] = mapped_column(Text) # 抽取后保留结构 body_text: Mapped[str] = mapped_column(Text, nullable=False, default="") lang_src: Mapped[str | None] = mapped_column(String(8)) author: Mapped[str | None] = mapped_column(String(255)) image_url: Mapped[str | None] = mapped_column(Text) # === 译文 === title_zh: Mapped[str | None] = mapped_column(Text) body_zh_html: Mapped[str | None] = mapped_column(Text) body_zh_text: Mapped[str | None] = mapped_column(Text) body_zh_formatted: Mapped[str | None] = mapped_column(Text) # LLM 排版后 summary_zh: Mapped[str | None] = mapped_column(Text) # === 翻译状态 === translation_status: Mapped[str] = mapped_column( String(16), default="pending", nullable=False, index=True ) # pending / ok / partial / failed / n/a translation_engine: Mapped[str | None] = mapped_column(String(16)) # tencent / nllb / cache / skip translation_chars: Mapped[int] = mapped_column(Integer, default=0, nullable=False) translated_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) # === LLM 增强状态(每个独立状态)=== format_status: Mapped[str] = mapped_column( String(16), default="n/a", nullable=False ) # pending/ok/failed/n/a classify_status: Mapped[str] = mapped_column( String(16), default="n/a", nullable=False ) image_ai_status: Mapped[str] = mapped_column( String(16), default="n/a", nullable=False ) commentary_status: Mapped[str] = mapped_column( String(16), default="n/a", nullable=False ) # === LLM 增强内容 === image_ai_url: Mapped[str | None] = mapped_column(Text) # AI 生成的插图 # === ML 字段(预留,MVP 全 null)=== category: Mapped[str | None] = mapped_column(String(32), index=True) commentary: Mapped[str | None] = mapped_column(Text) entities: Mapped[dict | None] = mapped_column(JSONB) sentiment: Mapped[float | None] = mapped_column(Float) topic_id: Mapped[str | None] = mapped_column(String(64), index=True) bias: Mapped[str | None] = mapped_column(String(16)) # left/center/right # === 去重 === duplicate_of: Mapped[int | None] = mapped_column( ForeignKey("articles.id", ondelete="SET NULL"), index=True ) # === 时间 === published_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), index=True) fetched_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), server_default=func.now(), nullable=False, index=True ) created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), server_default=func.now(), nullable=False ) __table_args__ = ( Index("ix_articles_source_published", "source_id", "published_at"), Index("ix_articles_status_published", "translation_status", "published_at"), ) def __repr__(self) -> str: return f"
"