2026-06-07 21:51:01 +08:00
|
|
|
"""文章主表:原文 + 译文 + ML 字段预留。"""
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
from sqlalchemy import (
|
|
|
|
|
BigInteger,
|
|
|
|
|
DateTime,
|
|
|
|
|
Float,
|
|
|
|
|
ForeignKey,
|
|
|
|
|
Index,
|
|
|
|
|
Integer,
|
|
|
|
|
String,
|
|
|
|
|
Text,
|
|
|
|
|
func,
|
|
|
|
|
)
|
|
|
|
|
from sqlalchemy.dialects.postgresql import JSONB
|
|
|
|
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
|
|
|
|
|
|
|
|
from app.database import Base
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Article(Base):
|
|
|
|
|
__tablename__ = "articles"
|
|
|
|
|
|
|
|
|
|
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
|
|
|
|
|
|
|
|
|
|
# === 来源 ===
|
|
|
|
|
source_id: Mapped[int] = mapped_column(
|
|
|
|
|
ForeignKey("sources.id", ondelete="CASCADE"), nullable=False, index=True
|
|
|
|
|
)
|
|
|
|
|
source: Mapped["Source"] = relationship(back_populates="articles", lazy="joined") # noqa: F821
|
|
|
|
|
|
|
|
|
|
# === 原文标识 ===
|
|
|
|
|
url: Mapped[str] = mapped_column(Text, nullable=False)
|
|
|
|
|
url_hash: Mapped[str] = mapped_column(String(40), unique=True, nullable=False, index=True)
|
|
|
|
|
guid: Mapped[str | None] = mapped_column(String(255), index=True) # 源站给的 ID
|
|
|
|
|
|
|
|
|
|
# === 原文内容 ===
|
|
|
|
|
title: Mapped[str] = mapped_column(Text, nullable=False)
|
|
|
|
|
body_html: Mapped[str | None] = mapped_column(Text) # 抽取后保留结构
|
|
|
|
|
body_text: Mapped[str] = mapped_column(Text, nullable=False, default="")
|
|
|
|
|
lang_src: Mapped[str | None] = mapped_column(String(8))
|
|
|
|
|
author: Mapped[str | None] = mapped_column(String(255))
|
|
|
|
|
image_url: Mapped[str | None] = mapped_column(Text)
|
|
|
|
|
|
|
|
|
|
# === 译文 ===
|
|
|
|
|
title_zh: Mapped[str | None] = mapped_column(Text)
|
|
|
|
|
body_zh_html: Mapped[str | None] = mapped_column(Text)
|
|
|
|
|
body_zh_text: Mapped[str | None] = mapped_column(Text)
|
2026-06-08 16:08:39 +08:00
|
|
|
body_zh_formatted: Mapped[str | None] = mapped_column(Text) # LLM 排版后
|
2026-06-07 21:51:01 +08:00
|
|
|
summary_zh: Mapped[str | None] = mapped_column(Text)
|
|
|
|
|
|
|
|
|
|
# === 翻译状态 ===
|
|
|
|
|
translation_status: Mapped[str] = mapped_column(
|
|
|
|
|
String(16), default="pending", nullable=False, index=True
|
|
|
|
|
)
|
|
|
|
|
# pending / ok / partial / failed / n/a
|
|
|
|
|
translation_engine: Mapped[str | None] = mapped_column(String(16))
|
|
|
|
|
# tencent / nllb / cache / skip
|
|
|
|
|
translation_chars: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
|
|
|
|
translated_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
|
|
|
|
|
2026-06-08 16:08:39 +08:00
|
|
|
# === LLM 增强状态(每个独立状态)===
|
|
|
|
|
format_status: Mapped[str] = mapped_column(
|
|
|
|
|
String(16), default="n/a", nullable=False
|
|
|
|
|
) # pending/ok/failed/n/a
|
|
|
|
|
classify_status: Mapped[str] = mapped_column(
|
|
|
|
|
String(16), default="n/a", nullable=False
|
|
|
|
|
)
|
|
|
|
|
image_ai_status: Mapped[str] = mapped_column(
|
|
|
|
|
String(16), default="n/a", nullable=False
|
|
|
|
|
)
|
|
|
|
|
commentary_status: Mapped[str] = mapped_column(
|
|
|
|
|
String(16), default="n/a", nullable=False
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# === LLM 增强内容 ===
|
|
|
|
|
image_ai_url: Mapped[str | None] = mapped_column(Text) # AI 生成的插图
|
|
|
|
|
|
2026-06-07 21:51:01 +08:00
|
|
|
# === ML 字段(预留,MVP 全 null)===
|
|
|
|
|
category: Mapped[str | None] = mapped_column(String(32), index=True)
|
|
|
|
|
commentary: Mapped[str | None] = mapped_column(Text)
|
|
|
|
|
entities: Mapped[dict | None] = mapped_column(JSONB)
|
|
|
|
|
sentiment: Mapped[float | None] = mapped_column(Float)
|
|
|
|
|
topic_id: Mapped[str | None] = mapped_column(String(64), index=True)
|
|
|
|
|
bias: Mapped[str | None] = mapped_column(String(16)) # left/center/right
|
|
|
|
|
|
|
|
|
|
# === 去重 ===
|
|
|
|
|
duplicate_of: Mapped[int | None] = mapped_column(
|
|
|
|
|
ForeignKey("articles.id", ondelete="SET NULL"), index=True
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# === 时间 ===
|
|
|
|
|
published_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), index=True)
|
|
|
|
|
fetched_at: Mapped[datetime] = mapped_column(
|
|
|
|
|
DateTime(timezone=True), server_default=func.now(), nullable=False, index=True
|
|
|
|
|
)
|
|
|
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
|
|
|
DateTime(timezone=True), server_default=func.now(), nullable=False
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
__table_args__ = (
|
|
|
|
|
Index("ix_articles_source_published", "source_id", "published_at"),
|
|
|
|
|
Index("ix_articles_status_published", "translation_status", "published_at"),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def __repr__(self) -> str:
|
|
|
|
|
return f"<Article id={self.id} src={self.source_id} status={self.translation_status}>"
|