feat: initial MVP - FastAPI backend + Vue3 frontend + docker-compose
- backend: FastAPI + SQLAlchemy 2.0(async) + asyncpg + Alembic - 7 API routes: auth/me/articles/sources/bookmarks/subscriptions/admin - models: User/Source/Article/Bookmark/Subscription/ApiToken - services: RSS fetcher (feedparser) + Tencent TMT translator with quota + cache + local NLLB fallback - workers: APScheduler + asyncio pipeline (fetch -> dedupe -> insert -> translate) - seed scripts: create_user, seed_sources (5 RSS: Reuters/BBC/Al Jazeera/NHK/DW) - frontend: Vue 3 + Vite + Naive UI + Pinia + vue-router - pages: Login, Feed (24h), ArticleDetail, Sources, Bookmarks, AdminSources - deploy: docker-compose (postgres/redis/api/worker/frontend/caddy) - docs: README, DEPLOY, architecture, acceptance
This commit is contained in:
11
backend/.dockerignore
Normal file
11
backend/.dockerignore
Normal file
@@ -0,0 +1,11 @@
|
||||
__pycache__
|
||||
*.pyc
|
||||
.pytest_cache
|
||||
.mypy_cache
|
||||
.ruff_cache
|
||||
.venv
|
||||
venv
|
||||
.env
|
||||
*.egg-info
|
||||
build
|
||||
dist
|
||||
36
backend/Dockerfile
Normal file
36
backend/Dockerfile
Normal file
@@ -0,0 +1,36 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
||||
TZ=Asia/Hong_Kong
|
||||
|
||||
# 系统依赖
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
libpq-dev \
|
||||
curl \
|
||||
ca-certificates \
|
||||
tzdata \
|
||||
&& ln -sf /usr/share/zoneinfo/Asia/Hong_Kong /etc/localtime \
|
||||
&& echo "Asia/Hong_Kong" > /etc/timezone \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 先装依赖(利用 Docker 缓存)
|
||||
COPY pyproject.toml ./
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install -e .
|
||||
|
||||
# 代码(开发期用 volume 覆盖,这里也保留一份)
|
||||
COPY app ./app
|
||||
COPY alembic ./alembic
|
||||
COPY alembic.ini ./
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
# 默认启动 uvicorn;docker-compose 中 worker 容器会用别的 command
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
42
backend/alembic.ini
Normal file
42
backend/alembic.ini
Normal file
@@ -0,0 +1,42 @@
|
||||
[alembic]
|
||||
script_location = alembic
|
||||
prepend_sys_path = .
|
||||
version_path_separator = os
|
||||
# sqlalchemy.url 从 env.py 注入,这里留空
|
||||
sqlalchemy.url =
|
||||
|
||||
[post_write_hooks]
|
||||
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARN
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
59
backend/alembic/env.py
Normal file
59
backend/alembic/env.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""Alembic 环境配置:从 app.config 读取 URL,启用 autogenerate。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from logging.config import fileConfig
|
||||
from pathlib import Path
|
||||
|
||||
from alembic import context
|
||||
from sqlalchemy import engine_from_config, pool
|
||||
|
||||
# 让 alembic 能 import app
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
||||
|
||||
from app.config import settings # noqa: E402
|
||||
from app.database import Base # noqa: E402
|
||||
from app.models import * # noqa: F401, F403, E402
|
||||
|
||||
config = context.config
|
||||
if config.config_file_name is not None:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
config.set_main_option("sqlalchemy.url", settings.sync_database_url)
|
||||
|
||||
target_metadata = Base.metadata
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
compare_type=True,
|
||||
)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
connectable = engine_from_config(
|
||||
config.get_section(config.config_ini_section, {}),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
with connectable.connect() as connection:
|
||||
context.configure(
|
||||
connection=connection,
|
||||
target_metadata=target_metadata,
|
||||
compare_type=True,
|
||||
)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
24
backend/alembic/script.py.mako
Normal file
24
backend/alembic/script.py.mako
Normal file
@@ -0,0 +1,24 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
revision: str = ${repr(up_revision)}
|
||||
down_revision: Union[str, None] = ${repr(down_revision)}
|
||||
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
||||
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
180
backend/alembic/versions/0001_initial.py
Normal file
180
backend/alembic/versions/0001_initial.py
Normal file
@@ -0,0 +1,180 @@
|
||||
"""initial schema
|
||||
|
||||
Revision ID: 0001
|
||||
Revises:
|
||||
Create Date: 2026-06-07
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
revision: str = "0001"
|
||||
down_revision: Union[str, None] = None
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# === 用户 ===
|
||||
user_role = postgresql.ENUM("owner", "member", name="user_role", create_type=True)
|
||||
user_role.create(op.get_bind(), checkfirst=True)
|
||||
|
||||
op.create_table(
|
||||
"users",
|
||||
sa.Column("id", sa.Integer, primary_key=True),
|
||||
sa.Column("username", sa.String(64), unique=True, index=True, nullable=False),
|
||||
sa.Column("email", sa.String(255), unique=True, index=True),
|
||||
sa.Column("password_hash", sa.String(255), nullable=False),
|
||||
sa.Column(
|
||||
"role",
|
||||
postgresql.ENUM("owner", "member", name="user_role", create_type=False),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("is_active", sa.Boolean, nullable=False, server_default=sa.text("true")),
|
||||
sa.Column("display_name", sa.String(128)),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
|
||||
sa.Column("last_login_at", sa.DateTime(timezone=True)),
|
||||
)
|
||||
|
||||
# === 源 ===
|
||||
source_kind = postgresql.ENUM("rss", "html_list", "tg_channel", name="source_kind", create_type=True)
|
||||
source_kind.create(op.get_bind(), checkfirst=True)
|
||||
|
||||
op.create_table(
|
||||
"sources",
|
||||
sa.Column("id", sa.Integer, primary_key=True),
|
||||
sa.Column("name", sa.String(128), nullable=False),
|
||||
sa.Column("slug", sa.String(128), unique=True, index=True, nullable=False),
|
||||
sa.Column(
|
||||
"kind",
|
||||
postgresql.ENUM("rss", "html_list", "tg_channel", name="source_kind", create_type=False),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("url", sa.Text, nullable=False),
|
||||
sa.Column("detail_selector", postgresql.JSONB),
|
||||
sa.Column("fetch_interval_min", sa.Integer, nullable=False, server_default="60"),
|
||||
sa.Column("fetch_cron", sa.String(64)),
|
||||
sa.Column("translate_to", sa.String(8), nullable=False, server_default="zh"),
|
||||
sa.Column("enabled", sa.Boolean, nullable=False, server_default=sa.text("true")),
|
||||
sa.Column("region", sa.String(32), index=True),
|
||||
sa.Column("language_src", sa.String(8)),
|
||||
sa.Column("priority", sa.Integer, nullable=False, server_default="50", index=True),
|
||||
sa.Column("headers_json", postgresql.JSONB),
|
||||
sa.Column("last_fetched_at", sa.DateTime(timezone=True)),
|
||||
sa.Column("last_status", sa.String(64)),
|
||||
sa.Column("consecutive_failures", sa.Integer, nullable=False, server_default="0"),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
|
||||
sa.Column(
|
||||
"updated_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.func.now(),
|
||||
nullable=False,
|
||||
),
|
||||
)
|
||||
|
||||
# === 文章 ===
|
||||
op.create_table(
|
||||
"articles",
|
||||
sa.Column("id", sa.BigInteger, primary_key=True),
|
||||
sa.Column("source_id", sa.Integer, sa.ForeignKey("sources.id", ondelete="CASCADE"), nullable=False),
|
||||
sa.Column("url", sa.Text, nullable=False),
|
||||
sa.Column("url_hash", sa.String(40), unique=True, nullable=False, index=True),
|
||||
sa.Column("guid", sa.String(255), index=True),
|
||||
sa.Column("title", sa.Text, nullable=False),
|
||||
sa.Column("body_html", sa.Text),
|
||||
sa.Column("body_text", sa.Text, nullable=False, server_default=""),
|
||||
sa.Column("lang_src", sa.String(8)),
|
||||
sa.Column("author", sa.String(255)),
|
||||
sa.Column("image_url", sa.Text),
|
||||
sa.Column("title_zh", sa.Text),
|
||||
sa.Column("body_zh_html", sa.Text),
|
||||
sa.Column("body_zh_text", sa.Text),
|
||||
sa.Column("summary_zh", sa.Text),
|
||||
sa.Column("translation_status", sa.String(16), nullable=False, server_default="pending"),
|
||||
sa.Column("translation_engine", sa.String(16)),
|
||||
sa.Column("translation_chars", sa.Integer, nullable=False, server_default="0"),
|
||||
sa.Column("translated_at", sa.DateTime(timezone=True)),
|
||||
sa.Column("category", sa.String(32), index=True),
|
||||
sa.Column("commentary", sa.Text),
|
||||
sa.Column("entities", postgresql.JSONB),
|
||||
sa.Column("sentiment", sa.Float),
|
||||
sa.Column("topic_id", sa.String(64), index=True),
|
||||
sa.Column("bias", sa.String(16)),
|
||||
sa.Column("duplicate_of", sa.BigInteger, sa.ForeignKey("articles.id", ondelete="SET NULL")),
|
||||
sa.Column("published_at", sa.DateTime(timezone=True), index=True),
|
||||
sa.Column("fetched_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
|
||||
)
|
||||
op.create_index("ix_articles_source_published", "articles", ["source_id", "published_at"])
|
||||
op.create_index("ix_articles_status_published", "articles", ["translation_status", "published_at"])
|
||||
|
||||
# === 收藏 ===
|
||||
op.create_table(
|
||||
"bookmarks",
|
||||
sa.Column("id", sa.Integer, primary_key=True),
|
||||
sa.Column("user_id", sa.Integer, sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False),
|
||||
sa.Column("article_id", sa.BigInteger, sa.ForeignKey("articles.id", ondelete="CASCADE"), nullable=False),
|
||||
sa.Column("note", sa.Text),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
|
||||
sa.UniqueConstraint("user_id", "article_id", name="uq_bookmark_user_article"),
|
||||
)
|
||||
op.create_index("ix_bookmarks_user_id", "bookmarks", ["user_id"])
|
||||
op.create_index("ix_bookmarks_article_id", "bookmarks", ["article_id"])
|
||||
|
||||
# === 订阅 ===
|
||||
subscription_match = postgresql.ENUM("any", "title", "body", name="subscription_match", create_type=True)
|
||||
subscription_match.create(op.get_bind(), checkfirst=True)
|
||||
|
||||
op.create_table(
|
||||
"subscriptions",
|
||||
sa.Column("id", sa.Integer, primary_key=True),
|
||||
sa.Column("user_id", sa.Integer, sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False),
|
||||
sa.Column("keyword", sa.String(255), nullable=False),
|
||||
sa.Column(
|
||||
"match_in",
|
||||
postgresql.ENUM("any", "title", "body", name="subscription_match", create_type=False),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("channel", sa.String(32), nullable=False, server_default="telegram"),
|
||||
sa.Column("target", sa.Text),
|
||||
sa.Column("enabled", sa.Boolean, nullable=False, server_default=sa.text("true")),
|
||||
sa.Column("last_hit_at", sa.DateTime(timezone=True)),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
|
||||
)
|
||||
op.create_index("ix_subscriptions_user_id", "subscriptions", ["user_id"])
|
||||
|
||||
# === API Token ===
|
||||
op.create_table(
|
||||
"api_tokens",
|
||||
sa.Column("id", sa.Integer, primary_key=True),
|
||||
sa.Column("user_id", sa.Integer, sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False),
|
||||
sa.Column("name", sa.String(64), nullable=False),
|
||||
sa.Column("token_hash", sa.String(128), unique=True, nullable=False),
|
||||
sa.Column("last_used_at", sa.DateTime(timezone=True)),
|
||||
sa.Column("expires_at", sa.DateTime(timezone=True)),
|
||||
sa.Column("revoked_at", sa.DateTime(timezone=True)),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
|
||||
)
|
||||
op.create_index("ix_api_tokens_user_id", "api_tokens", ["user_id"])
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_table("api_tokens")
|
||||
op.drop_table("subscriptions")
|
||||
op.drop_index("ix_subscriptions_user_id", table_name="subscriptions")
|
||||
op.drop_table("bookmarks")
|
||||
op.drop_index("ix_bookmarks_user_id", table_name="bookmarks")
|
||||
op.drop_index("ix_bookmarks_article_id", table_name="bookmarks")
|
||||
op.drop_index("ix_articles_status_published", table_name="articles")
|
||||
op.drop_index("ix_articles_source_published", table_name="articles")
|
||||
op.drop_table("articles")
|
||||
op.drop_table("sources")
|
||||
op.drop_table("users")
|
||||
|
||||
op.execute("DROP TYPE IF EXISTS subscription_match")
|
||||
op.execute("DROP TYPE IF EXISTS source_kind")
|
||||
op.execute("DROP TYPE IF EXISTS user_role")
|
||||
3
backend/app/__init__.py
Normal file
3
backend/app/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""News Aggregator backend."""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
1
backend/app/api/__init__.py
Normal file
1
backend/app/api/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""API routes."""
|
||||
199
backend/app/api/admin.py
Normal file
199
backend/app/api/admin.py
Normal file
@@ -0,0 +1,199 @@
|
||||
"""Admin API(仅 owner)。
|
||||
|
||||
- 源管理 CRUD
|
||||
- 手动触发抓取 / 重译
|
||||
- 源健康看板
|
||||
- 翻译配额管理
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.deps import require_owner
|
||||
from app.database import get_session
|
||||
from app.models.article import Article
|
||||
from app.models.source import Source
|
||||
from app.models.user import User
|
||||
from app.schemas.source import SourceIn, SourceOut, SourceUpdate
|
||||
|
||||
router = APIRouter(prefix="/admin", tags=["admin"], dependencies=[Depends(require_owner)])
|
||||
|
||||
|
||||
# === Source CRUD ===
|
||||
@router.get("/sources", response_model=list[SourceOut])
|
||||
async def list_sources_all(session: AsyncSession = Depends(get_session)):
|
||||
rows = (await session.execute(select(Source).order_by(Source.id))).scalars()
|
||||
return [SourceOut.model_validate(s) for s in rows]
|
||||
|
||||
|
||||
@router.post("/sources", response_model=SourceOut, status_code=status.HTTP_201_CREATED)
|
||||
async def create_source(body: SourceIn, session: AsyncSession = Depends(get_session)):
|
||||
src = Source(
|
||||
name=body.name,
|
||||
slug=body.slug,
|
||||
kind=body.kind,
|
||||
url=str(body.url),
|
||||
detail_selector=body.detail_selector,
|
||||
region=body.region,
|
||||
language_src=body.language_src,
|
||||
priority=body.priority,
|
||||
fetch_interval_min=body.fetch_interval_min,
|
||||
translate_to=body.translate_to,
|
||||
enabled=body.enabled,
|
||||
headers_json=body.headers_json,
|
||||
)
|
||||
session.add(src)
|
||||
try:
|
||||
await session.commit()
|
||||
except IntegrityError as e:
|
||||
await session.rollback()
|
||||
raise HTTPException(status.HTTP_409_CONFLICT, f"slug '{body.slug}' already exists") from e
|
||||
await session.refresh(src)
|
||||
return SourceOut.model_validate(src)
|
||||
|
||||
|
||||
@router.patch("/sources/{source_id}", response_model=SourceOut)
|
||||
async def update_source(
|
||||
source_id: int,
|
||||
body: SourceUpdate,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
src = (await session.execute(select(Source).where(Source.id == source_id))).scalar_one_or_none()
|
||||
if not src:
|
||||
raise HTTPException(status.HTTP_404_NOT_FOUND, "Source not found")
|
||||
for k, v in body.model_dump(exclude_unset=True).items():
|
||||
setattr(src, k, v)
|
||||
await session.commit()
|
||||
await session.refresh(src)
|
||||
return SourceOut.model_validate(src)
|
||||
|
||||
|
||||
@router.delete("/sources/{source_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def delete_source(source_id: int, session: AsyncSession = Depends(get_session)):
|
||||
src = (await session.execute(select(Source).where(Source.id == source_id))).scalar_one_or_none()
|
||||
if not src:
|
||||
raise HTTPException(status.HTTP_404_NOT_FOUND, "Source not found")
|
||||
await session.delete(src)
|
||||
await session.commit()
|
||||
return None
|
||||
|
||||
|
||||
# === 手动触发 ===
|
||||
class TriggerResponse(BaseModel):
|
||||
triggered: bool
|
||||
detail: str = ""
|
||||
|
||||
|
||||
@router.post("/refresh/{source_id}", response_model=TriggerResponse)
|
||||
async def refresh_source(
|
||||
source_id: int,
|
||||
background: BackgroundTasks,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
src = (await session.execute(select(Source).where(Source.id == source_id))).scalar_one_or_none()
|
||||
if not src:
|
||||
raise HTTPException(status.HTTP_404_NOT_FOUND, "Source not found")
|
||||
if not src.enabled:
|
||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, "Source disabled")
|
||||
|
||||
# 走 background,不等结果
|
||||
from app.workers.pipeline import fetch_one_source
|
||||
|
||||
background.add_task(fetch_one_source, source_id)
|
||||
return TriggerResponse(triggered=True, detail=f"queued fetch for {src.slug}")
|
||||
|
||||
|
||||
async def _run_fetch(source_id: int) -> None:
|
||||
"""(deprecated) 走 background 用的薄包装,见 refresh_source。"""
|
||||
from app.workers.pipeline import fetch_one_source
|
||||
|
||||
await fetch_one_source(source_id)
|
||||
|
||||
|
||||
@router.post("/translation/rerun/{article_id}", response_model=TriggerResponse)
|
||||
async def rerun_translation(
|
||||
article_id: int,
|
||||
background: BackgroundTasks,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
art = (await session.execute(select(Article).where(Article.id == article_id))).scalar_one_or_none()
|
||||
if not art:
|
||||
raise HTTPException(status.HTTP_404_NOT_FOUND, "Article not found")
|
||||
art.translation_status = "pending"
|
||||
art.title_zh = None
|
||||
art.body_zh_text = None
|
||||
art.body_zh_html = None
|
||||
art.translated_at = None
|
||||
art.translation_engine = None
|
||||
await session.commit()
|
||||
|
||||
from app.workers.pipeline import translate_article
|
||||
|
||||
background.add_task(translate_article, article_id)
|
||||
return TriggerResponse(triggered=True, detail=f"queued translation for article {article_id}")
|
||||
|
||||
|
||||
# === 健康看板 ===
|
||||
class HealthOut(BaseModel):
|
||||
source_id: int
|
||||
slug: str
|
||||
name: str
|
||||
enabled: bool
|
||||
last_fetched_at: datetime | None
|
||||
last_status: str | None
|
||||
consecutive_failures: int
|
||||
fetch_interval_min: int
|
||||
article_count_24h: int
|
||||
|
||||
|
||||
@router.get("/health", response_model=list[HealthOut])
|
||||
async def health(session: AsyncSession = Depends(get_session)):
|
||||
rows = (await session.execute(select(Source).order_by(Source.priority.desc()))).scalars()
|
||||
out: list[HealthOut] = []
|
||||
for s in rows:
|
||||
c24 = (
|
||||
await session.execute(
|
||||
select(func.count(Article.id)).where(
|
||||
Article.source_id == s.id,
|
||||
Article.fetched_at >= datetime.now(timezone.utc).replace(tzinfo=None)
|
||||
- timedelta(hours=24),
|
||||
)
|
||||
)
|
||||
).scalar_one()
|
||||
out.append(
|
||||
HealthOut(
|
||||
source_id=s.id,
|
||||
slug=s.slug,
|
||||
name=s.name,
|
||||
enabled=s.enabled,
|
||||
last_fetched_at=s.last_fetched_at,
|
||||
last_status=s.last_status,
|
||||
consecutive_failures=s.consecutive_failures,
|
||||
fetch_interval_min=s.fetch_interval_min,
|
||||
article_count_24h=c24 or 0,
|
||||
)
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
# === 翻译配额(管理员视图) ===
|
||||
class QuotaReset(BaseModel):
|
||||
used_chars: int = 0
|
||||
|
||||
|
||||
@router.post("/translation/quota/reset")
|
||||
async def reset_quota(payload: QuotaReset) -> dict[str, Any]:
|
||||
from app.redis_client import get_redis
|
||||
|
||||
r = get_redis()
|
||||
now = datetime.now(timezone.utc)
|
||||
key = f"translation:month:{now:%Y%m}"
|
||||
await r.set(key, payload.used_chars)
|
||||
return {"key": key, "value": payload.used_chars}
|
||||
194
backend/app/api/articles.py
Normal file
194
backend/app/api/articles.py
Normal file
@@ -0,0 +1,194 @@
|
||||
"""/articles 列表与详情。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from sqlalchemy import and_, desc, func, or_, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.deps import get_current_user
|
||||
from app.database import get_session
|
||||
from app.models.article import Article
|
||||
from app.models.bookmark import Bookmark
|
||||
from app.models.source import Source
|
||||
from app.models.user import User
|
||||
from app.schemas.article import (
|
||||
ArticleDetail,
|
||||
ArticleListItem,
|
||||
ArticleListResponse,
|
||||
SourceBrief,
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/articles", tags=["articles"])
|
||||
|
||||
|
||||
def _encode_cursor(article: Article) -> str:
|
||||
payload = {"id": article.id, "ts": int(article.fetched_at.timestamp())}
|
||||
return base64.urlsafe_b64encode(json.dumps(payload).encode()).decode()
|
||||
|
||||
|
||||
def _decode_cursor(cur: str) -> tuple[int, datetime]:
|
||||
try:
|
||||
data = json.loads(base64.urlsafe_b64decode(cur.encode()).decode())
|
||||
return int(data["id"]), datetime.fromtimestamp(int(data["ts"]))
|
||||
except Exception:
|
||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, "Invalid cursor")
|
||||
|
||||
|
||||
@router.get("", response_model=ArticleListResponse)
|
||||
async def list_articles(
|
||||
since: datetime | None = Query(default=None, description="起时间 UTC"),
|
||||
until: datetime | None = Query(default=None, description="止时间 UTC"),
|
||||
source: str | None = Query(default=None, description="逗号分隔 source slug"),
|
||||
category: str | None = None,
|
||||
q: str | None = Query(default=None, description="标题/正文搜索"),
|
||||
lang: Annotated[str, Query(pattern=r"^(src|zh|both)$")] = "both",
|
||||
limit: int = Query(default=50, ge=1, le=200),
|
||||
cursor: str | None = None,
|
||||
starred_only: bool = False,
|
||||
user: User = Depends(get_current_user),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
stmt = (
|
||||
select(Article, Source)
|
||||
.join(Source, Source.id == Article.source_id)
|
||||
.where(Article.duplicate_of.is_(None))
|
||||
)
|
||||
|
||||
# 默认过去 24h
|
||||
if since is None and until is None and cursor is None:
|
||||
since = _default_since_24h()
|
||||
|
||||
if since:
|
||||
stmt = stmt.where(Article.published_at >= since)
|
||||
if until:
|
||||
stmt = stmt.where(Article.published_at <= until)
|
||||
if category:
|
||||
stmt = stmt.where(Article.category == category)
|
||||
|
||||
if source:
|
||||
slugs = [s.strip() for s in source.split(",") if s.strip()]
|
||||
if slugs:
|
||||
stmt = stmt.where(Source.slug.in_(slugs))
|
||||
|
||||
if q:
|
||||
like = f"%{q}%"
|
||||
stmt = stmt.where(or_(Article.title.ilike(like), Article.body_text.ilike(like)))
|
||||
|
||||
# 语言过滤
|
||||
if lang == "zh":
|
||||
stmt = stmt.where(Article.title_zh.is_not(None))
|
||||
elif lang == "src":
|
||||
# 只要原文已有
|
||||
pass
|
||||
|
||||
if cursor:
|
||||
last_id, _ = _decode_cursor(cursor)
|
||||
stmt = stmt.where(Article.id < last_id)
|
||||
|
||||
if starred_only:
|
||||
stmt = stmt.join(Bookmark, and_(Bookmark.article_id == Article.id, Bookmark.user_id == user.id))
|
||||
|
||||
stmt = stmt.order_by(desc(Article.published_at), desc(Article.id)).limit(limit + 1)
|
||||
|
||||
rows = (await session.execute(stmt)).all()
|
||||
has_more = len(rows) > limit
|
||||
rows = rows[:limit]
|
||||
|
||||
# 标记 is_starred(批量)
|
||||
ids = [a.id for a, _ in rows]
|
||||
starred_ids: set[int] = set()
|
||||
if ids:
|
||||
bm_rows = (
|
||||
await session.execute(
|
||||
select(Bookmark.article_id).where(
|
||||
Bookmark.user_id == user.id, Bookmark.article_id.in_(ids)
|
||||
)
|
||||
)
|
||||
).all()
|
||||
starred_ids = {b[0] for b in bm_rows}
|
||||
|
||||
items = []
|
||||
for art, src in rows:
|
||||
item = ArticleListItem(
|
||||
id=art.id,
|
||||
source=SourceBrief.model_validate(src),
|
||||
title=art.title,
|
||||
title_zh=art.title_zh,
|
||||
summary_zh=art.summary_zh,
|
||||
lang_src=art.lang_src,
|
||||
translation_status=art.translation_status,
|
||||
category=art.category,
|
||||
published_at=art.published_at,
|
||||
fetched_at=art.fetched_at,
|
||||
image_url=art.image_url,
|
||||
is_starred=art.id in starred_ids,
|
||||
)
|
||||
items.append(item)
|
||||
|
||||
next_cursor = _encode_cursor(rows[-1][0]) if has_more and rows else None
|
||||
return ArticleListResponse(items=items, next_cursor=next_cursor, total=None)
|
||||
|
||||
|
||||
@router.get("/{article_id}", response_model=ArticleDetail)
|
||||
async def get_article(
|
||||
article_id: int,
|
||||
user: User = Depends(get_current_user),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
art = (
|
||||
await session.execute(
|
||||
select(Article, Source)
|
||||
.join(Source, Source.id == Article.source_id)
|
||||
.where(Article.id == article_id)
|
||||
)
|
||||
.first()
|
||||
)
|
||||
if not art:
|
||||
raise HTTPException(status.HTTP_404_NOT_FOUND, "Article not found")
|
||||
article, source = art
|
||||
|
||||
is_starred = (
|
||||
await session.execute(
|
||||
select(Bookmark.id).where(
|
||||
Bookmark.user_id == user.id, Bookmark.article_id == article.id
|
||||
)
|
||||
)
|
||||
).first() is not None
|
||||
|
||||
return ArticleDetail(
|
||||
id=article.id,
|
||||
source=SourceBrief.model_validate(source),
|
||||
url=article.url,
|
||||
title=article.title,
|
||||
body_html=article.body_html,
|
||||
body_text=article.body_text,
|
||||
title_zh=article.title_zh,
|
||||
body_zh_html=article.body_zh_html,
|
||||
body_zh_text=article.body_zh_text,
|
||||
summary_zh=article.summary_zh,
|
||||
lang_src=article.lang_src,
|
||||
author=article.author,
|
||||
image_url=article.image_url,
|
||||
translation_status=article.translation_status,
|
||||
translation_engine=article.translation_engine,
|
||||
translated_at=article.translated_at,
|
||||
category=article.category,
|
||||
commentary=article.commentary,
|
||||
entities=article.entities,
|
||||
sentiment=article.sentiment,
|
||||
duplicate_of=article.duplicate_of,
|
||||
published_at=article.published_at,
|
||||
fetched_at=article.fetched_at,
|
||||
is_starred=is_starred,
|
||||
)
|
||||
|
||||
|
||||
def _default_since_24h() -> datetime:
|
||||
from datetime import timedelta
|
||||
|
||||
return datetime.utcnow() - timedelta(hours=24)
|
||||
65
backend/app/api/auth.py
Normal file
65
backend/app/api/auth.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""登录/刷新/登出。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from jwt.exceptions import InvalidTokenError
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.config import settings
|
||||
from app.core.security import (
|
||||
create_access_token,
|
||||
create_refresh_token,
|
||||
decode_token,
|
||||
verify_password,
|
||||
)
|
||||
from app.database import get_session
|
||||
from app.models.user import User
|
||||
from app.schemas.auth import LoginRequest, RefreshRequest, TokenPair
|
||||
|
||||
router = APIRouter(prefix="/auth", tags=["auth"])
|
||||
|
||||
|
||||
def _pair_for(user: User) -> TokenPair:
|
||||
access = create_access_token(user.id, extra={"role": user.role.value})
|
||||
refresh = create_refresh_token(user.id)
|
||||
return TokenPair(
|
||||
access_token=access,
|
||||
refresh_token=refresh,
|
||||
expires_in=settings.access_token_ttl_min * 60,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/login", response_model=TokenPair)
|
||||
async def login(body: LoginRequest, session: AsyncSession = Depends(get_session)):
|
||||
user = (
|
||||
await session.execute(select(User).where(User.username == body.username))
|
||||
.scalars()
|
||||
.first()
|
||||
)
|
||||
if not user or not user.is_active or not verify_password(body.password, user.password_hash):
|
||||
raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Invalid credentials")
|
||||
user.last_login_at = datetime.now(timezone.utc)
|
||||
await session.commit()
|
||||
return _pair_for(user)
|
||||
|
||||
|
||||
@router.post("/refresh", response_model=TokenPair)
|
||||
async def refresh(body: RefreshRequest, session: AsyncSession = Depends(get_session)):
|
||||
try:
|
||||
payload = decode_token(body.refresh_token)
|
||||
if payload.get("type") != "refresh":
|
||||
raise InvalidTokenError("wrong type")
|
||||
uid = int(payload["sub"])
|
||||
except (InvalidTokenError, KeyError, ValueError):
|
||||
raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Invalid refresh token")
|
||||
user = (
|
||||
await session.execute(select(User).where(User.id == uid, User.is_active.is_(True)))
|
||||
.scalars()
|
||||
.first()
|
||||
)
|
||||
if not user:
|
||||
raise HTTPException(status.HTTP_401_UNAUTHORIZED, "User not found")
|
||||
return _pair_for(user)
|
||||
73
backend/app/api/bookmarks.py
Normal file
73
backend/app/api/bookmarks.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""/bookmarks 收藏。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.deps import get_current_user
|
||||
from app.database import get_session
|
||||
from app.models.article import Article
|
||||
from app.models.bookmark import Bookmark
|
||||
from app.models.user import User
|
||||
from app.schemas.misc import BookmarkIn, BookmarkOut
|
||||
|
||||
router = APIRouter(prefix="/bookmarks", tags=["bookmarks"])
|
||||
|
||||
|
||||
@router.post("", response_model=BookmarkOut, status_code=status.HTTP_201_CREATED)
|
||||
async def add(
|
||||
body: BookmarkIn,
|
||||
user: User = Depends(get_current_user),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
art = (await session.execute(select(Article).where(Article.id == body.article_id))).scalar_one_or_none()
|
||||
if not art:
|
||||
raise HTTPException(status.HTTP_404_NOT_FOUND, "Article not found")
|
||||
# 已存在则直接返回
|
||||
existing = (
|
||||
await session.execute(
|
||||
select(Bookmark).where(
|
||||
Bookmark.user_id == user.id, Bookmark.article_id == body.article_id
|
||||
)
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
if existing:
|
||||
return BookmarkOut.model_validate(existing)
|
||||
bm = Bookmark(user_id=user.id, article_id=body.article_id, note=body.note)
|
||||
session.add(bm)
|
||||
await session.commit()
|
||||
await session.refresh(bm)
|
||||
return BookmarkOut.model_validate(bm)
|
||||
|
||||
|
||||
@router.delete("/{article_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def remove(
|
||||
article_id: int,
|
||||
user: User = Depends(get_current_user),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
bm = (
|
||||
await session.execute(
|
||||
select(Bookmark).where(
|
||||
Bookmark.user_id == user.id, Bookmark.article_id == article_id
|
||||
)
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
if bm:
|
||||
await session.delete(bm)
|
||||
await session.commit()
|
||||
return None
|
||||
|
||||
|
||||
@router.get("", response_model=list[BookmarkOut])
|
||||
async def list_mine(
|
||||
user: User = Depends(get_current_user),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
rows = (
|
||||
await session.execute(
|
||||
select(Bookmark).where(Bookmark.user_id == user.id).order_by(Bookmark.created_at.desc())
|
||||
)
|
||||
).scalars()
|
||||
return [BookmarkOut.model_validate(b) for b in rows]
|
||||
68
backend/app/api/me.py
Normal file
68
backend/app/api/me.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""/me 当前用户信息 + 翻译配额。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.config import settings
|
||||
from app.core.deps import get_current_user
|
||||
from app.database import get_session
|
||||
from app.models.user import User
|
||||
from app.redis_client import get_redis
|
||||
|
||||
router = APIRouter(prefix="/me", tags=["me"])
|
||||
|
||||
|
||||
class MeOut(BaseModel):
|
||||
id: int
|
||||
username: str
|
||||
email: str | None
|
||||
role: str
|
||||
display_name: str | None
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class UsageOut(BaseModel):
|
||||
month: str
|
||||
used_chars: int
|
||||
quota_chars: int
|
||||
remaining_chars: int
|
||||
buffered_quota: int
|
||||
pct_used: float
|
||||
|
||||
|
||||
@router.get("", response_model=MeOut)
|
||||
async def me(user: User = Depends(get_current_user)):
|
||||
return MeOut(
|
||||
id=user.id,
|
||||
username=user.username,
|
||||
email=user.email,
|
||||
role=user.role.value,
|
||||
display_name=user.display_name,
|
||||
created_at=user.created_at,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/usage", response_model=UsageOut)
|
||||
async def usage(
|
||||
user: User = Depends(get_current_user),
|
||||
session: AsyncSession = Depends(get_session), # noqa: ARG001
|
||||
):
|
||||
r = get_redis()
|
||||
now = datetime.now(timezone.utc)
|
||||
key = f"translation:month:{now:%Y%m}"
|
||||
used = int(await r.get(key) or 0)
|
||||
quota = settings.tencent_tmt_quota_month
|
||||
buffered = int(quota * (1 - settings.tencent_tmt_quota_buffer))
|
||||
remaining = max(0, quota - used)
|
||||
return UsageOut(
|
||||
month=f"{now:%Y%m}",
|
||||
used_chars=used,
|
||||
quota_chars=quota,
|
||||
remaining_chars=remaining,
|
||||
buffered_quota=buffered,
|
||||
pct_used=round(used / quota * 100, 2) if quota else 0.0,
|
||||
)
|
||||
25
backend/app/api/sources.py
Normal file
25
backend/app/api/sources.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""/sources 源列表(只读,所有登录用户可看)。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.deps import get_current_user
|
||||
from app.database import get_session
|
||||
from app.models.source import Source
|
||||
from app.models.user import User
|
||||
from app.schemas.source import SourceOut
|
||||
|
||||
router = APIRouter(prefix="/sources", tags=["sources"])
|
||||
|
||||
|
||||
@router.get("", response_model=list[SourceOut])
|
||||
async def list_sources(
|
||||
user: User = Depends(get_current_user), # noqa: ARG001
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
rows = (
|
||||
await session.execute(select(Source).order_by(Source.priority.desc(), Source.name))
|
||||
).scalars()
|
||||
return [SourceOut.model_validate(s) for s in rows]
|
||||
68
backend/app/api/subscriptions.py
Normal file
68
backend/app/api/subscriptions.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""/subscriptions 关键词订阅。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.deps import get_current_user
|
||||
from app.database import get_session
|
||||
from app.models.subscription import Subscription
|
||||
from app.models.user import User
|
||||
from app.schemas.misc import SubscriptionIn, SubscriptionOut
|
||||
|
||||
router = APIRouter(prefix="/subscriptions", tags=["subscriptions"])
|
||||
|
||||
|
||||
@router.post("", response_model=SubscriptionOut, status_code=status.HTTP_201_CREATED)
|
||||
async def create(
|
||||
body: SubscriptionIn,
|
||||
user: User = Depends(get_current_user),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
sub = Subscription(
|
||||
user_id=user.id,
|
||||
keyword=body.keyword,
|
||||
match_in=body.match_in,
|
||||
channel=body.channel,
|
||||
target=body.target,
|
||||
)
|
||||
session.add(sub)
|
||||
await session.commit()
|
||||
await session.refresh(sub)
|
||||
return SubscriptionOut.model_validate(sub)
|
||||
|
||||
|
||||
@router.get("", response_model=list[SubscriptionOut])
|
||||
async def list_mine(
|
||||
user: User = Depends(get_current_user),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
rows = (
|
||||
await session.execute(
|
||||
select(Subscription)
|
||||
.where(Subscription.user_id == user.id)
|
||||
.order_by(Subscription.created_at.desc())
|
||||
)
|
||||
).scalars()
|
||||
return [SubscriptionOut.model_validate(s) for s in rows]
|
||||
|
||||
|
||||
@router.delete("/{sub_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def delete(
|
||||
sub_id: int,
|
||||
user: User = Depends(get_current_user),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
sub = (
|
||||
await session.execute(
|
||||
select(Subscription).where(
|
||||
Subscription.id == sub_id, Subscription.user_id == user.id
|
||||
)
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
if not sub:
|
||||
raise HTTPException(status.HTTP_404_NOT_FOUND, "Subscription not found")
|
||||
await session.delete(sub)
|
||||
await session.commit()
|
||||
return None
|
||||
104
backend/app/config.py
Normal file
104
backend/app/config.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""应用配置:从 .env / 环境变量读取,集中管理所有开关。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import Field, field_validator
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(
|
||||
env_file=".env",
|
||||
env_file_encoding="utf-8",
|
||||
case_sensitive=False,
|
||||
extra="ignore",
|
||||
)
|
||||
|
||||
# ===== 通用 =====
|
||||
tz: str = "Asia/Hong_Kong"
|
||||
log_level: str = "INFO"
|
||||
|
||||
# ===== 数据库 =====
|
||||
postgres_user: str
|
||||
postgres_password: str
|
||||
postgres_db: str
|
||||
postgres_host: str = "postgres"
|
||||
postgres_port: int = 5432
|
||||
|
||||
@property
|
||||
def database_url(self) -> str:
|
||||
# asyncpg
|
||||
return (
|
||||
f"postgresql+asyncpg://{self.postgres_user}:{self.postgres_password}"
|
||||
f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db}"
|
||||
)
|
||||
|
||||
@property
|
||||
def sync_database_url(self) -> str:
|
||||
# alembic 用的同步 URL
|
||||
return (
|
||||
f"postgresql+psycopg2://{self.postgres_user}:{self.postgres_password}"
|
||||
f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db}"
|
||||
)
|
||||
|
||||
# ===== Redis =====
|
||||
redis_host: str = "redis"
|
||||
redis_port: int = 6379
|
||||
redis_password: str
|
||||
redis_db: int = 0
|
||||
|
||||
@property
|
||||
def redis_url(self) -> str:
|
||||
return (
|
||||
f"redis://:{self.redis_password}@{self.redis_host}:{self.redis_port}/{self.redis_db}"
|
||||
)
|
||||
|
||||
# ===== JWT =====
|
||||
jwt_secret: str
|
||||
jwt_algorithm: str = "HS256"
|
||||
access_token_ttl_min: int = 60
|
||||
refresh_token_ttl_day: int = 14
|
||||
|
||||
# ===== 腾讯云 TMT =====
|
||||
tencentcloud_secret_id: str = ""
|
||||
tencentcloud_secret_key: str = ""
|
||||
tencentcloud_region: str = "ap-hongkong"
|
||||
tencent_tmt_endpoint: str = "tmt.tencentcloudapi.com"
|
||||
tencent_tmt_quota_month: int = 5_000_000
|
||||
tencent_tmt_quota_buffer: float = 0.05
|
||||
tencent_tmt_max_chars_per_req: int = 4500
|
||||
|
||||
@field_validator("tencent_tmt_quota_buffer")
|
||||
@classmethod
|
||||
def _check_buffer(cls, v: float) -> float:
|
||||
if not 0.0 <= v <= 0.5:
|
||||
raise ValueError("buffer 必须在 0~0.5")
|
||||
return v
|
||||
|
||||
# ===== 本地翻译 =====
|
||||
local_translate_enabled: bool = False
|
||||
local_translate_model: str = "nllb-200-distilled-600M"
|
||||
local_translate_device: str = "cpu"
|
||||
|
||||
# ===== 抓取 =====
|
||||
fetch_global_qps: int = 4
|
||||
fetch_timeout: int = 20
|
||||
fetch_fail_pause_threshold: int = 3
|
||||
fetch_max_retries: int = 2
|
||||
|
||||
# ===== Caddy / 域名 =====
|
||||
domain: str = ""
|
||||
acme_email: str = ""
|
||||
|
||||
# ===== 内部路径(部署后可调) =====
|
||||
project_root: Path = Path(__file__).resolve().parents[2]
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_settings() -> Settings:
|
||||
return Settings() # type: ignore[call-arg]
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
1
backend/app/core/__init__.py
Normal file
1
backend/app/core/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""core utilities."""
|
||||
77
backend/app/core/deps.py
Normal file
77
backend/app/core/deps.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""通用依赖:获取当前用户、要求 owner。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import Depends, HTTPException, status
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from jwt.exceptions import InvalidTokenError
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.security import decode_token, hash_api_token
|
||||
from app.database import get_session
|
||||
from app.models.api_token import ApiToken
|
||||
from app.models.user import User, UserRole
|
||||
|
||||
_bearer = HTTPBearer(auto_error=False)
|
||||
|
||||
|
||||
async def _resolve_user(
|
||||
creds: HTTPAuthorizationCredentials | None = Depends(_bearer),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> User:
|
||||
if creds is None or not creds.credentials:
|
||||
raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Missing credentials")
|
||||
|
||||
token = creds.credentials
|
||||
|
||||
# 1) 先试 API Token(sha256 比较)
|
||||
h = hash_api_token(token)
|
||||
api_row = (
|
||||
await session.execute(
|
||||
select(ApiToken).where(ApiToken.token_hash == h, ApiToken.revoked_at.is_(None))
|
||||
)
|
||||
.scalars()
|
||||
.first()
|
||||
)
|
||||
if api_row:
|
||||
if api_row.expires_at and api_row.expires_at < datetime.now(timezone.utc):
|
||||
raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Token expired")
|
||||
user = (
|
||||
await session.execute(select(User).where(User.id == api_row.user_id))
|
||||
.scalars()
|
||||
.first()
|
||||
)
|
||||
if user and user.is_active:
|
||||
api_row.last_used_at = datetime.now(timezone.utc)
|
||||
await session.commit()
|
||||
return user
|
||||
|
||||
# 2) 试 JWT
|
||||
try:
|
||||
payload = decode_token(token)
|
||||
if payload.get("type") != "access":
|
||||
raise InvalidTokenError("wrong type")
|
||||
uid = int(payload["sub"])
|
||||
except (InvalidTokenError, KeyError, ValueError):
|
||||
raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Invalid token")
|
||||
|
||||
user = (
|
||||
await session.execute(select(User).where(User.id == uid, User.is_active.is_(True)))
|
||||
.scalars()
|
||||
.first()
|
||||
)
|
||||
if user is None:
|
||||
raise HTTPException(status.HTTP_401_UNAUTHORIZED, "User not found or inactive")
|
||||
return user
|
||||
|
||||
|
||||
async def get_current_user(user: User = Depends(_resolve_user)) -> User:
|
||||
return user
|
||||
|
||||
|
||||
async def require_owner(user: User = Depends(get_current_user)) -> User:
|
||||
if user.role != UserRole.OWNER:
|
||||
raise HTTPException(status.HTTP_403_FORBIDDEN, "Owner only")
|
||||
return user
|
||||
73
backend/app/core/security.py
Normal file
73
backend/app/core/security.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""鉴权核心:密码哈希 + JWT 编解码 + API Token。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import secrets
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
import jwt
|
||||
from passlib.context import CryptContext
|
||||
|
||||
from app.config import settings
|
||||
|
||||
# bcrypt 4.0.1 与 passlib 1.7.4 兼容
|
||||
pwd_ctx = CryptContext(schemes=["bcrypt"], deprecated="auto", bcrypt__rounds=12)
|
||||
|
||||
|
||||
def hash_password(plain: str) -> str:
|
||||
return pwd_ctx.hash(plain)
|
||||
|
||||
|
||||
def verify_password(plain: str, hashed: str) -> bool:
|
||||
try:
|
||||
return pwd_ctx.verify(plain, hashed)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# === JWT ===
|
||||
def create_access_token(subject: str | int, extra: dict[str, Any] | None = None) -> str:
|
||||
now = datetime.now(timezone.utc)
|
||||
payload: dict[str, Any] = {
|
||||
"sub": str(subject),
|
||||
"type": "access",
|
||||
"iat": int(now.timestamp()),
|
||||
"exp": int((now + timedelta(minutes=settings.access_token_ttl_min)).timestamp()),
|
||||
}
|
||||
if extra:
|
||||
payload.update(extra)
|
||||
return jwt.encode(payload, settings.jwt_secret, algorithm=settings.jwt_algorithm)
|
||||
|
||||
|
||||
def create_refresh_token(subject: str | int) -> str:
|
||||
now = datetime.now(timezone.utc)
|
||||
payload = {
|
||||
"sub": str(subject),
|
||||
"type": "refresh",
|
||||
"iat": int(now.timestamp()),
|
||||
"exp": int((now + timedelta(days=settings.refresh_token_ttl_day)).timestamp()),
|
||||
"jti": secrets.token_urlsafe(16),
|
||||
}
|
||||
return jwt.encode(payload, settings.jwt_secret, algorithm=settings.jwt_algorithm)
|
||||
|
||||
|
||||
def decode_token(token: str) -> dict[str, Any]:
|
||||
return jwt.decode(token, settings.jwt_secret, algorithms=[settings.jwt_algorithm])
|
||||
|
||||
|
||||
# === API Token(给 Android 用)===
|
||||
def generate_api_token() -> tuple[str, str]:
|
||||
"""返回 (raw_token, token_hash)。raw_token 只显示一次。"""
|
||||
raw = secrets.token_urlsafe(32)
|
||||
return raw, hash_api_token(raw)
|
||||
|
||||
|
||||
def hash_api_token(raw: str) -> str:
|
||||
# 简单 sha256 即可(随机性已经够)
|
||||
return hashlib.sha256(raw.encode()).hexdigest()
|
||||
|
||||
|
||||
def constant_time_eq(a: str, b: str) -> bool:
|
||||
return hmac.compare_digest(a, b)
|
||||
52
backend/app/database.py
Normal file
52
backend/app/database.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""异步 SQLAlchemy 数据库连接。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import AsyncGenerator
|
||||
|
||||
from sqlalchemy.ext.asyncio import (
|
||||
AsyncSession,
|
||||
async_sessionmaker,
|
||||
create_async_engine,
|
||||
)
|
||||
from sqlalchemy.orm import DeclarativeBase
|
||||
|
||||
from app.config import settings
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
"""所有 ORM 模型的基类。"""
|
||||
|
||||
|
||||
engine = create_async_engine(
|
||||
settings.database_url,
|
||||
echo=False,
|
||||
pool_size=5,
|
||||
max_overflow=10,
|
||||
pool_pre_ping=True,
|
||||
pool_recycle=1800,
|
||||
)
|
||||
|
||||
AsyncSessionLocal = async_sessionmaker(
|
||||
bind=engine,
|
||||
class_=AsyncSession,
|
||||
expire_on_commit=False,
|
||||
autoflush=False,
|
||||
)
|
||||
|
||||
|
||||
async def get_session() -> AsyncGenerator[AsyncSession, None]:
|
||||
"""FastAPI 依赖:请求级 session。"""
|
||||
async with AsyncSessionLocal() as session:
|
||||
try:
|
||||
yield session
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
|
||||
async def init_db() -> None:
|
||||
"""开发期用,生产请用 alembic。"""
|
||||
# import models to register them
|
||||
from app.models import article, source, user # noqa: F401
|
||||
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
117
backend/app/main.py
Normal file
117
backend/app/main.py
Normal file
@@ -0,0 +1,117 @@
|
||||
"""FastAPI 入口。
|
||||
|
||||
- 注册路由
|
||||
- 启动 / 关闭事件:连接池、调度器
|
||||
- CORS
|
||||
- 全局异常处理
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
from starlette.exceptions import HTTPException as StarletteHTTPException
|
||||
|
||||
from app.api import admin, articles, auth, bookmarks, me, sources, subscriptions
|
||||
from app.config import settings
|
||||
from app.database import engine
|
||||
from app.redis_client import close_redis, get_redis
|
||||
|
||||
logger = logging.getLogger("news.api")
|
||||
logging.basicConfig(
|
||||
level=settings.log_level,
|
||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
# 启动
|
||||
logger.info("api starting, tz=%s", settings.tz)
|
||||
# 触发 redis 连接
|
||||
await get_redis().ping()
|
||||
yield
|
||||
# 关闭
|
||||
logger.info("api shutting down")
|
||||
await close_redis()
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="Diary News",
|
||||
description="Private news aggregator",
|
||||
version="0.1.0",
|
||||
default_response_class=JSONResponse,
|
||||
lifespan=lifespan,
|
||||
docs_url="/api/docs" if settings.log_level == "DEBUG" else None,
|
||||
redoc_url=None,
|
||||
)
|
||||
|
||||
# CORS:网页 + Android,简单放开(私有)
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # MVP 放开,生产收紧
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
# === 全局异常处理(RFC 7807) ===
|
||||
@app.exception_handler(StarletteHTTPException)
|
||||
async def http_exc_handler(request: Request, exc: StarletteHTTPException):
|
||||
return JSONResponse(
|
||||
status_code=exc.status_code,
|
||||
content={
|
||||
"type": "about:blank",
|
||||
"title": exc.detail if isinstance(exc.detail, str) else "Error",
|
||||
"status": exc.status_code,
|
||||
"instance": str(request.url),
|
||||
},
|
||||
headers=exc.headers or None,
|
||||
)
|
||||
|
||||
|
||||
@app.exception_handler(RequestValidationError)
|
||||
async def validation_exc_handler(request: Request, exc: RequestValidationError):
|
||||
return JSONResponse(
|
||||
status_code=422,
|
||||
content={
|
||||
"type": "about:blank",
|
||||
"title": "Validation Error",
|
||||
"status": 422,
|
||||
"errors": exc.errors(),
|
||||
"instance": str(request.url),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# === 路由 ===
|
||||
API_PREFIX = "/api/v1"
|
||||
|
||||
app.include_router(auth.router, prefix=API_PREFIX)
|
||||
app.include_router(me.router, prefix=API_PREFIX)
|
||||
app.include_router(articles.router, prefix=API_PREFIX)
|
||||
app.include_router(sources.router, prefix=API_PREFIX)
|
||||
app.include_router(bookmarks.router, prefix=API_PREFIX)
|
||||
app.include_router(subscriptions.router, prefix=API_PREFIX)
|
||||
app.include_router(admin.router, prefix=API_PREFIX)
|
||||
|
||||
|
||||
# === 健康检查 ===
|
||||
@app.get("/healthz", include_in_schema=False)
|
||||
async def healthz():
|
||||
try:
|
||||
await get_redis().ping()
|
||||
except Exception as e:
|
||||
return JSONResponse({"status": "degraded", "redis": str(e)}, status_code=503)
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.get("/", include_in_schema=False)
|
||||
async def root():
|
||||
return {"name": "diary-news", "version": app.version, "docs": "/api/docs"}
|
||||
21
backend/app/models/__init__.py
Normal file
21
backend/app/models/__init__.py
Normal file
@@ -0,0 +1,21 @@
|
||||
"""所有 ORM 模型。
|
||||
|
||||
新模型请在这里 import,确保 Alembic 自动发现。
|
||||
"""
|
||||
from app.models.api_token import ApiToken # noqa: F401
|
||||
from app.models.article import Article # noqa: F401
|
||||
from app.models.bookmark import Bookmark # noqa: F401
|
||||
from app.models.source import Source, SourceKind # noqa: F401
|
||||
from app.models.subscription import Subscription # noqa: F401
|
||||
from app.models.user import User, UserRole # noqa: F401
|
||||
|
||||
__all__ = [
|
||||
"ApiToken",
|
||||
"Article",
|
||||
"Bookmark",
|
||||
"Source",
|
||||
"SourceKind",
|
||||
"Subscription",
|
||||
"User",
|
||||
"UserRole",
|
||||
]
|
||||
27
backend/app/models/api_token.py
Normal file
27
backend/app/models/api_token.py
Normal file
@@ -0,0 +1,27 @@
|
||||
"""API Token(给 Android 用,可独立撤销)。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, String, func
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class ApiToken(Base):
|
||||
__tablename__ = "api_tokens"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
user_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True
|
||||
)
|
||||
name: Mapped[str] = mapped_column(String(64), nullable=False) # "Xiaomi-14"
|
||||
token_hash: Mapped[str] = mapped_column(String(128), unique=True, nullable=False, index=True)
|
||||
# 只存 hash,原始 token 一次性返回给用户
|
||||
last_used_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
revoked_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
91
backend/app/models/article.py
Normal file
91
backend/app/models/article.py
Normal file
@@ -0,0 +1,91 @@
|
||||
"""文章主表:原文 + 译文 + ML 字段预留。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import (
|
||||
BigInteger,
|
||||
DateTime,
|
||||
Float,
|
||||
ForeignKey,
|
||||
Index,
|
||||
Integer,
|
||||
String,
|
||||
Text,
|
||||
func,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class Article(Base):
|
||||
__tablename__ = "articles"
|
||||
|
||||
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
|
||||
|
||||
# === 来源 ===
|
||||
source_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("sources.id", ondelete="CASCADE"), nullable=False, index=True
|
||||
)
|
||||
source: Mapped["Source"] = relationship(back_populates="articles", lazy="joined") # noqa: F821
|
||||
|
||||
# === 原文标识 ===
|
||||
url: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
url_hash: Mapped[str] = mapped_column(String(40), unique=True, nullable=False, index=True)
|
||||
guid: Mapped[str | None] = mapped_column(String(255), index=True) # 源站给的 ID
|
||||
|
||||
# === 原文内容 ===
|
||||
title: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
body_html: Mapped[str | None] = mapped_column(Text) # 抽取后保留结构
|
||||
body_text: Mapped[str] = mapped_column(Text, nullable=False, default="")
|
||||
lang_src: Mapped[str | None] = mapped_column(String(8))
|
||||
author: Mapped[str | None] = mapped_column(String(255))
|
||||
image_url: Mapped[str | None] = mapped_column(Text)
|
||||
|
||||
# === 译文 ===
|
||||
title_zh: Mapped[str | None] = mapped_column(Text)
|
||||
body_zh_html: Mapped[str | None] = mapped_column(Text)
|
||||
body_zh_text: Mapped[str | None] = mapped_column(Text)
|
||||
summary_zh: Mapped[str | None] = mapped_column(Text)
|
||||
|
||||
# === 翻译状态 ===
|
||||
translation_status: Mapped[str] = mapped_column(
|
||||
String(16), default="pending", nullable=False, index=True
|
||||
)
|
||||
# pending / ok / partial / failed / n/a
|
||||
translation_engine: Mapped[str | None] = mapped_column(String(16))
|
||||
# tencent / nllb / cache / skip
|
||||
translation_chars: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
||||
translated_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
|
||||
# === ML 字段(预留,MVP 全 null)===
|
||||
category: Mapped[str | None] = mapped_column(String(32), index=True)
|
||||
commentary: Mapped[str | None] = mapped_column(Text)
|
||||
entities: Mapped[dict | None] = mapped_column(JSONB)
|
||||
sentiment: Mapped[float | None] = mapped_column(Float)
|
||||
topic_id: Mapped[str | None] = mapped_column(String(64), index=True)
|
||||
bias: Mapped[str | None] = mapped_column(String(16)) # left/center/right
|
||||
|
||||
# === 去重 ===
|
||||
duplicate_of: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("articles.id", ondelete="SET NULL"), index=True
|
||||
)
|
||||
|
||||
# === 时间 ===
|
||||
published_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), index=True)
|
||||
fetched_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False, index=True
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_articles_source_published", "source_id", "published_at"),
|
||||
Index("ix_articles_status_published", "translation_status", "published_at"),
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<Article id={self.id} src={self.source_id} status={self.translation_status}>"
|
||||
27
backend/app/models/bookmark.py
Normal file
27
backend/app/models/bookmark.py
Normal file
@@ -0,0 +1,27 @@
|
||||
"""收藏。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, UniqueConstraint, func
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class Bookmark(Base):
|
||||
__tablename__ = "bookmarks"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
user_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True
|
||||
)
|
||||
article_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("articles.id", ondelete="CASCADE"), nullable=False, index=True
|
||||
)
|
||||
note: Mapped[str | None] = mapped_column()
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
|
||||
__table_args__ = (UniqueConstraint("user_id", "article_id", name="uq_bookmark_user_article"),)
|
||||
64
backend/app/models/source.py
Normal file
64
backend/app/models/source.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""采集源模型。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import (
|
||||
JSON,
|
||||
Boolean,
|
||||
DateTime,
|
||||
Enum,
|
||||
Integer,
|
||||
String,
|
||||
Text,
|
||||
func,
|
||||
)
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class SourceKind(str, enum.Enum):
|
||||
RSS = "rss"
|
||||
HTML_LIST = "html_list"
|
||||
TG_CHANNEL = "tg_channel"
|
||||
|
||||
|
||||
class Source(Base):
|
||||
__tablename__ = "sources"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
name: Mapped[str] = mapped_column(String(128), nullable=False)
|
||||
slug: Mapped[str] = mapped_column(String(128), unique=True, index=True, nullable=False)
|
||||
kind: Mapped[SourceKind] = mapped_column(
|
||||
Enum(SourceKind, name="source_kind"),
|
||||
default=SourceKind.RSS,
|
||||
nullable=False,
|
||||
)
|
||||
url: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
detail_selector: Mapped[dict | None] = mapped_column(JSON)
|
||||
fetch_interval_min: Mapped[int] = mapped_column(Integer, default=60, nullable=False)
|
||||
fetch_cron: Mapped[str | None] = mapped_column(String(64)) # 5 段 cron
|
||||
translate_to: Mapped[str] = mapped_column(String(8), default="zh", nullable=False)
|
||||
enabled: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False)
|
||||
region: Mapped[str | None] = mapped_column(String(32), index=True)
|
||||
language_src: Mapped[str | None] = mapped_column(String(8))
|
||||
priority: Mapped[int] = mapped_column(Integer, default=50, nullable=False, index=True)
|
||||
headers_json: Mapped[dict | None] = mapped_column(JSON)
|
||||
last_fetched_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
last_status: Mapped[str | None] = mapped_column(String(64))
|
||||
consecutive_failures: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False
|
||||
)
|
||||
|
||||
articles: Mapped[list["Article"]] = relationship( # noqa: F821
|
||||
back_populates="source", cascade="all, delete-orphan", lazy="noload"
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<Source id={self.id} slug={self.slug} kind={self.kind.value}>"
|
||||
48
backend/app/models/subscription.py
Normal file
48
backend/app/models/subscription.py
Normal file
@@ -0,0 +1,48 @@
|
||||
"""关键词订阅(命中即通知)。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import (
|
||||
Boolean,
|
||||
DateTime,
|
||||
Enum,
|
||||
ForeignKey,
|
||||
String,
|
||||
Text,
|
||||
func,
|
||||
)
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class SubscriptionMatch(str, enum.Enum):
|
||||
ANY = "any" # 标题或正文
|
||||
TITLE = "title"
|
||||
BODY = "body"
|
||||
|
||||
|
||||
class Subscription(Base):
|
||||
__tablename__ = "subscriptions"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
user_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True
|
||||
)
|
||||
keyword: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
# 简单关键词,匹配走 ILIKE '%kw%';后续可加 regex/lucene
|
||||
match_in: Mapped[SubscriptionMatch] = mapped_column(
|
||||
Enum(SubscriptionMatch, name="subscription_match"),
|
||||
default=SubscriptionMatch.ANY,
|
||||
nullable=False,
|
||||
)
|
||||
channel: Mapped[str] = mapped_column(String(32), default="telegram", nullable=False)
|
||||
# telegram / email / web
|
||||
target: Mapped[str | None] = mapped_column(Text) # chat_id / email
|
||||
enabled: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False)
|
||||
last_hit_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
41
backend/app/models/user.py
Normal file
41
backend/app/models/user.py
Normal file
@@ -0,0 +1,41 @@
|
||||
"""用户模型。
|
||||
|
||||
Phase 1 仅 owner + member 两级,后续扩展。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import Boolean, DateTime, Enum, String, func
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class UserRole(str, enum.Enum):
|
||||
OWNER = "owner"
|
||||
MEMBER = "member"
|
||||
|
||||
|
||||
class User(Base):
|
||||
__tablename__ = "users"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
username: Mapped[str] = mapped_column(String(64), unique=True, index=True, nullable=False)
|
||||
email: Mapped[str | None] = mapped_column(String(255), unique=True, index=True)
|
||||
password_hash: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
role: Mapped[UserRole] = mapped_column(
|
||||
Enum(UserRole, name="user_role"),
|
||||
default=UserRole.MEMBER,
|
||||
nullable=False,
|
||||
)
|
||||
is_active: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False)
|
||||
display_name: Mapped[str | None] = mapped_column(String(128))
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
last_login_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<User id={self.id} username={self.username} role={self.role.value}>"
|
||||
31
backend/app/redis_client.py
Normal file
31
backend/app/redis_client.py
Normal file
@@ -0,0 +1,31 @@
|
||||
"""Redis 客户端(单例)。用于:
|
||||
- 翻译缓存
|
||||
- 翻译字符配额(月度)
|
||||
- 限流(后续)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import redis.asyncio as redis_async
|
||||
|
||||
from app.config import settings
|
||||
|
||||
_pool: redis_async.Redis | None = None
|
||||
|
||||
|
||||
def get_redis() -> redis_async.Redis:
|
||||
global _pool
|
||||
if _pool is None:
|
||||
_pool = redis_async.from_url(
|
||||
settings.redis_url,
|
||||
encoding="utf-8",
|
||||
decode_responses=True,
|
||||
max_connections=20,
|
||||
)
|
||||
return _pool
|
||||
|
||||
|
||||
async def close_redis() -> None:
|
||||
global _pool
|
||||
if _pool is not None:
|
||||
await _pool.aclose()
|
||||
_pool = None
|
||||
1
backend/app/schemas/__init__.py
Normal file
1
backend/app/schemas/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Pydantic schemas for API I/O."""
|
||||
83
backend/app/schemas/article.py
Normal file
83
backend/app/schemas/article.py
Normal file
@@ -0,0 +1,83 @@
|
||||
"""Article schemas."""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
class SourceBrief(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: int
|
||||
name: str
|
||||
slug: str
|
||||
region: str | None = None
|
||||
|
||||
|
||||
class ArticleListItem(BaseModel):
|
||||
"""列表项:精简字段。"""
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: int
|
||||
source: SourceBrief
|
||||
title: str
|
||||
title_zh: str | None = None
|
||||
summary_zh: str | None = None
|
||||
lang_src: str | None = None
|
||||
translation_status: str
|
||||
category: str | None = None
|
||||
published_at: datetime | None = None
|
||||
fetched_at: datetime
|
||||
image_url: str | None = None
|
||||
is_starred: bool = False
|
||||
|
||||
|
||||
class ArticleDetail(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: int
|
||||
source: SourceBrief
|
||||
url: str
|
||||
title: str
|
||||
body_html: str | None = None
|
||||
body_text: str
|
||||
title_zh: str | None = None
|
||||
body_zh_html: str | None = None
|
||||
body_zh_text: str | None = None
|
||||
summary_zh: str | None = None
|
||||
lang_src: str | None = None
|
||||
author: str | None = None
|
||||
image_url: str | None = None
|
||||
translation_status: str
|
||||
translation_engine: str | None = None
|
||||
translated_at: datetime | None = None
|
||||
category: str | None = None
|
||||
commentary: str | None = None
|
||||
entities: dict | None = None
|
||||
sentiment: float | None = None
|
||||
duplicate_of: int | None = None
|
||||
published_at: datetime | None = None
|
||||
fetched_at: datetime
|
||||
is_starred: bool = False
|
||||
|
||||
|
||||
class ArticleListResponse(BaseModel):
|
||||
items: list[ArticleListItem]
|
||||
next_cursor: str | None = None
|
||||
total: int | None = None
|
||||
|
||||
|
||||
class ArticleQuery(BaseModel):
|
||||
"""用作 ?query= 解析参考(实际 FastAPI 直接用 Query)。"""
|
||||
|
||||
since: datetime | None = None
|
||||
until: datetime | None = None
|
||||
source: str | None = None # 逗号分隔 slug
|
||||
category: str | None = None
|
||||
q: str | None = None
|
||||
lang: str = Field(default="both", pattern=r"^(src|zh|both)$")
|
||||
limit: int = Field(default=50, ge=1, le=200)
|
||||
cursor: str | None = None
|
||||
starred_only: bool = False
|
||||
20
backend/app/schemas/auth.py
Normal file
20
backend/app/schemas/auth.py
Normal file
@@ -0,0 +1,20 @@
|
||||
"""Auth schemas."""
|
||||
from __future__ import annotations
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class LoginRequest(BaseModel):
|
||||
username: str = Field(min_length=1, max_length=64)
|
||||
password: str = Field(min_length=6, max_length=128)
|
||||
|
||||
|
||||
class TokenPair(BaseModel):
|
||||
access_token: str
|
||||
refresh_token: str
|
||||
token_type: str = "bearer"
|
||||
expires_in: int # seconds
|
||||
|
||||
|
||||
class RefreshRequest(BaseModel):
|
||||
refresh_token: str
|
||||
43
backend/app/schemas/misc.py
Normal file
43
backend/app/schemas/misc.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""Bookmark / Subscription schemas."""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from app.models.subscription import SubscriptionMatch
|
||||
|
||||
|
||||
class BookmarkIn(BaseModel):
|
||||
article_id: int
|
||||
note: str | None = None
|
||||
|
||||
|
||||
class BookmarkOut(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: int
|
||||
user_id: int
|
||||
article_id: int
|
||||
note: str | None = None
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class SubscriptionIn(BaseModel):
|
||||
keyword: str = Field(min_length=1, max_length=255)
|
||||
match_in: SubscriptionMatch = SubscriptionMatch.ANY
|
||||
channel: str = "telegram"
|
||||
target: str | None = None
|
||||
|
||||
|
||||
class SubscriptionOut(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: int
|
||||
keyword: str
|
||||
match_in: SubscriptionMatch
|
||||
channel: str
|
||||
target: str | None = None
|
||||
enabled: bool
|
||||
last_hit_at: datetime | None = None
|
||||
created_at: datetime
|
||||
51
backend/app/schemas/source.py
Normal file
51
backend/app/schemas/source.py
Normal file
@@ -0,0 +1,51 @@
|
||||
"""Source schemas."""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, HttpUrl
|
||||
|
||||
from app.models.source import SourceKind
|
||||
|
||||
|
||||
class SourceOut(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: int
|
||||
name: str
|
||||
slug: str
|
||||
kind: SourceKind
|
||||
url: str
|
||||
enabled: bool
|
||||
region: str | None = None
|
||||
language_src: str | None = None
|
||||
priority: int
|
||||
fetch_interval_min: int
|
||||
translate_to: str
|
||||
last_fetched_at: datetime | None = None
|
||||
last_status: str | None = None
|
||||
consecutive_failures: int = 0
|
||||
|
||||
|
||||
class SourceIn(BaseModel):
|
||||
name: str = Field(min_length=1, max_length=128)
|
||||
slug: str = Field(min_length=1, max_length=128, pattern=r"^[a-z0-9-]+$")
|
||||
kind: SourceKind = SourceKind.RSS
|
||||
url: HttpUrl
|
||||
region: str | None = None
|
||||
language_src: str | None = None
|
||||
priority: int = Field(default=50, ge=1, le=100)
|
||||
fetch_interval_min: int = Field(default=60, ge=5, le=1440)
|
||||
translate_to: str = "zh"
|
||||
enabled: bool = True
|
||||
detail_selector: dict | None = None
|
||||
headers_json: dict | None = None
|
||||
|
||||
|
||||
class SourceUpdate(BaseModel):
|
||||
name: str | None = None
|
||||
enabled: bool | None = None
|
||||
priority: int | None = Field(default=None, ge=1, le=100)
|
||||
fetch_interval_min: int | None = Field(default=None, ge=5, le=1440)
|
||||
region: str | None = None
|
||||
translate_to: str | None = None
|
||||
1
backend/app/scripts/__init__.py
Normal file
1
backend/app/scripts/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""命令行脚本集合。"""
|
||||
56
backend/app/scripts/create_user.py
Normal file
56
backend/app/scripts/create_user.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""创建用户(默认 owner)。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import sys
|
||||
from getpass import getpass
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.core.security import hash_password
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.user import User, UserRole
|
||||
|
||||
|
||||
async def main(username: str, password: str, email: str | None, role: UserRole) -> int:
|
||||
async with AsyncSessionLocal() as session:
|
||||
exists = (await session.execute(select(User).where(User.username == username))).scalar_one_or_none()
|
||||
if exists:
|
||||
print(f"user '{username}' already exists (id={exists.id})", file=sys.stderr)
|
||||
return 1
|
||||
u = User(
|
||||
username=username,
|
||||
email=email,
|
||||
password_hash=hash_password(password),
|
||||
role=role,
|
||||
is_active=True,
|
||||
)
|
||||
session.add(u)
|
||||
await session.commit()
|
||||
await session.refresh(u)
|
||||
print(f"created user id={u.id} username={u.username} role={u.role.value}")
|
||||
return 0
|
||||
|
||||
|
||||
def cli() -> None:
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--username", required=True)
|
||||
p.add_argument("--password", default=None, help="缺省则交互输入")
|
||||
p.add_argument("--email", default=None)
|
||||
p.add_argument("--role", choices=["owner", "member"], default="member")
|
||||
args = p.parse_args()
|
||||
password = args.password
|
||||
if not password:
|
||||
pw1 = getpass("password: ")
|
||||
pw2 = getpass("password (again): ")
|
||||
if pw1 != pw2 or len(pw1) < 6:
|
||||
print("passwords differ or too short", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
password = pw1
|
||||
rc = asyncio.run(main(args.username, password, args.email, UserRole(args.role)))
|
||||
sys.exit(rc)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
114
backend/app/scripts/seed_sources.py
Normal file
114
backend/app/scripts/seed_sources.py
Normal file
@@ -0,0 +1,114 @@
|
||||
"""种子:导入 MVP 5 源。
|
||||
|
||||
- Reuters World
|
||||
- BBC World
|
||||
- Al Jazeera
|
||||
- NHK World
|
||||
- DW
|
||||
|
||||
RSS 链接为公开 feed,实际链接可能变更;若 fetch 失败,先看 /admin/health。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.source import Source, SourceKind
|
||||
|
||||
SEEDS = [
|
||||
{
|
||||
"name": "Reuters World",
|
||||
"slug": "reuters-world",
|
||||
"kind": SourceKind.RSS,
|
||||
"url": "https://feeds.reuters.com/Reuters/worldNews",
|
||||
"region": "global",
|
||||
"language_src": "en",
|
||||
"priority": 90,
|
||||
"fetch_interval_min": 30,
|
||||
"translate_to": "zh",
|
||||
"enabled": True,
|
||||
},
|
||||
{
|
||||
"name": "BBC World",
|
||||
"slug": "bbc-world",
|
||||
"kind": SourceKind.RSS,
|
||||
"url": "https://feeds.bbci.co.uk/news/world/rss.xml",
|
||||
"region": "global",
|
||||
"language_src": "en",
|
||||
"priority": 85,
|
||||
"fetch_interval_min": 30,
|
||||
"translate_to": "zh",
|
||||
"enabled": True,
|
||||
},
|
||||
{
|
||||
"name": "Al Jazeera",
|
||||
"slug": "aljazeera",
|
||||
"kind": SourceKind.RSS,
|
||||
"url": "https://www.aljazeera.com/xml/rss/all.xml",
|
||||
"region": "mena",
|
||||
"language_src": "en",
|
||||
"priority": 80,
|
||||
"fetch_interval_min": 45,
|
||||
"translate_to": "zh",
|
||||
"enabled": True,
|
||||
},
|
||||
{
|
||||
"name": "NHK World",
|
||||
"slug": "nhk-world",
|
||||
"kind": SourceKind.RSS,
|
||||
"url": "https://www3.nhk.or.jp/rss/news/cat0.xml",
|
||||
"region": "asia",
|
||||
"language_src": "en",
|
||||
"priority": 70,
|
||||
"fetch_interval_min": 60,
|
||||
"translate_to": "zh",
|
||||
"enabled": True,
|
||||
},
|
||||
{
|
||||
"name": "DW (Deutsche Welle)",
|
||||
"slug": "dw",
|
||||
"kind": SourceKind.RSS,
|
||||
"url": "https://rss.dw.com/xml/rss-en-all",
|
||||
"region": "eu",
|
||||
"language_src": "en",
|
||||
"priority": 70,
|
||||
"fetch_interval_min": 60,
|
||||
"translate_to": "zh",
|
||||
"enabled": True,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
async def main() -> int:
|
||||
async with AsyncSessionLocal() as session:
|
||||
inserted = 0
|
||||
for row in SEEDS:
|
||||
stmt = (
|
||||
pg_insert(Source)
|
||||
.values(**row)
|
||||
.on_conflict_do_nothing(index_elements=["slug"])
|
||||
.returning(Source.id)
|
||||
)
|
||||
try:
|
||||
r = await session.execute(stmt)
|
||||
rid = r.scalar_one_or_none()
|
||||
if rid is not None:
|
||||
inserted += 1
|
||||
print(f" + {row['slug']} (id={rid})")
|
||||
else:
|
||||
print(f" = {row['slug']} (already exists)")
|
||||
except IntegrityError as e:
|
||||
print(f" ! {row['slug']}: {e}", file=sys.stderr)
|
||||
await session.rollback()
|
||||
await session.commit()
|
||||
print(f"seeded {inserted} new source(s)")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(asyncio.run(main()))
|
||||
1
backend/app/services/__init__.py
Normal file
1
backend/app/services/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Services (fetchers / translation)."""
|
||||
12
backend/app/services/fetchers/__init__.py
Normal file
12
backend/app/services/fetchers/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""Fetcher implementations."""
|
||||
from app.services.fetchers.base import BaseFetcher, FetchedItem
|
||||
from app.services.fetchers.rss import RSSFetcher
|
||||
|
||||
__all__ = ["BaseFetcher", "FetchedItem", "RSSFetcher"]
|
||||
|
||||
|
||||
def get_fetcher(kind: str, **kwargs) -> BaseFetcher:
|
||||
if kind == "rss":
|
||||
return RSSFetcher(**kwargs)
|
||||
# html_list / tg_channel: Phase 2 实现,这里抛错
|
||||
raise NotImplementedError(f"fetcher not implemented for kind={kind}")
|
||||
67
backend/app/services/fetchers/base.py
Normal file
67
backend/app/services/fetchers/base.py
Normal file
@@ -0,0 +1,67 @@
|
||||
"""Fetcher 抽象基类 + 通用工具。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from app.config import settings
|
||||
|
||||
|
||||
def normalize_url(url: str) -> str:
|
||||
"""去 utm_*、fragment、尾斜杠,用于 url_hash。"""
|
||||
from urllib.parse import urlsplit, urlunsplit, parse_qsl, urlencode
|
||||
|
||||
sp = urlsplit(url.strip())
|
||||
# 去掉 fragment
|
||||
fragment = ""
|
||||
# 过滤 utm_*
|
||||
qs = [(k, v) for k, v in parse_qsl(sp.query, keep_blank_values=True) if not k.lower().startswith("utm_")]
|
||||
query = urlencode(qs)
|
||||
# 路径末尾 /
|
||||
path = sp.path.rstrip("/") or "/"
|
||||
return urlunsplit((sp.scheme.lower(), sp.netloc.lower(), path, query, fragment))
|
||||
|
||||
|
||||
def url_hash(url: str) -> str:
|
||||
return hashlib.sha1(normalize_url(url).encode()).hexdigest()
|
||||
|
||||
|
||||
@dataclass
|
||||
class FetchedItem:
|
||||
"""统一返回结构:一个待入库的条目。"""
|
||||
|
||||
url: str
|
||||
title: str
|
||||
body_html: str | None = None
|
||||
body_text: str = ""
|
||||
published_at: datetime | None = None
|
||||
lang: str | None = None
|
||||
author: str | None = None
|
||||
image_url: str | None = None
|
||||
guid: str | None = None
|
||||
raw: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
class BaseFetcher(ABC):
|
||||
def __init__(self, url: str, headers: dict | None = None):
|
||||
self.url = url
|
||||
self.headers = headers or {"User-Agent": "DiaryNews/0.1 (+https://github.com/)"}
|
||||
|
||||
@abstractmethod
|
||||
async def fetch(self) -> list[FetchedItem]:
|
||||
"""拉取并解析,返回 FetchedItem 列表。"""
|
||||
|
||||
async def _http_get(self) -> bytes:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=settings.fetch_timeout,
|
||||
follow_redirects=True,
|
||||
headers=self.headers,
|
||||
) as client:
|
||||
r = await client.get(self.url)
|
||||
r.raise_for_status()
|
||||
return r.content
|
||||
100
backend/app/services/fetchers/rss.py
Normal file
100
backend/app/services/fetchers/rss.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""RSS / Atom fetcher(基于 feedparser)。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from email.utils import parsedate_to_datetime
|
||||
|
||||
import feedparser
|
||||
from dateutil import parser as dtp
|
||||
|
||||
from app.services.fetchers.base import BaseFetcher, FetchedItem
|
||||
|
||||
|
||||
class RSSFetcher(BaseFetcher):
|
||||
async def fetch(self) -> list[FetchedItem]:
|
||||
raw = await self._http_get()
|
||||
# feedparser 在不同 Python 下处理 bytes/str
|
||||
try:
|
||||
text = raw.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
text = raw.decode("utf-8", errors="replace")
|
||||
feed = feedparser.parse(text)
|
||||
if feed.bozo and not feed.entries:
|
||||
# 整篇解析失败
|
||||
raise RuntimeError(f"RSS parse failed: {feed.bozo_exception}")
|
||||
items: list[FetchedItem] = []
|
||||
for e in feed.entries:
|
||||
url = e.get("link") or e.get("id")
|
||||
if not url:
|
||||
continue
|
||||
title = (e.get("title") or "").strip()
|
||||
if not title:
|
||||
continue
|
||||
|
||||
body_html = None
|
||||
body_text = ""
|
||||
if e.get("content"):
|
||||
# 选最长 content
|
||||
contents = sorted(e["content"], key=lambda c: -len(c.get("value", "")))
|
||||
body_html = contents[0].get("value")
|
||||
if not body_html:
|
||||
body_html = e.get("summary")
|
||||
if body_html:
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
soup = BeautifulSoup(body_html, "lxml")
|
||||
# 去 script/style
|
||||
for tag in soup(["script", "style", "noscript"]):
|
||||
tag.decompose()
|
||||
body_text = soup.get_text(separator="\n", strip=True)
|
||||
|
||||
published_at = _parse_dt(e.get("published") or e.get("updated") or e.get("created"))
|
||||
author = e.get("author")
|
||||
image_url = None
|
||||
if e.get("media_content"):
|
||||
try:
|
||||
image_url = e["media_content"][0].get("url")
|
||||
except (IndexError, KeyError, TypeError):
|
||||
pass
|
||||
if not image_url and e.get("media_thumbnail"):
|
||||
try:
|
||||
image_url = e["media_thumbnail"][0].get("url")
|
||||
except (IndexError, KeyError, TypeError):
|
||||
pass
|
||||
if not image_url and e.get("enclosures"):
|
||||
for enc in e["enclosures"]:
|
||||
if enc.get("type", "").startswith("image/"):
|
||||
image_url = enc.get("href") or enc.get("url")
|
||||
break
|
||||
|
||||
items.append(
|
||||
FetchedItem(
|
||||
url=url,
|
||||
title=title,
|
||||
body_html=body_html,
|
||||
body_text=body_text,
|
||||
published_at=published_at,
|
||||
lang=e.get("language") or feed.feed.get("language"),
|
||||
author=author,
|
||||
image_url=image_url,
|
||||
guid=e.get("id") or e.get("guid"),
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
def _parse_dt(s: str | None) -> datetime | None:
|
||||
if not s:
|
||||
return None
|
||||
try:
|
||||
dt = dtp.parse(s)
|
||||
except (ValueError, TypeError, dtp.ParserError):
|
||||
try:
|
||||
dt = parsedate_to_datetime(s)
|
||||
except Exception:
|
||||
return None
|
||||
if dt is None:
|
||||
return None
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return dt.astimezone(timezone.utc)
|
||||
1
backend/app/services/translation/__init__.py
Normal file
1
backend/app/services/translation/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Translation services."""
|
||||
26
backend/app/services/translation/base.py
Normal file
26
backend/app/services/translation/base.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""翻译后端抽象。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class TranslationResult:
|
||||
text: str
|
||||
engine: str
|
||||
chars: int
|
||||
cached: bool = False
|
||||
|
||||
|
||||
class BaseTranslator(ABC):
|
||||
name: str = "base"
|
||||
|
||||
@abstractmethod
|
||||
async def translate(self, text: str, source: str = "auto", target: str = "zh") -> TranslationResult:
|
||||
"""同步调用,失败抛异常。"""
|
||||
|
||||
|
||||
def count_chars(s: str) -> int:
|
||||
"""近似的字符计数(Unicode 码点)。腾讯 TMT 按字符数计费。"""
|
||||
return len(s)
|
||||
62
backend/app/services/translation/local.py
Normal file
62
backend/app/services/translation/local.py
Normal file
@@ -0,0 +1,62 @@
|
||||
"""本地翻译(降级用,需要 transformers + 模型文件)。
|
||||
|
||||
默认关闭。启用方式:
|
||||
- LOCAL_TRANSLATE_ENABLED=true
|
||||
- 容器内预装模型(Volume 挂载)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from app.config import settings
|
||||
from app.services.translation.base import BaseTranslator, TranslationResult
|
||||
|
||||
logger = logging.getLogger("news.translate.local")
|
||||
|
||||
|
||||
class LocalTranslator(BaseTranslator):
|
||||
name = "nllb"
|
||||
|
||||
def __init__(self):
|
||||
if not settings.local_translate_enabled:
|
||||
raise RuntimeError("LocalTranslator disabled in settings")
|
||||
# 模型懒加载(避免 import 时加载大模型)
|
||||
self._pipe = None
|
||||
|
||||
def _ensure_loaded(self):
|
||||
if self._pipe is not None:
|
||||
return
|
||||
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
|
||||
|
||||
model_name = settings.local_translate_model
|
||||
logger.info("loading local translation model: %s", model_name)
|
||||
tok = AutoTokenizer.from_pretrained(model_name)
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
||||
self._pipe = pipeline(
|
||||
"translation",
|
||||
model=model,
|
||||
tokenizer=tok,
|
||||
device=settings.local_translate_device,
|
||||
)
|
||||
|
||||
async def translate(
|
||||
self, text: str, source: str = "auto", target: str = "zh"
|
||||
) -> TranslationResult:
|
||||
if not text.strip():
|
||||
return TranslationResult(text=text, engine=self.name, chars=0)
|
||||
self._ensure_loaded()
|
||||
import asyncio
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
# NLLB 的 src_lang/tgt_lang 比较长,简单按约定:en→zh_Hans
|
||||
src = "eng_Latn" if source in ("en", "auto") else source
|
||||
tgt = "zho_Hans" if target == "zh" else target
|
||||
out = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: self._pipe(
|
||||
text, src_lang=src, tgt_lang=tgt, max_length=2000
|
||||
),
|
||||
)
|
||||
return TranslationResult(
|
||||
text=out[0]["translation_text"], engine=self.name, chars=len(text)
|
||||
)
|
||||
146
backend/app/services/translation/service.py
Normal file
146
backend/app/services/translation/service.py
Normal file
@@ -0,0 +1,146 @@
|
||||
"""翻译服务门面:配额检查 + 缓存 + 引擎选择 + 月度计数。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Protocol
|
||||
|
||||
from app.config import settings
|
||||
from app.redis_client import get_redis
|
||||
from app.services.translation.base import BaseTranslator, TranslationResult
|
||||
from app.services.translation.local import LocalTranslator
|
||||
from app.services.translation.tencent import TencentTranslator
|
||||
|
||||
logger = logging.getLogger("news.translate.service")
|
||||
|
||||
|
||||
# 缓存 key
|
||||
def _cache_key(text: str, src: str, tgt: str) -> str:
|
||||
h = hashlib.sha1(f"{src}|{tgt}|{text}".encode()).hexdigest()
|
||||
return f"translation:cache:{h}"
|
||||
|
||||
|
||||
def _month_key() -> str:
|
||||
now = datetime.now(timezone.utc)
|
||||
return f"translation:month:{now:%Y%m}"
|
||||
|
||||
|
||||
class TranslationService:
|
||||
def __init__(self):
|
||||
self._tencent: BaseTranslator | None = None
|
||||
self._local: BaseTranslator | None = None
|
||||
self._sem = asyncio.Semaphore(3) # 并发限流
|
||||
|
||||
def _primary(self) -> BaseTranslator:
|
||||
if self._tencent is None:
|
||||
self._tencent = TencentTranslator()
|
||||
return self._tencent
|
||||
|
||||
def _fallback(self) -> BaseTranslator | None:
|
||||
if self._local is None and settings.local_translate_enabled:
|
||||
try:
|
||||
self._local = LocalTranslator()
|
||||
except Exception as e:
|
||||
logger.warning("local translator init failed: %s", e)
|
||||
self._local = None
|
||||
return self._local
|
||||
|
||||
async def can_use_tencent(self, chars: int) -> bool:
|
||||
if not settings.tencentcloud_secret_id:
|
||||
return False
|
||||
r = get_redis()
|
||||
used = int(await r.get(_month_key()) or 0)
|
||||
buffered = int(
|
||||
settings.tencent_tmt_quota_month * (1 - settings.tencent_tmt_quota_buffer)
|
||||
)
|
||||
return (used + chars) <= buffered
|
||||
|
||||
async def add_usage(self, chars: int) -> None:
|
||||
r = get_redis()
|
||||
# 用 INCRBY + EXPIRE 月初;简单做法:每次 set + 设 TTL
|
||||
key = _month_key()
|
||||
async with r.pipeline(transaction=False) as pipe:
|
||||
pipe.incrby(key, chars)
|
||||
# 月底过期(下下月 1 日)
|
||||
now = datetime.now(timezone.utc)
|
||||
if now.month == 12:
|
||||
next_month = now.replace(year=now.year + 1, month=1, day=1)
|
||||
else:
|
||||
next_month = now.replace(month=now.month + 1, day=1)
|
||||
ttl = int((next_month - now).total_seconds()) + 86400
|
||||
pipe.expire(key, ttl)
|
||||
await pipe.execute()
|
||||
|
||||
async def translate(
|
||||
self, text: str, source: str = "auto", target: str = "zh"
|
||||
) -> TranslationResult:
|
||||
if not text.strip():
|
||||
return TranslationResult(text=text, engine="skip", chars=0)
|
||||
|
||||
chars = len(text)
|
||||
# 1) 缓存
|
||||
r = get_redis()
|
||||
ck = _cache_key(text, source, target)
|
||||
cached = await r.get(ck)
|
||||
if cached is not None:
|
||||
return TranslationResult(text=cached, engine="cache", chars=chars, cached=True)
|
||||
|
||||
# 2) 选引擎
|
||||
use_tencent = await self.can_use_tencent(chars)
|
||||
engine: BaseTranslator
|
||||
if use_tencent:
|
||||
engine = self._primary()
|
||||
else:
|
||||
fb = self._fallback()
|
||||
if fb is None:
|
||||
# 没本地:返回原文 + 标记
|
||||
return TranslationResult(
|
||||
text=text + "\n\n[本条未翻译:配额耗尽且未启用本地翻译]",
|
||||
engine="skip",
|
||||
chars=chars,
|
||||
)
|
||||
engine = fb
|
||||
logger.info("fallback to local translator for %d chars", chars)
|
||||
|
||||
# 3) 调用
|
||||
async with self._sem:
|
||||
try:
|
||||
res = await engine.translate(text, source=source, target=target)
|
||||
except Exception as e:
|
||||
# 失败:降级
|
||||
logger.exception("translate failed with %s: %s", engine.name, e)
|
||||
fb = self._fallback()
|
||||
if fb is not None and engine is not fb:
|
||||
res = await fb.translate(text, source=source, target=target)
|
||||
else:
|
||||
res = TranslationResult(
|
||||
text=text + f"\n\n[翻译失败: {e}]",
|
||||
engine="skip",
|
||||
chars=chars,
|
||||
)
|
||||
|
||||
# 4) 写缓存(无论引擎)
|
||||
try:
|
||||
await r.set(ck, res.text, ex=60 * 60 * 24 * 30) # 30 天
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 5) 计数(只在 tencent 上计)
|
||||
if res.engine == "tencent":
|
||||
try:
|
||||
await self.add_usage(res.chars or chars)
|
||||
except Exception as e:
|
||||
logger.warning("add_usage failed: %s", e)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
# 全局单例
|
||||
service = TranslationService()
|
||||
|
||||
|
||||
# 让后端 worker 直接调
|
||||
class _Protocol(Protocol):
|
||||
async def translate(self, text: str, source: str = "auto", target: str = "zh") -> TranslationResult: ...
|
||||
74
backend/app/services/translation/tencent.py
Normal file
74
backend/app/services/translation/tencent.py
Normal file
@@ -0,0 +1,74 @@
|
||||
"""腾讯云文本翻译 TMT。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import random
|
||||
from typing import Any
|
||||
|
||||
from tencentcloud.common import credential
|
||||
from tencentcloud.common.exception.tencent_cloud_sdk_exception import (
|
||||
TencentCloudSDKException,
|
||||
)
|
||||
from tencentcloud.tmt.v20180321 import models, tmt_client
|
||||
|
||||
from app.config import settings
|
||||
from app.services.translation.base import BaseTranslator, TranslationResult
|
||||
|
||||
logger = logging.getLogger("news.translate.tencent")
|
||||
|
||||
# 常见语种映射
|
||||
_LANG_MAP = {
|
||||
"en": "en",
|
||||
"zh": "zh",
|
||||
"ja": "ja",
|
||||
"ko": "ko",
|
||||
"fr": "fr",
|
||||
"de": "de",
|
||||
"es": "es",
|
||||
"ru": "ru",
|
||||
"ar": "ar",
|
||||
}
|
||||
|
||||
|
||||
class TencentTranslator(BaseTranslator):
|
||||
name = "tencent"
|
||||
|
||||
def __init__(self):
|
||||
if not settings.tencentcloud_secret_id or not settings.tencentcloud_secret_key:
|
||||
raise RuntimeError("Tencent Cloud credentials missing")
|
||||
self.cred = credential.Credential(
|
||||
settings.tencentcloud_secret_id, settings.tencentcloud_secret_key
|
||||
)
|
||||
self.client = tmt_client.TmtClient(self.cred, settings.tencentcloud_region)
|
||||
|
||||
async def translate(
|
||||
self, text: str, source: str = "auto", target: str = "zh"
|
||||
) -> TranslationResult:
|
||||
if not text.strip():
|
||||
return TranslationResult(text=text, engine=self.name, chars=0)
|
||||
|
||||
source = _LANG_MAP.get(source, source if source != "auto" else "auto")
|
||||
target = _LANG_MAP.get(target, target)
|
||||
|
||||
# 简单重试
|
||||
for attempt in range(2):
|
||||
try:
|
||||
req = models.TextTranslateRequest()
|
||||
req.SourceText = text
|
||||
req.Source = source
|
||||
req.Target = target
|
||||
req.ProjectId = 0
|
||||
# SDK 同步调用 → 放线程池
|
||||
resp: Any = await asyncio.to_thread(self.client.TextTranslate, req)
|
||||
out = getattr(resp, "TargetText", "") or ""
|
||||
return TranslationResult(
|
||||
text=out, engine=self.name, chars=len(text), cached=False
|
||||
)
|
||||
except TencentCloudSDKException as e:
|
||||
logger.warning("tencent translate attempt %s failed: %s", attempt, e)
|
||||
if attempt == 0:
|
||||
await asyncio.sleep(0.5 + random.random())
|
||||
else:
|
||||
raise
|
||||
raise RuntimeError("unreachable")
|
||||
1
backend/app/workers/__init__.py
Normal file
1
backend/app/workers/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Background workers (fetch + translate + scheduler)."""
|
||||
112
backend/app/workers/__main__.py
Normal file
112
backend/app/workers/__main__.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""Worker 入口:启动调度器 + 异步任务。
|
||||
|
||||
`docker compose exec worker python -m app.workers`
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import signal
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from apscheduler.triggers.interval import IntervalTrigger
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.config import settings
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.source import Source
|
||||
from app.workers.pipeline import fetch_one_source, run_once
|
||||
|
||||
logger = logging.getLogger("news.worker")
|
||||
logging.basicConfig(
|
||||
level=settings.log_level,
|
||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
)
|
||||
|
||||
|
||||
async def _rebuild_jobs(scheduler: AsyncIOScheduler) -> None:
|
||||
"""从 sources 表动态构建 job(可热更新)。"""
|
||||
scheduler.remove_all_jobs()
|
||||
async with AsyncSessionLocal() as s:
|
||||
rows = (await s.execute(select(Source).where(Source.enabled.is_(True)))).scalars()
|
||||
sources = list(rows)
|
||||
if not sources:
|
||||
logger.warning("no enabled sources; scheduler idle")
|
||||
return
|
||||
for src in sources:
|
||||
trigger = (
|
||||
CronTrigger.from_crontab(src.fetch_cron)
|
||||
if src.fetch_cron
|
||||
else IntervalTrigger(minutes=src.fetch_interval_min)
|
||||
)
|
||||
scheduler.add_job(
|
||||
fetch_one_source,
|
||||
trigger=trigger,
|
||||
args=[src.id],
|
||||
id=f"src:{src.slug}",
|
||||
replace_existing=True,
|
||||
max_instances=1,
|
||||
coalesce=True,
|
||||
misfire_grace_time=300,
|
||||
)
|
||||
logger.info("scheduled %s every %s", src.slug, src.fetch_cron or f"{src.fetch_interval_min}m")
|
||||
|
||||
|
||||
async def _daily_rebuild() -> None:
|
||||
"""每天 00:30 重建 job 列表(支持运行时新增源)。"""
|
||||
scheduler = AsyncIOScheduler()
|
||||
# 临时实例,只为重建用
|
||||
# 实际用全局 scheduler 实例
|
||||
pass
|
||||
|
||||
|
||||
def build_scheduler() -> AsyncIOScheduler:
|
||||
sched = AsyncIOScheduler(timezone="Asia/Hong_Kong")
|
||||
return sched
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
scheduler = build_scheduler()
|
||||
await _rebuild_jobs(scheduler)
|
||||
# 每天 00:30 重建一次
|
||||
scheduler.add_job(
|
||||
_rebuild_jobs,
|
||||
trigger=CronTrigger(hour=0, minute=30),
|
||||
args=[scheduler],
|
||||
id="rebuild_jobs",
|
||||
replace_existing=True,
|
||||
)
|
||||
# 启动时立即跑一次
|
||||
scheduler.add_job(
|
||||
run_once,
|
||||
trigger=IntervalTrigger(minutes=0),
|
||||
id="startup_run",
|
||||
next_run_time=datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
scheduler.start()
|
||||
logger.info("scheduler started with %d jobs", len(scheduler.get_jobs()))
|
||||
|
||||
stop = asyncio.Event()
|
||||
|
||||
def _signal_handler():
|
||||
logger.info("shutdown signal received")
|
||||
stop.set()
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
for sig in (signal.SIGINT, signal.SIGTERM):
|
||||
try:
|
||||
loop.add_signal_handler(sig, _signal_handler)
|
||||
except NotImplementedError:
|
||||
# Windows 等不支持
|
||||
pass
|
||||
|
||||
await stop.wait()
|
||||
logger.info("stopping scheduler")
|
||||
scheduler.shutdown(wait=False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
274
backend/app/workers/pipeline.py
Normal file
274
backend/app/workers/pipeline.py
Normal file
@@ -0,0 +1,274 @@
|
||||
"""核心 pipeline:
|
||||
- 抓取(去重 + 入库)
|
||||
- 翻译(分块 + 配额管理)
|
||||
- 手动 run_once / fetch_one_source / translate_article
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||
|
||||
from app.config import settings
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.article import Article
|
||||
from app.models.source import Source, SourceKind
|
||||
from app.services.fetchers import get_fetcher
|
||||
from app.services.fetchers.base import FetchedItem, url_hash
|
||||
from app.services.translation.service import service as translation_service
|
||||
|
||||
logger = logging.getLogger("news.pipeline")
|
||||
|
||||
TRANSLATE_BODY_MAX = 8000 # 单篇正文最大翻译字符
|
||||
SEM_PER_SOURCE = asyncio.Semaphore(2) # 同一源抓取并发
|
||||
|
||||
|
||||
# === 抓取 + 入库 ===
|
||||
async def fetch_one_source(source_id: int) -> None:
|
||||
async with SEM_PER_SOURCE:
|
||||
async with AsyncSessionLocal() as session:
|
||||
src = (
|
||||
await session.execute(select(Source).where(Source.id == source_id))
|
||||
).scalar_one_or_none()
|
||||
if not src or not src.enabled:
|
||||
logger.info("source %s disabled or missing", source_id)
|
||||
return
|
||||
|
||||
try:
|
||||
fetcher = get_fetcher(src.kind.value, url=src.url, headers=src.headers_json)
|
||||
items = await fetcher.fetch()
|
||||
except Exception as e:
|
||||
logger.exception("fetch failed for %s: %s", src.slug, e)
|
||||
await _mark_failure(source_id, f"fetch: {type(e).__name__}: {e}")
|
||||
return
|
||||
|
||||
if not items:
|
||||
await _mark_success(source_id, n_new=0)
|
||||
return
|
||||
|
||||
n_new = await _bulk_insert(src, items)
|
||||
await _mark_success(source_id, n_new=n_new)
|
||||
logger.info("source %s: %d new articles", src.slug, n_new)
|
||||
|
||||
# 入库后,挑高优先级 / 没翻译的开始翻译
|
||||
await _translate_recent_for_source(source_id, max_n=20)
|
||||
|
||||
|
||||
async def _mark_failure(source_id: int, status: str) -> None:
|
||||
async with AsyncSessionLocal() as session:
|
||||
src = (
|
||||
await session.execute(select(Source).where(Source.id == source_id))
|
||||
).scalar_one_or_none()
|
||||
if not src:
|
||||
return
|
||||
src.last_status = status
|
||||
src.consecutive_failures += 1
|
||||
src.last_fetched_at = datetime.now(timezone.utc)
|
||||
if src.consecutive_failures >= settings.fetch_fail_pause_threshold:
|
||||
# 退避:把 interval 翻倍,封顶 720 分钟
|
||||
src.fetch_interval_min = min(720, src.fetch_interval_min * 2)
|
||||
logger.warning(
|
||||
"source %s paused, interval bumped to %dm",
|
||||
src.slug,
|
||||
src.fetch_interval_min,
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
|
||||
async def _mark_success(source_id: int, n_new: int) -> None:
|
||||
async with AsyncSessionLocal() as session:
|
||||
src = (
|
||||
await session.execute(select(Source).where(Source.id == source_id))
|
||||
).scalar_one_or_none()
|
||||
if not src:
|
||||
return
|
||||
src.last_status = f"ok:new={n_new}"
|
||||
src.consecutive_failures = 0
|
||||
src.last_fetched_at = datetime.now(timezone.utc)
|
||||
await session.commit()
|
||||
|
||||
|
||||
async def _bulk_insert(src: Source, items: list[FetchedItem]) -> int:
|
||||
"""用 PG ON CONFLICT DO NOTHING 去重;返回新插入行数。"""
|
||||
if not items:
|
||||
return 0
|
||||
rows = []
|
||||
for it in items:
|
||||
if not it.title or not it.url:
|
||||
continue
|
||||
rows.append(
|
||||
{
|
||||
"source_id": src.id,
|
||||
"url": it.url,
|
||||
"url_hash": url_hash(it.url),
|
||||
"guid": it.guid,
|
||||
"title": it.title[:512],
|
||||
"body_html": (it.body_html or "")[:65535],
|
||||
"body_text": (it.body_text or "")[:65535],
|
||||
"lang_src": it.lang or src.language_src,
|
||||
"author": it.author,
|
||||
"image_url": it.image_url,
|
||||
"published_at": it.published_at,
|
||||
"translation_status": "pending",
|
||||
"translate_to": src.translate_to,
|
||||
}
|
||||
)
|
||||
if not rows:
|
||||
return 0
|
||||
|
||||
async with AsyncSessionLocal() as session:
|
||||
stmt = (
|
||||
pg_insert(Article)
|
||||
.values(rows)
|
||||
.on_conflict_do_nothing(index_elements=["url_hash"])
|
||||
.returning(Article.id)
|
||||
)
|
||||
result = await session.execute(stmt)
|
||||
inserted_ids = [r[0] for r in result.all()]
|
||||
await session.commit()
|
||||
return len(inserted_ids)
|
||||
|
||||
|
||||
# === 翻译 ===
|
||||
async def _translate_recent_for_source(source_id: int, max_n: int = 20) -> None:
|
||||
async with AsyncSessionLocal() as session:
|
||||
rows = (
|
||||
await session.execute(
|
||||
select(Article)
|
||||
.where(Article.source_id == source_id, Article.translation_status == "pending")
|
||||
.order_by(Article.published_at.desc().nullslast(), Article.id.desc())
|
||||
.limit(max_n)
|
||||
)
|
||||
).scalars()
|
||||
article_ids = [a.id for a in rows]
|
||||
for aid in article_ids:
|
||||
await translate_article(aid)
|
||||
|
||||
|
||||
async def translate_article(article_id: int) -> None:
|
||||
async with AsyncSessionLocal() as session:
|
||||
art = (
|
||||
await session.execute(select(Article).where(Article.id == article_id))
|
||||
).scalar_one_or_none()
|
||||
if not art:
|
||||
return
|
||||
if art.translation_status not in ("pending", "failed"):
|
||||
return
|
||||
title = art.title
|
||||
body_text = (art.body_text or "")[:TRANSLATE_BODY_MAX]
|
||||
lang_src = art.lang_src or "auto"
|
||||
target = "zh"
|
||||
article_id_ref = art.id
|
||||
|
||||
if not body_text and not title:
|
||||
return
|
||||
|
||||
total_chars = 0
|
||||
try:
|
||||
# title
|
||||
tr_title = await translation_service.translate(title, source=lang_src, target=target)
|
||||
total_chars += tr_title.chars
|
||||
|
||||
# body 段落切分 + 重组
|
||||
chunks = _chunk_text(body_text, max_chars=settings.tencent_tmt_max_chars_per_req)
|
||||
translated_chunks: list[str] = []
|
||||
for ch in chunks:
|
||||
tr = await translation_service.translate(ch, source=lang_src, target=target)
|
||||
total_chars += tr.chars
|
||||
translated_chunks.append(tr.text)
|
||||
tr_body = "\n\n".join(translated_chunks)
|
||||
|
||||
engine_label = "tencent"
|
||||
status = "ok" if (tr_title.text and tr_body) else "partial"
|
||||
except Exception as e:
|
||||
logger.exception("translate article %s failed: %s", article_id, e)
|
||||
async with AsyncSessionLocal() as session:
|
||||
art = (
|
||||
await session.execute(select(Article).where(Article.id == article_id))
|
||||
).scalar_one_or_none()
|
||||
if art:
|
||||
art.translation_status = "failed"
|
||||
await session.commit()
|
||||
return
|
||||
|
||||
# 写回
|
||||
async with AsyncSessionLocal() as session:
|
||||
art = (
|
||||
await session.execute(select(Article).where(Article.id == article_id_ref))
|
||||
).scalar_one_or_none()
|
||||
if art:
|
||||
art.title_zh = tr_title.text if tr_title.text else None
|
||||
art.body_zh_text = tr_body or None
|
||||
art.body_zh_html = _wrap_html(tr_body) if tr_body else None
|
||||
art.translation_status = status
|
||||
art.translation_engine = engine_label
|
||||
art.translation_chars = total_chars
|
||||
art.translated_at = datetime.now(timezone.utc)
|
||||
await session.commit()
|
||||
logger.info("article %s translated: %d chars, %s", article_id, total_chars, engine_label)
|
||||
|
||||
|
||||
def _chunk_text(text: str, max_chars: int) -> list[str]:
|
||||
if not text:
|
||||
return []
|
||||
paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
|
||||
chunks: list[str] = []
|
||||
cur = ""
|
||||
for p in paragraphs:
|
||||
if len(p) > max_chars:
|
||||
# 单段过长:按句号切
|
||||
sentences = _split_long_para(p, max_chars)
|
||||
for s in sentences:
|
||||
if len(cur) + len(s) + 2 > max_chars:
|
||||
if cur:
|
||||
chunks.append(cur)
|
||||
cur = s
|
||||
else:
|
||||
cur = (cur + "\n\n" + s).strip() if cur else s
|
||||
else:
|
||||
if len(cur) + len(p) + 2 > max_chars:
|
||||
if cur:
|
||||
chunks.append(cur)
|
||||
cur = p
|
||||
else:
|
||||
cur = (cur + "\n\n" + p).strip() if cur else p
|
||||
if cur:
|
||||
chunks.append(cur)
|
||||
return chunks
|
||||
|
||||
|
||||
def _split_long_para(para: str, max_chars: int) -> list[str]:
|
||||
parts: list[str] = []
|
||||
cur = ""
|
||||
for ch in para:
|
||||
cur += ch
|
||||
if ch in ".!?。!?" and len(cur) >= max_chars // 2:
|
||||
parts.append(cur.strip())
|
||||
cur = ""
|
||||
if cur.strip():
|
||||
parts.append(cur.strip())
|
||||
if not parts:
|
||||
return [para[:max_chars]]
|
||||
return parts
|
||||
|
||||
|
||||
def _wrap_html(text: str) -> str:
|
||||
"""把译文包成 HTML 段落。"""
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
parts = [f"<p>{p.strip()}</p>" for p in text.split("\n\n") if p.strip()]
|
||||
return "\n".join(parts) if parts else ""
|
||||
|
||||
|
||||
# === 全量跑(供测试 / 手动触发) ===
|
||||
async def run_once() -> None:
|
||||
async with AsyncSessionLocal() as session:
|
||||
rows = (await session.execute(select(Source).where(Source.enabled.is_(True)))).scalars()
|
||||
sources = list(rows)
|
||||
|
||||
logger.info("run_once: %d enabled sources", len(sources))
|
||||
tasks = [fetch_one_source(s.id) for s in sources]
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
72
backend/pyproject.toml
Normal file
72
backend/pyproject.toml
Normal file
@@ -0,0 +1,72 @@
|
||||
[project]
|
||||
name = "news-aggregator"
|
||||
version = "0.1.0"
|
||||
description = "Private news aggregator with multi-source RSS, translation, web + Android clients"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
# web
|
||||
"fastapi>=0.115.0",
|
||||
"uvicorn[standard]>=0.32.0",
|
||||
"pydantic>=2.9.0",
|
||||
"pydantic-settings>=2.6.0",
|
||||
"python-multipart>=0.0.12",
|
||||
# db
|
||||
"sqlalchemy[asyncio]>=2.0.36",
|
||||
"asyncpg>=0.30.0",
|
||||
"alembic>=1.14.0",
|
||||
"psycopg2-binary>=2.9.10", # alembic sync driver
|
||||
# cache / queue
|
||||
"redis>=5.2.0",
|
||||
# auth
|
||||
"passlib[bcrypt]>=1.7.4",
|
||||
"bcrypt==4.0.1", # 锁版本,passlib 与新版 bcrypt 不兼容
|
||||
"pyjwt>=2.10.0",
|
||||
# fetch / parse
|
||||
"feedparser>=6.0.11",
|
||||
"httpx>=0.28.0",
|
||||
"trafilatura>=2.0.0",
|
||||
"beautifulsoup4>=4.12.3",
|
||||
"lxml>=5.3.0",
|
||||
"python-dateutil>=2.9.0",
|
||||
# translation
|
||||
"tencentcloud-sdk-python>=3.0.1200",
|
||||
# scheduling
|
||||
"apscheduler>=3.10.4",
|
||||
# observability
|
||||
"structlog>=24.4.0",
|
||||
"orjson>=3.10.10",
|
||||
# util
|
||||
"pydantic-extra-types>=2.10.0",
|
||||
"email-validator>=2.2.0",
|
||||
"python-slugify>=8.0.4",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pytest>=8.3.0",
|
||||
"pytest-asyncio>=0.24.0",
|
||||
"ruff>=0.7.0",
|
||||
"mypy>=1.13.0",
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 110
|
||||
target-version = "py312"
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "I", "B", "UP", "W"]
|
||||
ignore = ["E501"]
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.12"
|
||||
ignore_missing_imports = true
|
||||
strict_optional = true
|
||||
warn_unused_ignores = true
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=68"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["."]
|
||||
include = ["app*"]
|
||||
Reference in New Issue
Block a user