- POST /api/v1/ingest:鉴权(X-Ingest-Token) + 限速(每 token 2 篇/秒,
Redis 滑动桶,INGEST_RATE_PER_SEC 可调) + 三层去重(L1 external_id /
L2 content_hash / L3 DB UNIQUE 兜底,均带 reason)
- 写入字段:is_short_news=True、translation/format/image_ai_status='n/a'、
classify_status=(有 tags?'ok':'pending')、commentary_{angel,meituan}_status='pending'、
body_zh_text=body_text(走统一路径,前端/prompt 不用改)
- services/fetchers/api_push.py:compute_content_hash + synthesize_url +
normalize_published_at + build_initial_status 纯函数
- schemas/ingest.py:IngestPayload(title 1-200/body 1-5000/tags 去重去空) +
IngestResponse(article_id/content_hash/status/reason/matched_external_id)
- admin.py:POST/GET/DELETE /admin/sources/{id}/ingest-tokens — owner 生成
(raw_token 仅一次性返回)、列出、撤销
- schemas/article.py:ArticleListItem 加 is_short_news/source_ref;
ArticleDetail 加 is_short_news/source_ref/external_id
- main.py:挂 ingest router;config.py + .env.example:ingest_rate_per_sec 默认 2
短新闻由 commit 1 enrichment_loop 自动接管 classify + 双 provider commentary,
跳过 format/image。
168 lines
5.9 KiB
Python
168 lines
5.9 KiB
Python
"""应用配置:从 .env / 环境变量读取,集中管理所有开关。"""
|
|
from __future__ import annotations
|
|
|
|
from functools import lru_cache
|
|
from pathlib import Path
|
|
|
|
from pydantic import Field, field_validator
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
model_config = SettingsConfigDict(
|
|
env_file=".env",
|
|
env_file_encoding="utf-8",
|
|
case_sensitive=False,
|
|
extra="ignore",
|
|
)
|
|
|
|
# ===== 通用 =====
|
|
tz: str = "Asia/Hong_Kong"
|
|
log_level: str = "INFO"
|
|
|
|
# ===== 数据库 =====
|
|
postgres_user: str
|
|
postgres_password: str
|
|
postgres_db: str
|
|
postgres_host: str = "postgres"
|
|
postgres_port: int = 5432
|
|
|
|
@property
|
|
def database_url(self) -> str:
|
|
# asyncpg
|
|
return (
|
|
f"postgresql+asyncpg://{self.postgres_user}:{self.postgres_password}"
|
|
f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db}"
|
|
)
|
|
|
|
@property
|
|
def sync_database_url(self) -> str:
|
|
# alembic 用的同步 URL
|
|
return (
|
|
f"postgresql+psycopg2://{self.postgres_user}:{self.postgres_password}"
|
|
f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db}"
|
|
)
|
|
|
|
# ===== Redis =====
|
|
redis_host: str = "redis"
|
|
redis_port: int = 6379
|
|
redis_password: str
|
|
redis_db: int = 0
|
|
|
|
@property
|
|
def redis_url(self) -> str:
|
|
return (
|
|
f"redis://:{self.redis_password}@{self.redis_host}:{self.redis_port}/{self.redis_db}"
|
|
)
|
|
|
|
# ===== JWT =====
|
|
jwt_secret: str
|
|
jwt_algorithm: str = "HS256"
|
|
access_token_ttl_min: int = 60
|
|
refresh_token_ttl_day: int = 14
|
|
|
|
# ===== 腾讯云 TMT =====
|
|
tencentcloud_secret_id: str = ""
|
|
tencentcloud_secret_key: str = ""
|
|
tencentcloud_region: str = "ap-hongkong"
|
|
tencent_tmt_endpoint: str = "tmt.tencentcloudapi.com"
|
|
tencent_tmt_quota_month: int = 5_000_000
|
|
tencent_tmt_quota_buffer: float = 0.05
|
|
tencent_tmt_max_chars_per_req: int = 4500
|
|
|
|
# ===== 智谱 GLM(OpenAI 兼容,翻译主通道)=====
|
|
# 用法:智谱开放平台 GLM-4 系列,通过 OpenAI 协议调用
|
|
# 留空 api_key = 不启用该 provider
|
|
zhipu_api_key: str = ""
|
|
zhipu_base_url: str = "https://open.bigmodel.cn/api/paas/v4"
|
|
zhipu_chat_model: str = "glm-4-flash"
|
|
zhipu_model: str = "glm-4-flash" # 兼容旧字段名
|
|
# 2 秒/次(用户要求 6/11,降低频率避免触发限流)
|
|
zhipu_interval_sec: float = 2.0
|
|
|
|
# ===== 讯飞星火(WebSocket,翻译二级通道)=====
|
|
# 用法:讯飞星火 v1.1 Spark Lite,WebSocket 鉴权需要 APPID + APIKey + APISecret
|
|
# 留空任意一个 = 不启用该 provider
|
|
# (历史实现用的是 OpenAI 兼容 + APIPassword,已切换为 WebSocket 鉴权,字段名换)
|
|
spark_appid: str = ""
|
|
spark_api_key: str = "" # WebSocket 鉴权用的 APIKey
|
|
spark_api_secret: str = "" # WebSocket 鉴权用的 APISecret
|
|
spark_domain: str = "lite" # v1.1 Spark Lite
|
|
# 兼容旧字段名(留空,只在没填 WebSocket 字段时起提示作用)
|
|
spark_api_password: str = ""
|
|
# 2 秒/次(用户要求 6/11,降低频率避免触发限流)
|
|
spark_interval_sec: float = 2.0
|
|
|
|
@field_validator("tencent_tmt_quota_buffer")
|
|
@classmethod
|
|
def _check_buffer(cls, v: float) -> float:
|
|
if not 0.0 <= v <= 0.5:
|
|
raise ValueError("buffer 必须在 0~0.5")
|
|
return v
|
|
|
|
# ===== 本地翻译 =====
|
|
local_translate_enabled: bool = False
|
|
local_translate_model: str = "nllb-200-distilled-600M"
|
|
local_translate_device: str = "cpu"
|
|
|
|
# ===== 腾讯 MaaS(OpenAI 兼容翻译备用通道)=====
|
|
# 用法:腾讯云 MaaS 提供的翻译模型,通过 OpenAI 协议调用
|
|
# 留空 api_key = 不启用该 provider
|
|
tencent_maas_api_key: str = ""
|
|
tencent_maas_base_url: str = "https://maas-api.hivoice.cn/v1"
|
|
tencent_maas_model: str = "u2"
|
|
# 2 秒/次(与智谱/星火统一节流)
|
|
tencent_maas_interval_sec: float = 2.0
|
|
|
|
# ===== 抓取 =====
|
|
fetch_global_qps: int = 4
|
|
fetch_timeout: int = 20
|
|
fetch_fail_pause_threshold: int = 3
|
|
fetch_max_retries: int = 2
|
|
|
|
# ===== API Push 短新闻 ingest 限速 =====
|
|
# 每个 ingest token 的滑动窗口限速(篇/秒)。2 = 短新闻一秒最多推 2 篇
|
|
# 单 token — 防止单点滥用。改小需重启 api。
|
|
ingest_rate_per_sec: int = 2
|
|
|
|
# ===== 站点并发登录 IP 限制 =====
|
|
# 限制同时在线的客户端 IP 数(防滥用 + 防 token 泄漏被滥用)
|
|
# Redis ZSET 滑动窗口:每次已认证请求刷新 score,30 天没活动自动剔除
|
|
# 第 (limit+1) 个新 IP 登录时直接 429
|
|
site_max_active_ips: int = 30
|
|
site_active_ip_idle_days: int = 30
|
|
# 是否信任反向代理的 X-Forwarded-For 头(生产用 Caddy/Nginx 必开;直连调测关)
|
|
trust_x_forwarded_for: bool = True
|
|
|
|
# ===== Caddy / 域名 =====
|
|
domain: str = ""
|
|
acme_email: str = ""
|
|
|
|
# ===== Agnes LLM(智能增强)=====
|
|
# 留空 = 不启用 LLM 增强(翻译后只走默认排版,提示词也不读)
|
|
agnes_api_key: str = ""
|
|
agnes_base_url: str = "https://apihub.agnes-ai.com/v1"
|
|
agnes_chat_model: str = "agnes-2.0-flash"
|
|
agnes_image_model: str = "agnes-image-2.1-flash"
|
|
# 全局 LLM 调用间隔(秒),避免被限流
|
|
llm_interval_sec: float = 2.0
|
|
|
|
# ===== 美团大模型 LongCat(双 provider 评论的第二个)=====
|
|
# OpenAI 兼容端点;与 Agnes 并列,各自跑各自的 prompt,结果存到 articles 各自列
|
|
# 留空 api_key = 不启用美团 provider(Angel 仍正常工作)
|
|
meituan_api_key: str = ""
|
|
meituan_base_url: str = "https://api.longcat.chat/openai/v1"
|
|
meituan_chat_model: str = "LongCat-2.0-Preview"
|
|
meituan_interval_sec: float = 2.0
|
|
|
|
# ===== 内部路径(部署后可调) =====
|
|
project_root: Path = Path(__file__).resolve().parents[2]
|
|
|
|
|
|
@lru_cache
|
|
def get_settings() -> Settings:
|
|
return Settings() # type: ignore[call-arg]
|
|
|
|
|
|
settings = get_settings()
|