Files
diary-news/backend/app/scripts/seed_sources.py

115 lines
3.0 KiB
Python
Raw Normal View History

"""种子:导入 MVP 5 源。
- Reuters World
- BBC World
- Al Jazeera
- NHK World
- DW
RSS 链接为公开 feed,实际链接可能变更; fetch 失败,先看 /admin/health
"""
from __future__ import annotations
import asyncio
import sys
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.exc import IntegrityError
from app.database import AsyncSessionLocal
from app.models.source import Source, SourceKind
SEEDS = [
{
"name": "Reuters World",
"slug": "reuters-world",
"kind": SourceKind.RSS,
"url": "https://feeds.reuters.com/Reuters/worldNews",
"region": "global",
"language_src": "en",
"priority": 90,
"fetch_interval_min": 30,
"translate_to": "zh",
"enabled": True,
},
{
"name": "BBC World",
"slug": "bbc-world",
"kind": SourceKind.RSS,
"url": "https://feeds.bbci.co.uk/news/world/rss.xml",
"region": "global",
"language_src": "en",
"priority": 85,
"fetch_interval_min": 30,
"translate_to": "zh",
"enabled": True,
},
{
"name": "Al Jazeera",
"slug": "aljazeera",
"kind": SourceKind.RSS,
"url": "https://www.aljazeera.com/xml/rss/all.xml",
"region": "mena",
"language_src": "en",
"priority": 80,
"fetch_interval_min": 45,
"translate_to": "zh",
"enabled": True,
},
{
"name": "NHK World",
"slug": "nhk-world",
"kind": SourceKind.RSS,
"url": "https://www3.nhk.or.jp/rss/news/cat0.xml",
"region": "asia",
"language_src": "en",
"priority": 70,
"fetch_interval_min": 60,
"translate_to": "zh",
"enabled": True,
},
{
"name": "DW (Deutsche Welle)",
"slug": "dw",
"kind": SourceKind.RSS,
"url": "https://rss.dw.com/xml/rss-en-all",
"region": "eu",
"language_src": "en",
"priority": 70,
"fetch_interval_min": 60,
"translate_to": "zh",
"enabled": True,
},
]
async def main() -> int:
async with AsyncSessionLocal() as session:
inserted = 0
for row in SEEDS:
stmt = (
pg_insert(Source)
.values(**row)
.on_conflict_do_nothing(index_elements=["slug"])
.returning(Source.id)
)
try:
r = await session.execute(stmt)
rid = r.scalar_one_or_none()
if rid is not None:
inserted += 1
print(f" + {row['slug']} (id={rid})")
else:
print(f" = {row['slug']} (already exists)")
except IntegrityError as e:
print(f" ! {row['slug']}: {e}", file=sys.stderr)
await session.rollback()
await session.commit()
print(f"seeded {inserted} new source(s)")
return 0
if __name__ == "__main__":
sys.exit(asyncio.run(main()))