#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 全功能产品系统运行脚本 提供简化的命令行界面 """ import argparse import asyncio import os import sys from loguru import logger # 导入主系统 from integrated_product_system import IntegratedProductSystem from config import DATABASE_CONFIG, CHROME_CONFIG, AI_CONFIG, SCRAPING_CONFIG, LOGGING_CONFIG, ANALYSIS_CONFIG def setup_logging(log_file=None, log_level="INFO"): """设置日志配置""" if log_file is None: log_file = LOGGING_CONFIG['log_file'] logger.remove() logger.add(sys.stderr, level=log_level, format=LOGGING_CONFIG['log_format']) logger.add(log_file, level=log_level, rotation=LOGGING_CONFIG['log_rotation']) logger.info("日志系统初始化完成") def print_system_info(): """打印系统信息""" logger.info("=== 全功能产品抓取与分析系统 ===") logger.info(f"数据库路径: {DATABASE_CONFIG['product_db_path']}") logger.info(f"Chrome调试端口: {CHROME_CONFIG['debug_port']}") logger.info(f"AI模型: {AI_CONFIG['model']}") logger.info(f"API地址: {AI_CONFIG['api_url']}") logger.info("=" * 40) async def run_scraping_mode(args): """运行抓取模式""" logger.info("运行抓取模式...") system = IntegratedProductSystem( tophub_db_path=args.tophub_db or DATABASE_CONFIG['tophub_db_path'], product_db_path=args.product_db or DATABASE_CONFIG['product_db_path'], debug_port=args.debug_port or CHROME_CONFIG['debug_port'], limit=args.limit or SCRAPING_CONFIG['default_limit'], skip_duplicates=args.skip_duplicates if hasattr(args, 'skip_duplicates') else SCRAPING_CONFIG['skip_duplicates'] ) # 初始化数据库 system.init_database() # 运行抓取 await system.run_scraping(urls=args.urls) async def run_analysis_mode(args): """运行分析模式""" logger.info("运行分析模式...") system = IntegratedProductSystem( product_db_path=args.product_db or DATABASE_CONFIG['product_db_path'] ) # 初始化数据库 system.init_database() # 运行分析 system.analyze_products(max_products=args.max_products) async def run_full_mode(args): """运行完整模式(抓取+分析)""" logger.info("运行完整模式(抓取+分析)...") system = IntegratedProductSystem( tophub_db_path=args.tophub_db or DATABASE_CONFIG['tophub_db_path'], product_db_path=args.product_db or DATABASE_CONFIG['product_db_path'], debug_port=args.debug_port or CHROME_CONFIG['debug_port'], limit=args.limit or SCRAPING_CONFIG['default_limit'], skip_duplicates=args.skip_duplicates if hasattr(args, 'skip_duplicates') else SCRAPING_CONFIG['skip_duplicates'] ) # 运行完整工作流程 system.run_full_workflow(max_products=args.max_products) def main(): """主函数""" parser = argparse.ArgumentParser(description="全功能产品抓取与分析系统") # 通用参数 parser.add_argument("--mode", choices=["scraping", "analysis", "full"], default="full", help="运行模式: scraping(仅抓取), analysis(仅分析), full(抓取+分析)") parser.add_argument("--tophub-db", help="tophub数据库路径") parser.add_argument("--product-db", help="产品数据库路径") parser.add_argument("--debug-port", type=int, help="Chrome调试端口") parser.add_argument("--limit", type=int, help="抓取链接数量限制") parser.add_argument("--max-products", type=int, help="最大分析产品数量") parser.add_argument("--log-file", help="日志文件路径") parser.add_argument("--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR"], default="INFO", help="日志级别") parser.add_argument("--no-skip-duplicates", action="store_true", help="不跳过重复URL") parser.add_argument("--urls", nargs="+", help="指定要抓取的URL列表") args = parser.parse_args() # 设置日志 setup_logging(args.log_file, args.log_level) # 打印系统信息 print_system_info() # 根据模式运行 if args.mode == "scraping": asyncio.run(run_scraping_mode(args)) elif args.mode == "analysis": asyncio.run(run_analysis_mode(args)) else: # full mode asyncio.run(run_full_mode(args)) if __name__ == "__main__": main()