新爬取数据

This commit is contained in:
2025-11-13 22:27:05 +08:00
parent d5344aaa4a
commit 1507416806
6 changed files with 6733 additions and 2546 deletions

View File

@@ -249,20 +249,37 @@ class TopHubScraper:
# 调用tophub_add_data_to_db.py脚本
logger.info("正在调用tophub_add_data_to_db.py...")
result = subprocess.run([sys.executable, "tophub_add_data_to_db.py"],
capture_output=True, text=True, encoding='utf-8')
if result.returncode == 0:
# 使用Popen方式处理可能的编码问题
process = subprocess.Popen([sys.executable, "tophub_add_data_to_db.py"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
encoding='utf-8',
errors='replace') # 使用replace模式处理无法解码的字符
# 实时读取输出以避免编码问题
try:
stdout, stderr = process.communicate(timeout=300) # 5分钟超时
except subprocess.TimeoutExpired:
process.kill()
logger.error("tophub_add_data_to_db.py执行超时")
return
if process.returncode == 0:
logger.info("tophub_add_data_to_db.py调用成功")
if result.stdout:
logger.info(f"脚本输出: {result.stdout}")
if stdout:
logger.info(f"脚本输出: {stdout}")
else:
logger.error(f"tophub_add_data_to_db.py调用失败返回码: {result.returncode}")
if result.stderr:
logger.error(f"错误信息: {result.stderr}")
if result.stdout:
logger.info(f"脚本输出: {result.stdout}")
logger.error(f"tophub_add_data_to_db.py调用失败返回码: {process.returncode}")
if stderr:
logger.error(f"错误信息: {stderr}")
if stdout:
logger.info(f"脚本输出: {stdout}")
except UnicodeDecodeError as e:
logger.error(f"编码解码错误: {e}")
logger.info("可能是脚本输出包含非UTF-8编码字符已尝试使用replace模式处理")
except Exception as e:
logger.error(f"调用tophub_add_data_to_db.py时出错: {e}")