新爬取数据

This commit is contained in:
2025-11-13 22:27:05 +08:00
parent d5344aaa4a
commit 1507416806
6 changed files with 6733 additions and 2546 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,55 +0,0 @@
#!/usr/bin/env python3
"""
修复db_viewer.py文件中的方法位置问题
将increase_score和decrease_score方法从文件末尾移动到DatabaseViewer类内部
"""
import re
def fix_db_viewer():
"""修复db_viewer.py文件"""
try:
# 读取原始文件
with open('db_viewer.py', 'r', encoding='utf-8') as f:
content = f.read()
# 找到increase_score和decrease_score方法
increase_score_match = re.search(r'\n\s*def increase_score\(self\):.*?(?=\n\s*def|\n\nclass|\n\ndef|\n\nif __name__|\Z)', content, re.DOTALL)
decrease_score_match = re.search(r'\n\s*def decrease_score\(self\):.*?(?=\n\s*def|\n\nclass|\n\ndef|\n\nif __name__|\Z)', content, re.DOTALL)
if not increase_score_match or not decrease_score_match:
print("未找到increase_score或decrease_score方法")
return False
# 提取方法内容
increase_score_method = increase_score_match.group(0)
decrease_score_method = decrease_score_match.group(0)
# 从文件末尾移除这两个方法
content = re.sub(r'\n\s*def increase_score\(self\):.*?(?=\n\s*def|\n\nclass|\n\ndef|\n\nif __name__|\Z)', '', content, flags=re.DOTALL)
content = re.sub(r'\n\s*def decrease_score\(self\):.*?(?=\n\s*def|\n\nclass|\n\ndef|\n\nif __name__|\Z)', '', content, flags=re.DOTALL)
# 找到mark_as_not_interested方法的结束位置在其后插入新方法
mark_as_not_interested_match = re.search(r'(\n\s*def mark_as_not_interested\(self\):.*?(?=\n\s*def|\n\nclass|\n\ndef|\n\nif __name__|\Z))', content, re.DOTALL)
if not mark_as_not_interested_match:
print("未找到mark_as_not_interested方法")
return False
# 在mark_as_not_interested方法后插入新方法
insertion_point = mark_as_not_interested_match.end(1)
new_content = content[:insertion_point] + increase_score_method + decrease_score_method + content[insertion_point:]
# 写入修复后的文件
with open('db_viewer.py', 'w', encoding='utf-8') as f:
f.write(new_content)
print("成功修复db_viewer.py文件")
return True
except Exception as e:
print(f"修复文件时出错: {str(e)}")
return False
if __name__ == "__main__":
fix_db_viewer()

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -249,20 +249,37 @@ class TopHubScraper:
# 调用tophub_add_data_to_db.py脚本
logger.info("正在调用tophub_add_data_to_db.py...")
result = subprocess.run([sys.executable, "tophub_add_data_to_db.py"],
capture_output=True, text=True, encoding='utf-8')
if result.returncode == 0:
# 使用Popen方式处理可能的编码问题
process = subprocess.Popen([sys.executable, "tophub_add_data_to_db.py"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
encoding='utf-8',
errors='replace') # 使用replace模式处理无法解码的字符
# 实时读取输出以避免编码问题
try:
stdout, stderr = process.communicate(timeout=300) # 5分钟超时
except subprocess.TimeoutExpired:
process.kill()
logger.error("tophub_add_data_to_db.py执行超时")
return
if process.returncode == 0:
logger.info("tophub_add_data_to_db.py调用成功")
if result.stdout:
logger.info(f"脚本输出: {result.stdout}")
if stdout:
logger.info(f"脚本输出: {stdout}")
else:
logger.error(f"tophub_add_data_to_db.py调用失败返回码: {result.returncode}")
if result.stderr:
logger.error(f"错误信息: {result.stderr}")
if result.stdout:
logger.info(f"脚本输出: {result.stdout}")
logger.error(f"tophub_add_data_to_db.py调用失败返回码: {process.returncode}")
if stderr:
logger.error(f"错误信息: {stderr}")
if stdout:
logger.info(f"脚本输出: {stdout}")
except UnicodeDecodeError as e:
logger.error(f"编码解码错误: {e}")
logger.info("可能是脚本输出包含非UTF-8编码字符已尝试使用replace模式处理")
except Exception as e:
logger.error(f"调用tophub_add_data_to_db.py时出错: {e}")