新爬取数据
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -1,55 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
修复db_viewer.py文件中的方法位置问题
|
|
||||||
将increase_score和decrease_score方法从文件末尾移动到DatabaseViewer类内部
|
|
||||||
"""
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
def fix_db_viewer():
|
|
||||||
"""修复db_viewer.py文件"""
|
|
||||||
try:
|
|
||||||
# 读取原始文件
|
|
||||||
with open('db_viewer.py', 'r', encoding='utf-8') as f:
|
|
||||||
content = f.read()
|
|
||||||
|
|
||||||
# 找到increase_score和decrease_score方法
|
|
||||||
increase_score_match = re.search(r'\n\s*def increase_score\(self\):.*?(?=\n\s*def|\n\nclass|\n\ndef|\n\nif __name__|\Z)', content, re.DOTALL)
|
|
||||||
decrease_score_match = re.search(r'\n\s*def decrease_score\(self\):.*?(?=\n\s*def|\n\nclass|\n\ndef|\n\nif __name__|\Z)', content, re.DOTALL)
|
|
||||||
|
|
||||||
if not increase_score_match or not decrease_score_match:
|
|
||||||
print("未找到increase_score或decrease_score方法")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# 提取方法内容
|
|
||||||
increase_score_method = increase_score_match.group(0)
|
|
||||||
decrease_score_method = decrease_score_match.group(0)
|
|
||||||
|
|
||||||
# 从文件末尾移除这两个方法
|
|
||||||
content = re.sub(r'\n\s*def increase_score\(self\):.*?(?=\n\s*def|\n\nclass|\n\ndef|\n\nif __name__|\Z)', '', content, flags=re.DOTALL)
|
|
||||||
content = re.sub(r'\n\s*def decrease_score\(self\):.*?(?=\n\s*def|\n\nclass|\n\ndef|\n\nif __name__|\Z)', '', content, flags=re.DOTALL)
|
|
||||||
|
|
||||||
# 找到mark_as_not_interested方法的结束位置,在其后插入新方法
|
|
||||||
mark_as_not_interested_match = re.search(r'(\n\s*def mark_as_not_interested\(self\):.*?(?=\n\s*def|\n\nclass|\n\ndef|\n\nif __name__|\Z))', content, re.DOTALL)
|
|
||||||
|
|
||||||
if not mark_as_not_interested_match:
|
|
||||||
print("未找到mark_as_not_interested方法")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# 在mark_as_not_interested方法后插入新方法
|
|
||||||
insertion_point = mark_as_not_interested_match.end(1)
|
|
||||||
new_content = content[:insertion_point] + increase_score_method + decrease_score_method + content[insertion_point:]
|
|
||||||
|
|
||||||
# 写入修复后的文件
|
|
||||||
with open('db_viewer.py', 'w', encoding='utf-8') as f:
|
|
||||||
f.write(new_content)
|
|
||||||
|
|
||||||
print("成功修复db_viewer.py文件")
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"修复文件时出错: {str(e)}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
fix_db_viewer()
|
|
||||||
File diff suppressed because it is too large
Load Diff
BIN
tophub_data.db
BIN
tophub_data.db
Binary file not shown.
1100
tophub_scraper.log
1100
tophub_scraper.log
File diff suppressed because it is too large
Load Diff
@@ -249,20 +249,37 @@ class TopHubScraper:
|
|||||||
|
|
||||||
# 调用tophub_add_data_to_db.py脚本
|
# 调用tophub_add_data_to_db.py脚本
|
||||||
logger.info("正在调用tophub_add_data_to_db.py...")
|
logger.info("正在调用tophub_add_data_to_db.py...")
|
||||||
result = subprocess.run([sys.executable, "tophub_add_data_to_db.py"],
|
|
||||||
capture_output=True, text=True, encoding='utf-8')
|
|
||||||
|
|
||||||
if result.returncode == 0:
|
# 使用Popen方式处理可能的编码问题
|
||||||
|
process = subprocess.Popen([sys.executable, "tophub_add_data_to_db.py"],
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
text=True,
|
||||||
|
encoding='utf-8',
|
||||||
|
errors='replace') # 使用replace模式处理无法解码的字符
|
||||||
|
|
||||||
|
# 实时读取输出以避免编码问题
|
||||||
|
try:
|
||||||
|
stdout, stderr = process.communicate(timeout=300) # 5分钟超时
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
process.kill()
|
||||||
|
logger.error("tophub_add_data_to_db.py执行超时")
|
||||||
|
return
|
||||||
|
|
||||||
|
if process.returncode == 0:
|
||||||
logger.info("tophub_add_data_to_db.py调用成功")
|
logger.info("tophub_add_data_to_db.py调用成功")
|
||||||
if result.stdout:
|
if stdout:
|
||||||
logger.info(f"脚本输出: {result.stdout}")
|
logger.info(f"脚本输出: {stdout}")
|
||||||
else:
|
else:
|
||||||
logger.error(f"tophub_add_data_to_db.py调用失败,返回码: {result.returncode}")
|
logger.error(f"tophub_add_data_to_db.py调用失败,返回码: {process.returncode}")
|
||||||
if result.stderr:
|
if stderr:
|
||||||
logger.error(f"错误信息: {result.stderr}")
|
logger.error(f"错误信息: {stderr}")
|
||||||
if result.stdout:
|
if stdout:
|
||||||
logger.info(f"脚本输出: {result.stdout}")
|
logger.info(f"脚本输出: {stdout}")
|
||||||
|
|
||||||
|
except UnicodeDecodeError as e:
|
||||||
|
logger.error(f"编码解码错误: {e}")
|
||||||
|
logger.info("可能是脚本输出包含非UTF-8编码字符,已尝试使用replace模式处理")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"调用tophub_add_data_to_db.py时出错: {e}")
|
logger.error(f"调用tophub_add_data_to_db.py时出错: {e}")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user