增加对producthunt网站的数据爬取

This commit is contained in:
2025-11-17 07:39:45 +08:00
parent 256850f752
commit d07017cf11
27 changed files with 26638 additions and 2153 deletions

124
fix_chrome_debug.py Normal file
View File

@@ -0,0 +1,124 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
修复 Chrome 调试连接问题
自动检测并处理 Chrome 调试模式连接
"""
import socket
import subprocess
import time
from loguru import logger
def check_chrome_debug_port(host="127.0.0.1", port=5003):
"""检查 Chrome 调试端口是否可用"""
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(2)
result = sock.connect_ex((host, port))
sock.close()
return result == 0
except Exception as e:
logger.error(f"检查端口 {host}:{port} 失败: {e}")
return False
def start_chrome_debug_mode(port=5003, user_data_dir="C:\\temp\\chrome_debug"):
"""启动 Chrome 调试模式"""
try:
logger.info(f"正在启动 Chrome 调试模式,端口: {port}")
# Chrome 可执行文件路径Windows
chrome_paths = [
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
]
chrome_exe = None
for path in chrome_paths:
try:
subprocess.run([path, "--version"], capture_output=True, check=True)
chrome_exe = path
logger.info(f"找到 Chrome 可执行文件: {path}")
break
except:
continue
if not chrome_exe:
logger.error("未找到 Chrome 可执行文件")
return False
# 启动 Chrome 调试模式
cmd = [
chrome_exe,
f"--remote-debugging-port={port}",
f"--user-data-dir={user_data_dir}",
"--no-first-run",
"--no-default-browser-check"
]
logger.info(f"启动命令: {' '.join(cmd)}")
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# 等待 Chrome 启动
time.sleep(5)
# 检查端口是否可用
if check_chrome_debug_port(port=port):
logger.success("Chrome 调试模式启动成功")
return True
else:
logger.error("Chrome 调试模式启动失败")
process.terminate()
return False
except Exception as e:
logger.error(f"启动 Chrome 调试模式失败: {e}")
return False
def main():
"""主函数"""
logger.info("=== Chrome 调试模式检查与修复 ===")
# 检查当前端口状态
if check_chrome_debug_port():
logger.success("Chrome 调试端口 127.0.0.1:5003 已可用")
logger.info("您现在可以运行: python product/new_data.py")
return True
else:
logger.warning("Chrome 调试端口 127.0.0.1:5003 不可用")
# 询问用户是否要启动 Chrome 调试模式
logger.info("检测到 Chrome 调试模式未启动")
logger.info("建议使用以下替代方案:")
logger.info("1. 使用 Playwright 版本 (推荐): python product/new_data_stealth.py")
logger.info("2. 手动启动 Chrome 调试模式")
logger.info("3. 自动启动 Chrome 调试模式")
choice = input("请选择方案 (1/2/3, 默认1): ").strip() or "1"
if choice == "1":
logger.info("使用 Playwright 版本...")
logger.info("运行: python product/new_data_stealth.py")
return True
elif choice == "2":
logger.info("请手动运行以下命令:")
logger.info('chrome.exe --remote-debugging-port=5003 --user-data-dir="C:\\temp\\chrome_debug"')
return False
elif choice == "3":
if start_chrome_debug_mode():
logger.success("Chrome 调试模式启动成功")
logger.info("您现在可以运行: python product/new_data.py")
return True
else:
logger.error("自动启动失败,请尝试手动启动")
return False
else:
logger.error("无效选择")
return False
if __name__ == "__main__":
success = main()
if success:
logger.info("修复完成")
else:
logger.error("修复失败,请检查上述信息")