104 lines
3.7 KiB
Python
104 lines
3.7 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
调试脚本,详细诊断 new_data_stealth.py 的问题
|
|
"""
|
|
|
|
import asyncio
|
|
import sys
|
|
from loguru import logger
|
|
|
|
# 配置日志,确保输出到控制台
|
|
logger.remove()
|
|
logger.add(sys.stderr, level="INFO", format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>")
|
|
|
|
async def debug_stealth():
|
|
"""调试 stealth 功能"""
|
|
logger.info("=== 开始调试 new_data_stealth.py ===")
|
|
|
|
try:
|
|
# 测试导入
|
|
logger.info("1. 测试导入模块...")
|
|
from playwright.async_api import async_playwright
|
|
from playwright_stealth.stealth import Stealth
|
|
from product.new_data_stealth import ProductHuntScraper
|
|
logger.success("✅ 所有模块导入成功")
|
|
|
|
# 测试类实例化
|
|
logger.info("2. 测试类实例化...")
|
|
scraper = ProductHuntScraper()
|
|
logger.success("✅ ProductHuntScraper 实例化成功")
|
|
|
|
# 测试浏览器启动
|
|
logger.info("3. 测试浏览器启动...")
|
|
browser_started = await scraper.start_browser()
|
|
|
|
if browser_started:
|
|
logger.success("✅ 浏览器启动成功")
|
|
|
|
# 测试页面导航
|
|
logger.info("4. 测试页面导航...")
|
|
try:
|
|
# 测试访问简单页面
|
|
await scraper.page.goto("https://httpbin.org/user-agent", {"waitUntil": "networkidle", "timeout": 30000})
|
|
|
|
# 获取页面内容
|
|
content = await scraper.page.content()
|
|
if "user-agent" in content.lower():
|
|
logger.success("✅ 页面导航成功")
|
|
else:
|
|
logger.warning("⚠️ 页面内容异常")
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ 页面导航失败: {e}")
|
|
|
|
# 测试截图功能
|
|
logger.info("5. 测试截图功能...")
|
|
try:
|
|
await scraper.take_screenshot()
|
|
logger.success("✅ 截图功能正常")
|
|
except Exception as e:
|
|
logger.error(f"❌ 截图功能失败: {e}")
|
|
|
|
# 测试HTML保存功能
|
|
logger.info("6. 测试HTML保存功能...")
|
|
try:
|
|
await scraper.save_html()
|
|
logger.success("✅ HTML保存功能正常")
|
|
except Exception as e:
|
|
logger.error(f"❌ HTML保存功能失败: {e}")
|
|
|
|
# 关闭浏览器
|
|
logger.info("7. 关闭浏览器...")
|
|
await scraper.close_browser()
|
|
logger.success("✅ 浏览器关闭成功")
|
|
|
|
logger.success("🎉 所有调试测试通过!")
|
|
return True
|
|
|
|
else:
|
|
logger.error("❌ 浏览器启动失败")
|
|
return False
|
|
|
|
except ImportError as e:
|
|
logger.error(f"❌ 导入失败: {e}")
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"❌ 调试过程中发生错误: {e}")
|
|
return False
|
|
|
|
async def main():
|
|
"""主函数"""
|
|
logger.info("开始调试过程...")
|
|
|
|
success = await debug_stealth()
|
|
|
|
if success:
|
|
logger.info("\n🎉 调试完成!脚本修复成功")
|
|
logger.info("现在可以正常运行: python product/new_data_stealth.py")
|
|
else:
|
|
logger.error("\n💥 调试发现存在问题")
|
|
logger.info("请检查错误信息并进一步调试")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main()) |