Files
tophux_scrape/debug_stealth.py

104 lines
3.7 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
调试脚本,详细诊断 new_data_stealth.py 的问题
"""
import asyncio
import sys
from loguru import logger
# 配置日志,确保输出到控制台
logger.remove()
logger.add(sys.stderr, level="INFO", format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>")
async def debug_stealth():
"""调试 stealth 功能"""
logger.info("=== 开始调试 new_data_stealth.py ===")
try:
# 测试导入
logger.info("1. 测试导入模块...")
from playwright.async_api import async_playwright
from playwright_stealth.stealth import Stealth
from product.new_data_stealth import ProductHuntScraper
logger.success("✅ 所有模块导入成功")
# 测试类实例化
logger.info("2. 测试类实例化...")
scraper = ProductHuntScraper()
logger.success("✅ ProductHuntScraper 实例化成功")
# 测试浏览器启动
logger.info("3. 测试浏览器启动...")
browser_started = await scraper.start_browser()
if browser_started:
logger.success("✅ 浏览器启动成功")
# 测试页面导航
logger.info("4. 测试页面导航...")
try:
# 测试访问简单页面
await scraper.page.goto("https://httpbin.org/user-agent", {"waitUntil": "networkidle", "timeout": 30000})
# 获取页面内容
content = await scraper.page.content()
if "user-agent" in content.lower():
logger.success("✅ 页面导航成功")
else:
logger.warning("⚠️ 页面内容异常")
except Exception as e:
logger.error(f"❌ 页面导航失败: {e}")
# 测试截图功能
logger.info("5. 测试截图功能...")
try:
await scraper.take_screenshot()
logger.success("✅ 截图功能正常")
except Exception as e:
logger.error(f"❌ 截图功能失败: {e}")
# 测试HTML保存功能
logger.info("6. 测试HTML保存功能...")
try:
await scraper.save_html()
logger.success("✅ HTML保存功能正常")
except Exception as e:
logger.error(f"❌ HTML保存功能失败: {e}")
# 关闭浏览器
logger.info("7. 关闭浏览器...")
await scraper.close_browser()
logger.success("✅ 浏览器关闭成功")
logger.success("🎉 所有调试测试通过!")
return True
else:
logger.error("❌ 浏览器启动失败")
return False
except ImportError as e:
logger.error(f"❌ 导入失败: {e}")
return False
except Exception as e:
logger.error(f"❌ 调试过程中发生错误: {e}")
return False
async def main():
"""主函数"""
logger.info("开始调试过程...")
success = await debug_stealth()
if success:
logger.info("\n🎉 调试完成!脚本修复成功")
logger.info("现在可以正常运行: python product/new_data_stealth.py")
else:
logger.error("\n💥 调试发现存在问题")
logger.info("请检查错误信息并进一步调试")
if __name__ == "__main__":
asyncio.run(main())