42 lines
1.2 KiB
Python
42 lines
1.2 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
playwright_stealth 使用示例
|
|
演示如何使用 stealth 模式运行 ProductHunt 爬虫
|
|
"""
|
|
|
|
import asyncio
|
|
from loguru import logger
|
|
from product.new_data_stealth import ProductHuntScraper
|
|
|
|
async def run_stealth_scraper():
|
|
"""运行 stealth 版本的爬虫"""
|
|
logger.info("开始运行 stealth 版本的 ProductHunt 爬虫")
|
|
|
|
# 创建爬虫实例
|
|
scraper = ProductHuntScraper()
|
|
|
|
# 执行爬取
|
|
success = await scraper.scrape()
|
|
|
|
if success:
|
|
logger.success("Stealth 爬虫执行成功!")
|
|
logger.info("生成的文件:")
|
|
logger.info("- product_info_stealth.json: 产品信息数据")
|
|
logger.info("- product_page_stealth.html: 页面HTML内容")
|
|
logger.info("- product_screenshot_stealth.png: 页面截图")
|
|
else:
|
|
logger.error("Stealth 爬虫执行失败")
|
|
|
|
return success
|
|
|
|
def main():
|
|
"""主函数"""
|
|
logger.info("=== playwright_stealth 使用示例 ===")
|
|
logger.info("此示例演示如何使用 playwright_stealth 模块增强浏览器反检测能力")
|
|
|
|
# 运行异步任务
|
|
asyncio.run(run_stealth_scraper())
|
|
|
|
if __name__ == "__main__":
|
|
main() |