刷新今日的新闻数据
This commit is contained in:
@@ -8,6 +8,7 @@ import asyncio
|
||||
from playwright.async_api import async_playwright
|
||||
from loguru import logger
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
# 配置日志
|
||||
logger.remove()
|
||||
@@ -120,33 +121,74 @@ class ProductHuntScraper:
|
||||
try:
|
||||
product_info = {}
|
||||
|
||||
# 提取产品名称
|
||||
# 提取产品名称(h1标签)
|
||||
name_element = await self.page.query_selector("h1")
|
||||
if name_element:
|
||||
product_info["name"] = await name_element.text_content()
|
||||
product_info["name"] = (await name_element.text_content()).strip()
|
||||
logger.info(f"产品名称: {product_info['name']}")
|
||||
|
||||
# 提取产品描述
|
||||
desc_element = await self.page.query_selector("[data-testid='product-description']")
|
||||
if not desc_element:
|
||||
desc_element = await self.page.query_selector(".styles_description__")
|
||||
# 提取产品简介(class为"relative text-16 font-normal text-gray-700"的div)
|
||||
logger.info("正在提取产品简介...")
|
||||
try:
|
||||
intro_div = await self.page.query_selector('div.relative.text-16.font-normal.text-gray-700')
|
||||
if intro_div:
|
||||
product_info["introduction"] = (await intro_div.text_content()).strip()
|
||||
logger.info(f"产品简介: {product_info['introduction'][:200]}...")
|
||||
else:
|
||||
logger.warning("未找到class为'relative text-16 font-normal text-gray-700'的div")
|
||||
except Exception as e:
|
||||
logger.error(f"提取产品简介失败: {e}")
|
||||
|
||||
if desc_element:
|
||||
product_info["description"] = await desc_element.text_content()
|
||||
logger.info(f"产品描述: {product_info['description'][:100]}...")
|
||||
# 等待制作人发言动态加载(等待class="flex flex-col gap-2"的section标签出现)
|
||||
logger.info("等待制作人发言动态加载...")
|
||||
try:
|
||||
# 等待section标签出现,最长等待60秒
|
||||
section_element = await self.page.wait_for_selector(
|
||||
'section.flex.flex-col.gap-2',
|
||||
timeout=60000
|
||||
)
|
||||
if section_element:
|
||||
logger.success("制作人发言区域已加载")
|
||||
|
||||
# 提取制作人发言(class为"flex flex-col gap-1"的div里面的span标签)
|
||||
maker_div = await section_element.query_selector('div.flex.flex-col.gap-1')
|
||||
if maker_div:
|
||||
span_element = await maker_div.query_selector('span')
|
||||
if span_element:
|
||||
product_info["maker_statement"] = (await span_element.text_content()).strip()
|
||||
logger.info(f"制作人发言: {product_info['maker_statement'][:200]}...")
|
||||
else:
|
||||
logger.warning("在div中未找到span标签")
|
||||
else:
|
||||
logger.warning("未找到class为'flex flex-col gap-1'的div")
|
||||
else:
|
||||
logger.warning("制作人发言区域未加载")
|
||||
except Exception as e:
|
||||
logger.error(f"等待制作人发言加载失败: {e}")
|
||||
|
||||
# 提取投票数
|
||||
votes_element = await self.page.query_selector("[data-testid='vote-button']")
|
||||
if votes_element:
|
||||
votes_text = await votes_element.text_content()
|
||||
product_info["votes"] = votes_text
|
||||
logger.info(f"投票数: {votes_text}")
|
||||
# 提取用户数(class="text-14 font-medium text-gray-700"的p标签)
|
||||
logger.info("正在提取用户数...")
|
||||
try:
|
||||
user_count_element = await self.page.query_selector('p.text-14.font-medium.text-gray-700')
|
||||
if user_count_element:
|
||||
product_info["user_count"] = (await user_count_element.text_content()).strip()
|
||||
logger.info(f"用户数: {product_info['user_count']}")
|
||||
else:
|
||||
logger.warning("未找到class为'text-14 font-medium text-gray-700'的p标签")
|
||||
except Exception as e:
|
||||
logger.error(f"提取用户数失败: {e}")
|
||||
|
||||
# 提取产品链接
|
||||
website_element = await self.page.query_selector("a[href*='://']")
|
||||
if website_element:
|
||||
product_info["website"] = await website_element.get_attribute("href")
|
||||
logger.info(f"产品网站: {product_info['website']}")
|
||||
# 保存到临时文件
|
||||
temp_file_path = "temp_product_info.txt"
|
||||
with open(temp_file_path, "w", encoding="utf-8") as f:
|
||||
f.write("=== Product Hunt 产品信息 ===\n\n")
|
||||
f.write(f"产品名称: {product_info.get('name', '未获取')}\n\n")
|
||||
f.write(f"产品简介: {product_info.get('introduction', '未获取')}\n\n")
|
||||
f.write(f"制作人发言: {product_info.get('maker_statement', '未获取')}\n\n")
|
||||
f.write(f"用户数: {product_info.get('user_count', '未获取')}\n\n")
|
||||
f.write(f"提取时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
||||
|
||||
logger.info(f"产品信息已保存到临时文件: {temp_file_path}")
|
||||
|
||||
# 截取页面截图
|
||||
screenshot_path = "product_screenshot.png"
|
||||
|
||||
Reference in New Issue
Block a user