增加今天的数据更新
This commit is contained in:
@@ -114,32 +114,51 @@ class ProductHuntScraper:
|
||||
try:
|
||||
logger.info("模拟点击制作人链接...")
|
||||
|
||||
# 查找要点击的a标签
|
||||
a_element = await self.page.query_selector(f'a[href*="{maker_link.split("/")[-1]}"]')
|
||||
if not a_element:
|
||||
# 如果找不到特定href的a标签,尝试查找包含制作人文本的a标签
|
||||
a_element = await self.page.query_selector(f'a:has-text("{maker_text}")')
|
||||
|
||||
if not a_element:
|
||||
logger.warning("未找到要点击的a标签,使用备用方法")
|
||||
# 查找包含制作人信息的div容器(class="flex flex-col gap-1")
|
||||
div_container = await self.page.query_selector('div.flex.flex-col.gap-1')
|
||||
if not div_container:
|
||||
logger.warning("未找到class='flex flex-col gap-1'的div容器,使用备用方法")
|
||||
# 备用方法:直接打开新窗口
|
||||
return await self._extract_maker_statement_direct_open(maker_link, maker_text)
|
||||
|
||||
# 获取a标签的边界框,用于点击中间位置
|
||||
bbox = await a_element.bounding_box()
|
||||
# 获取div容器的边界框,用于点击中间位置
|
||||
bbox = await div_container.bounding_box()
|
||||
if not bbox:
|
||||
logger.warning("无法获取a标签边界框,使用备用方法")
|
||||
logger.warning("无法获取div容器边界框,使用备用方法")
|
||||
return await self._extract_maker_statement_direct_open(maker_link, maker_text)
|
||||
|
||||
# 计算中间位置
|
||||
center_x = bbox['x'] + bbox['width'] / 2
|
||||
center_y = bbox['y'] + bbox['height'] / 2
|
||||
# 计算div容器中前面几个元素的高度总和
|
||||
# 获取div容器内的所有子元素
|
||||
child_elements = await div_container.query_selector_all('*')
|
||||
|
||||
logger.info(f"点击a标签中间位置: ({center_x:.1f}, {center_y:.1f})")
|
||||
# 计算前面几个元素的高度总和
|
||||
total_height = 0
|
||||
element_count = 0
|
||||
max_elements = 3 # 考虑前面3个元素的高度
|
||||
|
||||
for child in child_elements[:max_elements]:
|
||||
child_bbox = await child.bounding_box()
|
||||
if child_bbox:
|
||||
total_height += child_bbox['height']
|
||||
element_count += 1
|
||||
logger.debug(f"元素{element_count}高度: {child_bbox['height']:.1f}px")
|
||||
|
||||
# 如果无法获取子元素高度,使用div容器高度的一半
|
||||
if total_height == 0:
|
||||
center_y = bbox['y'] + bbox['height'] / 2
|
||||
logger.info("使用div容器高度的一半作为点击位置")
|
||||
else:
|
||||
# 计算点击位置:div容器的y坐标 + 前面元素高度总和
|
||||
center_y = bbox['y'] + total_height
|
||||
logger.info(f"使用前面{element_count}个元素高度总和作为点击位置")
|
||||
|
||||
center_x = bbox['x'] + bbox['width'] / 2
|
||||
|
||||
logger.info(f"点击位置: ({center_x:.1f}, {center_y:.1f})")
|
||||
|
||||
# 监听新窗口打开事件
|
||||
async with self.page.context.expect_page() as new_page_info:
|
||||
# 模拟点击a标签中间位置
|
||||
# 模拟点击计算出的位置
|
||||
await self.page.mouse.click(center_x, center_y)
|
||||
|
||||
# 获取新页面
|
||||
@@ -197,7 +216,7 @@ class ProductHuntScraper:
|
||||
new_page = await self.browser.new_page()
|
||||
|
||||
# 导航到制作人页面
|
||||
await new_page.goto(maker_link, wait_until="domcontentloaded", timeout=300000)
|
||||
await new_page.goto(maker_link, wait_until="domcontentloaded", timeout=3000000)
|
||||
|
||||
# 等待页面加载
|
||||
await new_page.wait_for_timeout(3000)
|
||||
|
||||
Reference in New Issue
Block a user