更新了抓取producthunt的代码
This commit is contained in:
Binary file not shown.
@@ -31,7 +31,7 @@ logger.add(sys.stderr, level="INFO", format="<green>{time:YYYY-MM-DD HH:mm:ss}</
|
||||
class ProductHuntScraperFull:
|
||||
"""全功能ProductHunt数据抓取器"""
|
||||
|
||||
def __init__(self, tophub_db_path=None, product_db_path=None, debug_port=9222, limit=10, skip_duplicates=True):
|
||||
def __init__(self, tophub_db_path=None, product_db_path=None, debug_port=9222, limit=0, skip_duplicates=True):
|
||||
"""
|
||||
初始化抓取器
|
||||
|
||||
@@ -68,12 +68,9 @@ class ProductHuntScraperFull:
|
||||
conn = sqlite3.connect(self.tophub_db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 查询包含producthunt.com的链接
|
||||
if limit > 0:
|
||||
cursor.execute("SELECT url FROM articles WHERE url LIKE '%producthunt.com%' LIMIT ?", (limit,))
|
||||
else:
|
||||
cursor.execute("SELECT url FROM articles WHERE url LIKE '%producthunt.com%'")
|
||||
|
||||
# 查询包含producthunt.com的链接(去掉LIMIT限制)
|
||||
cursor.execute("SELECT url FROM articles WHERE url LIKE '%producthunt.com%'")
|
||||
|
||||
urls = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
conn.close()
|
||||
@@ -322,7 +319,7 @@ def parse_arguments():
|
||||
parser.add_argument("--tophub-db", help="tophub数据库路径", default=None)
|
||||
parser.add_argument("--product-db", help="产品数据库路径", default=None)
|
||||
parser.add_argument("--debug-port", type=int, help="Chrome调试端口", default=9222)
|
||||
parser.add_argument("--limit", type=int, help="抓取链接数量限制", default=10)
|
||||
parser.add_argument("--limit", type=int, help="抓取链接数量限制", default=0)
|
||||
parser.add_argument("--no-skip-duplicates", action="store_true", help="不跳过重复URL")
|
||||
parser.add_argument("--urls", nargs="+", help="指定要抓取的URL列表")
|
||||
parser.add_argument("--log-file", help="日志文件路径", default="producthunt_scraper.log")
|
||||
|
||||
@@ -114,7 +114,7 @@ class ProductHuntScraper:
|
||||
|
||||
# 等待页面标题包含"Product Hunt",最长等待300秒
|
||||
logger.info("等待页面标题包含'Product Hunt'...")
|
||||
max_wait_time = 300 # 最大等待时间(秒)
|
||||
max_wait_time = 60 # 最大等待时间(秒)
|
||||
wait_interval = 5 # 检查间隔(秒)
|
||||
waited_time = 0
|
||||
|
||||
@@ -129,6 +129,23 @@ class ProductHuntScraper:
|
||||
logger.success("Product Hunt网站已成功打开")
|
||||
return True
|
||||
|
||||
# 检查是否遇到Cloudflare验证
|
||||
if "Just a moment" in title or "请稍候" in title or "Checking your browser" in title:
|
||||
logger.info("遇到Cloudflare验证,等待验证完成...")
|
||||
await asyncio.sleep(10) # 等待10秒
|
||||
waited_time += 10
|
||||
continue
|
||||
|
||||
# 检查是否已成功加载页面内容
|
||||
try:
|
||||
# 尝试查找页面中的关键元素
|
||||
h1_element = await self.page.query_selector("h1")
|
||||
if h1_element:
|
||||
logger.success("检测到页面内容已加载")
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 等待一段时间后再次检查
|
||||
await asyncio.sleep(wait_interval)
|
||||
waited_time += wait_interval
|
||||
@@ -165,59 +182,231 @@ class ProductHuntScraper:
|
||||
# 记录点击制作人链接的行为
|
||||
await self.record_click("制作人链接", "点击制作人链接在当前窗口打开")
|
||||
|
||||
# 保存当前页面的URL,以便后续返回
|
||||
original_url = self.page.url
|
||||
logger.info(f"保存当前页面URL: {original_url}")
|
||||
|
||||
# 在当前页面导航到制作人链接
|
||||
logger.info(f"正在在当前窗口打开制作人链接: {maker_link}")
|
||||
await self.page.goto(maker_link, wait_until="domcontentloaded")
|
||||
|
||||
# 设置更长的超时时间来处理模态窗口
|
||||
try:
|
||||
await self.page.goto(maker_link, wait_until="domcontentloaded", timeout=60000)
|
||||
logger.success("页面导航成功")
|
||||
except Exception as e:
|
||||
logger.error(f"页面导航失败: {e}")
|
||||
# 尝试返回原始页面
|
||||
try:
|
||||
await self.page.goto(original_url, wait_until="domcontentloaded")
|
||||
logger.success(f"已返回原始页面: {original_url}")
|
||||
except Exception as return_error:
|
||||
logger.error(f"返回原始页面失败: {return_error}")
|
||||
return ""
|
||||
|
||||
# 等待页面加载
|
||||
await self.page.wait_for_load_state("networkidle")
|
||||
|
||||
# 等待title元素出现并包含产品名称(最长等待2分钟)
|
||||
logger.info("等待title元素出现并包含产品名称(最长等待2分钟)...")
|
||||
# 检查并处理可能的模态窗口
|
||||
try:
|
||||
# 等待title元素出现,最长等待2分钟
|
||||
await self.page.wait_for_selector("title", timeout=120000)
|
||||
logger.info("检查是否存在模态窗口...")
|
||||
modal_selectors = [
|
||||
"[role='dialog']",
|
||||
".modal",
|
||||
".modal-dialog",
|
||||
"[data-testid='modal']",
|
||||
"[class*='modal']",
|
||||
"[class*='overlay']",
|
||||
"[class*='dialog']",
|
||||
"[class*='popup']"
|
||||
]
|
||||
|
||||
# 检查title是否包含产品名称
|
||||
for selector in modal_selectors:
|
||||
try:
|
||||
modal_element = await self.page.query_selector(selector)
|
||||
if modal_element:
|
||||
logger.info(f"检测到模态窗口,选择器: {selector}")
|
||||
|
||||
# 尝试关闭模态窗口
|
||||
close_selectors = [
|
||||
"[aria-label='Close']",
|
||||
".close",
|
||||
".modal-close",
|
||||
"[data-testid='close']",
|
||||
"button:has-text('Close')",
|
||||
"button:has-text('关闭')",
|
||||
"button:has-text('X')"
|
||||
]
|
||||
|
||||
for close_selector in close_selectors:
|
||||
try:
|
||||
close_button = await modal_element.query_selector(close_selector)
|
||||
if close_button:
|
||||
await close_button.click()
|
||||
logger.success(f"已关闭模态窗口,使用选择器: {close_selector}")
|
||||
await self.page.wait_for_timeout(1000) # 等待关闭动画
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# 如果模态窗口仍然存在,尝试点击模态窗口外部关闭
|
||||
try:
|
||||
await self.page.mouse.click(10, 10) # 点击页面左上角
|
||||
logger.info("尝试点击页面外部关闭模态窗口")
|
||||
await self.page.wait_for_timeout(1000)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.warning(f"检查模态窗口时出错: {e}")
|
||||
|
||||
# 快速检查页面是否已加载
|
||||
logger.info("快速检查页面加载状态...")
|
||||
|
||||
# 立即尝试获取页面内容,不等待特定元素
|
||||
try:
|
||||
title_text = await self.page.title()
|
||||
logger.info(f"页面标题: {title_text}")
|
||||
|
||||
# 获取产品名称(从maker_text参数中获取)
|
||||
product_name = maker_text.strip() if maker_text else ""
|
||||
|
||||
if product_name and product_name.lower() in title_text.lower():
|
||||
logger.success(f"标题包含产品名称: {product_name}")
|
||||
else:
|
||||
logger.warning(f"标题不包含产品名称,产品名称: {product_name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"等待title元素失败: {e}")
|
||||
logger.warning(f"获取页面标题失败: {e}")
|
||||
|
||||
# 再等待30秒,确保页面完全加载
|
||||
logger.info("再等待30秒,确保页面完全加载...")
|
||||
await self.page.wait_for_timeout(30000) # 等待30秒
|
||||
|
||||
# 提取制作人评论内容(XPath: //*[@id=\"comment-4597755\"]/div/div[2]/div/div/div)
|
||||
logger.info("正在提取制作人评论内容...")
|
||||
# 快速检查页面是否有内容
|
||||
try:
|
||||
# 使用XPath查找评论元素
|
||||
comment_element = await self.page.query_selector(
|
||||
'xpath=//*[@id="comment-4597755"]/div/div[2]/div/div/div'
|
||||
)
|
||||
if comment_element:
|
||||
maker_statement = (await comment_element.text_content()).strip()
|
||||
logger.info(f"制作人评论内容: {maker_statement[:200]}...")
|
||||
|
||||
return maker_statement
|
||||
else:
|
||||
logger.warning("未找到XPath为//*[@id=\"comment-4597755\"]/div/div[2]/div/div/div的元素")
|
||||
body_element = await self.page.query_selector("body")
|
||||
if body_element:
|
||||
body_text = await body_element.text_content()
|
||||
if len(body_text.strip()) > 10:
|
||||
logger.success("页面内容已加载")
|
||||
else:
|
||||
logger.warning("页面内容为空或过短")
|
||||
except Exception as e:
|
||||
logger.error(f"提取制作人评论内容失败: {e}")
|
||||
logger.warning(f"检查页面内容失败: {e}")
|
||||
|
||||
# 短暂等待确保DOM稳定
|
||||
logger.info("等待DOM稳定...")
|
||||
await self.page.wait_for_timeout(2000) # 等待2秒
|
||||
|
||||
# 保存模态窗口截图用于调试
|
||||
modal_screenshot = "modal_window_debug.png"
|
||||
await self.page.screenshot(path=modal_screenshot, full_page=True)
|
||||
logger.info(f"模态窗口调试截图已保存到: {modal_screenshot}")
|
||||
|
||||
# 首先检查页面内容,获取页面主要文本
|
||||
try:
|
||||
page_content = await self.page.content()
|
||||
logger.info("页面内容已获取")
|
||||
|
||||
# 检查页面是否包含常见的关键词
|
||||
keywords = ['comment', 'discussion', 'maker', 'creator', 'author', 'statement', 'description']
|
||||
found_keywords = [kw for kw in keywords if kw in page_content.lower()]
|
||||
if found_keywords:
|
||||
logger.info(f"页面包含关键词: {found_keywords}")
|
||||
else:
|
||||
logger.warning("页面未检测到常见关键词")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取页面内容失败: {e}")
|
||||
|
||||
# 提取制作人评论内容 - 针对模态窗口的多种选择器策略
|
||||
logger.info("正在提取制作人评论内容...")
|
||||
|
||||
# 策略1:尝试多种XPath选择器
|
||||
xpath_selectors = [
|
||||
# 新的主要选择器:包含prose、prose-format和richText类的div
|
||||
"//div[contains(@class, 'prose') and contains(@class, 'prose-format') and contains(@class, 'richText')]",
|
||||
# 备用选择器
|
||||
'//*[@id="comment-4597755"]/div/div[2]/div/div/div', # 原始选择器
|
||||
'//div[contains(@class, "comment")]//div[contains(@class, "text")]', # 通用评论选择器
|
||||
'//div[contains(@class, "modal")]//div[contains(@class, "content")]', # 模态窗口内容
|
||||
'//div[contains(@class, "dialog")]//div[contains(@class, "body")]', # 对话框内容
|
||||
'//section//div[contains(@class, "text")]', # section内的文本内容
|
||||
'//div[contains(@class, "launch")]//div[contains(@class, "description")]', # 发布描述
|
||||
'//article//div[contains(@class, "content")]', # 文章内容
|
||||
'//main//div[contains(@class, "text")]', # 主要内容区文本
|
||||
# 其他备用选择器
|
||||
"//div[contains(@class, 'styles_commentsContainer')]//div[contains(@class, 'styles_comment')]//div[contains(@class, 'styles_commentBody')]//p",
|
||||
"//div[contains(@class, 'comment')]//p",
|
||||
"//div[contains(@class, 'comments')]//p",
|
||||
]
|
||||
|
||||
for i, xpath in enumerate(xpath_selectors, 1):
|
||||
try:
|
||||
logger.info(f"尝试选择器 {i}/{len(xpath_selectors)}: {xpath}")
|
||||
comment_element = await self.page.query_selector(f'xpath={xpath}')
|
||||
if comment_element:
|
||||
maker_statement = (await comment_element.text_content()).strip()
|
||||
if maker_statement: # 确保有内容
|
||||
logger.success(f"使用选择器 {i} 成功提取制作人评论内容: {maker_statement[:200]}...")
|
||||
|
||||
# 提取完成后返回原始页面
|
||||
logger.info("提取完成,正在返回原始产品页面...")
|
||||
await self.page.goto(original_url, wait_until="domcontentloaded")
|
||||
logger.success(f"已成功返回原始页面: {original_url}")
|
||||
|
||||
return maker_statement
|
||||
else:
|
||||
logger.warning(f"选择器 {i} 提取的内容为空")
|
||||
except Exception as e:
|
||||
logger.warning(f"选择器 {i} 失败: {e}")
|
||||
|
||||
# 策略2:如果所有选择器都失败,尝试提取页面主要文本内容
|
||||
logger.info("所有选择器失败,尝试提取页面主要文本内容...")
|
||||
try:
|
||||
# 获取页面body文本
|
||||
body_element = await self.page.query_selector('body')
|
||||
if body_element:
|
||||
full_text = (await body_element.text_content()).strip()
|
||||
# 提取前500个字符作为制作人发言
|
||||
if len(full_text) > 100:
|
||||
maker_statement = full_text[:500]
|
||||
logger.info(f"提取页面主要文本内容: {maker_statement[:200]}...")
|
||||
|
||||
# 提取完成后返回原始页面
|
||||
logger.info("提取完成,正在返回原始产品页面...")
|
||||
await self.page.goto(original_url, wait_until="domcontentloaded")
|
||||
logger.success(f"已成功返回原始页面: {original_url}")
|
||||
|
||||
return maker_statement
|
||||
except Exception as e:
|
||||
logger.error(f"提取页面主要文本内容失败: {e}")
|
||||
|
||||
# 策略3:如果仍然失败,记录页面截图以便调试
|
||||
logger.warning("所有提取策略都失败,保存截图用于调试...")
|
||||
try:
|
||||
screenshot_path = "modal_debug_screenshot.png"
|
||||
await self.page.screenshot(path=screenshot_path, full_page=True)
|
||||
logger.info(f"模态窗口截图已保存到: {screenshot_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"保存截图失败: {e}")
|
||||
|
||||
# 即使未找到元素,也返回原始页面
|
||||
logger.info("正在返回原始产品页面...")
|
||||
await self.page.goto(original_url, wait_until="domcontentloaded")
|
||||
logger.success(f"已成功返回原始页面: {original_url}")
|
||||
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"在当前窗口打开制作人链接失败: {e}")
|
||||
|
||||
# 保存当前页面截图用于调试
|
||||
try:
|
||||
debug_screenshot = "debug_maker_link_failure.png"
|
||||
await self.page.screenshot(path=debug_screenshot, full_page=True)
|
||||
logger.info(f"错误调试截图已保存到: {debug_screenshot}")
|
||||
except Exception as screenshot_error:
|
||||
logger.error(f"保存调试截图失败: {screenshot_error}")
|
||||
|
||||
# 发生异常时也尝试返回原始页面
|
||||
try:
|
||||
logger.info("发生异常,尝试返回原始产品页面...")
|
||||
await self.page.goto(original_url, wait_until="domcontentloaded")
|
||||
logger.success(f"已成功返回原始页面: {original_url}")
|
||||
except Exception as return_error:
|
||||
logger.error(f"返回原始页面失败: {return_error}")
|
||||
|
||||
return ""
|
||||
|
||||
async def _extract_maker_statement_direct_open(self, maker_link, maker_text):
|
||||
@@ -351,19 +540,30 @@ class ProductHuntScraper:
|
||||
maker_link = await a_element.get_attribute('href')
|
||||
|
||||
# 拼凑完整的URL
|
||||
if maker_link and not maker_link.startswith('http'):
|
||||
# 如果是相对路径,拼凑为完整URL
|
||||
base_url = "https://www.producthunt.com"
|
||||
if maker_link.startswith('/'):
|
||||
maker_link = base_url + maker_link
|
||||
if maker_link:
|
||||
if not maker_link.startswith('http'):
|
||||
# 如果是相对路径,拼凑为完整URL
|
||||
base_url = "https://www.producthunt.com"
|
||||
if maker_link.startswith('/'):
|
||||
maker_link = base_url + maker_link
|
||||
else:
|
||||
maker_link = base_url + '/' + maker_link
|
||||
|
||||
# 验证URL是否有效(不能只是根路径)
|
||||
if maker_link == "https://www.producthunt.com/" or maker_link == "https://www.producthunt.com":
|
||||
logger.warning(f"制作人链接无效,跳过提取: {maker_link}")
|
||||
product_info["maker_link"] = ""
|
||||
product_info["maker_statement"] = ""
|
||||
else:
|
||||
maker_link = base_url + '/' + maker_link
|
||||
|
||||
product_info["maker_link"] = maker_link
|
||||
logger.info(f"制作人链接: {maker_link}")
|
||||
|
||||
# 调用子函数在当前窗口中提取制作人发言
|
||||
product_info["maker_statement"] = await self.extract_maker_statement_from_current_window(maker_link, maker_text)
|
||||
product_info["maker_link"] = maker_link
|
||||
logger.info(f"制作人链接: {maker_link}")
|
||||
|
||||
# 调用子函数在当前窗口中提取制作人发言
|
||||
product_info["maker_statement"] = await self.extract_maker_statement_from_current_window(maker_link, maker_text)
|
||||
else:
|
||||
logger.warning("未获取到制作人链接")
|
||||
product_info["maker_link"] = ""
|
||||
product_info["maker_statement"] = ""
|
||||
else:
|
||||
logger.warning("未找到制作人链接的a标签")
|
||||
else:
|
||||
@@ -410,7 +610,7 @@ async def main():
|
||||
logger.info("开始ProductHunt数据抓取任务")
|
||||
|
||||
# 目标URL
|
||||
target_url = "https://www.producthunt.com/products/notion"
|
||||
target_url = "https://www.producthunt.com/products/palettebrain"
|
||||
|
||||
# 创建抓取器实例
|
||||
scraper = ProductHuntScraper(debug_port=9222)
|
||||
|
||||
7
product/product_info.json
Normal file
7
product/product_info.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"name": "Raycast",
|
||||
"introduction": "A collection of powerful productivity tools all within an extendable launcher. Fast, ergonomic and reliable.",
|
||||
"user_count": "17K followers",
|
||||
"maker_link": "https://www.producthunt.com/products/raycast/launches/product-hunt-for-raycast",
|
||||
"maker_statement": "Raycast for Windows"
|
||||
}
|
||||
Binary file not shown.
@@ -1,7 +0,0 @@
|
||||
{
|
||||
"name": "Notion",
|
||||
"introduction": "Notion is an all-in-one workspace that combines note-taking, project management, and task organization. It allows users to create customized databases, documents, and calendars to streamline their personal and professional workflows.",
|
||||
"user_count": "15K followers",
|
||||
"maker_link": "https://www.producthunt.com/products/notion/launches/ai-meeting-notes-by-notion",
|
||||
"maker_statement": "Hey Product Hunt — I’m Frank, a product designer at Notion. Today, I’m excited to introduce you to our newest kid on the block: AI Meeting Notes — or as I like to call it, /meet.With AI Meeting Notes, you get perfect meeting memory in Notion. No bots. No app switching. Just a simple /meet command on any page or one click from your Notion Calendar.Why Notion? Because your meeting notes live right where you work — connected to your docs, projects, and team. No more copy-pasting, just instant answers, searchable history, and workflows that flow.We’re already seeing folks use it not only at work, but also at home, in therapy, even in deep conversations with partners. It’s still early — we’re just graduating from alpha — but we’re moving fast and building with heart.Try it in your next few meetings. Let us know how it goes — DMs open for feedback, bugs, ideas, anything.This is, after all, our Notion.— Frank"
|
||||
}
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 371 KiB |
@@ -1,406 +0,0 @@
|
||||
2025-11-23 11:00:52.606 | INFO | __main__:run_scraping:229 - === 开始ProductHunt数据抓取 ===
|
||||
2025-11-23 11:00:52.607 | INFO | __main__:init_product_database:90 - 正在初始化产品数据库...
|
||||
2025-11-23 11:00:52.613 | SUCCESS | __main__:init_product_database:113 - 产品数据库初始化完成
|
||||
2025-11-23 11:00:52.613 | INFO | __main__:query_producthunt_urls:65 - 正在查询tophub_data.db数据库,限制: 10条
|
||||
2025-11-23 11:00:52.617 | SUCCESS | __main__:query_producthunt_urls:81 - 找到 10 个包含producthunt.com的链接
|
||||
2025-11-23 11:00:52.617 | INFO | __main__:run_scraping:244 - 找到 10 个ProductHunt链接
|
||||
2025-11-23 11:00:52.624 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/pixley-ai
|
||||
2025-11-23 11:00:52.624 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/pixley-ai
|
||||
2025-11-23 11:00:52.624 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:00:54.060 | ERROR | playwright_get_data:connect_to_existing_chrome:61 - 连接Chrome失败: BrowserType.connect_over_cdp: connect ECONNREFUSED ::1:9222
|
||||
Call log:
|
||||
- <ws preparing> retrieving websocket url from http://localhost:9222
|
||||
|
||||
2025-11-23 11:00:54.060 | ERROR | __main__:scrape_product_info:200 - 连接Chrome失败,跳过此URL
|
||||
2025-11-23 11:00:54.060 | ERROR | __main__:run_scraping:276 - 抓取产品信息失败: https://www.producthunt.com/products/pixley-ai
|
||||
2025-11-23 11:00:54.060 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/burner-2
|
||||
2025-11-23 11:00:54.061 | INFO | __main__:run_scraping:258 - URL已存在,跳过: https://www.producthunt.com/products/burner-2
|
||||
2025-11-23 11:00:54.061 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/american-ratings-lead-magnet-portal
|
||||
2025-11-23 11:00:54.062 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/american-ratings-lead-magnet-portal
|
||||
2025-11-23 11:00:54.062 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:00:54.697 | ERROR | playwright_get_data:connect_to_existing_chrome:61 - 连接Chrome失败: BrowserType.connect_over_cdp: connect ECONNREFUSED ::1:9222
|
||||
Call log:
|
||||
- <ws preparing> retrieving websocket url from http://localhost:9222
|
||||
|
||||
2025-11-23 11:00:54.697 | ERROR | __main__:scrape_product_info:200 - 连接Chrome失败,跳过此URL
|
||||
2025-11-23 11:00:54.697 | ERROR | __main__:run_scraping:276 - 抓取产品信息失败: https://www.producthunt.com/products/american-ratings-lead-magnet-portal
|
||||
2025-11-23 11:00:54.697 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/builder-io
|
||||
2025-11-23 11:00:54.698 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/builder-io
|
||||
2025-11-23 11:00:54.698 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:00:55.333 | ERROR | playwright_get_data:connect_to_existing_chrome:61 - 连接Chrome失败: BrowserType.connect_over_cdp: connect ECONNREFUSED ::1:9222
|
||||
Call log:
|
||||
- <ws preparing> retrieving websocket url from http://localhost:9222
|
||||
|
||||
2025-11-23 11:00:55.333 | ERROR | __main__:scrape_product_info:200 - 连接Chrome失败,跳过此URL
|
||||
2025-11-23 11:00:55.333 | ERROR | __main__:run_scraping:276 - 抓取产品信息失败: https://www.producthunt.com/products/builder-io
|
||||
2025-11-23 11:00:55.333 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/beebot-for-airpods
|
||||
2025-11-23 11:00:55.334 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/beebot-for-airpods
|
||||
2025-11-23 11:00:55.334 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:00:55.956 | ERROR | playwright_get_data:connect_to_existing_chrome:61 - 连接Chrome失败: BrowserType.connect_over_cdp: connect ECONNREFUSED ::1:9222
|
||||
Call log:
|
||||
- <ws preparing> retrieving websocket url from http://localhost:9222
|
||||
|
||||
2025-11-23 11:00:55.956 | ERROR | __main__:scrape_product_info:200 - 连接Chrome失败,跳过此URL
|
||||
2025-11-23 11:00:55.956 | ERROR | __main__:run_scraping:276 - 抓取产品信息失败: https://www.producthunt.com/products/beebot-for-airpods
|
||||
2025-11-23 11:00:55.957 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/iisee-me
|
||||
2025-11-23 11:00:55.958 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/iisee-me
|
||||
2025-11-23 11:00:55.958 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:00:56.595 | ERROR | playwright_get_data:connect_to_existing_chrome:61 - 连接Chrome失败: BrowserType.connect_over_cdp: connect ECONNREFUSED ::1:9222
|
||||
Call log:
|
||||
- <ws preparing> retrieving websocket url from http://localhost:9222
|
||||
|
||||
2025-11-23 11:00:56.595 | ERROR | __main__:scrape_product_info:200 - 连接Chrome失败,跳过此URL
|
||||
2025-11-23 11:00:56.595 | ERROR | __main__:run_scraping:276 - 抓取产品信息失败: https://www.producthunt.com/products/iisee-me
|
||||
2025-11-23 11:00:56.595 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/melodic-mind-2
|
||||
2025-11-23 11:00:56.596 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/melodic-mind-2
|
||||
2025-11-23 11:00:56.596 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:00:57.200 | ERROR | playwright_get_data:connect_to_existing_chrome:61 - 连接Chrome失败: BrowserType.connect_over_cdp: connect ECONNREFUSED ::1:9222
|
||||
Call log:
|
||||
- <ws preparing> retrieving websocket url from http://localhost:9222
|
||||
|
||||
2025-11-23 11:00:57.200 | ERROR | __main__:scrape_product_info:200 - 连接Chrome失败,跳过此URL
|
||||
2025-11-23 11:00:57.201 | ERROR | __main__:run_scraping:276 - 抓取产品信息失败: https://www.producthunt.com/products/melodic-mind-2
|
||||
2025-11-23 11:00:57.201 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/agor
|
||||
2025-11-23 11:00:57.202 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/agor
|
||||
2025-11-23 11:00:57.202 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:00:57.824 | ERROR | playwright_get_data:connect_to_existing_chrome:61 - 连接Chrome失败: BrowserType.connect_over_cdp: connect ECONNREFUSED ::1:9222
|
||||
Call log:
|
||||
- <ws preparing> retrieving websocket url from http://localhost:9222
|
||||
|
||||
2025-11-23 11:00:57.824 | ERROR | __main__:scrape_product_info:200 - 连接Chrome失败,跳过此URL
|
||||
2025-11-23 11:00:57.824 | ERROR | __main__:run_scraping:276 - 抓取产品信息失败: https://www.producthunt.com/products/agor
|
||||
2025-11-23 11:00:57.825 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/quiteinbox
|
||||
2025-11-23 11:00:57.826 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/quiteinbox
|
||||
2025-11-23 11:00:57.826 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:00:58.451 | ERROR | playwright_get_data:connect_to_existing_chrome:61 - 连接Chrome失败: BrowserType.connect_over_cdp: connect ECONNREFUSED ::1:9222
|
||||
Call log:
|
||||
- <ws preparing> retrieving websocket url from http://localhost:9222
|
||||
|
||||
2025-11-23 11:00:58.451 | ERROR | __main__:scrape_product_info:200 - 连接Chrome失败,跳过此URL
|
||||
2025-11-23 11:00:58.452 | ERROR | __main__:run_scraping:276 - 抓取产品信息失败: https://www.producthunt.com/products/quiteinbox
|
||||
2025-11-23 11:00:58.452 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/everywhere
|
||||
2025-11-23 11:00:58.453 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/everywhere
|
||||
2025-11-23 11:00:58.453 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:00:59.070 | ERROR | playwright_get_data:connect_to_existing_chrome:61 - 连接Chrome失败: BrowserType.connect_over_cdp: connect ECONNREFUSED ::1:9222
|
||||
Call log:
|
||||
- <ws preparing> retrieving websocket url from http://localhost:9222
|
||||
|
||||
2025-11-23 11:00:59.070 | ERROR | __main__:scrape_product_info:200 - 连接Chrome失败,跳过此URL
|
||||
2025-11-23 11:00:59.070 | ERROR | __main__:run_scraping:276 - 抓取产品信息失败: https://www.producthunt.com/products/everywhere
|
||||
2025-11-23 11:00:59.071 | INFO | __main__:show_scraping_results:303 - === 抓取结果统计 ===
|
||||
2025-11-23 11:00:59.071 | INFO | __main__:show_scraping_results:304 - 成功抓取: 0 个产品
|
||||
2025-11-23 11:00:59.072 | INFO | __main__:show_scraping_results:305 - 跳过重复: 1 个链接
|
||||
2025-11-23 11:00:59.072 | INFO | __main__:show_scraping_results:306 - 抓取失败: 9 个链接
|
||||
2025-11-23 11:00:59.072 | INFO | __main__:show_scraping_results:307 - 数据库中的产品总数: 1
|
||||
2025-11-23 11:00:59.072 | INFO | __main__:show_scraping_results:310 - 最新抓取的产品:
|
||||
2025-11-23 11:00:59.072 | INFO | __main__:show_scraping_results:312 - - Burner: https://www.producthunt.com/products/burner-2
|
||||
2025-11-23 11:00:59.072 | SUCCESS | __main__:run_scraping:284 - === ProductHunt数据抓取完成 ===
|
||||
2025-11-23 11:01:18.968 | INFO | __main__:run_scraping:229 - === 开始ProductHunt数据抓取 ===
|
||||
2025-11-23 11:01:18.969 | INFO | __main__:init_product_database:90 - 正在初始化产品数据库...
|
||||
2025-11-23 11:01:18.970 | SUCCESS | __main__:init_product_database:113 - 产品数据库初始化完成
|
||||
2025-11-23 11:01:18.970 | INFO | __main__:query_producthunt_urls:65 - 正在查询tophub_data.db数据库,限制: 10条
|
||||
2025-11-23 11:01:18.970 | SUCCESS | __main__:query_producthunt_urls:81 - 找到 10 个包含producthunt.com的链接
|
||||
2025-11-23 11:01:18.970 | INFO | __main__:run_scraping:244 - 找到 10 个ProductHunt链接
|
||||
2025-11-23 11:01:18.973 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/pixley-ai
|
||||
2025-11-23 11:01:18.973 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/pixley-ai
|
||||
2025-11-23 11:01:18.974 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:01:19.626 | SUCCESS | playwright_get_data:connect_to_existing_chrome:57 - 成功连接到Chrome浏览器
|
||||
2025-11-23 11:01:19.626 | INFO | playwright_get_data:navigate_to_producthunt:111 - 正在访问: https://www.producthunt.com/products/pixley-ai
|
||||
2025-11-23 11:01:21.582 | INFO | playwright_get_data:navigate_to_producthunt:116 - 等待页面标题包含'Product Hunt'...
|
||||
2025-11-23 11:01:21.672 | INFO | playwright_get_data:navigate_to_producthunt:124 - 当前页面标题: Pixley AI: Pixley lets kids turn their ideas into cartoons in minutes | Product Hunt
|
||||
2025-11-23 11:01:21.672 | SUCCESS | playwright_get_data:navigate_to_producthunt:128 - 页面标题已包含'Product Hunt',等待时间: 0秒
|
||||
2025-11-23 11:01:21.672 | SUCCESS | playwright_get_data:navigate_to_producthunt:129 - Product Hunt网站已成功打开
|
||||
2025-11-23 11:01:21.672 | INFO | playwright_get_data:extract_product_info:291 - 正在提取产品名称...
|
||||
2025-11-23 11:01:21.673 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品名称 - 选择器: //h1
|
||||
2025-11-23 11:01:21.724 | INFO | playwright_get_data:extract_product_info:297 - 产品名称: Pixley AI
|
||||
2025-11-23 11:01:21.724 | INFO | playwright_get_data:extract_product_info:304 - 正在提取产品简介...
|
||||
2025-11-23 11:01:21.725 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品简介 - 选择器: //*[@class="relative text-16 font-normal text-gray-700"]//div
|
||||
2025-11-23 11:01:21.732 | INFO | playwright_get_data:extract_product_info:310 - 产品简介: Pixley is the first platform that lets children turn their drawings and ideas into personalized, animated cartoons in minutes. Until now, making animation was slow, expensive, and impossible to person...
|
||||
2025-11-23 11:01:21.732 | INFO | playwright_get_data:extract_product_info:317 - 正在提取用户数...
|
||||
2025-11-23 11:01:21.732 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 用户数 - 选择器: //*[@class="flex flex-row gap-2"]//div/div[2]/span/p
|
||||
2025-11-23 11:01:21.738 | INFO | playwright_get_data:extract_product_info:323 - 用户数: 53 followers
|
||||
2025-11-23 11:01:21.738 | INFO | playwright_get_data:extract_product_info:330 - 正在提取制作人发言链接...
|
||||
2025-11-23 11:01:21.738 | INFO | playwright_get_data:extract_product_info:333 - 等待页面元素加载...
|
||||
2025-11-23 11:01:41.743 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 制作人span标签 - 选择器: //span[contains(@class, "absolute")]
|
||||
2025-11-23 11:01:41.751 | WARNING | playwright_get_data:extract_product_info:370 - 未找到XPath为//span[contains(@class, "absolute")]的元素
|
||||
2025-11-23 11:01:41.753 | INFO | playwright_get_data:extract_product_info:384 - 产品信息已保存到临时文件: temp_product_info.txt
|
||||
2025-11-23 11:01:42.074 | INFO | playwright_get_data:extract_product_info:389 - 页面截图已保存到: product_screenshot.png
|
||||
2025-11-23 11:01:42.074 | SUCCESS | __main__:scrape_product_info:214 - 成功提取产品信息: Pixley AI
|
||||
2025-11-23 11:01:42.080 | INFO | playwright_get_data:close:401 - 浏览器连接已关闭
|
||||
2025-11-23 11:01:42.093 | INFO | playwright_get_data:close:405 - Playwright实例已关闭
|
||||
2025-11-23 11:01:42.094 | INFO | __main__:save_product_info:179 - 新增产品信息: Pixley AI
|
||||
2025-11-23 11:01:42.097 | SUCCESS | __main__:run_scraping:270 - 成功保存产品信息: Pixley AI
|
||||
2025-11-23 11:01:42.098 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/burner-2
|
||||
2025-11-23 11:01:42.098 | INFO | __main__:run_scraping:258 - URL已存在,跳过: https://www.producthunt.com/products/burner-2
|
||||
2025-11-23 11:01:42.099 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/american-ratings-lead-magnet-portal
|
||||
2025-11-23 11:01:42.099 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/american-ratings-lead-magnet-portal
|
||||
2025-11-23 11:01:42.099 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:01:42.765 | SUCCESS | playwright_get_data:connect_to_existing_chrome:57 - 成功连接到Chrome浏览器
|
||||
2025-11-23 11:01:42.765 | INFO | playwright_get_data:navigate_to_producthunt:111 - 正在访问: https://www.producthunt.com/products/american-ratings-lead-magnet-portal
|
||||
2025-11-23 11:02:02.769 | INFO | playwright_get_data:navigate_to_producthunt:116 - 等待页面标题包含'Product Hunt'...
|
||||
2025-11-23 11:02:02.775 | INFO | playwright_get_data:navigate_to_producthunt:124 - 当前页面标题: American Ratings Lead Magnet Portal: Get Your Verified A-I-R-S Number & Boost Global Credibility | Product Hunt
|
||||
2025-11-23 11:02:02.775 | SUCCESS | playwright_get_data:navigate_to_producthunt:128 - 页面标题已包含'Product Hunt',等待时间: 0秒
|
||||
2025-11-23 11:02:02.775 | SUCCESS | playwright_get_data:navigate_to_producthunt:129 - Product Hunt网站已成功打开
|
||||
2025-11-23 11:02:02.776 | INFO | playwright_get_data:extract_product_info:291 - 正在提取产品名称...
|
||||
2025-11-23 11:02:02.776 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品名称 - 选择器: //h1
|
||||
2025-11-23 11:02:02.807 | INFO | playwright_get_data:extract_product_info:297 - 产品名称: American Ratings Lead Magnet Portal
|
||||
2025-11-23 11:02:02.807 | INFO | playwright_get_data:extract_product_info:304 - 正在提取产品简介...
|
||||
2025-11-23 11:02:02.808 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品简介 - 选择器: //*[@class="relative text-16 font-normal text-gray-700"]//div
|
||||
2025-11-23 11:02:02.814 | INFO | playwright_get_data:extract_product_info:310 - 产品简介: Build verified business credibility with the American Ratings Lead Magnet Portal — the trusted platform for authentic verification and global rating credentials. Get your A-I-R-S Number to showcase tr...
|
||||
2025-11-23 11:02:02.815 | INFO | playwright_get_data:extract_product_info:317 - 正在提取用户数...
|
||||
2025-11-23 11:02:02.815 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 用户数 - 选择器: //*[@class="flex flex-row gap-2"]//div/div[2]/span/p
|
||||
2025-11-23 11:02:02.821 | INFO | playwright_get_data:extract_product_info:323 - 用户数: 24 followers
|
||||
2025-11-23 11:02:02.821 | INFO | playwright_get_data:extract_product_info:330 - 正在提取制作人发言链接...
|
||||
2025-11-23 11:02:02.821 | INFO | playwright_get_data:extract_product_info:333 - 等待页面元素加载...
|
||||
2025-11-23 11:02:22.834 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 制作人span标签 - 选择器: //span[contains(@class, "absolute")]
|
||||
2025-11-23 11:02:22.842 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 制作人链接 - 选择器: //span[contains(@class, "absolute")]/parent::a
|
||||
2025-11-23 11:02:22.852 | INFO | playwright_get_data:extract_product_info:363 - 制作人链接: https://www.producthunt.com/p/american-ratings-lead-magnet-portal/a-i-r-s-number-american-ratings-lead-magnet-webinar-channel-partner-credit-100k-25m
|
||||
2025-11-23 11:02:22.852 | INFO | playwright_get_data:record_click:75 - 记录点击: - 坐标(制作人链接, 点击制作人链接在当前窗口打开) - 选择器:
|
||||
2025-11-23 11:02:22.852 | INFO | playwright_get_data:extract_maker_statement_from_current_window:169 - 正在在当前窗口打开制作人链接: https://www.producthunt.com/p/american-ratings-lead-magnet-portal/a-i-r-s-number-american-ratings-lead-magnet-webinar-channel-partner-credit-100k-25m
|
||||
2025-11-23 11:02:55.175 | ERROR | playwright_get_data:extract_maker_statement_from_current_window:220 - 在当前窗口打开制作人链接失败: Timeout 30000ms exceeded.
|
||||
2025-11-23 11:02:55.176 | INFO | playwright_get_data:extract_product_info:384 - 产品信息已保存到临时文件: temp_product_info.txt
|
||||
2025-11-23 11:02:55.513 | INFO | playwright_get_data:extract_product_info:389 - 页面截图已保存到: product_screenshot.png
|
||||
2025-11-23 11:02:55.514 | SUCCESS | __main__:scrape_product_info:214 - 成功提取产品信息: American Ratings Lead Magnet Portal
|
||||
2025-11-23 11:02:55.519 | INFO | playwright_get_data:close:401 - 浏览器连接已关闭
|
||||
2025-11-23 11:02:55.529 | INFO | playwright_get_data:close:405 - Playwright实例已关闭
|
||||
2025-11-23 11:02:55.532 | INFO | __main__:save_product_info:179 - 新增产品信息: American Ratings Lead Magnet Portal
|
||||
2025-11-23 11:02:55.535 | SUCCESS | __main__:run_scraping:270 - 成功保存产品信息: American Ratings Lead Magnet Portal
|
||||
2025-11-23 11:02:55.536 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/builder-io
|
||||
2025-11-23 11:02:55.537 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/builder-io
|
||||
2025-11-23 11:02:55.537 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:02:56.193 | SUCCESS | playwright_get_data:connect_to_existing_chrome:57 - 成功连接到Chrome浏览器
|
||||
2025-11-23 11:02:56.194 | INFO | playwright_get_data:navigate_to_producthunt:111 - 正在访问: https://www.producthunt.com/products/builder-io
|
||||
2025-11-23 11:02:59.528 | INFO | playwright_get_data:navigate_to_producthunt:116 - 等待页面标题包含'Product Hunt'...
|
||||
2025-11-23 11:02:59.549 | INFO | playwright_get_data:navigate_to_producthunt:124 - 当前页面标题: Builder.io: The first AI agent for product, design, and code | Product Hunt
|
||||
2025-11-23 11:02:59.549 | SUCCESS | playwright_get_data:navigate_to_producthunt:128 - 页面标题已包含'Product Hunt',等待时间: 0秒
|
||||
2025-11-23 11:02:59.549 | SUCCESS | playwright_get_data:navigate_to_producthunt:129 - Product Hunt网站已成功打开
|
||||
2025-11-23 11:02:59.549 | INFO | playwright_get_data:extract_product_info:291 - 正在提取产品名称...
|
||||
2025-11-23 11:02:59.550 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品名称 - 选择器: //h1
|
||||
2025-11-23 11:02:59.590 | INFO | playwright_get_data:extract_product_info:297 - 产品名称: Builder.io
|
||||
2025-11-23 11:02:59.590 | INFO | playwright_get_data:extract_product_info:304 - 正在提取产品简介...
|
||||
2025-11-23 11:02:59.590 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品简介 - 选择器: //*[@class="relative text-16 font-normal text-gray-700"]//div
|
||||
2025-11-23 11:02:59.595 | INFO | playwright_get_data:extract_product_info:310 - 产品简介: The first AI agent that unifies product, design, and code. It connects Slack, Jira, Figma, and your repo to turn ideas into production features. Edit visually with real code, sync designs bidirectiona...
|
||||
2025-11-23 11:02:59.595 | INFO | playwright_get_data:extract_product_info:317 - 正在提取用户数...
|
||||
2025-11-23 11:02:59.595 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 用户数 - 选择器: //*[@class="flex flex-row gap-2"]//div/div[2]/span/p
|
||||
2025-11-23 11:02:59.600 | INFO | playwright_get_data:extract_product_info:323 - 用户数: 1.9K followers
|
||||
2025-11-23 11:02:59.600 | INFO | playwright_get_data:extract_product_info:330 - 正在提取制作人发言链接...
|
||||
2025-11-23 11:02:59.600 | INFO | playwright_get_data:extract_product_info:333 - 等待页面元素加载...
|
||||
2025-11-23 11:03:19.603 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 制作人span标签 - 选择器: //span[contains(@class, "absolute")]
|
||||
2025-11-23 11:03:19.608 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 制作人链接 - 选择器: //span[contains(@class, "absolute")]/parent::a
|
||||
2025-11-23 11:03:19.616 | INFO | playwright_get_data:extract_product_info:363 - 制作人链接: https://www.producthunt.com/products/builder-io/launches/fusion-1-0
|
||||
2025-11-23 11:03:19.616 | INFO | playwright_get_data:record_click:75 - 记录点击: - 坐标(制作人链接, 点击制作人链接在当前窗口打开) - 选择器:
|
||||
2025-11-23 11:03:19.616 | INFO | playwright_get_data:extract_maker_statement_from_current_window:169 - 正在在当前窗口打开制作人链接: https://www.producthunt.com/products/builder-io/launches/fusion-1-0
|
||||
2025-11-23 11:03:51.755 | ERROR | playwright_get_data:extract_maker_statement_from_current_window:220 - 在当前窗口打开制作人链接失败: Timeout 30000ms exceeded.
|
||||
=========================== logs ===========================
|
||||
"load" event fired
|
||||
============================================================
|
||||
2025-11-23 11:03:51.758 | INFO | playwright_get_data:extract_product_info:384 - 产品信息已保存到临时文件: temp_product_info.txt
|
||||
2025-11-23 11:03:52.016 | INFO | playwright_get_data:extract_product_info:389 - 页面截图已保存到: product_screenshot.png
|
||||
2025-11-23 11:03:52.016 | SUCCESS | __main__:scrape_product_info:214 - 成功提取产品信息: Builder.io
|
||||
2025-11-23 11:03:52.021 | INFO | playwright_get_data:close:401 - 浏览器连接已关闭
|
||||
2025-11-23 11:03:52.033 | INFO | playwright_get_data:close:405 - Playwright实例已关闭
|
||||
2025-11-23 11:03:52.035 | INFO | __main__:save_product_info:179 - 新增产品信息: Builder.io
|
||||
2025-11-23 11:03:52.038 | SUCCESS | __main__:run_scraping:270 - 成功保存产品信息: Builder.io
|
||||
2025-11-23 11:03:52.039 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/beebot-for-airpods
|
||||
2025-11-23 11:03:52.039 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/beebot-for-airpods
|
||||
2025-11-23 11:03:52.039 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:03:52.675 | SUCCESS | playwright_get_data:connect_to_existing_chrome:57 - 成功连接到Chrome浏览器
|
||||
2025-11-23 11:03:52.675 | INFO | playwright_get_data:navigate_to_producthunt:111 - 正在访问: https://www.producthunt.com/products/beebot-for-airpods
|
||||
2025-11-23 11:03:55.666 | INFO | playwright_get_data:navigate_to_producthunt:116 - 等待页面标题包含'Product Hunt'...
|
||||
2025-11-23 11:03:55.680 | INFO | playwright_get_data:navigate_to_producthunt:124 - 当前页面标题: BeeBot for AirPods: Your social audio guide to the city | Product Hunt
|
||||
2025-11-23 11:03:55.680 | SUCCESS | playwright_get_data:navigate_to_producthunt:128 - 页面标题已包含'Product Hunt',等待时间: 0秒
|
||||
2025-11-23 11:03:55.680 | SUCCESS | playwright_get_data:navigate_to_producthunt:129 - Product Hunt网站已成功打开
|
||||
2025-11-23 11:03:55.681 | INFO | playwright_get_data:extract_product_info:291 - 正在提取产品名称...
|
||||
2025-11-23 11:03:55.681 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品名称 - 选择器: //h1
|
||||
2025-11-23 11:03:55.728 | INFO | playwright_get_data:extract_product_info:297 - 产品名称: BeeBot for AirPods
|
||||
2025-11-23 11:03:55.729 | INFO | playwright_get_data:extract_product_info:304 - 正在提取产品简介...
|
||||
2025-11-23 11:03:55.729 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品简介 - 选择器: //*[@class="relative text-16 font-normal text-gray-700"]//div
|
||||
2025-11-23 11:03:55.741 | INFO | playwright_get_data:extract_product_info:310 - 产品简介: It’s like having that friend who knows everything that’s happening, except it whispers directly into your ears as you walk around. BeeBot gives you a few short updates a day about people, places, and ...
|
||||
2025-11-23 11:03:55.741 | INFO | playwright_get_data:extract_product_info:317 - 正在提取用户数...
|
||||
2025-11-23 11:03:55.742 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 用户数 - 选择器: //*[@class="flex flex-row gap-2"]//div/div[2]/span/p
|
||||
2025-11-23 11:03:55.749 | INFO | playwright_get_data:extract_product_info:323 - 用户数: 242 followers
|
||||
2025-11-23 11:03:55.749 | INFO | playwright_get_data:extract_product_info:330 - 正在提取制作人发言链接...
|
||||
2025-11-23 11:03:55.749 | INFO | playwright_get_data:extract_product_info:333 - 等待页面元素加载...
|
||||
2025-11-23 11:04:15.761 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 制作人span标签 - 选择器: //span[contains(@class, "absolute")]
|
||||
2025-11-23 11:04:15.768 | WARNING | playwright_get_data:extract_product_info:370 - 未找到XPath为//span[contains(@class, "absolute")]的元素
|
||||
2025-11-23 11:04:15.770 | INFO | playwright_get_data:extract_product_info:384 - 产品信息已保存到临时文件: temp_product_info.txt
|
||||
2025-11-23 11:04:15.972 | INFO | playwright_get_data:extract_product_info:389 - 页面截图已保存到: product_screenshot.png
|
||||
2025-11-23 11:04:15.973 | SUCCESS | __main__:scrape_product_info:214 - 成功提取产品信息: BeeBot for AirPods
|
||||
2025-11-23 11:04:15.979 | INFO | playwright_get_data:close:401 - 浏览器连接已关闭
|
||||
2025-11-23 11:04:15.988 | INFO | playwright_get_data:close:405 - Playwright实例已关闭
|
||||
2025-11-23 11:04:15.991 | INFO | __main__:save_product_info:179 - 新增产品信息: BeeBot for AirPods
|
||||
2025-11-23 11:04:15.994 | SUCCESS | __main__:run_scraping:270 - 成功保存产品信息: BeeBot for AirPods
|
||||
2025-11-23 11:04:15.994 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/iisee-me
|
||||
2025-11-23 11:04:15.995 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/iisee-me
|
||||
2025-11-23 11:04:15.996 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:04:16.640 | SUCCESS | playwright_get_data:connect_to_existing_chrome:57 - 成功连接到Chrome浏览器
|
||||
2025-11-23 11:04:16.641 | INFO | playwright_get_data:navigate_to_producthunt:111 - 正在访问: https://www.producthunt.com/products/iisee-me
|
||||
2025-11-23 11:04:29.367 | INFO | playwright_get_data:navigate_to_producthunt:116 - 等待页面标题包含'Product Hunt'...
|
||||
2025-11-23 11:04:29.448 | INFO | playwright_get_data:navigate_to_producthunt:124 - 当前页面标题: iisee.me: Create your own AI generated expression grid | Product Hunt
|
||||
2025-11-23 11:04:29.448 | SUCCESS | playwright_get_data:navigate_to_producthunt:128 - 页面标题已包含'Product Hunt',等待时间: 0秒
|
||||
2025-11-23 11:04:29.449 | SUCCESS | playwright_get_data:navigate_to_producthunt:129 - Product Hunt网站已成功打开
|
||||
2025-11-23 11:04:29.449 | INFO | playwright_get_data:extract_product_info:291 - 正在提取产品名称...
|
||||
2025-11-23 11:04:29.449 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品名称 - 选择器: //h1
|
||||
2025-11-23 11:04:29.521 | INFO | playwright_get_data:extract_product_info:297 - 产品名称: iisee.me
|
||||
2025-11-23 11:04:29.521 | INFO | playwright_get_data:extract_product_info:304 - 正在提取产品简介...
|
||||
2025-11-23 11:04:29.522 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品简介 - 选择器: //*[@class="relative text-16 font-normal text-gray-700"]//div
|
||||
2025-11-23 11:04:29.528 | INFO | playwright_get_data:extract_product_info:310 - 产品简介: A silly AI experiment that turns your photo into a grid of faces that track your mouse. Built in under 8 hours just for fun....
|
||||
2025-11-23 11:04:29.528 | INFO | playwright_get_data:extract_product_info:317 - 正在提取用户数...
|
||||
2025-11-23 11:04:29.528 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 用户数 - 选择器: //*[@class="flex flex-row gap-2"]//div/div[2]/span/p
|
||||
2025-11-23 11:04:29.534 | INFO | playwright_get_data:extract_product_info:323 - 用户数: 172 followers
|
||||
2025-11-23 11:04:29.535 | INFO | playwright_get_data:extract_product_info:330 - 正在提取制作人发言链接...
|
||||
2025-11-23 11:04:29.535 | INFO | playwright_get_data:extract_product_info:333 - 等待页面元素加载...
|
||||
2025-11-23 11:04:49.544 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 制作人span标签 - 选择器: //span[contains(@class, "absolute")]
|
||||
2025-11-23 11:04:49.552 | WARNING | playwright_get_data:extract_product_info:370 - 未找到XPath为//span[contains(@class, "absolute")]的元素
|
||||
2025-11-23 11:04:49.553 | INFO | playwright_get_data:extract_product_info:384 - 产品信息已保存到临时文件: temp_product_info.txt
|
||||
2025-11-23 11:04:49.765 | INFO | playwright_get_data:extract_product_info:389 - 页面截图已保存到: product_screenshot.png
|
||||
2025-11-23 11:04:49.765 | SUCCESS | __main__:scrape_product_info:214 - 成功提取产品信息: iisee.me
|
||||
2025-11-23 11:04:49.769 | INFO | playwright_get_data:close:401 - 浏览器连接已关闭
|
||||
2025-11-23 11:04:49.781 | INFO | playwright_get_data:close:405 - Playwright实例已关闭
|
||||
2025-11-23 11:04:49.783 | INFO | __main__:save_product_info:179 - 新增产品信息: iisee.me
|
||||
2025-11-23 11:04:49.786 | SUCCESS | __main__:run_scraping:270 - 成功保存产品信息: iisee.me
|
||||
2025-11-23 11:04:49.786 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/melodic-mind-2
|
||||
2025-11-23 11:04:49.787 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/melodic-mind-2
|
||||
2025-11-23 11:04:49.787 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:04:50.463 | SUCCESS | playwright_get_data:connect_to_existing_chrome:57 - 成功连接到Chrome浏览器
|
||||
2025-11-23 11:04:50.463 | INFO | playwright_get_data:navigate_to_producthunt:111 - 正在访问: https://www.producthunt.com/products/melodic-mind-2
|
||||
2025-11-23 11:04:51.994 | INFO | playwright_get_data:navigate_to_producthunt:116 - 等待页面标题包含'Product Hunt'...
|
||||
2025-11-23 11:04:52.011 | INFO | playwright_get_data:navigate_to_producthunt:124 - 当前页面标题: Melodic Mind: Create, learn, and grow as a musician | Product Hunt
|
||||
2025-11-23 11:04:52.011 | SUCCESS | playwright_get_data:navigate_to_producthunt:128 - 页面标题已包含'Product Hunt',等待时间: 0秒
|
||||
2025-11-23 11:04:52.011 | SUCCESS | playwright_get_data:navigate_to_producthunt:129 - Product Hunt网站已成功打开
|
||||
2025-11-23 11:04:52.012 | INFO | playwright_get_data:extract_product_info:291 - 正在提取产品名称...
|
||||
2025-11-23 11:04:52.012 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品名称 - 选择器: //h1
|
||||
2025-11-23 11:04:52.039 | INFO | playwright_get_data:extract_product_info:297 - 产品名称: Melodic Mind
|
||||
2025-11-23 11:04:52.039 | INFO | playwright_get_data:extract_product_info:304 - 正在提取产品简介...
|
||||
2025-11-23 11:04:52.039 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品简介 - 选择器: //*[@class="relative text-16 font-normal text-gray-700"]//div
|
||||
2025-11-23 11:04:52.047 | INFO | playwright_get_data:extract_product_info:310 - 产品简介: Melodic Mind is an all-in-one music superapp built to help you create, learn, and grow as a musician — no matter your level. It has 20+ different apps that solve every need you have and help you on yo...
|
||||
2025-11-23 11:04:52.048 | INFO | playwright_get_data:extract_product_info:317 - 正在提取用户数...
|
||||
2025-11-23 11:04:52.048 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 用户数 - 选择器: //*[@class="flex flex-row gap-2"]//div/div[2]/span/p
|
||||
2025-11-23 11:04:52.053 | INFO | playwright_get_data:extract_product_info:323 - 用户数: 159 followers
|
||||
2025-11-23 11:04:52.053 | INFO | playwright_get_data:extract_product_info:330 - 正在提取制作人发言链接...
|
||||
2025-11-23 11:04:52.053 | INFO | playwright_get_data:extract_product_info:333 - 等待页面元素加载...
|
||||
2025-11-23 11:05:12.061 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 制作人span标签 - 选择器: //span[contains(@class, "absolute")]
|
||||
2025-11-23 11:05:12.065 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 制作人链接 - 选择器: //span[contains(@class, "absolute")]/parent::a
|
||||
2025-11-23 11:05:12.074 | INFO | playwright_get_data:extract_product_info:363 - 制作人链接: https://www.producthunt.com/p/melodic-mind-2/q-a-4
|
||||
2025-11-23 11:05:12.074 | INFO | playwright_get_data:record_click:75 - 记录点击: - 坐标(制作人链接, 点击制作人链接在当前窗口打开) - 选择器:
|
||||
2025-11-23 11:05:12.075 | INFO | playwright_get_data:extract_maker_statement_from_current_window:169 - 正在在当前窗口打开制作人链接: https://www.producthunt.com/p/melodic-mind-2/q-a-4
|
||||
2025-11-23 11:05:15.198 | INFO | playwright_get_data:extract_maker_statement_from_current_window:176 - 等待title元素出现并包含产品名称(最长等待2分钟)...
|
||||
2025-11-23 11:07:15.214 | ERROR | playwright_get_data:extract_maker_statement_from_current_window:194 - 等待title元素失败: Page.wait_for_selector: Timeout 120000ms exceeded.
|
||||
Call log:
|
||||
- waiting for locator("title") to be visible
|
||||
239 × locator resolved to hidden <title>Q&A : Melodic Mind Discussion Forums | Product Hu…</title>
|
||||
|
||||
2025-11-23 11:07:15.214 | INFO | playwright_get_data:extract_maker_statement_from_current_window:197 - 再等待30秒,确保页面完全加载...
|
||||
2025-11-23 11:07:45.227 | INFO | playwright_get_data:extract_maker_statement_from_current_window:201 - 正在提取制作人评论内容...
|
||||
2025-11-23 11:07:45.231 | WARNING | playwright_get_data:extract_maker_statement_from_current_window:213 - 未找到XPath为//*[@id="comment-4597755"]/div/div[2]/div/div/div的元素
|
||||
2025-11-23 11:07:45.233 | INFO | playwright_get_data:extract_product_info:384 - 产品信息已保存到临时文件: temp_product_info.txt
|
||||
2025-11-23 11:07:45.476 | INFO | playwright_get_data:extract_product_info:389 - 页面截图已保存到: product_screenshot.png
|
||||
2025-11-23 11:07:45.479 | SUCCESS | __main__:scrape_product_info:214 - 成功提取产品信息: Melodic Mind
|
||||
2025-11-23 11:07:45.483 | INFO | playwright_get_data:close:401 - 浏览器连接已关闭
|
||||
2025-11-23 11:07:45.495 | INFO | playwright_get_data:close:405 - Playwright实例已关闭
|
||||
2025-11-23 11:07:45.496 | INFO | __main__:save_product_info:179 - 新增产品信息: Melodic Mind
|
||||
2025-11-23 11:07:45.499 | SUCCESS | __main__:run_scraping:270 - 成功保存产品信息: Melodic Mind
|
||||
2025-11-23 11:07:45.499 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/agor
|
||||
2025-11-23 11:07:45.500 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/agor
|
||||
2025-11-23 11:07:45.500 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:07:46.146 | SUCCESS | playwright_get_data:connect_to_existing_chrome:57 - 成功连接到Chrome浏览器
|
||||
2025-11-23 11:07:46.146 | INFO | playwright_get_data:navigate_to_producthunt:111 - 正在访问: https://www.producthunt.com/products/agor
|
||||
2025-11-23 11:07:49.097 | INFO | playwright_get_data:navigate_to_producthunt:116 - 等待页面标题包含'Product Hunt'...
|
||||
2025-11-23 11:07:49.112 | INFO | playwright_get_data:navigate_to_producthunt:124 - 当前页面标题: agor: Orchestrate multiple AI coding agents with your team | Product Hunt
|
||||
2025-11-23 11:07:49.112 | SUCCESS | playwright_get_data:navigate_to_producthunt:128 - 页面标题已包含'Product Hunt',等待时间: 0秒
|
||||
2025-11-23 11:07:49.113 | SUCCESS | playwright_get_data:navigate_to_producthunt:129 - Product Hunt网站已成功打开
|
||||
2025-11-23 11:07:49.113 | INFO | playwright_get_data:extract_product_info:291 - 正在提取产品名称...
|
||||
2025-11-23 11:07:49.113 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品名称 - 选择器: //h1
|
||||
2025-11-23 11:07:49.185 | INFO | playwright_get_data:extract_product_info:297 - 产品名称: agor
|
||||
2025-11-23 11:07:49.186 | INFO | playwright_get_data:extract_product_info:304 - 正在提取产品简介...
|
||||
2025-11-23 11:07:49.186 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品简介 - 选择器: //*[@class="relative text-16 font-normal text-gray-700"]//div
|
||||
2025-11-23 11:07:49.191 | INFO | playwright_get_data:extract_product_info:310 - 产品简介: Next-gen agent orchestration for AI coding. Multiplayer workspace for Claude Code, Codex, and Gemini....
|
||||
2025-11-23 11:07:49.191 | INFO | playwright_get_data:extract_product_info:317 - 正在提取用户数...
|
||||
2025-11-23 11:07:49.191 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 用户数 - 选择器: //*[@class="flex flex-row gap-2"]//div/div[2]/span/p
|
||||
2025-11-23 11:07:49.199 | INFO | playwright_get_data:extract_product_info:323 - 用户数: 133 followers
|
||||
2025-11-23 11:07:49.199 | INFO | playwright_get_data:extract_product_info:330 - 正在提取制作人发言链接...
|
||||
2025-11-23 11:07:49.200 | INFO | playwright_get_data:extract_product_info:333 - 等待页面元素加载...
|
||||
2025-11-23 11:08:09.216 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 制作人span标签 - 选择器: //span[contains(@class, "absolute")]
|
||||
2025-11-23 11:08:09.223 | WARNING | playwright_get_data:extract_product_info:370 - 未找到XPath为//span[contains(@class, "absolute")]的元素
|
||||
2025-11-23 11:08:09.226 | INFO | playwright_get_data:extract_product_info:384 - 产品信息已保存到临时文件: temp_product_info.txt
|
||||
2025-11-23 11:08:09.428 | INFO | playwright_get_data:extract_product_info:389 - 页面截图已保存到: product_screenshot.png
|
||||
2025-11-23 11:08:09.428 | SUCCESS | __main__:scrape_product_info:214 - 成功提取产品信息: agor
|
||||
2025-11-23 11:08:09.433 | INFO | playwright_get_data:close:401 - 浏览器连接已关闭
|
||||
2025-11-23 11:08:09.442 | INFO | playwright_get_data:close:405 - Playwright实例已关闭
|
||||
2025-11-23 11:08:09.444 | INFO | __main__:save_product_info:179 - 新增产品信息: agor
|
||||
2025-11-23 11:08:09.447 | SUCCESS | __main__:run_scraping:270 - 成功保存产品信息: agor
|
||||
2025-11-23 11:08:09.447 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/quiteinbox
|
||||
2025-11-23 11:08:09.448 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/quiteinbox
|
||||
2025-11-23 11:08:09.448 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:08:10.097 | SUCCESS | playwright_get_data:connect_to_existing_chrome:57 - 成功连接到Chrome浏览器
|
||||
2025-11-23 11:08:10.097 | INFO | playwright_get_data:navigate_to_producthunt:111 - 正在访问: https://www.producthunt.com/products/quiteinbox
|
||||
2025-11-23 11:08:11.298 | INFO | playwright_get_data:navigate_to_producthunt:116 - 等待页面标题包含'Product Hunt'...
|
||||
2025-11-23 11:08:11.306 | INFO | playwright_get_data:navigate_to_producthunt:124 - 当前页面标题: QuiteInbox: Take back control of your inbox | Product Hunt
|
||||
2025-11-23 11:08:11.307 | SUCCESS | playwright_get_data:navigate_to_producthunt:128 - 页面标题已包含'Product Hunt',等待时间: 0秒
|
||||
2025-11-23 11:08:11.308 | SUCCESS | playwright_get_data:navigate_to_producthunt:129 - Product Hunt网站已成功打开
|
||||
2025-11-23 11:08:11.308 | INFO | playwright_get_data:extract_product_info:291 - 正在提取产品名称...
|
||||
2025-11-23 11:08:11.308 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品名称 - 选择器: //h1
|
||||
2025-11-23 11:08:11.337 | INFO | playwright_get_data:extract_product_info:297 - 产品名称: QuiteInbox
|
||||
2025-11-23 11:08:11.338 | INFO | playwright_get_data:extract_product_info:304 - 正在提取产品简介...
|
||||
2025-11-23 11:08:11.338 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品简介 - 选择器: //*[@class="relative text-16 font-normal text-gray-700"]//div
|
||||
2025-11-23 11:08:11.344 | INFO | playwright_get_data:extract_product_info:310 - 产品简介: Unsubscribe from unwanted emails in seconds. No servers. No tracking. Everything happens locally in your browser. 100% free and open source....
|
||||
2025-11-23 11:08:11.344 | INFO | playwright_get_data:extract_product_info:317 - 正在提取用户数...
|
||||
2025-11-23 11:08:11.345 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 用户数 - 选择器: //*[@class="flex flex-row gap-2"]//div/div[2]/span/p
|
||||
2025-11-23 11:08:11.354 | INFO | playwright_get_data:extract_product_info:323 - 用户数: 149 followers
|
||||
2025-11-23 11:08:11.355 | INFO | playwright_get_data:extract_product_info:330 - 正在提取制作人发言链接...
|
||||
2025-11-23 11:08:11.355 | INFO | playwright_get_data:extract_product_info:333 - 等待页面元素加载...
|
||||
2025-11-23 11:08:31.367 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 制作人span标签 - 选择器: //span[contains(@class, "absolute")]
|
||||
2025-11-23 11:08:31.370 | WARNING | playwright_get_data:extract_product_info:370 - 未找到XPath为//span[contains(@class, "absolute")]的元素
|
||||
2025-11-23 11:08:31.372 | INFO | playwright_get_data:extract_product_info:384 - 产品信息已保存到临时文件: temp_product_info.txt
|
||||
2025-11-23 11:08:31.590 | INFO | playwright_get_data:extract_product_info:389 - 页面截图已保存到: product_screenshot.png
|
||||
2025-11-23 11:08:31.590 | SUCCESS | __main__:scrape_product_info:214 - 成功提取产品信息: QuiteInbox
|
||||
2025-11-23 11:08:31.595 | INFO | playwright_get_data:close:401 - 浏览器连接已关闭
|
||||
2025-11-23 11:08:31.604 | INFO | playwright_get_data:close:405 - Playwright实例已关闭
|
||||
2025-11-23 11:08:31.607 | INFO | __main__:save_product_info:179 - 新增产品信息: QuiteInbox
|
||||
2025-11-23 11:08:31.610 | SUCCESS | __main__:run_scraping:270 - 成功保存产品信息: QuiteInbox
|
||||
2025-11-23 11:08:31.610 | INFO | __main__:run_scraping:254 - 处理URL: https://www.producthunt.com/products/everywhere
|
||||
2025-11-23 11:08:31.611 | INFO | __main__:scrape_product_info:192 - 开始抓取: https://www.producthunt.com/products/everywhere
|
||||
2025-11-23 11:08:31.611 | INFO | playwright_get_data:connect_to_existing_chrome:30 - 正在连接到Chrome远程调试端口 9222
|
||||
2025-11-23 11:08:32.245 | SUCCESS | playwright_get_data:connect_to_existing_chrome:57 - 成功连接到Chrome浏览器
|
||||
2025-11-23 11:08:32.246 | INFO | playwright_get_data:navigate_to_producthunt:111 - 正在访问: https://www.producthunt.com/products/everywhere
|
||||
2025-11-23 11:08:33.776 | INFO | playwright_get_data:navigate_to_producthunt:116 - 等待页面标题包含'Product Hunt'...
|
||||
2025-11-23 11:08:33.813 | INFO | playwright_get_data:navigate_to_producthunt:124 - 当前页面标题: Everywhere: Every moment, Every place. Your AI: Everywhere | Product Hunt
|
||||
2025-11-23 11:08:33.813 | SUCCESS | playwright_get_data:navigate_to_producthunt:128 - 页面标题已包含'Product Hunt',等待时间: 0秒
|
||||
2025-11-23 11:08:33.813 | SUCCESS | playwright_get_data:navigate_to_producthunt:129 - Product Hunt网站已成功打开
|
||||
2025-11-23 11:08:33.813 | INFO | playwright_get_data:extract_product_info:291 - 正在提取产品名称...
|
||||
2025-11-23 11:08:33.813 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品名称 - 选择器: //h1
|
||||
2025-11-23 11:08:33.897 | INFO | playwright_get_data:extract_product_info:297 - 产品名称: Everywhere
|
||||
2025-11-23 11:08:33.897 | INFO | playwright_get_data:extract_product_info:304 - 正在提取产品简介...
|
||||
2025-11-23 11:08:33.897 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 产品简介 - 选择器: //*[@class="relative text-16 font-normal text-gray-700"]//div
|
||||
2025-11-23 11:08:33.904 | INFO | playwright_get_data:extract_product_info:310 - 产品简介: Everywhere is dedicated to liberating AI from browser tabs and standalone apps, making it a ubiquitous, native capability of your operating system. We believe true productivity gains stem from the sea...
|
||||
2025-11-23 11:08:33.904 | INFO | playwright_get_data:extract_product_info:317 - 正在提取用户数...
|
||||
2025-11-23 11:08:33.904 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 用户数 - 选择器: //*[@class="flex flex-row gap-2"]//div/div[2]/span/p
|
||||
2025-11-23 11:08:33.911 | INFO | playwright_get_data:extract_product_info:323 - 用户数: 204 followers
|
||||
2025-11-23 11:08:33.912 | INFO | playwright_get_data:extract_product_info:330 - 正在提取制作人发言链接...
|
||||
2025-11-23 11:08:33.912 | INFO | playwright_get_data:extract_product_info:333 - 等待页面元素加载...
|
||||
2025-11-23 11:08:53.915 | INFO | playwright_get_data:record_dom_selection:86 - 记录DOM选取: 制作人span标签 - 选择器: //span[contains(@class, "absolute")]
|
||||
2025-11-23 11:08:53.920 | WARNING | playwright_get_data:extract_product_info:370 - 未找到XPath为//span[contains(@class, "absolute")]的元素
|
||||
2025-11-23 11:08:53.921 | INFO | playwright_get_data:extract_product_info:384 - 产品信息已保存到临时文件: temp_product_info.txt
|
||||
2025-11-23 11:08:54.140 | INFO | playwright_get_data:extract_product_info:389 - 页面截图已保存到: product_screenshot.png
|
||||
2025-11-23 11:08:54.140 | SUCCESS | __main__:scrape_product_info:214 - 成功提取产品信息: Everywhere
|
||||
2025-11-23 11:08:54.145 | INFO | playwright_get_data:close:401 - 浏览器连接已关闭
|
||||
2025-11-23 11:08:54.155 | INFO | playwright_get_data:close:405 - Playwright实例已关闭
|
||||
2025-11-23 11:08:54.158 | INFO | __main__:save_product_info:179 - 新增产品信息: Everywhere
|
||||
2025-11-23 11:08:54.162 | SUCCESS | __main__:run_scraping:270 - 成功保存产品信息: Everywhere
|
||||
2025-11-23 11:08:54.163 | INFO | __main__:show_scraping_results:303 - === 抓取结果统计 ===
|
||||
2025-11-23 11:08:54.163 | INFO | __main__:show_scraping_results:304 - 成功抓取: 9 个产品
|
||||
2025-11-23 11:08:54.164 | INFO | __main__:show_scraping_results:305 - 跳过重复: 1 个链接
|
||||
2025-11-23 11:08:54.164 | INFO | __main__:show_scraping_results:306 - 抓取失败: 0 个链接
|
||||
2025-11-23 11:08:54.164 | INFO | __main__:show_scraping_results:307 - 数据库中的产品总数: 10
|
||||
2025-11-23 11:08:54.164 | INFO | __main__:show_scraping_results:310 - 最新抓取的产品:
|
||||
2025-11-23 11:08:54.164 | INFO | __main__:show_scraping_results:312 - - Everywhere: https://www.producthunt.com/products/everywhere
|
||||
2025-11-23 11:08:54.164 | INFO | __main__:show_scraping_results:312 - - QuiteInbox: https://www.producthunt.com/products/quiteinbox
|
||||
2025-11-23 11:08:54.164 | INFO | __main__:show_scraping_results:312 - - agor: https://www.producthunt.com/products/agor
|
||||
2025-11-23 11:08:54.164 | INFO | __main__:show_scraping_results:312 - - Melodic Mind: https://www.producthunt.com/products/melodic-mind-2
|
||||
2025-11-23 11:08:54.165 | INFO | __main__:show_scraping_results:312 - - iisee.me: https://www.producthunt.com/products/iisee-me
|
||||
2025-11-23 11:08:54.165 | INFO | __main__:show_scraping_results:312 - - BeeBot for AirPods: https://www.producthunt.com/products/beebot-for-airpods
|
||||
2025-11-23 11:08:54.165 | INFO | __main__:show_scraping_results:312 - - Builder.io: https://www.producthunt.com/products/builder-io
|
||||
2025-11-23 11:08:54.165 | INFO | __main__:show_scraping_results:312 - - American Ratings Lead Magnet Portal: https://www.producthunt.com/products/american-ratings-lead-magnet-portal
|
||||
2025-11-23 11:08:54.165 | INFO | __main__:show_scraping_results:312 - - Pixley AI: https://www.producthunt.com/products/pixley-ai
|
||||
2025-11-23 11:08:54.165 | INFO | __main__:show_scraping_results:312 - - Burner: https://www.producthunt.com/products/burner-2
|
||||
2025-11-23 11:08:54.165 | SUCCESS | __main__:run_scraping:284 - === ProductHunt数据抓取完成 ===
|
||||
@@ -1,11 +0,0 @@
|
||||
=== Product Hunt 产品信息 ===
|
||||
|
||||
产品名称: Everywhere
|
||||
|
||||
产品简介: Everywhere is dedicated to liberating AI from browser tabs and standalone apps, making it a ubiquitous, native capability of your operating system. We believe true productivity gains stem from the seamless integration of AI with your current tasks. Unlike conventional tools like ChatGPT, Everywhere perceives and understands any content on your screen in real-time. No need to screenshot, copy, or switch apps—simply use a hotkey to get the help you need, right where you are.
|
||||
|
||||
制作人发言: 未获取
|
||||
|
||||
用户数: 204 followers
|
||||
|
||||
提取时间: 2025-11-23 11:08:53
|
||||
Reference in New Issue
Block a user