# 巨量算数,区域指南的,景区数据 from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from loguru import logger import time import json import os # 配置日志 logger.add("jusuan_scraper.log", rotation="10 MB", level="INFO") def scrape_jusuan_data(): """ 抓取巨量算数网页上的景区数据 """ try: # 配置Chrome选项,指定调试端口(与命令行端口一致) chrome_options = Options() chrome_options.add_experimental_option("debuggerAddress", "localhost:9222") # 尝试初始化WebDriver logger.info("正在连接到Chrome浏览器...") driver = None # 方法1:尝试使用默认的Chrome驱动 try: driver = webdriver.Chrome(options=chrome_options) logger.info("使用默认Chrome驱动连接成功") except Exception as e: logger.warning(f"使用默认Chrome驱动失败: {str(e)}") # 方法2:尝试使用webdriver-manager自动管理驱动 try: from webdriver_manager.chrome import ChromeDriverManager service = Service(ChromeDriverManager().install()) driver = webdriver.Chrome(service=service, options=chrome_options) logger.info("使用webdriver-manager连接成功") except Exception as e2: logger.warning(f"使用webdriver-manager失败: {str(e2)}") # 方法3:尝试指定常见的ChromeDriver路径 common_paths = [ r"C:\chromedriver\chromedriver.exe", r"C:\Program Files\Google\Chrome\Application\chromedriver.exe", r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe", os.path.join(os.getcwd(), "chromedriver.exe") ] for path in common_paths: if os.path.exists(path): try: service = Service(path) driver = webdriver.Chrome(service=service, options=chrome_options) logger.info(f"使用路径 {path} 连接成功") break except Exception as e3: logger.warning(f"使用路径 {path} 失败: {str(e3)}") continue if driver is None: raise Exception("所有连接Chrome浏览器的方法都失败了") # 访问目标网页 target_url = "https://trendinsight.oceanengine.com/area?dates=daily-20251112_weekly-20251109_monthly-202510&area=%5B%2211%22%5D&category_id=3&rankStyle=monthly" logger.info(f"正在访问网页: {target_url}") driver.get(target_url) # 等待页面加载 logger.info("等待页面加载完成...") WebDriverWait(driver, 15).until( EC.presence_of_element_located((By.CLASS_NAME, "byted-table-body")) ) # 获取表格主体 table_body = driver.find_element(By.CLASS_NAME, "byted-table-body") logger.info("找到表格主体,开始抓取数据...") # 获取所有行 rows = table_body.find_elements(By.TAG_NAME, "div") logger.info(f"找到 {len(rows)} 行数据") # 存储抓取的数据 scraped_data = [] # 遍历每一行 for i, row in enumerate(rows): try: # 查找景区名称 (class包含"poiTitle-") poi_title_element = row.find_element(By.CSS_SELECTOR, '[class*="poiTitle-"]') poi_name = poi_title_element.text.strip() # 查找景区分类 (class包含"categoryIconBox-") category_element = row.find_element(By.CSS_SELECTOR, '[class*="categoryIconBox-"]') category = category_element.text.strip() # 查找热度指数值 (class包含"numberValue-") heat_index_element = row.find_element(By.CSS_SELECTOR, '[class*="numberValue-"]') heat_index = heat_index_element.text.strip() # 将数据添加到列表 data_entry = { "序号": i + 1, "景区名称": poi_name, "景区分类": category, "热度指数": heat_index } scraped_data.append(data_entry) logger.info(f"抓取第 {i+1} 条数据: {poi_name} | {category} | {heat_index}") except Exception as e: logger.error(f"处理第 {i+1} 行时出错: {str(e)}") continue # 将数据保存为JSON文件 output_file = "jusuan_scenic_spots_data.json" with open(output_file, "w", encoding="utf-8") as f: json.dump(scraped_data, f, ensure_ascii=False, indent=2) logger.info(f"数据抓取完成,共 {len(scraped_data)} 条记录,已保存到 {output_file}") # 打印前5条数据作为预览 logger.info("前5条数据预览:") for i, data in enumerate(scraped_data[:5]): logger.info(f"{i+1}. {data['景区名称']} | {data['景区分类']} | {data['热度指数']}") return scraped_data except Exception as e: logger.error(f"抓取过程中发生错误: {str(e)}") return None finally: # 关闭浏览器连接(但不关闭浏览器本身) if driver: try: driver.quit() except: pass if __name__ == "__main__": logger.info("开始执行巨量算数景区数据抓取...") logger.info("请确保Chrome浏览器已通过以下命令启动:") logger.info('"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" --remote-debugging-port=9222 --user-data-dir="C:\\tmp"') result = scrape_jusuan_data() if result: logger.info("抓取任务完成") else: logger.error("抓取任务失败") logger.info("请尝试安装webdriver-manager: pip install webdriver-manager") logger.info("或者手动下载ChromeDriver并放在系统PATH中")