diff --git a/.trae/skills/pyinstaller-one/SKILL.md b/.trae/skills/pyinstaller-one/SKILL.md index b260fd9..346babf 100644 --- a/.trae/skills/pyinstaller-one/SKILL.md +++ b/.trae/skills/pyinstaller-one/SKILL.md @@ -1,6 +1,6 @@ --- name: pyinstaller-one -description: 当执行pyinstaller打包的时候,执行这个技能。 +description: 用pyinstaller打包,执行这个技能。 --- ## 元数据 diff --git a/README.md b/README.md index 825a9a9..3c7a7e2 100644 --- a/README.md +++ b/README.md @@ -27,13 +27,12 @@ - **精确数值显示**: 实时显示当前平均情感分数和整体情绪标签 - **发光特效**: 采用发光效果增强视觉体验 -### 4. 股票数据实时监控 +### 3. 股票数据实时监控 - **上证指数监控**: 通过新浪财经API实时获取上证指数数据 -- **交易时间识别**: 自动识别交易时间,非交易时间自动跳过数据获取 -- **波形图展示**: 以时间轴形式展示股票价格走势,支持上午和下午交易时段 +- **波形图展示**: 以时间轴形式展示股票价格走势 - **数据点标记**: 在波形图上精确标记每个数据点的时间和价格 -### 5. 上证所网页截图功能 +### 4. 上证所网页截图功能 - **自动化截图**: 使用Playwright自动截取上证所网页指定区域 - **元素定位**: 通过XPath精确定位图表元素进行截图 - **定时更新**: 每5分钟自动更新一次截图 @@ -183,11 +182,6 @@ python main.py - **隐藏**: 隐藏到系统托盘 - **退出**: 完全退出程序 -### 交易时间说明 -- 上午交易时间: 9:30-11:30 -- 下午交易时间: 13:00-15:00 -- 非交易时间自动跳过数据获取,显示静态图表 - ## 高级功能 ### 批量分析模式 @@ -254,7 +248,6 @@ pyinstaller build.spec 2. **网络环境**: 确保网络连接稳定,以保证爬取和API调用的正常进行 3. **反爬虫**: 遵守目标网站的robots.txt协议和使用条款 4. **数据隐私**: 所有数据本地存储,不会上传到第三方服务器 -5. **交易时间**: 系统仅在交易时间获取实时股票数据 ## 开发扩展 diff --git a/pyinstaller_log.txt b/pyinstaller_log.txt new file mode 100644 index 0000000..4defcb0 --- /dev/null +++ b/pyinstaller_log.txt @@ -0,0 +1 @@ +Aborting build process due to attempt to collect multiple Qt bindings packages: attempting to run hook for 'PyQt5', while hook for 'PySide6' has already been run! PyInstaller does not support multiple Qt bindings packages in a frozen application - either ensure that the build environment has only one Qt bindings package installed, or exclude the extraneous bindings packages via the module exclusion mechanism (--exclude command-line option, or excludes list in the spec file). diff --git a/requirements.txt b/requirements.txt index 418135f..03cf950 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ beautifulsoup4>=4.12.0 lxml>=4.9.0 openai>=1.0.0 selenium>=4.15.0 +playwright>=1.40.0 diff --git a/spider.py b/spider.py index 8373b8a..f2d8cf2 100644 --- a/spider.py +++ b/spider.py @@ -281,6 +281,7 @@ class SpiderManager: """ from playwright.sync_api import sync_playwright import os + import sys url = "https://www.sse.com.cn/" xpath = "//div[contains(@class,'gray_bg')]//div[contains(@class,'col-md-7')]" @@ -288,12 +289,34 @@ class SpiderManager: logger.info(f"开始爬取上证所网页截图: {url}") logger.info(f"目标XPath: {xpath}") - output_dir = os.path.dirname(os.path.abspath(__file__)) + # 获取当前脚本目录 + if getattr(sys, 'frozen', False): + # 打包后的环境 + current_dir = os.path.dirname(sys.executable) + # 设置Playwright浏览器路径 + playwright_dir = os.path.join(current_dir, '_internal', 'ms-playwright') + logger.info(f"打包环境,Playwright浏览器路径: {playwright_dir}") + else: + # 开发环境 + current_dir = os.path.dirname(os.path.abspath(__file__)) + playwright_dir = None + logger.info(f"开发环境,当前目录: {current_dir}") + + output_dir = current_dir screenshot_path = os.path.join(output_dir, "sse_screenshot.png") try: with sync_playwright() as p: - browser = p.chromium.launch(headless=True) + # 设置浏览器路径 + browser_launch_options = { + 'headless': True + } + + if playwright_dir: + browser_launch_options['executable_path'] = os.path.join(playwright_dir, 'chromium-1091', 'chrome-win', 'chrome.exe') + logger.info(f"使用自定义浏览器路径: {browser_launch_options['executable_path']}") + + browser = p.chromium.launch(**browser_launch_options) page = browser.new_page() page.set_default_timeout(60000) @@ -330,62 +353,7 @@ class SpiderManager: except Exception as e: logger.error(f"爬取上证所截图失败: {e}") + logger.exception(e) # 记录详细异常 return "" - def fetch_sse_screenshot(self) -> str: - """ - 爬取上证所网页指定元素截图 - 返回截图文件路径 - """ - from playwright.sync_api import sync_playwright - import os - url = "https://www.sse.com.cn/" - xpath = "//div[contains(@class,'gray_bg')]//div[contains(@class,'col-md-7')]" - - logger.info(f"开始爬取上证所网页截图: {url}") - logger.info(f"目标XPath: {xpath}") - - output_dir = os.path.dirname(os.path.abspath(__file__)) - screenshot_path = os.path.join(output_dir, "sse_screenshot.png") - - try: - with sync_playwright() as p: - browser = p.chromium.launch(headless=True) - page = browser.new_page() - - page.set_default_timeout(60000) - logger.info("正在访问页面...") - page.goto(url, wait_until="networkidle") - - logger.info("等待页面加载完成...") - page.wait_for_load_state("domcontentloaded") - page.wait_for_timeout(5000) - - logger.info(f"查找XPath元素: {xpath}") - element = page.locator(f"xpath={xpath}") - - if element.count() > 0: - logger.info("✓ 找到目标元素") - - is_visible = element.is_visible() - logger.info(f"元素可见: {is_visible}") - - if not is_visible: - logger.info("元素不可见,尝试滚动到可见区域...") - element.scroll_into_view_if_needed() - page.wait_for_timeout(2000) - - logger.info(f"正在截取元素截图到: {screenshot_path}") - element.screenshot(path=screenshot_path) - logger.info("✓ 截屏成功") - browser.close() - return screenshot_path - else: - logger.warning("✗ 未找到目标元素") - browser.close() - return "" - - except Exception as e: - logger.error(f"爬取上证所截图失败: {e}") - return "" diff --git a/sse_screenshot.png b/sse_screenshot.png index 17d8763..124f8db 100644 Binary files a/sse_screenshot.png and b/sse_screenshot.png differ