feat(spider): 增强Playwright截图功能以支持打包环境
添加对打包环境的支持,自动检测运行环境并设置正确的浏览器路径 移除重复的截图方法,优化错误日志记录 更新requirements.txt添加playwright依赖
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: pyinstaller-one
|
||||
description: 当执行pyinstaller打包的时候,执行这个技能。
|
||||
description: 用pyinstaller打包,执行这个技能。
|
||||
---
|
||||
|
||||
## 元数据
|
||||
|
||||
13
README.md
13
README.md
@@ -27,13 +27,12 @@
|
||||
- **精确数值显示**: 实时显示当前平均情感分数和整体情绪标签
|
||||
- **发光特效**: 采用发光效果增强视觉体验
|
||||
|
||||
### 4. 股票数据实时监控
|
||||
### 3. 股票数据实时监控
|
||||
- **上证指数监控**: 通过新浪财经API实时获取上证指数数据
|
||||
- **交易时间识别**: 自动识别交易时间,非交易时间自动跳过数据获取
|
||||
- **波形图展示**: 以时间轴形式展示股票价格走势,支持上午和下午交易时段
|
||||
- **波形图展示**: 以时间轴形式展示股票价格走势
|
||||
- **数据点标记**: 在波形图上精确标记每个数据点的时间和价格
|
||||
|
||||
### 5. 上证所网页截图功能
|
||||
### 4. 上证所网页截图功能
|
||||
- **自动化截图**: 使用Playwright自动截取上证所网页指定区域
|
||||
- **元素定位**: 通过XPath精确定位图表元素进行截图
|
||||
- **定时更新**: 每5分钟自动更新一次截图
|
||||
@@ -183,11 +182,6 @@ python main.py
|
||||
- **隐藏**: 隐藏到系统托盘
|
||||
- **退出**: 完全退出程序
|
||||
|
||||
### 交易时间说明
|
||||
- 上午交易时间: 9:30-11:30
|
||||
- 下午交易时间: 13:00-15:00
|
||||
- 非交易时间自动跳过数据获取,显示静态图表
|
||||
|
||||
## 高级功能
|
||||
|
||||
### 批量分析模式
|
||||
@@ -254,7 +248,6 @@ pyinstaller build.spec
|
||||
2. **网络环境**: 确保网络连接稳定,以保证爬取和API调用的正常进行
|
||||
3. **反爬虫**: 遵守目标网站的robots.txt协议和使用条款
|
||||
4. **数据隐私**: 所有数据本地存储,不会上传到第三方服务器
|
||||
5. **交易时间**: 系统仅在交易时间获取实时股票数据
|
||||
|
||||
## 开发扩展
|
||||
|
||||
|
||||
1
pyinstaller_log.txt
Normal file
1
pyinstaller_log.txt
Normal file
@@ -0,0 +1 @@
|
||||
Aborting build process due to attempt to collect multiple Qt bindings packages: attempting to run hook for 'PyQt5', while hook for 'PySide6' has already been run! PyInstaller does not support multiple Qt bindings packages in a frozen application - either ensure that the build environment has only one Qt bindings package installed, or exclude the extraneous bindings packages via the module exclusion mechanism (--exclude command-line option, or excludes list in the spec file).
|
||||
@@ -4,3 +4,4 @@ beautifulsoup4>=4.12.0
|
||||
lxml>=4.9.0
|
||||
openai>=1.0.0
|
||||
selenium>=4.15.0
|
||||
playwright>=1.40.0
|
||||
|
||||
84
spider.py
84
spider.py
@@ -281,6 +281,7 @@ class SpiderManager:
|
||||
"""
|
||||
from playwright.sync_api import sync_playwright
|
||||
import os
|
||||
import sys
|
||||
|
||||
url = "https://www.sse.com.cn/"
|
||||
xpath = "//div[contains(@class,'gray_bg')]//div[contains(@class,'col-md-7')]"
|
||||
@@ -288,12 +289,34 @@ class SpiderManager:
|
||||
logger.info(f"开始爬取上证所网页截图: {url}")
|
||||
logger.info(f"目标XPath: {xpath}")
|
||||
|
||||
output_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
# 获取当前脚本目录
|
||||
if getattr(sys, 'frozen', False):
|
||||
# 打包后的环境
|
||||
current_dir = os.path.dirname(sys.executable)
|
||||
# 设置Playwright浏览器路径
|
||||
playwright_dir = os.path.join(current_dir, '_internal', 'ms-playwright')
|
||||
logger.info(f"打包环境,Playwright浏览器路径: {playwright_dir}")
|
||||
else:
|
||||
# 开发环境
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
playwright_dir = None
|
||||
logger.info(f"开发环境,当前目录: {current_dir}")
|
||||
|
||||
output_dir = current_dir
|
||||
screenshot_path = os.path.join(output_dir, "sse_screenshot.png")
|
||||
|
||||
try:
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
# 设置浏览器路径
|
||||
browser_launch_options = {
|
||||
'headless': True
|
||||
}
|
||||
|
||||
if playwright_dir:
|
||||
browser_launch_options['executable_path'] = os.path.join(playwright_dir, 'chromium-1091', 'chrome-win', 'chrome.exe')
|
||||
logger.info(f"使用自定义浏览器路径: {browser_launch_options['executable_path']}")
|
||||
|
||||
browser = p.chromium.launch(**browser_launch_options)
|
||||
page = browser.new_page()
|
||||
|
||||
page.set_default_timeout(60000)
|
||||
@@ -330,62 +353,7 @@ class SpiderManager:
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"爬取上证所截图失败: {e}")
|
||||
logger.exception(e) # 记录详细异常
|
||||
return ""
|
||||
|
||||
def fetch_sse_screenshot(self) -> str:
|
||||
"""
|
||||
爬取上证所网页指定元素截图
|
||||
返回截图文件路径
|
||||
"""
|
||||
from playwright.sync_api import sync_playwright
|
||||
import os
|
||||
|
||||
url = "https://www.sse.com.cn/"
|
||||
xpath = "//div[contains(@class,'gray_bg')]//div[contains(@class,'col-md-7')]"
|
||||
|
||||
logger.info(f"开始爬取上证所网页截图: {url}")
|
||||
logger.info(f"目标XPath: {xpath}")
|
||||
|
||||
output_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
screenshot_path = os.path.join(output_dir, "sse_screenshot.png")
|
||||
|
||||
try:
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
page = browser.new_page()
|
||||
|
||||
page.set_default_timeout(60000)
|
||||
logger.info("正在访问页面...")
|
||||
page.goto(url, wait_until="networkidle")
|
||||
|
||||
logger.info("等待页面加载完成...")
|
||||
page.wait_for_load_state("domcontentloaded")
|
||||
page.wait_for_timeout(5000)
|
||||
|
||||
logger.info(f"查找XPath元素: {xpath}")
|
||||
element = page.locator(f"xpath={xpath}")
|
||||
|
||||
if element.count() > 0:
|
||||
logger.info("✓ 找到目标元素")
|
||||
|
||||
is_visible = element.is_visible()
|
||||
logger.info(f"元素可见: {is_visible}")
|
||||
|
||||
if not is_visible:
|
||||
logger.info("元素不可见,尝试滚动到可见区域...")
|
||||
element.scroll_into_view_if_needed()
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
logger.info(f"正在截取元素截图到: {screenshot_path}")
|
||||
element.screenshot(path=screenshot_path)
|
||||
logger.info("✓ 截屏成功")
|
||||
browser.close()
|
||||
return screenshot_path
|
||||
else:
|
||||
logger.warning("✗ 未找到目标元素")
|
||||
browser.close()
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"爬取上证所截图失败: {e}")
|
||||
return ""
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 62 KiB After Width: | Height: | Size: 59 KiB |
Reference in New Issue
Block a user