diff --git a/README.md b/README.md
index d31b424..5804fb8 100644
--- a/README.md
+++ b/README.md
@@ -7,9 +7,11 @@
- 🎯 **智能区域检测**:使用灰度阈值 + 连续行判定算法,自动识别内容区块(div)和空白间隔
- 📜 **自动滚动截屏**:根据内容高度自动计算滚动距离,连续截屏
- 🔤 **OCR文字识别**:支持 Umi-OCR 和自定义HTTP OCR服务
+- 🖥️ **图形界面**:美观的PySide6 GUI界面,操作更简单
- ⌨️ **热键触发**:按 `Ctrl+F9` 快速启动
- 🖱️ **框选区域**:拖动鼠标选择截图区域
- 🛑 **智能停止**:检测到重复内容时自动停止
+- 📱 **系统托盘**:最小化到托盘,不占用任务栏
## 适用场景
@@ -42,6 +44,7 @@ pip install -r requirements.txt
- mouse >= 0.7.1
- requests >= 2.31.0
- loguru >= 0.7.0
+- pyside6 >= 6.5.0
### 3. 安装OCR引擎(二选一)
@@ -59,7 +62,22 @@ pip install -r requirements.txt
## 使用方法
-### 启动程序
+### 图形界面方式(推荐)
+
+```bash
+python gui.py
+```
+
+界面功能:
+- **开始截屏**按钮:启动截屏OCR流程
+- **停止**按钮:手动停止当前任务
+- **清空日志**按钮:清空日志显示区域
+- **日志显示**:彩色日志输出,带时间戳
+- **进度条**:显示当前任务进度
+- **状态标签**:显示当前运行状态
+- **系统托盘**:关闭窗口会最小化到托盘
+
+### 命令行方式
```bash
python main.py
@@ -67,8 +85,10 @@ python main.py
### 操作流程
-1. **等待热键**:程序启动后会显示 `等待热键 Ctrl+F9 启动...`
-2. **触发截屏**:按 `Ctrl+F9`
+1. **启动程序**:运行 `python gui.py` 或 `python main.py`
+2. **触发截屏**:
+ - GUI方式:点击「开始截屏」按钮
+ - 命令行:按 `Ctrl+F9`
3. **检查服务**:程序会检查OCR服务是否运行
4. **框选区域**:按住鼠标左键拖动,选择要截图的区域
5. **自动处理**:程序会自动:
@@ -138,7 +158,8 @@ class Config:
```
long-screen-cut/
-├── main.py # 主程序
+├── main.py # 主程序(命令行版)
+├── gui.py # 图形界面程序(PySide6)
├── umi_ocr_client.py # Umi-OCR HTTP客户端
├── ocr_server_example.py # OCR服务示例(Flask)
├── requirements.txt # Python依赖
diff --git a/gui.py b/gui.py
new file mode 100644
index 0000000..f75beae
--- /dev/null
+++ b/gui.py
@@ -0,0 +1,582 @@
+"""
+滚动截屏OCR工具 - PySide6 GUI界面
+美观现代的界面设计
+"""
+
+import sys
+import time
+from pathlib import Path
+from typing import Optional
+from datetime import datetime
+
+from PySide6.QtWidgets import (
+ QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
+ QPushButton, QTextEdit, QLabel, QFrame, QProgressBar,
+ QSystemTrayIcon, QMenu, QStyle
+)
+from PySide6.QtCore import Qt, QThread, Signal, QTimer, QSize
+from PySide6.QtGui import QFont, QIcon, QColor, QPalette, QFontDatabase
+
+from loguru import logger
+
+# 导入核心逻辑
+from main import ScrollCaptureOCR, Config
+
+
+class LogHandler:
+ """日志处理器,将日志输出到GUI"""
+ def __init__(self, signal):
+ self.signal = signal
+
+ def write(self, message):
+ if message.strip():
+ self.signal.emit(message.strip())
+
+ def flush(self):
+ pass
+
+
+class CaptureWorker(QThread):
+ """后台工作线程,执行截屏OCR任务"""
+ log_signal = Signal(str)
+ progress_signal = Signal(int)
+ status_signal = Signal(str)
+ finished_signal = Signal()
+ error_signal = Signal(str)
+
+ def __init__(self, scroll_capture: ScrollCaptureOCR):
+ super().__init__()
+ self.scroll_capture = scroll_capture
+ self.is_running = False
+
+ def run(self):
+ """运行截屏任务"""
+ self.is_running = True
+ try:
+ self.status_signal.emit("运行中")
+
+ # 重置状态
+ self.scroll_capture.previous_ocr_result = []
+ self.scroll_capture.scroll_count = 0
+ self.scroll_capture.all_results = []
+
+ # 循环处理
+ while self.is_running and self.scroll_capture.process_once():
+ progress = min(self.scroll_capture.scroll_count * 10, 90)
+ self.progress_signal.emit(progress)
+ self.log_signal.emit(f"第 {self.scroll_capture.scroll_count} 次截屏完成")
+
+ # 保存最终结果
+ if self.scroll_capture.all_results:
+ self.scroll_capture.save_final_result()
+ self.log_signal.emit(f"✓ 共处理 {len(self.scroll_capture.all_results)} 次截屏")
+
+ self.progress_signal.emit(100)
+ self.status_signal.emit("完成")
+
+ except Exception as e:
+ self.error_signal.emit(str(e))
+ self.status_signal.emit("错误")
+ finally:
+ self.is_running = False
+ self.finished_signal.emit()
+
+ def stop(self):
+ """停止任务"""
+ self.is_running = False
+ self.status_signal.emit("已停止")
+
+
+class ModernButton(QPushButton):
+ """现代风格按钮"""
+ def __init__(self, text, parent=None, primary=True):
+ super().__init__(text, parent)
+ self.primary = primary
+ self.setMinimumHeight(40)
+ self.setFont(QFont("Microsoft YaHei", 11, QFont.Bold if primary else QFont.Normal))
+ self.setCursor(Qt.PointingHandCursor)
+ self.update_style()
+
+ def update_style(self):
+ if self.primary:
+ self.setStyleSheet("""
+ QPushButton {
+ background-color: #4CAF50;
+ color: white;
+ border: none;
+ border-radius: 8px;
+ padding: 10px 30px;
+ }
+ QPushButton:hover {
+ background-color: #45a049;
+ }
+ QPushButton:pressed {
+ background-color: #3d8b40;
+ }
+ QPushButton:disabled {
+ background-color: #cccccc;
+ color: #666666;
+ }
+ """)
+ else:
+ self.setStyleSheet("""
+ QPushButton {
+ background-color: #f5f5f5;
+ color: #333333;
+ border: 2px solid #dddddd;
+ border-radius: 8px;
+ padding: 10px 30px;
+ }
+ QPushButton:hover {
+ background-color: #e8e8e8;
+ border-color: #cccccc;
+ }
+ QPushButton:pressed {
+ background-color: #d8d8d8;
+ }
+ """)
+
+
+class LogTextEdit(QTextEdit):
+ """带样式的日志显示框"""
+ def __init__(self, parent=None):
+ super().__init__(parent)
+ self.setReadOnly(True)
+ self.setFont(QFont("Consolas", 10))
+ self.setStyleSheet("""
+ QTextEdit {
+ background-color: #1e1e1e;
+ color: #d4d4d4;
+ border: 1px solid #3e3e3e;
+ border-radius: 8px;
+ padding: 10px;
+ }
+ """)
+ self.setPlaceholderText("日志信息将显示在这里...")
+
+ def append_log(self, message: str, level: str = "INFO"):
+ """添加带颜色的日志"""
+ timestamp = datetime.now().strftime("%H:%M:%S")
+
+ color_map = {
+ "INFO": "#4CAF50",
+ "WARNING": "#FF9800",
+ "ERROR": "#F44336",
+ "DEBUG": "#2196F3"
+ }
+ color = color_map.get(level, "#d4d4d4")
+
+ html = f'[{timestamp}] {message}'
+ self.append(html)
+
+ # 自动滚动到底部
+ scrollbar = self.verticalScrollBar()
+ scrollbar.setValue(scrollbar.maximum())
+
+
+class MainWindow(QMainWindow):
+ """主窗口"""
+ def __init__(self):
+ super().__init__()
+ self.setWindowTitle("滚动截屏OCR工具")
+ self.setMinimumSize(800, 600)
+ self.resize(900, 700)
+
+ # 初始化核心逻辑
+ self.scroll_capture = ScrollCaptureOCR()
+ self.worker: Optional[CaptureWorker] = None
+
+ # 设置应用样式
+ self.setup_styles()
+
+ # 创建UI
+ self.setup_ui()
+
+ # 设置系统托盘
+ self.setup_tray()
+
+ # 重定向日志
+ self.setup_logging()
+
+ def setup_styles(self):
+ """设置应用样式"""
+ self.setStyleSheet("""
+ QMainWindow {
+ background-color: #f8f9fa;
+ }
+ QLabel {
+ color: #333333;
+ }
+ QFrame {
+ border: none;
+ }
+ """)
+
+ def setup_ui(self):
+ """设置UI界面"""
+ # 中央部件
+ central_widget = QWidget()
+ self.setCentralWidget(central_widget)
+
+ # 主布局
+ main_layout = QVBoxLayout(central_widget)
+ main_layout.setSpacing(20)
+ main_layout.setContentsMargins(30, 30, 30, 30)
+
+ # === 标题区域 ===
+ title_layout = QHBoxLayout()
+
+ title_label = QLabel("滚动截屏OCR工具")
+ title_label.setFont(QFont("Microsoft YaHei", 20, QFont.Bold))
+ title_label.setStyleSheet("color: #2c3e50;")
+ title_layout.addWidget(title_label)
+
+ title_layout.addStretch()
+
+ # 状态标签
+ self.status_label = QLabel("就绪")
+ self.status_label.setFont(QFont("Microsoft YaHei", 11))
+ self.status_label.setStyleSheet("""
+ QLabel {
+ color: #4CAF50;
+ background-color: #e8f5e9;
+ padding: 6px 16px;
+ border-radius: 16px;
+ }
+ """)
+ title_layout.addWidget(self.status_label)
+
+ main_layout.addLayout(title_layout)
+
+ # === 信息卡片 ===
+ info_card = QFrame()
+ info_card.setStyleSheet("""
+ QFrame {
+ background-color: white;
+ border-radius: 12px;
+ padding: 15px;
+ }
+ """)
+ info_layout = QHBoxLayout(info_card)
+ info_layout.setSpacing(30)
+
+ # 使用说明
+ help_text = QLabel(
+ "使用步骤:
"
+ "1. 点击「开始截屏」按钮
"
+ "2. 按住鼠标左键拖动选择区域
"
+ "3. 程序自动滚动截屏并OCR识别
"
+ "4. 检测到重复内容时自动停止"
+ )
+ help_text.setFont(QFont("Microsoft YaHei", 10))
+ help_text.setStyleSheet("color: #555555; line-height: 1.6;")
+ info_layout.addWidget(help_text)
+
+ info_layout.addStretch()
+
+ # 配置信息
+ config_text = QLabel(
+ f"当前配置:
"
+ f"OCR引擎: {Config.OCR_ENGINE}
"
+ f"灰度阈值: {Config.GRAY_THRESHOLD}
"
+ f"输出目录: {Config.OUTPUT_DIR}"
+ )
+ config_text.setFont(QFont("Microsoft YaHei", 10))
+ config_text.setStyleSheet("color: #666666;")
+ config_text.setAlignment(Qt.AlignRight)
+ info_layout.addWidget(config_text)
+
+ main_layout.addWidget(info_card)
+
+ # === 进度条 ===
+ self.progress_bar = QProgressBar()
+ self.progress_bar.setMaximumHeight(8)
+ self.progress_bar.setTextVisible(False)
+ self.progress_bar.setStyleSheet("""
+ QProgressBar {
+ background-color: #e0e0e0;
+ border-radius: 4px;
+ }
+ QProgressBar::chunk {
+ background-color: #4CAF50;
+ border-radius: 4px;
+ }
+ """)
+ self.progress_bar.setValue(0)
+ main_layout.addWidget(self.progress_bar)
+
+ # === 日志区域 ===
+ log_label = QLabel("运行日志")
+ log_label.setFont(QFont("Microsoft YaHei", 12, QFont.Bold))
+ log_label.setStyleSheet("color: #2c3e50; margin-top: 10px;")
+ main_layout.addWidget(log_label)
+
+ self.log_text = LogTextEdit()
+ main_layout.addWidget(self.log_text)
+
+ # === 按钮区域 ===
+ button_layout = QHBoxLayout()
+ button_layout.setSpacing(15)
+
+ button_layout.addStretch()
+
+ # 停止按钮
+ self.stop_btn = ModernButton("停止", primary=False)
+ self.stop_btn.setEnabled(False)
+ self.stop_btn.clicked.connect(self.stop_capture)
+ button_layout.addWidget(self.stop_btn)
+
+ # 开始按钮
+ self.start_btn = ModernButton("开始截屏", primary=True)
+ self.start_btn.clicked.connect(self.start_capture)
+ button_layout.addWidget(self.start_btn)
+
+ # 清空日志按钮
+ self.clear_btn = ModernButton("清空日志", primary=False)
+ self.clear_btn.clicked.connect(self.clear_logs)
+ button_layout.addWidget(self.clear_btn)
+
+ button_layout.addStretch()
+
+ main_layout.addLayout(button_layout)
+
+ # === 底部信息 ===
+ footer = QLabel("按 Ctrl+F9 也可以快速启动 | 输出目录: ./output/")
+ footer.setFont(QFont("Microsoft YaHei", 9))
+ footer.setStyleSheet("color: #999999; margin-top: 10px;")
+ footer.setAlignment(Qt.AlignCenter)
+ main_layout.addWidget(footer)
+
+ def setup_tray(self):
+ """设置系统托盘"""
+ self.tray_icon = QSystemTrayIcon(self)
+ self.tray_icon.setIcon(self.style().standardIcon(QStyle.SP_ComputerIcon))
+
+ # 托盘菜单
+ tray_menu = QMenu()
+ show_action = tray_menu.addAction("显示窗口")
+ show_action.triggered.connect(self.show)
+
+ tray_menu.addSeparator()
+
+ start_action = tray_menu.addAction("开始截屏")
+ start_action.triggered.connect(self.start_capture)
+
+ stop_action = tray_menu.addAction("停止")
+ stop_action.triggered.connect(self.stop_capture)
+
+ tray_menu.addSeparator()
+
+ quit_action = tray_menu.addAction("退出")
+ quit_action.triggered.connect(self.quit_app)
+
+ self.tray_icon.setContextMenu(tray_menu)
+ self.tray_icon.show()
+
+ def setup_logging(self):
+ """设置日志重定向"""
+ # 创建自定义日志处理器
+ import logging
+
+ class QtHandler(logging.Handler):
+ def __init__(self, callback):
+ super().__init__()
+ self.callback = callback
+
+ def emit(self, record):
+ msg = self.format(record)
+ self.callback(msg, record.levelname)
+
+ # 配置loguru输出到GUI
+ logger.add(self.log_to_gui, format="{message}")
+
+ def log_to_gui(self, message):
+ """将日志输出到GUI"""
+ # 在主线程中更新UI
+ QTimer.singleShot(0, lambda: self._append_log_safe(message))
+
+ def _append_log_safe(self, message: str):
+ """安全地添加日志(在主线程中调用)"""
+ level = "INFO"
+ if "错误" in message or "失败" in message or "✗" in message:
+ level = "ERROR"
+ elif "警告" in message:
+ level = "WARNING"
+ elif "完成" in message or "✓" in message:
+ level = "INFO"
+
+ self.log_text.append_log(message, level)
+
+ def start_capture(self):
+ """开始截屏"""
+ # 检查OCR服务
+ if not self.scroll_capture.ocr_engine.check_service():
+ self.log_text.append_log("✗ OCR服务未运行", "ERROR")
+ if Config.OCR_ENGINE == "umi":
+ self.log_text.append_log("请先启动Umi-OCR并开启HTTP服务", "WARNING")
+ self.log_text.append_log("设置 → HTTP接口 → 启用HTTP服务", "INFO")
+ return
+
+ # 选择区域
+ self.log_text.append_log("请在屏幕上拖动选择截图区域...", "INFO")
+ self.status_label.setText("选择区域")
+ self.status_label.setStyleSheet("""
+ QLabel {
+ color: #FF9800;
+ background-color: #fff3e0;
+ padding: 6px 16px;
+ border-radius: 16px;
+ }
+ """)
+
+ try:
+ self.scroll_capture.capture_region = self.scroll_capture.region_selector.select_region()
+ except Exception as e:
+ self.log_text.append_log(f"区域选择失败: {e}", "ERROR")
+ self.status_label.setText("就绪")
+ self.status_label.setStyleSheet("""
+ QLabel {
+ color: #4CAF50;
+ background-color: #e8f5e9;
+ padding: 6px 16px;
+ border-radius: 16px;
+ }
+ """)
+ return
+
+ # 启动工作线程
+ self.worker = CaptureWorker(self.scroll_capture)
+ self.worker.log_signal.connect(lambda msg: self.log_text.append_log(msg, "INFO"))
+ self.worker.progress_signal.connect(self.update_progress)
+ self.worker.status_signal.connect(self.update_status)
+ self.worker.finished_signal.connect(self.on_finished)
+ self.worker.error_signal.connect(self.on_error)
+
+ self.worker.start()
+
+ # 更新UI状态
+ self.start_btn.setEnabled(False)
+ self.stop_btn.setEnabled(True)
+ self.progress_bar.setValue(0)
+
+ def stop_capture(self):
+ """停止截屏"""
+ if self.worker and self.worker.isRunning():
+ self.worker.stop()
+ self.worker.wait(1000)
+ self.log_text.append_log("用户手动停止", "WARNING")
+
+ self.start_btn.setEnabled(True)
+ self.stop_btn.setEnabled(False)
+ self.status_label.setText("已停止")
+ self.status_label.setStyleSheet("""
+ QLabel {
+ color: #F44336;
+ background-color: #ffebee;
+ padding: 6px 16px;
+ border-radius: 16px;
+ }
+ """)
+
+ def update_progress(self, value: int):
+ """更新进度条"""
+ self.progress_bar.setValue(value)
+
+ def update_status(self, status: str):
+ """更新状态标签"""
+ self.status_label.setText(status)
+
+ if status == "运行中":
+ self.status_label.setStyleSheet("""
+ QLabel {
+ color: #2196F3;
+ background-color: #e3f2fd;
+ padding: 6px 16px;
+ border-radius: 16px;
+ }
+ """)
+ elif status == "完成":
+ self.status_label.setStyleSheet("""
+ QLabel {
+ color: #4CAF50;
+ background-color: #e8f5e9;
+ padding: 6px 16px;
+ border-radius: 16px;
+ }
+ """)
+ elif status == "错误":
+ self.status_label.setStyleSheet("""
+ QLabel {
+ color: #F44336;
+ background-color: #ffebee;
+ padding: 6px 16px;
+ border-radius: 16px;
+ }
+ """)
+
+ def on_finished(self):
+ """任务完成回调"""
+ self.start_btn.setEnabled(True)
+ self.stop_btn.setEnabled(False)
+ self.log_text.append_log("✓ 截屏OCR任务已完成", "INFO")
+
+ def on_error(self, error_msg: str):
+ """错误回调"""
+ self.log_text.append_log(f"✗ 错误: {error_msg}", "ERROR")
+ self.start_btn.setEnabled(True)
+ self.stop_btn.setEnabled(False)
+
+ def clear_logs(self):
+ """清空日志"""
+ self.log_text.clear()
+ self.log_text.append_log("日志已清空", "INFO")
+
+ def quit_app(self):
+ """退出应用"""
+ if self.worker and self.worker.isRunning():
+ self.worker.stop()
+ self.worker.wait(2000)
+ self.tray_icon.hide()
+ QApplication.quit()
+
+ def closeEvent(self, event):
+ """关闭事件"""
+ # 最小化到托盘而不是关闭
+ if self.tray_icon.isVisible():
+ self.hide()
+ self.tray_icon.showMessage(
+ "滚动截屏OCR工具",
+ "程序已最小化到系统托盘",
+ QSystemTrayIcon.Information,
+ 2000
+ )
+ event.ignore()
+ else:
+ event.accept()
+
+
+def main():
+ """入口函数"""
+ app = QApplication(sys.argv)
+
+ # 设置应用信息
+ app.setApplicationName("滚动截屏OCR工具")
+ app.setApplicationVersion("1.0.0")
+
+ # 设置全局字体
+ font = QFont("Microsoft YaHei", 10)
+ app.setFont(font)
+
+ # 创建并显示主窗口
+ window = MainWindow()
+ window.show()
+
+ # 显示启动提示
+ window.log_text.append_log("程序已启动,点击「开始截屏」按钮开始", "INFO")
+ window.log_text.append_log(f"OCR引擎: {Config.OCR_ENGINE}", "INFO")
+
+ sys.exit(app.exec())
+
+
+if __name__ == "__main__":
+ main()
diff --git a/requirements.txt b/requirements.txt
index 7c87a26..042817a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,4 @@ keyboard>=0.13.5
mouse>=0.7.1
requests>=2.31.0
loguru>=0.7.0
+pyside6>=6.5.0