diff --git a/gui.py b/gui.py
index f75beae..e49d035 100644
--- a/gui.py
+++ b/gui.py
@@ -43,6 +43,7 @@ class CaptureWorker(QThread):
status_signal = Signal(str)
finished_signal = Signal()
error_signal = Signal(str)
+ div_processed_signal = Signal(int, int) # (当前div序号, 总div数)
def __init__(self, scroll_capture: ScrollCaptureOCR):
super().__init__()
@@ -59,17 +60,24 @@ class CaptureWorker(QThread):
self.scroll_capture.previous_ocr_result = []
self.scroll_capture.scroll_count = 0
self.scroll_capture.all_results = []
+ self.scroll_capture.processed_divs = []
+ self.scroll_capture.last_div_signature = None
+ self.scroll_capture.total_scroll_distance = 0
+ self.scroll_capture.is_first_capture = True
# 循环处理
while self.is_running and self.scroll_capture.process_once():
progress = min(self.scroll_capture.scroll_count * 10, 90)
self.progress_signal.emit(progress)
- self.log_signal.emit(f"第 {self.scroll_capture.scroll_count} 次截屏完成")
+ self.log_signal.emit(f"✓ 第 {self.scroll_capture.scroll_count} 次截屏完成,"
+ f"累计滚动 {self.scroll_capture.total_scroll_distance} 像素")
# 保存最终结果
if self.scroll_capture.all_results:
self.scroll_capture.save_final_result()
- self.log_signal.emit(f"✓ 共处理 {len(self.scroll_capture.all_results)} 次截屏")
+ total_divs = sum(len(result.get('texts', [])) for result in self.scroll_capture.all_results)
+ self.log_signal.emit(f"✓ 共处理 {len(self.scroll_capture.all_results)} 次截屏,"
+ f"识别 {total_divs} 个内容区域")
self.progress_signal.emit(100)
self.status_signal.emit("完成")
@@ -263,10 +271,11 @@ class MainWindow(QMainWindow):
# 使用说明
help_text = QLabel(
"使用步骤:
"
- "1. 点击「开始截屏」按钮
"
+ "1. 点击「开始」按钮
"
"2. 按住鼠标左键拖动选择区域
"
- "3. 程序自动滚动截屏并OCR识别
"
- "4. 检测到重复内容时自动停止"
+ "3. 程序自动分割div并逐个OCR
"
+ "4. 智能计算滚动距离,自动翻页
"
+ "5. 完成后点击「结束」按钮"
)
help_text.setFont(QFont("Microsoft YaHei", 10))
help_text.setStyleSheet("color: #555555; line-height: 1.6;")
@@ -320,16 +329,10 @@ class MainWindow(QMainWindow):
button_layout.addStretch()
- # 停止按钮
- self.stop_btn = ModernButton("停止", primary=False)
- self.stop_btn.setEnabled(False)
- self.stop_btn.clicked.connect(self.stop_capture)
- button_layout.addWidget(self.stop_btn)
-
- # 开始按钮
- self.start_btn = ModernButton("开始截屏", primary=True)
- self.start_btn.clicked.connect(self.start_capture)
- button_layout.addWidget(self.start_btn)
+ # 操作按钮(开始/结束 二合一)
+ self.action_btn = ModernButton("开始", primary=True)
+ self.action_btn.clicked.connect(self.on_action_button_clicked)
+ button_layout.addWidget(self.action_btn)
# 清空日志按钮
self.clear_btn = ModernButton("清空日志", primary=False)
@@ -341,7 +344,7 @@ class MainWindow(QMainWindow):
main_layout.addLayout(button_layout)
# === 底部信息 ===
- footer = QLabel("按 Ctrl+F9 也可以快速启动 | 输出目录: ./output/")
+ footer = QLabel("点击「开始」按钮启动 | 输出目录: ./output/")
footer.setFont(QFont("Microsoft YaHei", 9))
footer.setStyleSheet("color: #999999; margin-top: 10px;")
footer.setAlignment(Qt.AlignCenter)
@@ -359,10 +362,10 @@ class MainWindow(QMainWindow):
tray_menu.addSeparator()
- start_action = tray_menu.addAction("开始截屏")
+ start_action = tray_menu.addAction("开始")
start_action.triggered.connect(self.start_capture)
- stop_action = tray_menu.addAction("停止")
+ stop_action = tray_menu.addAction("结束")
stop_action.triggered.connect(self.stop_capture)
tray_menu.addSeparator()
@@ -407,6 +410,13 @@ class MainWindow(QMainWindow):
self.log_text.append_log(message, level)
+ def on_action_button_clicked(self):
+ """操作按钮点击事件"""
+ if self.action_btn.text() == "开始":
+ self.start_capture()
+ else:
+ self.stop_capture()
+
def start_capture(self):
"""开始截屏"""
# 检查OCR服务
@@ -454,9 +464,9 @@ class MainWindow(QMainWindow):
self.worker.start()
- # 更新UI状态
- self.start_btn.setEnabled(False)
- self.stop_btn.setEnabled(True)
+ # 更新UI状态 - 按钮变为"结束"
+ self.action_btn.setText("结束")
+ self.action_btn.update_style()
self.progress_bar.setValue(0)
def stop_capture(self):
@@ -464,15 +474,16 @@ class MainWindow(QMainWindow):
if self.worker and self.worker.isRunning():
self.worker.stop()
self.worker.wait(1000)
- self.log_text.append_log("用户手动停止", "WARNING")
+ self.log_text.append_log("用户手动结束", "WARNING")
- self.start_btn.setEnabled(True)
- self.stop_btn.setEnabled(False)
- self.status_label.setText("已停止")
+ # 更新UI状态 - 按钮恢复为"开始"
+ self.action_btn.setText("开始")
+ self.action_btn.update_style()
+ self.status_label.setText("就绪")
self.status_label.setStyleSheet("""
QLabel {
- color: #F44336;
- background-color: #ffebee;
+ color: #4CAF50;
+ background-color: #e8f5e9;
padding: 6px 16px;
border-radius: 16px;
}
@@ -516,15 +527,17 @@ class MainWindow(QMainWindow):
def on_finished(self):
"""任务完成回调"""
- self.start_btn.setEnabled(True)
- self.stop_btn.setEnabled(False)
+ # 按钮恢复为"开始"
+ self.action_btn.setText("开始")
+ self.action_btn.update_style()
self.log_text.append_log("✓ 截屏OCR任务已完成", "INFO")
def on_error(self, error_msg: str):
"""错误回调"""
self.log_text.append_log(f"✗ 错误: {error_msg}", "ERROR")
- self.start_btn.setEnabled(True)
- self.stop_btn.setEnabled(False)
+ # 按钮恢复为"开始"
+ self.action_btn.setText("开始")
+ self.action_btn.update_style()
def clear_logs(self):
"""清空日志"""
@@ -572,7 +585,7 @@ def main():
window.show()
# 显示启动提示
- window.log_text.append_log("程序已启动,点击「开始截屏」按钮开始", "INFO")
+ window.log_text.append_log("程序已启动,点击「开始」按钮开始", "INFO")
window.log_text.append_log(f"OCR引擎: {Config.OCR_ENGINE}", "INFO")
sys.exit(app.exec())
diff --git a/main.py b/main.py
index df400bc..15daf71 100644
--- a/main.py
+++ b/main.py
@@ -9,7 +9,7 @@ import base64
import io
import tempfile
from dataclasses import dataclass, field
-from typing import List, Tuple, Optional, Callable
+from typing import List, Tuple, Optional, Callable, Dict
from pathlib import Path
import cv2
@@ -381,6 +381,12 @@ class ScrollCaptureOCR:
self.scroll_count = 0
self.all_results: List[dict] = []
+ # 新增:记录已处理的div信息
+ self.processed_divs: List[Dict] = [] # 已处理的所有div信息
+ self.last_div_signature: Optional[str] = None # 最后一个div的签名(用于去重)
+ self.total_scroll_distance: int = 0 # 累计滚动距离
+ self.is_first_capture: bool = True # 是否是第一次截图
+
# 创建输出目录
Path(self.config.OUTPUT_DIR).mkdir(exist_ok=True)
@@ -393,6 +399,21 @@ class ScrollCaptureOCR:
screenshot = pyautogui.screenshot(region=(left, top, right - left, bottom - top))
return cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2BGR)
+ def get_div_signature(self, div: DivRegion, image: np.ndarray) -> str:
+ """
+ 生成div的签名,用于判断是否是同一个div
+ 使用div的图像内容的哈希值
+ """
+ import hashlib
+ div_image = image[div.top:div.bottom, div.left:div.right]
+ # 缩小图像以加快计算
+ small = cv2.resize(div_image, (32, 32))
+ # 计算平均哈希
+ gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY)
+ avg = gray.mean()
+ hash_str = ''.join(['1' if p > avg else '0' for p in gray.flatten()])
+ return hashlib.md5(hash_str.encode()).hexdigest()[:16]
+
def scroll_screen(self, distance: int):
"""在截图区域执行滚动"""
if not self.capture_region:
@@ -460,13 +481,19 @@ class ScrollCaptureOCR:
"""
执行一次处理循环
返回False表示应该停止
+
+ 新逻辑:
+ 1. 第一次截图:处理所有div
+ 2. 后续截图:只处理新的div(跳过已处理的)
+ 3. 滚动距离 = 最后一个新div的底部位置 + 空白间隔
"""
logger.info(f"=== 第 {self.scroll_count + 1} 次截屏 ===")
print(f"\n>>> 第 {self.scroll_count + 1} 次截屏处理中...")
# 1. 截取当前屏幕
image = self.capture_screen()
- logger.info(f"截图完成,尺寸: {image.shape[1]}x{image.shape[0]}")
+ height, width = image.shape[:2]
+ logger.info(f"截图完成,尺寸: {width}x{height}")
# 2. 分析图像,定位div边界
analysis = self.image_analyzer.analyze(image)
@@ -476,32 +503,101 @@ class ScrollCaptureOCR:
print("警告: 未检测到内容区域")
return False
- # 3. OCR提取文字
- current_texts = self.ocr_engine.recognize_divs(image, analysis.divs)
- print(f"识别到 {len(current_texts)} 段文字")
+ logger.info(f"检测到 {len(analysis.divs)} 个div区域")
+ print(f"检测到 {len(analysis.divs)} 个内容区域")
+
+ # 3. 识别新的div(跳过已处理的)
+ new_divs = []
+ current_texts = []
+ last_processed_signature = None
+
+ for i, div in enumerate(analysis.divs):
+ # 生成div签名
+ div_signature = self.get_div_signature(div, image)
+
+ # 检查是否是已处理的div
+ is_processed = False
+ if not self.is_first_capture and self.processed_divs:
+ # 与已处理的div比较
+ for processed in self.processed_divs:
+ if processed.get('signature') == div_signature:
+ is_processed = True
+ logger.info(f"跳过已处理的div {i+1}")
+ break
+
+ if is_processed:
+ continue
+
+ # 新的div,进行处理
+ new_divs.append({
+ 'div': div,
+ 'signature': div_signature,
+ 'index': i
+ })
+
+ logger.info(f"处理新的div {i+1},位置: {div.top}-{div.bottom}")
+ print(f" 处理新区域 {i+1}/{len(analysis.divs)}...")
+
+ # 截取单个div区域
+ div_image = image[div.top:div.bottom, div.left:div.right]
+
+ # OCR识别
+ texts = self.ocr_engine.recognize(div_image)
+ div.text = "\n".join(texts)
+ current_texts.extend(texts)
+
+ # 保存单个div的结果
+ self.save_div_result(self.scroll_count, i, div_image, texts, div)
+
+ # 记录处理的div
+ self.processed_divs.append({
+ 'signature': div_signature,
+ 'text': div.text,
+ 'scroll_count': self.scroll_count,
+ 'div_index': i
+ })
+
+ last_processed_signature = div_signature
+ logger.info(f" 识别到 {len(texts)} 段文字")
+
+ # 如果没有新的div,说明已经到底
+ if not new_divs:
+ logger.info("没有新的div需要处理,可能已到达底部")
+ print("✓ 没有新的内容需要处理")
+ return False
+
+ print(f"✓ 本次处理 {len(new_divs)} 个新区域,共识别 {len(current_texts)} 段文字")
for i, text in enumerate(current_texts[:3], 1):
preview = text[:50] + "..." if len(text) > 50 else text
print(f" [{i}] {preview}")
if len(current_texts) > 3:
print(f" ... 还有 {len(current_texts) - 3} 段文字")
- # 4. 保存结果
+ # 4. 保存完整截图结果
self.save_result(self.scroll_count, image, current_texts)
- # 5. 判断是否到达底部(OCR结果重复)
+ # 5. 判断是否到达底部(没有新内容或OCR结果重复)
if self.check_duplicate(current_texts):
print("\n>>> 检测到内容重复,已到达底部,处理完成 <<<")
return False
self.previous_ocr_result = current_texts
- # 6. 计算滚动距离
- scroll_distance = self.image_analyzer.calculate_scroll_distance(analysis)
+ # 6. 计算滚动距离 - 基于最后一个新div的位置
+ if new_divs:
+ last_new_div = new_divs[-1]['div']
+ scroll_distance = self.calculate_scroll_based_on_last_div(
+ last_new_div, analysis.gaps, height
+ )
+ else:
+ scroll_distance = int(height * 0.8) # 默认滚动80%高度
# 7. 执行滚动
self.scroll_screen(scroll_distance)
+ self.total_scroll_distance += scroll_distance
self.scroll_count += 1
+ self.is_first_capture = False # 标记不再是第一次
# 检查最大滚动次数
if self.scroll_count >= self.config.MAX_SCROLL_COUNT:
@@ -511,6 +607,67 @@ class ScrollCaptureOCR:
return True
+ def calculate_scroll_based_on_last_div(self, last_div: DivRegion, gaps: List[GapInfo], image_height: int) -> int:
+ """
+ 基于最后一个div计算滚动距离
+
+ 策略:滚动到最后一个div的底部 + 其后空白间隔
+ 这样可以让下一个新div出现在截图区域的顶部
+ """
+ last_div_bottom = last_div.bottom
+
+ # 查找最后一个div之后的空白间隔
+ last_gap_height = 0
+ for gap in gaps:
+ if gap.start_row >= last_div.bottom:
+ last_gap_height = gap.height
+ break
+
+ # 计算滚动距离
+ scroll_distance = last_div_bottom + last_gap_height
+
+ # 确保至少滚动一定距离(避免 stuck)
+ min_scroll = 50
+ scroll_distance = max(scroll_distance, min_scroll)
+
+ # 限制最大滚动距离(不超过图片高度的90%,保留一些重叠)
+ max_scroll = int(image_height * 0.9)
+ scroll_distance = min(scroll_distance, max_scroll)
+
+ logger.info(f"滚动距离计算: 最后div底部={last_div_bottom}, "
+ f"空白间隔={last_gap_height}, 滚动距离={scroll_distance}")
+ print(f" 滚动距离: {scroll_distance} 像素 (基于最后一个内容区域)")
+
+ return int(scroll_distance)
+
+ def save_div_result(self, scroll_index: int, div_index: int, image: np.ndarray, texts: List[str], div: DivRegion):
+ """保存单个div的结果"""
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
+
+ # 保存div图片
+ div_dir = Path(self.config.OUTPUT_DIR) / f"scroll_{scroll_index:03d}"
+ div_dir.mkdir(parents=True, exist_ok=True)
+
+ image_path = div_dir / f"div_{div_index:02d}_{timestamp}.png"
+ cv2.imwrite(str(image_path), image)
+
+ # 保存div OCR结果
+ result = {
+ "scroll_index": scroll_index,
+ "div_index": div_index,
+ "timestamp": timestamp,
+ "div_position": {"top": div.top, "bottom": div.bottom, "left": div.left, "right": div.right},
+ "image_path": str(image_path),
+ "texts": texts
+ }
+
+ # 也添加到总结果中
+ if not hasattr(self, '_div_results'):
+ self._div_results = []
+ self._div_results.append(result)
+
+ logger.debug(f"保存div结果: scroll={scroll_index}, div={div_index}, 文字数={len(texts)}")
+
def run(self):
"""主运行流程"""
print("=" * 60)