feat: 优化滚动截屏逻辑并改进UI交互
- 新增div签名机制用于内容去重 - 实现基于最后一个div位置的智能滚动计算 - 合并开始/停止按钮为单一操作按钮 - 增加处理进度和滚动距离的详细日志 - 优化UI状态显示和提示信息
This commit is contained in:
175
main.py
175
main.py
@@ -9,7 +9,7 @@ import base64
|
||||
import io
|
||||
import tempfile
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Tuple, Optional, Callable
|
||||
from typing import List, Tuple, Optional, Callable, Dict
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
@@ -381,6 +381,12 @@ class ScrollCaptureOCR:
|
||||
self.scroll_count = 0
|
||||
self.all_results: List[dict] = []
|
||||
|
||||
# 新增:记录已处理的div信息
|
||||
self.processed_divs: List[Dict] = [] # 已处理的所有div信息
|
||||
self.last_div_signature: Optional[str] = None # 最后一个div的签名(用于去重)
|
||||
self.total_scroll_distance: int = 0 # 累计滚动距离
|
||||
self.is_first_capture: bool = True # 是否是第一次截图
|
||||
|
||||
# 创建输出目录
|
||||
Path(self.config.OUTPUT_DIR).mkdir(exist_ok=True)
|
||||
|
||||
@@ -393,6 +399,21 @@ class ScrollCaptureOCR:
|
||||
screenshot = pyautogui.screenshot(region=(left, top, right - left, bottom - top))
|
||||
return cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2BGR)
|
||||
|
||||
def get_div_signature(self, div: DivRegion, image: np.ndarray) -> str:
|
||||
"""
|
||||
生成div的签名,用于判断是否是同一个div
|
||||
使用div的图像内容的哈希值
|
||||
"""
|
||||
import hashlib
|
||||
div_image = image[div.top:div.bottom, div.left:div.right]
|
||||
# 缩小图像以加快计算
|
||||
small = cv2.resize(div_image, (32, 32))
|
||||
# 计算平均哈希
|
||||
gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY)
|
||||
avg = gray.mean()
|
||||
hash_str = ''.join(['1' if p > avg else '0' for p in gray.flatten()])
|
||||
return hashlib.md5(hash_str.encode()).hexdigest()[:16]
|
||||
|
||||
def scroll_screen(self, distance: int):
|
||||
"""在截图区域执行滚动"""
|
||||
if not self.capture_region:
|
||||
@@ -460,13 +481,19 @@ class ScrollCaptureOCR:
|
||||
"""
|
||||
执行一次处理循环
|
||||
返回False表示应该停止
|
||||
|
||||
新逻辑:
|
||||
1. 第一次截图:处理所有div
|
||||
2. 后续截图:只处理新的div(跳过已处理的)
|
||||
3. 滚动距离 = 最后一个新div的底部位置 + 空白间隔
|
||||
"""
|
||||
logger.info(f"=== 第 {self.scroll_count + 1} 次截屏 ===")
|
||||
print(f"\n>>> 第 {self.scroll_count + 1} 次截屏处理中...")
|
||||
|
||||
# 1. 截取当前屏幕
|
||||
image = self.capture_screen()
|
||||
logger.info(f"截图完成,尺寸: {image.shape[1]}x{image.shape[0]}")
|
||||
height, width = image.shape[:2]
|
||||
logger.info(f"截图完成,尺寸: {width}x{height}")
|
||||
|
||||
# 2. 分析图像,定位div边界
|
||||
analysis = self.image_analyzer.analyze(image)
|
||||
@@ -476,32 +503,101 @@ class ScrollCaptureOCR:
|
||||
print("警告: 未检测到内容区域")
|
||||
return False
|
||||
|
||||
# 3. OCR提取文字
|
||||
current_texts = self.ocr_engine.recognize_divs(image, analysis.divs)
|
||||
print(f"识别到 {len(current_texts)} 段文字")
|
||||
logger.info(f"检测到 {len(analysis.divs)} 个div区域")
|
||||
print(f"检测到 {len(analysis.divs)} 个内容区域")
|
||||
|
||||
# 3. 识别新的div(跳过已处理的)
|
||||
new_divs = []
|
||||
current_texts = []
|
||||
last_processed_signature = None
|
||||
|
||||
for i, div in enumerate(analysis.divs):
|
||||
# 生成div签名
|
||||
div_signature = self.get_div_signature(div, image)
|
||||
|
||||
# 检查是否是已处理的div
|
||||
is_processed = False
|
||||
if not self.is_first_capture and self.processed_divs:
|
||||
# 与已处理的div比较
|
||||
for processed in self.processed_divs:
|
||||
if processed.get('signature') == div_signature:
|
||||
is_processed = True
|
||||
logger.info(f"跳过已处理的div {i+1}")
|
||||
break
|
||||
|
||||
if is_processed:
|
||||
continue
|
||||
|
||||
# 新的div,进行处理
|
||||
new_divs.append({
|
||||
'div': div,
|
||||
'signature': div_signature,
|
||||
'index': i
|
||||
})
|
||||
|
||||
logger.info(f"处理新的div {i+1},位置: {div.top}-{div.bottom}")
|
||||
print(f" 处理新区域 {i+1}/{len(analysis.divs)}...")
|
||||
|
||||
# 截取单个div区域
|
||||
div_image = image[div.top:div.bottom, div.left:div.right]
|
||||
|
||||
# OCR识别
|
||||
texts = self.ocr_engine.recognize(div_image)
|
||||
div.text = "\n".join(texts)
|
||||
current_texts.extend(texts)
|
||||
|
||||
# 保存单个div的结果
|
||||
self.save_div_result(self.scroll_count, i, div_image, texts, div)
|
||||
|
||||
# 记录处理的div
|
||||
self.processed_divs.append({
|
||||
'signature': div_signature,
|
||||
'text': div.text,
|
||||
'scroll_count': self.scroll_count,
|
||||
'div_index': i
|
||||
})
|
||||
|
||||
last_processed_signature = div_signature
|
||||
logger.info(f" 识别到 {len(texts)} 段文字")
|
||||
|
||||
# 如果没有新的div,说明已经到底
|
||||
if not new_divs:
|
||||
logger.info("没有新的div需要处理,可能已到达底部")
|
||||
print("✓ 没有新的内容需要处理")
|
||||
return False
|
||||
|
||||
print(f"✓ 本次处理 {len(new_divs)} 个新区域,共识别 {len(current_texts)} 段文字")
|
||||
for i, text in enumerate(current_texts[:3], 1):
|
||||
preview = text[:50] + "..." if len(text) > 50 else text
|
||||
print(f" [{i}] {preview}")
|
||||
if len(current_texts) > 3:
|
||||
print(f" ... 还有 {len(current_texts) - 3} 段文字")
|
||||
|
||||
# 4. 保存结果
|
||||
# 4. 保存完整截图结果
|
||||
self.save_result(self.scroll_count, image, current_texts)
|
||||
|
||||
# 5. 判断是否到达底部(OCR结果重复)
|
||||
# 5. 判断是否到达底部(没有新内容或OCR结果重复)
|
||||
if self.check_duplicate(current_texts):
|
||||
print("\n>>> 检测到内容重复,已到达底部,处理完成 <<<")
|
||||
return False
|
||||
|
||||
self.previous_ocr_result = current_texts
|
||||
|
||||
# 6. 计算滚动距离
|
||||
scroll_distance = self.image_analyzer.calculate_scroll_distance(analysis)
|
||||
# 6. 计算滚动距离 - 基于最后一个新div的位置
|
||||
if new_divs:
|
||||
last_new_div = new_divs[-1]['div']
|
||||
scroll_distance = self.calculate_scroll_based_on_last_div(
|
||||
last_new_div, analysis.gaps, height
|
||||
)
|
||||
else:
|
||||
scroll_distance = int(height * 0.8) # 默认滚动80%高度
|
||||
|
||||
# 7. 执行滚动
|
||||
self.scroll_screen(scroll_distance)
|
||||
self.total_scroll_distance += scroll_distance
|
||||
|
||||
self.scroll_count += 1
|
||||
self.is_first_capture = False # 标记不再是第一次
|
||||
|
||||
# 检查最大滚动次数
|
||||
if self.scroll_count >= self.config.MAX_SCROLL_COUNT:
|
||||
@@ -511,6 +607,67 @@ class ScrollCaptureOCR:
|
||||
|
||||
return True
|
||||
|
||||
def calculate_scroll_based_on_last_div(self, last_div: DivRegion, gaps: List[GapInfo], image_height: int) -> int:
|
||||
"""
|
||||
基于最后一个div计算滚动距离
|
||||
|
||||
策略:滚动到最后一个div的底部 + 其后空白间隔
|
||||
这样可以让下一个新div出现在截图区域的顶部
|
||||
"""
|
||||
last_div_bottom = last_div.bottom
|
||||
|
||||
# 查找最后一个div之后的空白间隔
|
||||
last_gap_height = 0
|
||||
for gap in gaps:
|
||||
if gap.start_row >= last_div.bottom:
|
||||
last_gap_height = gap.height
|
||||
break
|
||||
|
||||
# 计算滚动距离
|
||||
scroll_distance = last_div_bottom + last_gap_height
|
||||
|
||||
# 确保至少滚动一定距离(避免 stuck)
|
||||
min_scroll = 50
|
||||
scroll_distance = max(scroll_distance, min_scroll)
|
||||
|
||||
# 限制最大滚动距离(不超过图片高度的90%,保留一些重叠)
|
||||
max_scroll = int(image_height * 0.9)
|
||||
scroll_distance = min(scroll_distance, max_scroll)
|
||||
|
||||
logger.info(f"滚动距离计算: 最后div底部={last_div_bottom}, "
|
||||
f"空白间隔={last_gap_height}, 滚动距离={scroll_distance}")
|
||||
print(f" 滚动距离: {scroll_distance} 像素 (基于最后一个内容区域)")
|
||||
|
||||
return int(scroll_distance)
|
||||
|
||||
def save_div_result(self, scroll_index: int, div_index: int, image: np.ndarray, texts: List[str], div: DivRegion):
|
||||
"""保存单个div的结果"""
|
||||
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
# 保存div图片
|
||||
div_dir = Path(self.config.OUTPUT_DIR) / f"scroll_{scroll_index:03d}"
|
||||
div_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
image_path = div_dir / f"div_{div_index:02d}_{timestamp}.png"
|
||||
cv2.imwrite(str(image_path), image)
|
||||
|
||||
# 保存div OCR结果
|
||||
result = {
|
||||
"scroll_index": scroll_index,
|
||||
"div_index": div_index,
|
||||
"timestamp": timestamp,
|
||||
"div_position": {"top": div.top, "bottom": div.bottom, "left": div.left, "right": div.right},
|
||||
"image_path": str(image_path),
|
||||
"texts": texts
|
||||
}
|
||||
|
||||
# 也添加到总结果中
|
||||
if not hasattr(self, '_div_results'):
|
||||
self._div_results = []
|
||||
self._div_results.append(result)
|
||||
|
||||
logger.debug(f"保存div结果: scroll={scroll_index}, div={div_index}, 文字数={len(texts)}")
|
||||
|
||||
def run(self):
|
||||
"""主运行流程"""
|
||||
print("=" * 60)
|
||||
|
||||
Reference in New Issue
Block a user