feat: 初始提交 - 滚动截屏OCR工具

- 实现智能区域检测算法（灰度阈值 + 连续行判定） - 支持Umi-OCR和自定义HTTP OCR服务 - 添加热键触发和鼠标框选区域功能 - 实现自动滚动和智能停止逻辑 - 添加完整的README文档
2026-03-06 15:07:51 +08:00
commit 8600c0f576
6 changed files with 1247 additions and 0 deletions
--- a/ocr_server_example.py
+++ b/ocr_server_example.py
@@ -0,0 +1,146 @@
+"""
+OCR服务示例实现
+这是一个简单的OCR HTTP服务示例，使用 PaddleOCR 或 Tesseract 作为后端
+你可以根据实际需求修改此文件或使用其他OCR服务
+
+启动方式: python ocr_server_example.py
+服务地址: http://localhost:8000
+"""
+
+import base64
+import io
+from typing import List
+
+try:
+    from flask import Flask, request, jsonify
+except ImportError:
+    print("请先安装Flask: pip install flask")
+    raise
+
+try:
+    from PIL import Image
+except ImportError:
+    print("请先安装Pillow: pip install pillow")
+    raise
+
+app = Flask(__name__)
+
+# 尝试导入OCR引擎，按优先级：PaddleOCR > Tesseract > 模拟
+ocr_engine = None
+ocr_type = None
+
+try:
+    from paddleocr import PaddleOCR
+    ocr_engine = PaddleOCR(
+        use_angle_cls=True,
+        lang='ch',
+        show_log=False
+    )
+    ocr_type = "paddle"
+    print("使用 PaddleOCR 引擎")
+except ImportError:
+    try:
+        import pytesseract
+        ocr_engine = pytesseract
+        ocr_type = "tesseract"
+        print("使用 Tesseract OCR 引擎")
+    except ImportError:
+        ocr_type = "mock"
+        print("警告: 未找到OCR引擎，使用模拟模式")
+        print("建议安装 PaddleOCR: pip install paddleocr")
+        print("或安装 Tesseract + pytesseract: pip install pytesseract")
+
+
+def recognize_with_paddle(image: Image.Image) -> List[str]:
+    """使用PaddleOCR识别"""
+    import numpy as np
+    img_array = np.array(image)
+    result = ocr_engine.ocr(img_array, cls=True)
+
+    texts = []
+    if result and result[0]:
+        for line in result[0]:
+            if line:
+                text = line[1][0]  # 提取文字内容
+                confidence = line[1][1]  # 置信度
+                if confidence > 0.5:  # 过滤低置信度结果
+                    texts.append(text)
+    return texts
+
+
+def recognize_with_tesseract(image: Image.Image) -> List[str]:
+    """使用Tesseract识别"""
+    text = ocr_engine.image_to_string(image, lang='chi_sim+eng')
+    # 按行分割
+    lines = [line.strip() for line in text.split('\n') if line.strip()]
+    return lines
+
+
+def recognize_mock(image: Image.Image) -> List[str]:
+    """模拟OCR（用于测试）"""
+    return ["[模拟OCR] 请安装实际的OCR引擎"]
+
+
+def recognize_image(image: Image.Image) -> List[str]:
+    """根据配置的引擎进行识别"""
+    if ocr_type == "paddle":
+        return recognize_with_paddle(image)
+    elif ocr_type == "tesseract":
+        return recognize_with_tesseract(image)
+    else:
+        return recognize_mock(image)
+
+
+@app.route('/ocr', methods=['POST'])
+def ocr_endpoint():
+    """
+    OCR API端点
+    接收JSON: {"image": "base64编码的图片"}
+    返回JSON: {"texts": ["识别到的文字1", "识别到的文字2", ...]}
+    """
+    try:
+        data = request.get_json()
+        if not data or 'image' not in data:
+            return jsonify({"error": "缺少image字段"}), 400
+
+        # 解码base64图片
+        img_base64 = data['image']
+        img_data = base64.b64decode(img_base64)
+        image = Image.open(io.BytesIO(img_data))
+
+        # 转换为RGB（如果是RGBA或其他模式）
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+
+        # 执行OCR
+        texts = recognize_image(image)
+
+        return jsonify({
+            "texts": texts,
+            "count": len(texts),
+            "engine": ocr_type
+        })
+
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+
+@app.route('/health', methods=['GET'])
+def health_check():
+    """健康检查端点"""
+    return jsonify({
+        "status": "ok",
+        "engine": ocr_type
+    })
+
+
+if __name__ == '__main__':
+    print("=" * 60)
+    print("OCR HTTP 服务")
+    print("=" * 60)
+    print(f"OCR引擎: {ocr_type}")
+    print("API地址: http://localhost:8000/ocr")
+    print("健康检查: http://localhost:8000/health")
+    print("=" * 60)
+    print("\n启动服务中...")
+    app.run(host='0.0.0.0', port=8000, debug=False)