feat: 初始提交 - 滚动截屏OCR工具

- 实现智能区域检测算法(灰度阈值 + 连续行判定)
- 支持Umi-OCR和自定义HTTP OCR服务
- 添加热键触发和鼠标框选区域功能
- 实现自动滚动和智能停止逻辑
- 添加完整的README文档
This commit is contained in:
2026-03-06 15:07:51 +08:00
commit 8600c0f576
6 changed files with 1247 additions and 0 deletions

146
ocr_server_example.py Normal file
View File

@@ -0,0 +1,146 @@
"""
OCR服务示例实现
这是一个简单的OCR HTTP服务示例使用 PaddleOCR 或 Tesseract 作为后端
你可以根据实际需求修改此文件或使用其他OCR服务
启动方式: python ocr_server_example.py
服务地址: http://localhost:8000
"""
import base64
import io
from typing import List
try:
from flask import Flask, request, jsonify
except ImportError:
print("请先安装Flask: pip install flask")
raise
try:
from PIL import Image
except ImportError:
print("请先安装Pillow: pip install pillow")
raise
app = Flask(__name__)
# 尝试导入OCR引擎按优先级PaddleOCR > Tesseract > 模拟
ocr_engine = None
ocr_type = None
try:
from paddleocr import PaddleOCR
ocr_engine = PaddleOCR(
use_angle_cls=True,
lang='ch',
show_log=False
)
ocr_type = "paddle"
print("使用 PaddleOCR 引擎")
except ImportError:
try:
import pytesseract
ocr_engine = pytesseract
ocr_type = "tesseract"
print("使用 Tesseract OCR 引擎")
except ImportError:
ocr_type = "mock"
print("警告: 未找到OCR引擎使用模拟模式")
print("建议安装 PaddleOCR: pip install paddleocr")
print("或安装 Tesseract + pytesseract: pip install pytesseract")
def recognize_with_paddle(image: Image.Image) -> List[str]:
"""使用PaddleOCR识别"""
import numpy as np
img_array = np.array(image)
result = ocr_engine.ocr(img_array, cls=True)
texts = []
if result and result[0]:
for line in result[0]:
if line:
text = line[1][0] # 提取文字内容
confidence = line[1][1] # 置信度
if confidence > 0.5: # 过滤低置信度结果
texts.append(text)
return texts
def recognize_with_tesseract(image: Image.Image) -> List[str]:
"""使用Tesseract识别"""
text = ocr_engine.image_to_string(image, lang='chi_sim+eng')
# 按行分割
lines = [line.strip() for line in text.split('\n') if line.strip()]
return lines
def recognize_mock(image: Image.Image) -> List[str]:
"""模拟OCR用于测试"""
return ["[模拟OCR] 请安装实际的OCR引擎"]
def recognize_image(image: Image.Image) -> List[str]:
"""根据配置的引擎进行识别"""
if ocr_type == "paddle":
return recognize_with_paddle(image)
elif ocr_type == "tesseract":
return recognize_with_tesseract(image)
else:
return recognize_mock(image)
@app.route('/ocr', methods=['POST'])
def ocr_endpoint():
"""
OCR API端点
接收JSON: {"image": "base64编码的图片"}
返回JSON: {"texts": ["识别到的文字1", "识别到的文字2", ...]}
"""
try:
data = request.get_json()
if not data or 'image' not in data:
return jsonify({"error": "缺少image字段"}), 400
# 解码base64图片
img_base64 = data['image']
img_data = base64.b64decode(img_base64)
image = Image.open(io.BytesIO(img_data))
# 转换为RGB如果是RGBA或其他模式
if image.mode != 'RGB':
image = image.convert('RGB')
# 执行OCR
texts = recognize_image(image)
return jsonify({
"texts": texts,
"count": len(texts),
"engine": ocr_type
})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route('/health', methods=['GET'])
def health_check():
"""健康检查端点"""
return jsonify({
"status": "ok",
"engine": ocr_type
})
if __name__ == '__main__':
print("=" * 60)
print("OCR HTTP 服务")
print("=" * 60)
print(f"OCR引擎: {ocr_type}")
print("API地址: http://localhost:8000/ocr")
print("健康检查: http://localhost:8000/health")
print("=" * 60)
print("\n启动服务中...")
app.run(host='0.0.0.0', port=8000, debug=False)