Files
long-screen-cut/ocr_server_example.py
xiaji 8600c0f576 feat: 初始提交 - 滚动截屏OCR工具
- 实现智能区域检测算法(灰度阈值 + 连续行判定)
- 支持Umi-OCR和自定义HTTP OCR服务
- 添加热键触发和鼠标框选区域功能
- 实现自动滚动和智能停止逻辑
- 添加完整的README文档
2026-03-06 15:07:51 +08:00

147 lines
4.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
OCR服务示例实现
这是一个简单的OCR HTTP服务示例使用 PaddleOCR 或 Tesseract 作为后端
你可以根据实际需求修改此文件或使用其他OCR服务
启动方式: python ocr_server_example.py
服务地址: http://localhost:8000
"""
import base64
import io
from typing import List
try:
from flask import Flask, request, jsonify
except ImportError:
print("请先安装Flask: pip install flask")
raise
try:
from PIL import Image
except ImportError:
print("请先安装Pillow: pip install pillow")
raise
app = Flask(__name__)
# 尝试导入OCR引擎按优先级PaddleOCR > Tesseract > 模拟
ocr_engine = None
ocr_type = None
try:
from paddleocr import PaddleOCR
ocr_engine = PaddleOCR(
use_angle_cls=True,
lang='ch',
show_log=False
)
ocr_type = "paddle"
print("使用 PaddleOCR 引擎")
except ImportError:
try:
import pytesseract
ocr_engine = pytesseract
ocr_type = "tesseract"
print("使用 Tesseract OCR 引擎")
except ImportError:
ocr_type = "mock"
print("警告: 未找到OCR引擎使用模拟模式")
print("建议安装 PaddleOCR: pip install paddleocr")
print("或安装 Tesseract + pytesseract: pip install pytesseract")
def recognize_with_paddle(image: Image.Image) -> List[str]:
"""使用PaddleOCR识别"""
import numpy as np
img_array = np.array(image)
result = ocr_engine.ocr(img_array, cls=True)
texts = []
if result and result[0]:
for line in result[0]:
if line:
text = line[1][0] # 提取文字内容
confidence = line[1][1] # 置信度
if confidence > 0.5: # 过滤低置信度结果
texts.append(text)
return texts
def recognize_with_tesseract(image: Image.Image) -> List[str]:
"""使用Tesseract识别"""
text = ocr_engine.image_to_string(image, lang='chi_sim+eng')
# 按行分割
lines = [line.strip() for line in text.split('\n') if line.strip()]
return lines
def recognize_mock(image: Image.Image) -> List[str]:
"""模拟OCR用于测试"""
return ["[模拟OCR] 请安装实际的OCR引擎"]
def recognize_image(image: Image.Image) -> List[str]:
"""根据配置的引擎进行识别"""
if ocr_type == "paddle":
return recognize_with_paddle(image)
elif ocr_type == "tesseract":
return recognize_with_tesseract(image)
else:
return recognize_mock(image)
@app.route('/ocr', methods=['POST'])
def ocr_endpoint():
"""
OCR API端点
接收JSON: {"image": "base64编码的图片"}
返回JSON: {"texts": ["识别到的文字1", "识别到的文字2", ...]}
"""
try:
data = request.get_json()
if not data or 'image' not in data:
return jsonify({"error": "缺少image字段"}), 400
# 解码base64图片
img_base64 = data['image']
img_data = base64.b64decode(img_base64)
image = Image.open(io.BytesIO(img_data))
# 转换为RGB如果是RGBA或其他模式
if image.mode != 'RGB':
image = image.convert('RGB')
# 执行OCR
texts = recognize_image(image)
return jsonify({
"texts": texts,
"count": len(texts),
"engine": ocr_type
})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route('/health', methods=['GET'])
def health_check():
"""健康检查端点"""
return jsonify({
"status": "ok",
"engine": ocr_type
})
if __name__ == '__main__':
print("=" * 60)
print("OCR HTTP 服务")
print("=" * 60)
print(f"OCR引擎: {ocr_type}")
print("API地址: http://localhost:8000/ocr")
print("健康检查: http://localhost:8000/health")
print("=" * 60)
print("\n启动服务中...")
app.run(host='0.0.0.0', port=8000, debug=False)