long-screen-cut/ocr_server_example.py

"""
OCR服务示例实现
这是一个简单的OCR HTTP服务示例，使用 PaddleOCR 或 Tesseract 作为后端
你可以根据实际需求修改此文件或使用其他OCR服务

启动方式: python ocr_server_example.py
服务地址: http://localhost:8000
"""

import base64
import io
from typing import List

try:
    from flask import Flask, request, jsonify
except ImportError:
    print("请先安装Flask: pip install flask")
    raise

try:
    from PIL import Image
except ImportError:
    print("请先安装Pillow: pip install pillow")
    raise

app = Flask(__name__)

# 尝试导入OCR引擎，按优先级：PaddleOCR > Tesseract > 模拟
ocr_engine = None
ocr_type = None

try:
    from paddleocr import PaddleOCR
    ocr_engine = PaddleOCR(
        use_angle_cls=True,
        lang='ch',
        show_log=False
    )
    ocr_type = "paddle"
    print("使用 PaddleOCR 引擎")
except ImportError:
    try:
        import pytesseract
        ocr_engine = pytesseract
        ocr_type = "tesseract"
        print("使用 Tesseract OCR 引擎")
    except ImportError:
        ocr_type = "mock"
        print("警告: 未找到OCR引擎，使用模拟模式")
        print("建议安装 PaddleOCR: pip install paddleocr")
        print("或安装 Tesseract + pytesseract: pip install pytesseract")


def recognize_with_paddle(image: Image.Image) -> List[str]:
    """使用PaddleOCR识别"""
    import numpy as np
    img_array = np.array(image)
    result = ocr_engine.ocr(img_array, cls=True)

    texts = []
    if result and result[0]:
        for line in result[0]:
            if line:
                text = line[1][0]  # 提取文字内容
                confidence = line[1][1]  # 置信度
                if confidence > 0.5:  # 过滤低置信度结果
                    texts.append(text)
    return texts


def recognize_with_tesseract(image: Image.Image) -> List[str]:
    """使用Tesseract识别"""
    text = ocr_engine.image_to_string(image, lang='chi_sim+eng')
    # 按行分割
    lines = [line.strip() for line in text.split('\n') if line.strip()]
    return lines


def recognize_mock(image: Image.Image) -> List[str]:
    """模拟OCR（用于测试）"""
    return ["[模拟OCR] 请安装实际的OCR引擎"]


def recognize_image(image: Image.Image) -> List[str]:
    """根据配置的引擎进行识别"""
    if ocr_type == "paddle":
        return recognize_with_paddle(image)
    elif ocr_type == "tesseract":
        return recognize_with_tesseract(image)
    else:
        return recognize_mock(image)


@app.route('/ocr', methods=['POST'])
def ocr_endpoint():
    """
    OCR API端点
    接收JSON: {"image": "base64编码的图片"}
    返回JSON: {"texts": ["识别到的文字1", "识别到的文字2", ...]}
    """
    try:
        data = request.get_json()
        if not data or 'image' not in data:
            return jsonify({"error": "缺少image字段"}), 400

        # 解码base64图片
        img_base64 = data['image']
        img_data = base64.b64decode(img_base64)
        image = Image.open(io.BytesIO(img_data))

        # 转换为RGB（如果是RGBA或其他模式）
        if image.mode != 'RGB':
            image = image.convert('RGB')

        # 执行OCR
        texts = recognize_image(image)

        return jsonify({
            "texts": texts,
            "count": len(texts),
            "engine": ocr_type
        })

    except Exception as e:
        return jsonify({"error": str(e)}), 500


@app.route('/health', methods=['GET'])
def health_check():
    """健康检查端点"""
    return jsonify({
        "status": "ok",
        "engine": ocr_type
    })


if __name__ == '__main__':
    print("=" * 60)
    print("OCR HTTP 服务")
    print("=" * 60)
    print(f"OCR引擎: {ocr_type}")
    print("API地址: http://localhost:8000/ocr")
    print("健康检查: http://localhost:8000/health")
    print("=" * 60)
    print("\n启动服务中...")
    app.run(host='0.0.0.0', port=8000, debug=False)