229 lines
7.0 KiB
Python
229 lines
7.0 KiB
Python
|
|
"""
|
|||
|
|
Umi-OCR HTTP客户端
|
|||
|
|
用于调用Umi-OCR的argv接口进行OCR识别
|
|||
|
|
|
|||
|
|
Umi-OCR 接口文档:
|
|||
|
|
- 服务地址: http://127.0.0.1:1224
|
|||
|
|
- argv接口: POST /argv
|
|||
|
|
- 请求格式: JSON数组,如 ["--screenshot"] 或 ["--path", "图片路径"]
|
|||
|
|
- 返回格式: 纯文本字符串
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import time
|
|||
|
|
import requests
|
|||
|
|
from typing import List, Optional, Union
|
|||
|
|
from pathlib import Path
|
|||
|
|
from loguru import logger
|
|||
|
|
|
|||
|
|
|
|||
|
|
class UmiOCRClient:
|
|||
|
|
"""Umi-OCR HTTP客户端"""
|
|||
|
|
|
|||
|
|
DEFAULT_HOST = "127.0.0.1"
|
|||
|
|
DEFAULT_PORT = 1224
|
|||
|
|
|
|||
|
|
def __init__(self, host: str = DEFAULT_HOST, port: int = DEFAULT_PORT):
|
|||
|
|
self.host = host
|
|||
|
|
self.port = port
|
|||
|
|
self.base_url = f"http://{host}:{port}"
|
|||
|
|
self.argv_url = f"{self.base_url}/argv"
|
|||
|
|
|
|||
|
|
def is_service_running(self, timeout: float = 2.0) -> bool:
|
|||
|
|
"""
|
|||
|
|
检查Umi-OCR HTTP服务是否运行
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
timeout: 请求超时时间(秒)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
服务是否可用
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
response = requests.get(
|
|||
|
|
self.base_url,
|
|||
|
|
timeout=timeout
|
|||
|
|
)
|
|||
|
|
return response.status_code == 200
|
|||
|
|
except requests.exceptions.ConnectionError:
|
|||
|
|
logger.warning(f"无法连接到Umi-OCR服务: {self.base_url}")
|
|||
|
|
return False
|
|||
|
|
except requests.exceptions.Timeout:
|
|||
|
|
logger.warning(f"连接Umi-OCR服务超时: {self.base_url}")
|
|||
|
|
return False
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"检查Umi-OCR服务状态时出错: {e}")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def recognize_screenshot(self, timeout: float = 30.0) -> Optional[str]:
|
|||
|
|
"""
|
|||
|
|
调用Umi-OCR进行截图识别
|
|||
|
|
等价于命令行: Umi-OCR --screenshot
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
timeout: 请求超时时间(秒)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
识别到的文字,失败返回None
|
|||
|
|
"""
|
|||
|
|
if not self.is_service_running():
|
|||
|
|
logger.error("Umi-OCR服务未运行,请先启动Umi-OCR")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
data = ["--screenshot"]
|
|||
|
|
response = requests.post(
|
|||
|
|
self.argv_url,
|
|||
|
|
headers={"Content-Type": "application/json"},
|
|||
|
|
json=data,
|
|||
|
|
timeout=timeout
|
|||
|
|
)
|
|||
|
|
response.raise_for_status()
|
|||
|
|
|
|||
|
|
text = response.text
|
|||
|
|
logger.info(f"截图OCR完成,识别到 {len(text)} 个字符")
|
|||
|
|
return text
|
|||
|
|
|
|||
|
|
except requests.exceptions.Timeout:
|
|||
|
|
logger.error("Umi-OCR请求超时")
|
|||
|
|
return None
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"Umi-OCR截图识别失败: {e}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def recognize_image(self, image_path: Union[str, Path], timeout: float = 30.0) -> Optional[str]:
|
|||
|
|
"""
|
|||
|
|
调用Umi-OCR识别指定图片
|
|||
|
|
等价于命令行: Umi-OCR --path "图片路径"
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
image_path: 图片文件路径
|
|||
|
|
timeout: 请求超时时间(秒)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
识别到的文字,失败返回None
|
|||
|
|
"""
|
|||
|
|
if not self.is_service_running():
|
|||
|
|
logger.error("Umi-OCR服务未运行,请先启动Umi-OCR")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
image_path = Path(image_path)
|
|||
|
|
if not image_path.exists():
|
|||
|
|
logger.error(f"图片文件不存在: {image_path}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 转换为绝对路径并标准化
|
|||
|
|
abs_path = str(image_path.resolve())
|
|||
|
|
data = ["--path", abs_path]
|
|||
|
|
|
|||
|
|
response = requests.post(
|
|||
|
|
self.argv_url,
|
|||
|
|
headers={"Content-Type": "application/json"},
|
|||
|
|
json=data,
|
|||
|
|
timeout=timeout
|
|||
|
|
)
|
|||
|
|
response.raise_for_status()
|
|||
|
|
|
|||
|
|
text = response.text
|
|||
|
|
logger.info(f"图片OCR完成: {image_path.name}, 识别到 {len(text)} 个字符")
|
|||
|
|
return text
|
|||
|
|
|
|||
|
|
except requests.exceptions.Timeout:
|
|||
|
|
logger.error("Umi-OCR请求超时")
|
|||
|
|
return None
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"Umi-OCR图片识别失败: {e}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def recognize_images(self, image_paths: List[Union[str, Path]], timeout: float = 30.0) -> List[str]:
|
|||
|
|
"""
|
|||
|
|
批量识别多张图片
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
image_paths: 图片路径列表
|
|||
|
|
timeout: 每张图片的请求超时时间(秒)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
识别结果列表,失败的图片对应位置为None
|
|||
|
|
"""
|
|||
|
|
results = []
|
|||
|
|
for path in image_paths:
|
|||
|
|
result = self.recognize_image(path, timeout)
|
|||
|
|
results.append(result)
|
|||
|
|
# 添加小延迟避免请求过快
|
|||
|
|
time.sleep(0.1)
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
|
|||
|
|
def check_and_wait_for_service(client: UmiOCRClient, max_wait: float = 10.0, interval: float = 1.0) -> bool:
|
|||
|
|
"""
|
|||
|
|
检查并等待Umi-OCR服务启动
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
client: UmiOCRClient实例
|
|||
|
|
max_wait: 最大等待时间(秒)
|
|||
|
|
interval: 检查间隔(秒)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
服务是否可用
|
|||
|
|
"""
|
|||
|
|
start_time = time.time()
|
|||
|
|
while time.time() - start_time < max_wait:
|
|||
|
|
if client.is_service_running():
|
|||
|
|
logger.info("Umi-OCR服务已就绪")
|
|||
|
|
return True
|
|||
|
|
logger.info("等待Umi-OCR服务启动...")
|
|||
|
|
time.sleep(interval)
|
|||
|
|
|
|||
|
|
logger.error(f"等待Umi-OCR服务超时({max_wait}秒)")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
|
|||
|
|
# 便捷函数
|
|||
|
|
def recognize_screenshot(host: str = UmiOCRClient.DEFAULT_HOST,
|
|||
|
|
port: int = UmiOCRClient.DEFAULT_PORT) -> Optional[str]:
|
|||
|
|
"""便捷函数:截图识别"""
|
|||
|
|
client = UmiOCRClient(host, port)
|
|||
|
|
return client.recognize_screenshot()
|
|||
|
|
|
|||
|
|
|
|||
|
|
def recognize_image(image_path: Union[str, Path],
|
|||
|
|
host: str = UmiOCRClient.DEFAULT_HOST,
|
|||
|
|
port: int = UmiOCRClient.DEFAULT_PORT) -> Optional[str]:
|
|||
|
|
"""便捷函数:图片识别"""
|
|||
|
|
client = UmiOCRClient(host, port)
|
|||
|
|
return client.recognize_image(image_path)
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
# 测试代码
|
|||
|
|
print("=" * 60)
|
|||
|
|
print("Umi-OCR 客户端测试")
|
|||
|
|
print("=" * 60)
|
|||
|
|
|
|||
|
|
client = UmiOCRClient()
|
|||
|
|
|
|||
|
|
# 检查服务状态
|
|||
|
|
print("\n1. 检查服务状态...")
|
|||
|
|
if client.is_service_running():
|
|||
|
|
print("✓ Umi-OCR服务运行中")
|
|||
|
|
else:
|
|||
|
|
print("✗ Umi-OCR服务未运行")
|
|||
|
|
print("请先启动Umi-OCR软件并开启HTTP服务(设置->HTTP接口->启用)")
|
|||
|
|
exit(1)
|
|||
|
|
|
|||
|
|
# 测试截图识别
|
|||
|
|
print("\n2. 测试截图识别...")
|
|||
|
|
print("请在5秒内准备好要截图的内容...")
|
|||
|
|
time.sleep(5)
|
|||
|
|
|
|||
|
|
result = client.recognize_screenshot()
|
|||
|
|
if result:
|
|||
|
|
print(f"✓ 识别成功,内容:\n{result[:200]}...")
|
|||
|
|
else:
|
|||
|
|
print("✗ 识别失败")
|
|||
|
|
|
|||
|
|
print("\n" + "=" * 60)
|
|||
|
|
print("测试完成")
|
|||
|
|
print("=" * 60)
|