Files
long-screen-cut/umi_ocr_client.py

229 lines
7.0 KiB
Python
Raw Normal View History

"""
Umi-OCR HTTP客户端
用于调用Umi-OCR的argv接口进行OCR识别
Umi-OCR 接口文档:
- 服务地址: http://127.0.0.1:1224
- argv接口: POST /argv
- 请求格式: JSON数组 ["--screenshot"] ["--path", "图片路径"]
- 返回格式: 纯文本字符串
"""
import time
import requests
from typing import List, Optional, Union
from pathlib import Path
from loguru import logger
class UmiOCRClient:
"""Umi-OCR HTTP客户端"""
DEFAULT_HOST = "127.0.0.1"
DEFAULT_PORT = 1224
def __init__(self, host: str = DEFAULT_HOST, port: int = DEFAULT_PORT):
self.host = host
self.port = port
self.base_url = f"http://{host}:{port}"
self.argv_url = f"{self.base_url}/argv"
def is_service_running(self, timeout: float = 2.0) -> bool:
"""
检查Umi-OCR HTTP服务是否运行
Args:
timeout: 请求超时时间
Returns:
服务是否可用
"""
try:
response = requests.get(
self.base_url,
timeout=timeout
)
return response.status_code == 200
except requests.exceptions.ConnectionError:
logger.warning(f"无法连接到Umi-OCR服务: {self.base_url}")
return False
except requests.exceptions.Timeout:
logger.warning(f"连接Umi-OCR服务超时: {self.base_url}")
return False
except Exception as e:
logger.error(f"检查Umi-OCR服务状态时出错: {e}")
return False
def recognize_screenshot(self, timeout: float = 30.0) -> Optional[str]:
"""
调用Umi-OCR进行截图识别
等价于命令行: Umi-OCR --screenshot
Args:
timeout: 请求超时时间
Returns:
识别到的文字失败返回None
"""
if not self.is_service_running():
logger.error("Umi-OCR服务未运行请先启动Umi-OCR")
return None
try:
data = ["--screenshot"]
response = requests.post(
self.argv_url,
headers={"Content-Type": "application/json"},
json=data,
timeout=timeout
)
response.raise_for_status()
text = response.text
logger.info(f"截图OCR完成识别到 {len(text)} 个字符")
return text
except requests.exceptions.Timeout:
logger.error("Umi-OCR请求超时")
return None
except Exception as e:
logger.error(f"Umi-OCR截图识别失败: {e}")
return None
def recognize_image(self, image_path: Union[str, Path], timeout: float = 30.0) -> Optional[str]:
"""
调用Umi-OCR识别指定图片
等价于命令行: Umi-OCR --path "图片路径"
Args:
image_path: 图片文件路径
timeout: 请求超时时间
Returns:
识别到的文字失败返回None
"""
if not self.is_service_running():
logger.error("Umi-OCR服务未运行请先启动Umi-OCR")
return None
image_path = Path(image_path)
if not image_path.exists():
logger.error(f"图片文件不存在: {image_path}")
return None
try:
# 转换为绝对路径并标准化
abs_path = str(image_path.resolve())
data = ["--path", abs_path]
response = requests.post(
self.argv_url,
headers={"Content-Type": "application/json"},
json=data,
timeout=timeout
)
response.raise_for_status()
text = response.text
logger.info(f"图片OCR完成: {image_path.name}, 识别到 {len(text)} 个字符")
return text
except requests.exceptions.Timeout:
logger.error("Umi-OCR请求超时")
return None
except Exception as e:
logger.error(f"Umi-OCR图片识别失败: {e}")
return None
def recognize_images(self, image_paths: List[Union[str, Path]], timeout: float = 30.0) -> List[str]:
"""
批量识别多张图片
Args:
image_paths: 图片路径列表
timeout: 每张图片的请求超时时间
Returns:
识别结果列表失败的图片对应位置为None
"""
results = []
for path in image_paths:
result = self.recognize_image(path, timeout)
results.append(result)
# 添加小延迟避免请求过快
time.sleep(0.1)
return results
def check_and_wait_for_service(client: UmiOCRClient, max_wait: float = 10.0, interval: float = 1.0) -> bool:
"""
检查并等待Umi-OCR服务启动
Args:
client: UmiOCRClient实例
max_wait: 最大等待时间
interval: 检查间隔
Returns:
服务是否可用
"""
start_time = time.time()
while time.time() - start_time < max_wait:
if client.is_service_running():
logger.info("Umi-OCR服务已就绪")
return True
logger.info("等待Umi-OCR服务启动...")
time.sleep(interval)
logger.error(f"等待Umi-OCR服务超时{max_wait}秒)")
return False
# 便捷函数
def recognize_screenshot(host: str = UmiOCRClient.DEFAULT_HOST,
port: int = UmiOCRClient.DEFAULT_PORT) -> Optional[str]:
"""便捷函数:截图识别"""
client = UmiOCRClient(host, port)
return client.recognize_screenshot()
def recognize_image(image_path: Union[str, Path],
host: str = UmiOCRClient.DEFAULT_HOST,
port: int = UmiOCRClient.DEFAULT_PORT) -> Optional[str]:
"""便捷函数:图片识别"""
client = UmiOCRClient(host, port)
return client.recognize_image(image_path)
if __name__ == "__main__":
# 测试代码
print("=" * 60)
print("Umi-OCR 客户端测试")
print("=" * 60)
client = UmiOCRClient()
# 检查服务状态
print("\n1. 检查服务状态...")
if client.is_service_running():
print("✓ Umi-OCR服务运行中")
else:
print("✗ Umi-OCR服务未运行")
print("请先启动Umi-OCR软件并开启HTTP服务设置->HTTP接口->启用)")
exit(1)
# 测试截图识别
print("\n2. 测试截图识别...")
print("请在5秒内准备好要截图的内容...")
time.sleep(5)
result = client.recognize_screenshot()
if result:
print(f"✓ 识别成功,内容:\n{result[:200]}...")
else:
print("✗ 识别失败")
print("\n" + "=" * 60)
print("测试完成")
print("=" * 60)