- 实现智能区域检测算法(灰度阈值 + 连续行判定) - 支持Umi-OCR和自定义HTTP OCR服务 - 添加热键触发和鼠标框选区域功能 - 实现自动滚动和智能停止逻辑 - 添加完整的README文档
229 lines
7.0 KiB
Python
229 lines
7.0 KiB
Python
"""
|
||
Umi-OCR HTTP客户端
|
||
用于调用Umi-OCR的argv接口进行OCR识别
|
||
|
||
Umi-OCR 接口文档:
|
||
- 服务地址: http://127.0.0.1:1224
|
||
- argv接口: POST /argv
|
||
- 请求格式: JSON数组,如 ["--screenshot"] 或 ["--path", "图片路径"]
|
||
- 返回格式: 纯文本字符串
|
||
"""
|
||
|
||
import time
|
||
import requests
|
||
from typing import List, Optional, Union
|
||
from pathlib import Path
|
||
from loguru import logger
|
||
|
||
|
||
class UmiOCRClient:
|
||
"""Umi-OCR HTTP客户端"""
|
||
|
||
DEFAULT_HOST = "127.0.0.1"
|
||
DEFAULT_PORT = 1224
|
||
|
||
def __init__(self, host: str = DEFAULT_HOST, port: int = DEFAULT_PORT):
|
||
self.host = host
|
||
self.port = port
|
||
self.base_url = f"http://{host}:{port}"
|
||
self.argv_url = f"{self.base_url}/argv"
|
||
|
||
def is_service_running(self, timeout: float = 2.0) -> bool:
|
||
"""
|
||
检查Umi-OCR HTTP服务是否运行
|
||
|
||
Args:
|
||
timeout: 请求超时时间(秒)
|
||
|
||
Returns:
|
||
服务是否可用
|
||
"""
|
||
try:
|
||
response = requests.get(
|
||
self.base_url,
|
||
timeout=timeout
|
||
)
|
||
return response.status_code == 200
|
||
except requests.exceptions.ConnectionError:
|
||
logger.warning(f"无法连接到Umi-OCR服务: {self.base_url}")
|
||
return False
|
||
except requests.exceptions.Timeout:
|
||
logger.warning(f"连接Umi-OCR服务超时: {self.base_url}")
|
||
return False
|
||
except Exception as e:
|
||
logger.error(f"检查Umi-OCR服务状态时出错: {e}")
|
||
return False
|
||
|
||
def recognize_screenshot(self, timeout: float = 30.0) -> Optional[str]:
|
||
"""
|
||
调用Umi-OCR进行截图识别
|
||
等价于命令行: Umi-OCR --screenshot
|
||
|
||
Args:
|
||
timeout: 请求超时时间(秒)
|
||
|
||
Returns:
|
||
识别到的文字,失败返回None
|
||
"""
|
||
if not self.is_service_running():
|
||
logger.error("Umi-OCR服务未运行,请先启动Umi-OCR")
|
||
return None
|
||
|
||
try:
|
||
data = ["--screenshot"]
|
||
response = requests.post(
|
||
self.argv_url,
|
||
headers={"Content-Type": "application/json"},
|
||
json=data,
|
||
timeout=timeout
|
||
)
|
||
response.raise_for_status()
|
||
|
||
text = response.text
|
||
logger.info(f"截图OCR完成,识别到 {len(text)} 个字符")
|
||
return text
|
||
|
||
except requests.exceptions.Timeout:
|
||
logger.error("Umi-OCR请求超时")
|
||
return None
|
||
except Exception as e:
|
||
logger.error(f"Umi-OCR截图识别失败: {e}")
|
||
return None
|
||
|
||
def recognize_image(self, image_path: Union[str, Path], timeout: float = 30.0) -> Optional[str]:
|
||
"""
|
||
调用Umi-OCR识别指定图片
|
||
等价于命令行: Umi-OCR --path "图片路径"
|
||
|
||
Args:
|
||
image_path: 图片文件路径
|
||
timeout: 请求超时时间(秒)
|
||
|
||
Returns:
|
||
识别到的文字,失败返回None
|
||
"""
|
||
if not self.is_service_running():
|
||
logger.error("Umi-OCR服务未运行,请先启动Umi-OCR")
|
||
return None
|
||
|
||
image_path = Path(image_path)
|
||
if not image_path.exists():
|
||
logger.error(f"图片文件不存在: {image_path}")
|
||
return None
|
||
|
||
try:
|
||
# 转换为绝对路径并标准化
|
||
abs_path = str(image_path.resolve())
|
||
data = ["--path", abs_path]
|
||
|
||
response = requests.post(
|
||
self.argv_url,
|
||
headers={"Content-Type": "application/json"},
|
||
json=data,
|
||
timeout=timeout
|
||
)
|
||
response.raise_for_status()
|
||
|
||
text = response.text
|
||
logger.info(f"图片OCR完成: {image_path.name}, 识别到 {len(text)} 个字符")
|
||
return text
|
||
|
||
except requests.exceptions.Timeout:
|
||
logger.error("Umi-OCR请求超时")
|
||
return None
|
||
except Exception as e:
|
||
logger.error(f"Umi-OCR图片识别失败: {e}")
|
||
return None
|
||
|
||
def recognize_images(self, image_paths: List[Union[str, Path]], timeout: float = 30.0) -> List[str]:
|
||
"""
|
||
批量识别多张图片
|
||
|
||
Args:
|
||
image_paths: 图片路径列表
|
||
timeout: 每张图片的请求超时时间(秒)
|
||
|
||
Returns:
|
||
识别结果列表,失败的图片对应位置为None
|
||
"""
|
||
results = []
|
||
for path in image_paths:
|
||
result = self.recognize_image(path, timeout)
|
||
results.append(result)
|
||
# 添加小延迟避免请求过快
|
||
time.sleep(0.1)
|
||
return results
|
||
|
||
|
||
def check_and_wait_for_service(client: UmiOCRClient, max_wait: float = 10.0, interval: float = 1.0) -> bool:
|
||
"""
|
||
检查并等待Umi-OCR服务启动
|
||
|
||
Args:
|
||
client: UmiOCRClient实例
|
||
max_wait: 最大等待时间(秒)
|
||
interval: 检查间隔(秒)
|
||
|
||
Returns:
|
||
服务是否可用
|
||
"""
|
||
start_time = time.time()
|
||
while time.time() - start_time < max_wait:
|
||
if client.is_service_running():
|
||
logger.info("Umi-OCR服务已就绪")
|
||
return True
|
||
logger.info("等待Umi-OCR服务启动...")
|
||
time.sleep(interval)
|
||
|
||
logger.error(f"等待Umi-OCR服务超时({max_wait}秒)")
|
||
return False
|
||
|
||
|
||
# 便捷函数
|
||
def recognize_screenshot(host: str = UmiOCRClient.DEFAULT_HOST,
|
||
port: int = UmiOCRClient.DEFAULT_PORT) -> Optional[str]:
|
||
"""便捷函数:截图识别"""
|
||
client = UmiOCRClient(host, port)
|
||
return client.recognize_screenshot()
|
||
|
||
|
||
def recognize_image(image_path: Union[str, Path],
|
||
host: str = UmiOCRClient.DEFAULT_HOST,
|
||
port: int = UmiOCRClient.DEFAULT_PORT) -> Optional[str]:
|
||
"""便捷函数:图片识别"""
|
||
client = UmiOCRClient(host, port)
|
||
return client.recognize_image(image_path)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# 测试代码
|
||
print("=" * 60)
|
||
print("Umi-OCR 客户端测试")
|
||
print("=" * 60)
|
||
|
||
client = UmiOCRClient()
|
||
|
||
# 检查服务状态
|
||
print("\n1. 检查服务状态...")
|
||
if client.is_service_running():
|
||
print("✓ Umi-OCR服务运行中")
|
||
else:
|
||
print("✗ Umi-OCR服务未运行")
|
||
print("请先启动Umi-OCR软件并开启HTTP服务(设置->HTTP接口->启用)")
|
||
exit(1)
|
||
|
||
# 测试截图识别
|
||
print("\n2. 测试截图识别...")
|
||
print("请在5秒内准备好要截图的内容...")
|
||
time.sleep(5)
|
||
|
||
result = client.recognize_screenshot()
|
||
if result:
|
||
print(f"✓ 识别成功,内容:\n{result[:200]}...")
|
||
else:
|
||
print("✗ 识别失败")
|
||
|
||
print("\n" + "=" * 60)
|
||
print("测试完成")
|
||
print("=" * 60)
|