Files
long-screen-cut/umi_ocr_client.py
xiaji 8600c0f576 feat: 初始提交 - 滚动截屏OCR工具
- 实现智能区域检测算法(灰度阈值 + 连续行判定)
- 支持Umi-OCR和自定义HTTP OCR服务
- 添加热键触发和鼠标框选区域功能
- 实现自动滚动和智能停止逻辑
- 添加完整的README文档
2026-03-06 15:07:51 +08:00

229 lines
7.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Umi-OCR HTTP客户端
用于调用Umi-OCR的argv接口进行OCR识别
Umi-OCR 接口文档:
- 服务地址: http://127.0.0.1:1224
- argv接口: POST /argv
- 请求格式: JSON数组如 ["--screenshot"] 或 ["--path", "图片路径"]
- 返回格式: 纯文本字符串
"""
import time
import requests
from typing import List, Optional, Union
from pathlib import Path
from loguru import logger
class UmiOCRClient:
"""Umi-OCR HTTP客户端"""
DEFAULT_HOST = "127.0.0.1"
DEFAULT_PORT = 1224
def __init__(self, host: str = DEFAULT_HOST, port: int = DEFAULT_PORT):
self.host = host
self.port = port
self.base_url = f"http://{host}:{port}"
self.argv_url = f"{self.base_url}/argv"
def is_service_running(self, timeout: float = 2.0) -> bool:
"""
检查Umi-OCR HTTP服务是否运行
Args:
timeout: 请求超时时间(秒)
Returns:
服务是否可用
"""
try:
response = requests.get(
self.base_url,
timeout=timeout
)
return response.status_code == 200
except requests.exceptions.ConnectionError:
logger.warning(f"无法连接到Umi-OCR服务: {self.base_url}")
return False
except requests.exceptions.Timeout:
logger.warning(f"连接Umi-OCR服务超时: {self.base_url}")
return False
except Exception as e:
logger.error(f"检查Umi-OCR服务状态时出错: {e}")
return False
def recognize_screenshot(self, timeout: float = 30.0) -> Optional[str]:
"""
调用Umi-OCR进行截图识别
等价于命令行: Umi-OCR --screenshot
Args:
timeout: 请求超时时间(秒)
Returns:
识别到的文字失败返回None
"""
if not self.is_service_running():
logger.error("Umi-OCR服务未运行请先启动Umi-OCR")
return None
try:
data = ["--screenshot"]
response = requests.post(
self.argv_url,
headers={"Content-Type": "application/json"},
json=data,
timeout=timeout
)
response.raise_for_status()
text = response.text
logger.info(f"截图OCR完成识别到 {len(text)} 个字符")
return text
except requests.exceptions.Timeout:
logger.error("Umi-OCR请求超时")
return None
except Exception as e:
logger.error(f"Umi-OCR截图识别失败: {e}")
return None
def recognize_image(self, image_path: Union[str, Path], timeout: float = 30.0) -> Optional[str]:
"""
调用Umi-OCR识别指定图片
等价于命令行: Umi-OCR --path "图片路径"
Args:
image_path: 图片文件路径
timeout: 请求超时时间(秒)
Returns:
识别到的文字失败返回None
"""
if not self.is_service_running():
logger.error("Umi-OCR服务未运行请先启动Umi-OCR")
return None
image_path = Path(image_path)
if not image_path.exists():
logger.error(f"图片文件不存在: {image_path}")
return None
try:
# 转换为绝对路径并标准化
abs_path = str(image_path.resolve())
data = ["--path", abs_path]
response = requests.post(
self.argv_url,
headers={"Content-Type": "application/json"},
json=data,
timeout=timeout
)
response.raise_for_status()
text = response.text
logger.info(f"图片OCR完成: {image_path.name}, 识别到 {len(text)} 个字符")
return text
except requests.exceptions.Timeout:
logger.error("Umi-OCR请求超时")
return None
except Exception as e:
logger.error(f"Umi-OCR图片识别失败: {e}")
return None
def recognize_images(self, image_paths: List[Union[str, Path]], timeout: float = 30.0) -> List[str]:
"""
批量识别多张图片
Args:
image_paths: 图片路径列表
timeout: 每张图片的请求超时时间(秒)
Returns:
识别结果列表失败的图片对应位置为None
"""
results = []
for path in image_paths:
result = self.recognize_image(path, timeout)
results.append(result)
# 添加小延迟避免请求过快
time.sleep(0.1)
return results
def check_and_wait_for_service(client: UmiOCRClient, max_wait: float = 10.0, interval: float = 1.0) -> bool:
"""
检查并等待Umi-OCR服务启动
Args:
client: UmiOCRClient实例
max_wait: 最大等待时间(秒)
interval: 检查间隔(秒)
Returns:
服务是否可用
"""
start_time = time.time()
while time.time() - start_time < max_wait:
if client.is_service_running():
logger.info("Umi-OCR服务已就绪")
return True
logger.info("等待Umi-OCR服务启动...")
time.sleep(interval)
logger.error(f"等待Umi-OCR服务超时{max_wait}秒)")
return False
# 便捷函数
def recognize_screenshot(host: str = UmiOCRClient.DEFAULT_HOST,
port: int = UmiOCRClient.DEFAULT_PORT) -> Optional[str]:
"""便捷函数:截图识别"""
client = UmiOCRClient(host, port)
return client.recognize_screenshot()
def recognize_image(image_path: Union[str, Path],
host: str = UmiOCRClient.DEFAULT_HOST,
port: int = UmiOCRClient.DEFAULT_PORT) -> Optional[str]:
"""便捷函数:图片识别"""
client = UmiOCRClient(host, port)
return client.recognize_image(image_path)
if __name__ == "__main__":
# 测试代码
print("=" * 60)
print("Umi-OCR 客户端测试")
print("=" * 60)
client = UmiOCRClient()
# 检查服务状态
print("\n1. 检查服务状态...")
if client.is_service_running():
print("✓ Umi-OCR服务运行中")
else:
print("✗ Umi-OCR服务未运行")
print("请先启动Umi-OCR软件并开启HTTP服务设置->HTTP接口->启用)")
exit(1)
# 测试截图识别
print("\n2. 测试截图识别...")
print("请在5秒内准备好要截图的内容...")
time.sleep(5)
result = client.recognize_screenshot()
if result:
print(f"✓ 识别成功,内容:\n{result[:200]}...")
else:
print("✗ 识别失败")
print("\n" + "=" * 60)
print("测试完成")
print("=" * 60)