""" Umi-OCR HTTP客户端 用于调用Umi-OCR的argv接口进行OCR识别 Umi-OCR 接口文档: - 服务地址: http://127.0.0.1:1224 - argv接口: POST /argv - 请求格式: JSON数组,如 ["--screenshot"] 或 ["--path", "图片路径"] - 返回格式: 纯文本字符串 """ import time import requests from typing import List, Optional, Union from pathlib import Path from loguru import logger class UmiOCRClient: """Umi-OCR HTTP客户端""" DEFAULT_HOST = "127.0.0.1" DEFAULT_PORT = 1224 def __init__(self, host: str = DEFAULT_HOST, port: int = DEFAULT_PORT): self.host = host self.port = port self.base_url = f"http://{host}:{port}" self.argv_url = f"{self.base_url}/argv" def is_service_running(self, timeout: float = 2.0) -> bool: """ 检查Umi-OCR HTTP服务是否运行 Args: timeout: 请求超时时间(秒) Returns: 服务是否可用 """ try: response = requests.get( self.base_url, timeout=timeout ) return response.status_code == 200 except requests.exceptions.ConnectionError: logger.warning(f"无法连接到Umi-OCR服务: {self.base_url}") return False except requests.exceptions.Timeout: logger.warning(f"连接Umi-OCR服务超时: {self.base_url}") return False except Exception as e: logger.error(f"检查Umi-OCR服务状态时出错: {e}") return False def recognize_screenshot(self, timeout: float = 30.0) -> Optional[str]: """ 调用Umi-OCR进行截图识别 等价于命令行: Umi-OCR --screenshot Args: timeout: 请求超时时间(秒) Returns: 识别到的文字,失败返回None """ if not self.is_service_running(): logger.error("Umi-OCR服务未运行,请先启动Umi-OCR") return None try: data = ["--screenshot"] response = requests.post( self.argv_url, headers={"Content-Type": "application/json"}, json=data, timeout=timeout ) response.raise_for_status() text = response.text logger.info(f"截图OCR完成,识别到 {len(text)} 个字符") return text except requests.exceptions.Timeout: logger.error("Umi-OCR请求超时") return None except Exception as e: logger.error(f"Umi-OCR截图识别失败: {e}") return None def recognize_image(self, image_path: Union[str, Path], timeout: float = 30.0) -> Optional[str]: """ 调用Umi-OCR识别指定图片 等价于命令行: Umi-OCR --path "图片路径" Args: image_path: 图片文件路径 timeout: 请求超时时间(秒) Returns: 识别到的文字,失败返回None """ if not self.is_service_running(): logger.error("Umi-OCR服务未运行,请先启动Umi-OCR") return None image_path = Path(image_path) if not image_path.exists(): logger.error(f"图片文件不存在: {image_path}") return None try: # 转换为绝对路径并标准化 abs_path = str(image_path.resolve()) data = ["--path", abs_path] response = requests.post( self.argv_url, headers={"Content-Type": "application/json"}, json=data, timeout=timeout ) response.raise_for_status() text = response.text logger.info(f"图片OCR完成: {image_path.name}, 识别到 {len(text)} 个字符") return text except requests.exceptions.Timeout: logger.error("Umi-OCR请求超时") return None except Exception as e: logger.error(f"Umi-OCR图片识别失败: {e}") return None def recognize_images(self, image_paths: List[Union[str, Path]], timeout: float = 30.0) -> List[str]: """ 批量识别多张图片 Args: image_paths: 图片路径列表 timeout: 每张图片的请求超时时间(秒) Returns: 识别结果列表,失败的图片对应位置为None """ results = [] for path in image_paths: result = self.recognize_image(path, timeout) results.append(result) # 添加小延迟避免请求过快 time.sleep(0.1) return results def check_and_wait_for_service(client: UmiOCRClient, max_wait: float = 10.0, interval: float = 1.0) -> bool: """ 检查并等待Umi-OCR服务启动 Args: client: UmiOCRClient实例 max_wait: 最大等待时间(秒) interval: 检查间隔(秒) Returns: 服务是否可用 """ start_time = time.time() while time.time() - start_time < max_wait: if client.is_service_running(): logger.info("Umi-OCR服务已就绪") return True logger.info("等待Umi-OCR服务启动...") time.sleep(interval) logger.error(f"等待Umi-OCR服务超时({max_wait}秒)") return False # 便捷函数 def recognize_screenshot(host: str = UmiOCRClient.DEFAULT_HOST, port: int = UmiOCRClient.DEFAULT_PORT) -> Optional[str]: """便捷函数:截图识别""" client = UmiOCRClient(host, port) return client.recognize_screenshot() def recognize_image(image_path: Union[str, Path], host: str = UmiOCRClient.DEFAULT_HOST, port: int = UmiOCRClient.DEFAULT_PORT) -> Optional[str]: """便捷函数:图片识别""" client = UmiOCRClient(host, port) return client.recognize_image(image_path) if __name__ == "__main__": # 测试代码 print("=" * 60) print("Umi-OCR 客户端测试") print("=" * 60) client = UmiOCRClient() # 检查服务状态 print("\n1. 检查服务状态...") if client.is_service_running(): print("✓ Umi-OCR服务运行中") else: print("✗ Umi-OCR服务未运行") print("请先启动Umi-OCR软件并开启HTTP服务(设置->HTTP接口->启用)") exit(1) # 测试截图识别 print("\n2. 测试截图识别...") print("请在5秒内准备好要截图的内容...") time.sleep(5) result = client.recognize_screenshot() if result: print(f"✓ 识别成功,内容:\n{result[:200]}...") else: print("✗ 识别失败") print("\n" + "=" * 60) print("测试完成") print("=" * 60)