import os import atexit # 退出处理 import subprocess # 进程,管道 from json import loads as jsonLoads, dumps as jsonDumps from sys import platform as sysPlatform # popen静默模式 from base64 import b64encode # base64 编码 class Rapid_pipe: # 调用OCR(管道模式) def __init__(self, exePath: str, argument: dict = None): """初始化识别器(管道模式)。\n `exePath`: 识别器`PaddleOCR_json.exe`的路径。\n `argument`: 启动参数,字典`{"键":值}`。参数说明见 https://github.com/hiroi-sora/PaddleOCR-json """ cwd = os.path.abspath(os.path.join(exePath, os.pardir)) # 获取exe父文件夹 # 处理启动参数 if not argument == None: for key, value in argument.items(): if isinstance(value, str): # 字符串类型的值加双引号 exePath += f' --{key}="{value}"' else: exePath += f" --{key}={value}" if "ensureAscii" not in exePath: exePath += f" --ensureAscii=1" # 设置子进程启用静默模式,不显示控制台窗口 self.ret = None startupinfo = None if "win32" in str(sysPlatform).lower(): startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags = ( subprocess.CREATE_NEW_CONSOLE | subprocess.STARTF_USESHOWWINDOW ) startupinfo.wShowWindow = subprocess.SW_HIDE self.ret = subprocess.Popen( # 打开管道 exePath, cwd=cwd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, # 丢弃stderr的内容 startupinfo=startupinfo, # 开启静默模式 ) # 启动子进程 while True: if not self.ret.poll() == None: # 子进程已退出,初始化失败 raise Exception(f"OCR init fail.") initStr = self.ret.stdout.readline().decode("utf-8", errors="ignore") if "OCR init completed." in initStr: # 初始化成功 break atexit.register(self.exit) # 注册程序终止时执行强制停止子进程 def runDict(self, writeDict: dict): """传入指令字典,发送给引擎进程。\n `writeDict`: 指令字典。\n `return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n""" # 检查子进程 if not self.ret: return {"code": 901, "data": f"引擎实例不存在。"} if not self.ret.poll() == None: return {"code": 902, "data": f"子进程已崩溃。"} # 输入信息 writeStr = jsonDumps(writeDict, ensure_ascii=True, indent=None) + "\n" try: self.ret.stdin.write(writeStr.encode("utf-8")) self.ret.stdin.flush() except Exception as e: return {"code": 902, "data": f"向识别器进程传入指令失败,疑似子进程已崩溃。{e}"} # 获取返回值 try: getStr = self.ret.stdout.readline().decode("utf-8", errors="ignore") except Exception as e: return {"code": 903, "data": f"读取识别器进程输出值失败。异常信息:[{e}]"} try: return jsonLoads(getStr) except Exception as e: return {"code": 904, "data": f"识别器输出值反序列化JSON失败。异常信息:[{e}]。原始内容:[{getStr}]"} def run(self, imgPath: str): """对一张本地图片进行文字识别。\n `exePath`: 图片路径。\n `return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n""" writeDict = {"image_path": imgPath} return self.runDict(writeDict) def runBase64(self, imageBase64: str): """对一张编码为base64字符串的图片进行文字识别。\n `imageBase64`: 图片base64字符串。\n `return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n""" writeDict = {"image_base64": imageBase64} return self.runDict(writeDict) def runBytes(self, imageBytes): """对一张图片的字节流信息进行文字识别。\n `imageBytes`: 图片字节流。\n `return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n""" imageBase64 = b64encode(imageBytes).decode("utf-8") return self.runBase64(imageBase64) def exit(self): """关闭引擎子进程""" if not self.ret: return self.ret.kill() # 关闭子进程 self.ret = None atexit.unregister(self.exit) # 移除退出处理 print("### RapidOCR引擎子进程关闭!") @staticmethod def printResult(res: dict): """用于调试,格式化打印识别结果。\n `res`: OCR识别结果。""" # 识别成功 if res["code"] == 100: index = 1 for line in res["data"]: print(f"{index}-置信度:{round(line['score'], 2)},文本:{line['text']}") index += 1 elif res["code"] == 100: print("图片中未识别出文字。") else: print(f"图片识别失败。错误码:{res['code']},错误信息:{res['data']}") def __del__(self): self.exit()