125 lines
5.3 KiB
Python
125 lines
5.3 KiB
Python
import os
|
||
import atexit # 退出处理
|
||
import subprocess # 进程,管道
|
||
from json import loads as jsonLoads, dumps as jsonDumps
|
||
from sys import platform as sysPlatform # popen静默模式
|
||
from base64 import b64encode # base64 编码
|
||
|
||
|
||
class Rapid_pipe: # 调用OCR(管道模式)
|
||
def __init__(self, exePath: str, argument: dict = None):
|
||
"""初始化识别器(管道模式)。\n
|
||
`exePath`: 识别器`PaddleOCR_json.exe`的路径。\n
|
||
`argument`: 启动参数,字典`{"键":值}`。参数说明见 https://github.com/hiroi-sora/PaddleOCR-json
|
||
"""
|
||
cwd = os.path.abspath(os.path.join(exePath, os.pardir)) # 获取exe父文件夹
|
||
# 处理启动参数
|
||
if not argument == None:
|
||
for key, value in argument.items():
|
||
if isinstance(value, str): # 字符串类型的值加双引号
|
||
exePath += f' --{key}="{value}"'
|
||
else:
|
||
exePath += f" --{key}={value}"
|
||
if "ensureAscii" not in exePath:
|
||
exePath += f" --ensureAscii=1"
|
||
# 设置子进程启用静默模式,不显示控制台窗口
|
||
self.ret = None
|
||
startupinfo = None
|
||
if "win32" in str(sysPlatform).lower():
|
||
startupinfo = subprocess.STARTUPINFO()
|
||
startupinfo.dwFlags = (
|
||
subprocess.CREATE_NEW_CONSOLE | subprocess.STARTF_USESHOWWINDOW
|
||
)
|
||
startupinfo.wShowWindow = subprocess.SW_HIDE
|
||
self.ret = subprocess.Popen( # 打开管道
|
||
exePath,
|
||
cwd=cwd,
|
||
stdin=subprocess.PIPE,
|
||
stdout=subprocess.PIPE,
|
||
stderr=subprocess.DEVNULL, # 丢弃stderr的内容
|
||
startupinfo=startupinfo, # 开启静默模式
|
||
)
|
||
# 启动子进程
|
||
while True:
|
||
if not self.ret.poll() == None: # 子进程已退出,初始化失败
|
||
raise Exception(f"OCR init fail.")
|
||
initStr = self.ret.stdout.readline().decode("utf-8", errors="ignore")
|
||
if "OCR init completed." in initStr: # 初始化成功
|
||
break
|
||
atexit.register(self.exit) # 注册程序终止时执行强制停止子进程
|
||
|
||
def runDict(self, writeDict: dict):
|
||
"""传入指令字典,发送给引擎进程。\n
|
||
`writeDict`: 指令字典。\n
|
||
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
|
||
# 检查子进程
|
||
if not self.ret:
|
||
return {"code": 901, "data": f"引擎实例不存在。"}
|
||
if not self.ret.poll() == None:
|
||
return {"code": 902, "data": f"子进程已崩溃。"}
|
||
# 输入信息
|
||
writeStr = jsonDumps(writeDict, ensure_ascii=True, indent=None) + "\n"
|
||
try:
|
||
self.ret.stdin.write(writeStr.encode("utf-8"))
|
||
self.ret.stdin.flush()
|
||
except Exception as e:
|
||
return {"code": 902, "data": f"向识别器进程传入指令失败,疑似子进程已崩溃。{e}"}
|
||
# 获取返回值
|
||
try:
|
||
getStr = self.ret.stdout.readline().decode("utf-8", errors="ignore")
|
||
except Exception as e:
|
||
return {"code": 903, "data": f"读取识别器进程输出值失败。异常信息:[{e}]"}
|
||
try:
|
||
return jsonLoads(getStr)
|
||
except Exception as e:
|
||
return {"code": 904, "data": f"识别器输出值反序列化JSON失败。异常信息:[{e}]。原始内容:[{getStr}]"}
|
||
|
||
def run(self, imgPath: str):
|
||
"""对一张本地图片进行文字识别。\n
|
||
`exePath`: 图片路径。\n
|
||
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
|
||
writeDict = {"image_path": imgPath}
|
||
return self.runDict(writeDict)
|
||
|
||
def runBase64(self, imageBase64: str):
|
||
"""对一张编码为base64字符串的图片进行文字识别。\n
|
||
`imageBase64`: 图片base64字符串。\n
|
||
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
|
||
writeDict = {"image_base64": imageBase64}
|
||
return self.runDict(writeDict)
|
||
|
||
def runBytes(self, imageBytes):
|
||
"""对一张图片的字节流信息进行文字识别。\n
|
||
`imageBytes`: 图片字节流。\n
|
||
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
|
||
imageBase64 = b64encode(imageBytes).decode("utf-8")
|
||
return self.runBase64(imageBase64)
|
||
|
||
def exit(self):
|
||
"""关闭引擎子进程"""
|
||
if not self.ret:
|
||
return
|
||
self.ret.kill() # 关闭子进程
|
||
self.ret = None
|
||
atexit.unregister(self.exit) # 移除退出处理
|
||
print("### RapidOCR引擎子进程关闭!")
|
||
|
||
@staticmethod
|
||
def printResult(res: dict):
|
||
"""用于调试,格式化打印识别结果。\n
|
||
`res`: OCR识别结果。"""
|
||
|
||
# 识别成功
|
||
if res["code"] == 100:
|
||
index = 1
|
||
for line in res["data"]:
|
||
print(f"{index}-置信度:{round(line['score'], 2)},文本:{line['text']}")
|
||
index += 1
|
||
elif res["code"] == 100:
|
||
print("图片中未识别出文字。")
|
||
else:
|
||
print(f"图片识别失败。错误码:{res['code']},错误信息:{res['data']}")
|
||
|
||
def __del__(self):
|
||
self.exit()
|