docs: 添加涉密文件自检工具实施计划

This commit is contained in:
2026-06-08 13:53:24 +08:00
commit 31161d9a5f
1838 changed files with 455407 additions and 0 deletions

View File

@@ -0,0 +1,14 @@
from . import api_rapidocr
from . import rapidocr_config
# 插件信息
PluginInfo = {
# 插件组别
"group": "ocr",
# 全局配置
"global_options": rapidocr_config.globalOptions,
# 局部配置
"local_options": rapidocr_config.localOptions,
# 接口类
"api_class": api_rapidocr.Api,
}

View File

@@ -0,0 +1,79 @@
# 调用 RapidOCR-json.exe 的 Python Api
# 项目主页:
# https://github.com/hiroi-sora/RapidOCR-json
import os
from .rapidocr import Rapid_pipe
from .rapidocr_config import LangDict
# exe路径
ExePath = os.path.dirname(os.path.abspath(__file__)) + "/RapidOCR-json.exe"
class Api: # 公开接口
def __init__(self, globalArgd):
# 测试路径是否存在
if not os.path.exists(ExePath):
raise ValueError(f'[Error] Exe path "{ExePath}" does not exist.')
# 初始化参数
self.api = None # api对象
self.exeConfigs = { # exe启动参数字典
"models": "models",
"ensureAscii": 1,
"det": None,
"cls": None,
"rec": None,
"keys": None,
"doAngle": 0,
"mostAngle": 0,
"maxSideLen": None,
"numThread": globalArgd["numThread"],
}
def start(self, argd): # 启动引擎。返回: "" 成功,"[Error] xxx" 失败
# 加载局部参数
tempConfigs = self.exeConfigs.copy()
try:
lang = LangDict[argd["language"]]
tempConfigs.update(lang)
if argd["angle"]:
tempConfigs["doAngle"] = tempConfigs["mostAngle"] = 1
else:
tempConfigs["doAngle"] = tempConfigs["mostAngle"] = 0
tempConfigs["maxSideLen"] = argd["maxSideLen"]
except Exception as e:
self.api = None
return f"[Error] OCR start fail. Argd: {argd}\n{e}"
# 若引擎已启动,且局部参数与传入参数一致,则无需重启
if not self.api == None:
if set(tempConfigs.items()) == set(self.exeConfigs.items()):
return ""
# 若引擎已启动但需要更改参数,则停止旧引擎
self.stop()
# 启动新引擎
self.exeConfigs = tempConfigs
try:
self.api = Rapid_pipe(ExePath, tempConfigs)
except Exception as e:
self.api = None
return f"[Error] OCR init fail. Argd: {tempConfigs}\n{e}"
return ""
def stop(self): # 停止引擎
if self.api == None:
return
self.api.exit()
self.api = None
def runPath(self, imgPath: str): # 路径识图
res = self.api.run(imgPath)
return res
def runBytes(self, imageBytes): # 字节流
res = self.api.runBytes(imageBytes)
return res
def runBase64(self, imageBase64): # base64字符串
res = self.api.runBase64(imageBase64)
return res

View File

@@ -0,0 +1,11 @@
key,en_US,zh_TW,ja_JP
RapidOCR本地,RapidOCR (local),RapidOCR本地,RapidOCRローカル
线程数,Number of threads,線程數,スレッド数
文字识别RapidOCR,Text Recognition (RapidOCR),文字識別RapidOCR,文字認識RapidOCR
语言/模型库,Language/Model Library,語言/模型庫,言語/モデルライブラリ
纠正文本方向,Correct Text Direction,糾正文字方向,テキスト方向の修正
启用方向分类,识别倾斜或倒置的文本。可能降低识别速度。,Enable directional classification to recognize tilted or inverted text. May reduce recognition speed.,啟用方向分類,識別傾斜或倒置的文字。 可能降低識別速度。,方向分類を有効にして、傾斜または反転したテキストを識別します。認識速度が低下する可能性があります。
限制图像边长,Limit image edge length,限制影像邊長,画像の辺の長さを制限する
(默认),(Default),(默認),(デフォルト)
无限制,Unlimited,無限制,制限なし
将边长大于该值的图片进行压缩,可以提高识别速度。可能降低识别精度。,Compressing images with side lengths greater than this value can improve recognition speed. May reduce recognition accuracy.,將邊長大於該值的圖片進行壓縮,可以提高識別速度。 可能降低識別精度。,辺の長さがこの値より大きい画像を圧縮することで、認識速度を高めることができます。認識精度が低下する可能性があります。
1 key en_US zh_TW ja_JP
2 RapidOCR(本地) RapidOCR (local) RapidOCR(本地) RapidOCR(ローカル)
3 线程数 Number of threads 線程數 スレッド数
4 文字识别(RapidOCR) Text Recognition (RapidOCR) 文字識別(RapidOCR) 文字認識(RapidOCR)
5 语言/模型库 Language/Model Library 語言/模型庫 言語/モデルライブラリ
6 纠正文本方向 Correct Text Direction 糾正文字方向 テキスト方向の修正
7 启用方向分类,识别倾斜或倒置的文本。可能降低识别速度。 Enable directional classification to recognize tilted or inverted text. May reduce recognition speed. 啟用方向分類,識別傾斜或倒置的文字。 可能降低識別速度。 方向分類を有効にして、傾斜または反転したテキストを識別します。認識速度が低下する可能性があります。
8 限制图像边长 Limit image edge length 限制影像邊長 画像の辺の長さを制限する
9 (默认) (Default) (默認) (デフォルト)
10 无限制 Unlimited 無限制 制限なし
11 将边长大于该值的图片进行压缩,可以提高识别速度。可能降低识别精度。 Compressing images with side lengths greater than this value can improve recognition speed. May reduce recognition accuracy. 將邊長大於該值的圖片進行壓縮,可以提高識別速度。 可能降低識別精度。 辺の長さがこの値より大きい画像を圧縮することで、認識速度を高めることができます。認識精度が低下する可能性があります。

View File

@@ -0,0 +1,35 @@
简体中文
ch_PP-OCRv3_det_infer.onnx
ch_ppocr_mobile_v2.0_cls_infer.onnx
rec_ch_PP-OCRv4_infer.onnx
dict_chinese.txt
English
ch_PP-OCRv3_det_infer.onnx
ch_ppocr_mobile_v2.0_cls_infer.onnx
rec_en_PP-OCRv3_infer.onnx
dict_en.txt
繁體中文
ch_PP-OCRv3_det_infer.onnx
ch_ppocr_mobile_v2.0_cls_infer.onnx
rec_chinese_cht_PP-OCRv3_infer.onnx
dict_chinese_cht.txt
日本語
ch_PP-OCRv3_det_infer.onnx
ch_ppocr_mobile_v2.0_cls_infer.onnx
rec_japan_PP-OCRv3_infer.onnx
dict_japan.txt
한국어
ch_PP-OCRv3_det_infer.onnx
ch_ppocr_mobile_v2.0_cls_infer.onnx
rec_korean_PP-OCRv3_infer.onnx
dict_korean.txt
Русский
ch_PP-OCRv3_det_infer.onnx
ch_ppocr_mobile_v2.0_cls_infer.onnx
rec_cyrillic_PP-OCRv3_infer.onnx
dict_cyrillic.txt

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,163 @@
!
#
$
%
&
'
(
+
,
-
.
/
0
1
2
3
4
5
6
7
8
9
:
?
@
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
É
é
Ё
Є
І
Ј
Љ
Ў
А
Б
В
Г
Д
Е
Ж
З
И
Й
К
Л
М
Н
О
П
Р
С
Т
У
Ф
Х
Ц
Ч
Ш
Щ
Ъ
Ы
Ь
Э
Ю
Я
а
б
в
г
д
е
ж
з
и
й
к
л
м
н
о
п
р
с
т
у
ф
х
ц
ч
ш
щ
ъ
ы
ь
э
ю
я
ё
ђ
є
і
ј
љ
њ
ћ
ў
џ
Ґ
ґ

View File

@@ -0,0 +1,95 @@
0
1
2
3
4
5
6
7
8
9
:
;
<
=
>
?
@
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
[
\
]
^
_
`
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
{
|
}
~
!
"
#
$
%
&
'
(
)
*
+
,
-
.
/

View File

@@ -0,0 +1,136 @@
f
e
n
c
h
_
i
m
g
/
r
v
a
l
t
w
o
d
6
1
.
p
B
u
2
à
3
R
y
4
U
E
A
5
P
O
S
T
D
7
Z
8
I
N
L
G
M
H
0
J
K
-
9
F
C
V
é
X
'
s
Q
:
è
x
b
Y
Œ
É
z
W
Ç
È
k
Ô
ô
À
Ê
q
ù
°
ê
î
*
Â
j
"
,
â
%
û
ç
ü
?
!
;
ö
(
)
ï
º
ó
ø
å
+
á
Ë
<
²
Á
Î
&
@
œ
ε
Ü
ë
[
]
í
ò
Ö
ä
ß
«
»
ú
ñ
æ
µ
³
Å
$
#

View File

@@ -0,0 +1,143 @@
!
"
#
$
%
&
'
(
)
*
+
,
-
.
/
0
1
2
3
4
5
6
7
8
9
:
;
=
>
?
@
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
[
]
_
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
£
§
­
°
´
µ
·
º
¿
Á
Ä
Å
É
Ï
Ô
Ö
Ü
ß
à
á
â
ã
ä
å
æ
ç
è
é
ê
ë
í
ï
ñ
ò
ó
ô
ö
ø
ù
ú
û
ü
ō
Š
Ÿ
ʒ
β
δ
з
©
ª
«
¬

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,124 @@
import os
import atexit # 退出处理
import subprocess # 进程,管道
from json import loads as jsonLoads, dumps as jsonDumps
from sys import platform as sysPlatform # popen静默模式
from base64 import b64encode # base64 编码
class Rapid_pipe: # 调用OCR管道模式
def __init__(self, exePath: str, argument: dict = None):
"""初始化识别器(管道模式)。\n
`exePath`: 识别器`PaddleOCR_json.exe`的路径。\n
`argument`: 启动参数,字典`{"":值}`。参数说明见 https://github.com/hiroi-sora/PaddleOCR-json
"""
cwd = os.path.abspath(os.path.join(exePath, os.pardir)) # 获取exe父文件夹
# 处理启动参数
if not argument == None:
for key, value in argument.items():
if isinstance(value, str): # 字符串类型的值加双引号
exePath += f' --{key}="{value}"'
else:
exePath += f" --{key}={value}"
if "ensureAscii" not in exePath:
exePath += f" --ensureAscii=1"
# 设置子进程启用静默模式,不显示控制台窗口
self.ret = None
startupinfo = None
if "win32" in str(sysPlatform).lower():
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags = (
subprocess.CREATE_NEW_CONSOLE | subprocess.STARTF_USESHOWWINDOW
)
startupinfo.wShowWindow = subprocess.SW_HIDE
self.ret = subprocess.Popen( # 打开管道
exePath,
cwd=cwd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL, # 丢弃stderr的内容
startupinfo=startupinfo, # 开启静默模式
)
# 启动子进程
while True:
if not self.ret.poll() == None: # 子进程已退出,初始化失败
raise Exception(f"OCR init fail.")
initStr = self.ret.stdout.readline().decode("utf-8", errors="ignore")
if "OCR init completed." in initStr: # 初始化成功
break
atexit.register(self.exit) # 注册程序终止时执行强制停止子进程
def runDict(self, writeDict: dict):
"""传入指令字典,发送给引擎进程。\n
`writeDict`: 指令字典。\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
# 检查子进程
if not self.ret:
return {"code": 901, "data": f"引擎实例不存在。"}
if not self.ret.poll() == None:
return {"code": 902, "data": f"子进程已崩溃。"}
# 输入信息
writeStr = jsonDumps(writeDict, ensure_ascii=True, indent=None) + "\n"
try:
self.ret.stdin.write(writeStr.encode("utf-8"))
self.ret.stdin.flush()
except Exception as e:
return {"code": 902, "data": f"向识别器进程传入指令失败,疑似子进程已崩溃。{e}"}
# 获取返回值
try:
getStr = self.ret.stdout.readline().decode("utf-8", errors="ignore")
except Exception as e:
return {"code": 903, "data": f"读取识别器进程输出值失败。异常信息:[{e}]"}
try:
return jsonLoads(getStr)
except Exception as e:
return {"code": 904, "data": f"识别器输出值反序列化JSON失败。异常信息[{e}]。原始内容:[{getStr}]"}
def run(self, imgPath: str):
"""对一张本地图片进行文字识别。\n
`exePath`: 图片路径。\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
writeDict = {"image_path": imgPath}
return self.runDict(writeDict)
def runBase64(self, imageBase64: str):
"""对一张编码为base64字符串的图片进行文字识别。\n
`imageBase64`: 图片base64字符串。\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
writeDict = {"image_base64": imageBase64}
return self.runDict(writeDict)
def runBytes(self, imageBytes):
"""对一张图片的字节流信息进行文字识别。\n
`imageBytes`: 图片字节流。\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
imageBase64 = b64encode(imageBytes).decode("utf-8")
return self.runBase64(imageBase64)
def exit(self):
"""关闭引擎子进程"""
if not self.ret:
return
self.ret.kill() # 关闭子进程
self.ret = None
atexit.unregister(self.exit) # 移除退出处理
print("### RapidOCR引擎子进程关闭")
@staticmethod
def printResult(res: dict):
"""用于调试,格式化打印识别结果。\n
`res`: OCR识别结果。"""
# 识别成功
if res["code"] == 100:
index = 1
for line in res["data"]:
print(f"{index}-置信度:{round(line['score'], 2)},文本:{line['text']}")
index += 1
elif res["code"] == 100:
print("图片中未识别出文字。")
else:
print(f"图片识别失败。错误码:{res['code']},错误信息:{res['data']}")
def __del__(self):
self.exit()

View File

@@ -0,0 +1,109 @@
import os
import psutil
from plugin_i18n import Translator
tr = Translator(__file__, "i18n.csv")
# 模块配置路径
MODELS_CONFIGS = "/models/configs.txt"
# 保存语言字典
LangDict = {}
# 动态获取模型库列表
def _getlanguageList():
global LangDict
"""configs.txt 格式示例:
简体中文(V4)
ch_PP-OCRv4_det_infer.onnx
ch_ppocr_mobile_v2.0_cls_infer.onnx
rec_ch_PP-OCRv4_infer.onnx
dict_chinese.txt
"""
optionsList = []
configsPath = os.path.dirname(os.path.abspath(__file__)) + MODELS_CONFIGS
try:
with open(configsPath, "r", encoding="utf-8") as file:
content = file.read()
parts = content.split("\n\n")
for part in parts:
items = part.split("\n")
if len(items) == 5:
title, det, cls, rec, keys = items
LangDict[title] = {
"det": det,
"cls": cls,
"rec": rec,
"keys": keys,
}
optionsList.append([title, title])
return optionsList
except FileNotFoundError:
print("[Error] RapidOCR配置文件configs不存在请检查文件路径是否正确。", configsPath)
except IOError:
print("[Error] RapidOCR配置文件configs无法打开或读取。")
return []
_LanguageList = _getlanguageList()
# 获取最佳线程数
def _getThreads():
try:
phyCore = psutil.cpu_count(logical=False) # 物理核心数
lgiCore = psutil.cpu_count(logical=True) # 逻辑核心数
if (
not isinstance(phyCore, int)
or not isinstance(lgiCore, int)
or lgiCore < phyCore
):
raise ValueError("核心数计算异常")
# 物理核数=逻辑核数,返回逻辑核数
if phyCore * 2 == lgiCore or phyCore == lgiCore:
return lgiCore
# 大小核处理器,返回大核线程数
big = lgiCore - phyCore
return big * 2
except Exception as e:
print("[Warning] 无法获取CPU核心数", e)
return 4
_threads = _getThreads()
globalOptions = {
"title": tr("RapidOCR本地"),
"type": "group",
"numThread": {
"title": tr("线程数"),
"default": _threads,
"min": 1,
"isInt": True,
},
}
localOptions = {
"title": tr("文字识别RapidOCR"),
"type": "group",
"language": {
"title": tr("语言/模型库"),
"optionsList": _LanguageList,
},
"angle": {
"title": tr("纠正文本方向"),
"default": False,
"toolTip": tr("启用方向分类,识别倾斜或倒置的文本。可能降低识别速度。"),
},
"maxSideLen": {
"title": tr("限制图像边长"),
"optionsList": [
[1024, "1024 " + tr("(默认)")],
[2048, "2048"],
[4096, "4096"],
[999999, tr("无限制")],
],
"toolTip": tr("将边长大于该值的图片进行压缩,可以提高识别速度。可能降低识别精度。"),
},
}