docs: 添加涉密文件自检工具实施计划
This commit is contained in:
176
UmiOCR-data/py_src/server/ocr_server.py
Normal file
176
UmiOCR-data/py_src/server/ocr_server.py
Normal file
@@ -0,0 +1,176 @@
|
||||
import json
|
||||
|
||||
from .bottle import request
|
||||
from ..mission.mission_ocr import MissionOCR
|
||||
from ..utils.utils import initConfigDict
|
||||
from ..ocr.output.tools import getDataText
|
||||
|
||||
|
||||
# 获取ocr配置字典。 is_format=False 时不含 format 选项。
|
||||
def get_ocr_options(is_format=True):
|
||||
opts = {}
|
||||
# OCR 的参数
|
||||
ocr_opts = MissionOCR.getLocalOptions()
|
||||
for key in ocr_opts:
|
||||
opts[f"ocr.{key}"] = ocr_opts[key]
|
||||
# 排版解析的参数
|
||||
opts["tbpu.parser"] = {
|
||||
"title": "排版解析方案",
|
||||
"toolTip": "按什么方式,解析和排序图片中的文字块",
|
||||
"default": "multi_para",
|
||||
"optionsList": [
|
||||
["multi_para", "多栏-按自然段换行"],
|
||||
["multi_line", "多栏-总是换行"],
|
||||
["multi_none", "多栏-无换行"],
|
||||
["single_para", "单栏-按自然段换行"],
|
||||
["single_line", "单栏-总是换行"],
|
||||
["single_none", "单栏-无换行"],
|
||||
["single_code", "单栏-保留缩进"],
|
||||
["none", "不做处理"],
|
||||
],
|
||||
}
|
||||
# 忽略区域
|
||||
opts["tbpu.ignoreArea"] = {
|
||||
"title": "忽略区域",
|
||||
"toolTip": "数组,每一项为[[左上角x,y],[右下角x,y]]。",
|
||||
"default": [],
|
||||
"type": "var",
|
||||
}
|
||||
# 输出格式
|
||||
if is_format:
|
||||
opts["data.format"] = {
|
||||
"title": "数据返回格式",
|
||||
"toolTip": '返回值字典中,["data"] 按什么格式表示OCR结果数据',
|
||||
"default": "dict",
|
||||
"optionsList": [
|
||||
["dict", "含有位置等信息的原始字典"],
|
||||
["text", "纯文本"],
|
||||
],
|
||||
}
|
||||
opts = initConfigDict(opts) # 格式化
|
||||
return opts
|
||||
|
||||
|
||||
# 检查ocr参数字典,返回修改后字典
|
||||
def check_ocr_options(opts):
|
||||
# 检查忽略区域参数
|
||||
if opts["tbpu.ignoreArea"]:
|
||||
new_ia = []
|
||||
ia = opts["tbpu.ignoreArea"]
|
||||
for a in ia:
|
||||
if (
|
||||
not isinstance(a, list)
|
||||
or len(a) != 2
|
||||
or not isinstance(a[0], list)
|
||||
or len(a[0]) != 2
|
||||
or not isinstance(a[1], list)
|
||||
or len(a[1]) != 2
|
||||
or not all(
|
||||
isinstance(x, (int, float))
|
||||
for x in [a[0][0], a[0][1], a[1][0], a[1][1]]
|
||||
)
|
||||
):
|
||||
raise Exception(
|
||||
f"tbpu.ignoreArea 中,每一项的格式必须是 [[x1,y1],[x2,y2]] 。当前值不合法: {ia}"
|
||||
)
|
||||
new_ia.append([[a[0][0], a[0][1]], [], [a[1][0], a[1][1]], []])
|
||||
opts["tbpu.ignoreArea"] = new_ia
|
||||
return opts
|
||||
|
||||
|
||||
# 路由函数
|
||||
def init(UmiWeb):
|
||||
@UmiWeb.route("/api/ocr/get_options")
|
||||
def _get_options_json():
|
||||
opts = get_ocr_options()
|
||||
res = json.dumps(opts)
|
||||
return res
|
||||
|
||||
"""
|
||||
执行OCR,方法:POST
|
||||
参数:
|
||||
"base64": "", # 必填
|
||||
"options": {}, # 选填,内容与 _get_options 的对应。
|
||||
"""
|
||||
|
||||
@UmiWeb.route("/api/ocr", method="POST")
|
||||
def _ocr():
|
||||
try:
|
||||
data = request.json
|
||||
except Exception as e:
|
||||
return json.dumps({"code": 800, "data": f"请求无法解析为json。 {e}"})
|
||||
if not data:
|
||||
return json.dumps({"code": 801, "data": "请求为空。"})
|
||||
if "base64" not in data:
|
||||
return json.dumps({"code": 802, "data": "请求中缺少 base64 字段。"})
|
||||
if "options" not in data:
|
||||
data["options"] = {}
|
||||
elif not isinstance(data["options"], dict):
|
||||
return json.dumps({"code": 803, "data": "请求中 options 字段必须为字典。"})
|
||||
try:
|
||||
# 补充缺失的默认参数
|
||||
opt = data["options"]
|
||||
default = get_ocr_options()
|
||||
for key in default:
|
||||
if key not in opt:
|
||||
opt[key] = default[key]["default"]
|
||||
# 检查OCR参数
|
||||
check_ocr_options(opt)
|
||||
except Exception as e:
|
||||
return json.dumps({"code": 804, "data": f"options 解释失败。 {e}"})
|
||||
# 同步执行
|
||||
resList = MissionOCR.addMissionWait(opt, {"base64": data["base64"]})
|
||||
res = resList[0]["result"]
|
||||
if opt["data.format"] == "text": # 转纯文本
|
||||
if res["code"] == 100:
|
||||
res["data"] = getDataText(res["data"])
|
||||
res = json.dumps(res)
|
||||
return res
|
||||
|
||||
|
||||
"""
|
||||
const url = "http://127.0.0.1:1224/api/ocr";
|
||||
const data = {
|
||||
// 必填
|
||||
"base64": "iVBORw0KGgoAAAANSUhEUgAAAC4AAAAXCAIAAAD7ruoFAAAACXBIWXMAABnWAAAZ1gEY0crtAAAAEXRFWHRTb2Z0d2FyZQBTbmlwYXN0ZV0Xzt0AAAHjSURBVEiJ7ZYrcsMwEEBXnR7FLuj0BPIJHJOi0DAZ2qSsMCxEgjYrDQqJdALrBJ2ASndRgeNI8ledutOCLrLl1e7T/mRkjIG/IXe/DWBldRTNEoQSpgNURe5puiiaJehrMuJSXSTgbaby0A1WzLrCCQCmyn0FwoN0V06QONWAt1nUxfnjHYA8p65GjhDKxcjedVH6JOejBPwYh21eE0Wzfe0tqIsEkGXcVcpoMH4CRZ+P0lsQp/pWJ4ripf1XFDFe8GHSHlYcSo9Es31t60RdFlN1RUmrma5oTzTVB8ZUaeeYEC9GmL6kNkDw9BANAQYo3xTNdqUkvHq+rYhDKW0Bj3RSEIpmyWyBaZaMTCrCK+tJ5Jsa07fs3E7esE66HzralRLgJKp0/BD6fJRSxvmDsb6joqkcFXGqMVVFFEHDL2gTxwCAaTabnkFUWhDCHTd9iYrGcAL1ZnqIp5Vpiqh7bCfua7FA4qN0INMcN1+cgCzj+UFxtbmvwdZvGIrI41JiqhZBWhhF8WxorkYPpQwJiWYJeA3rXE4hzcwJ+B96F9zCFHC0FcVegghvFul7oeEE8PvHeJqC0w0AUbbFIT8JnEwGbPKcS2OxU3HMTqD0r4wgEIuiKJ7i4MS16+og8/+bPZRPLa+6Ld2DSzcAAAAASUVORK5CYII=",
|
||||
"options": {
|
||||
"ocr.angle": false,
|
||||
"ocr.language": "简体中文",
|
||||
"ocr.maxSideLen": 1024,
|
||||
"tbpu.parser": "multi_para",
|
||||
"data.format": "text",
|
||||
}
|
||||
};
|
||||
|
||||
fetch(url, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
body: JSON.stringify(data)
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
console.log(data);
|
||||
})
|
||||
.catch(error => {
|
||||
console.error(error);
|
||||
});
|
||||
|
||||
|
||||
|
||||
const url = "http://127.0.0.1:1224/api/ocr/get_options";
|
||||
fetch(url, {
|
||||
method: "GET",
|
||||
headers: {
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
console.log(data);
|
||||
})
|
||||
.catch(error => {
|
||||
console.error(error);
|
||||
});
|
||||
"""
|
||||
Reference in New Issue
Block a user