docs: 添加涉密文件自检工具实施计划

2026-06-08 13:53:24 +08:00
commit 31161d9a5f
1838 changed files with 455407 additions and 0 deletions
--- a/UmiOCR-data/py_src/server/ocr_server.py
+++ b/UmiOCR-data/py_src/server/ocr_server.py
@@ -0,0 +1,176 @@
+import json
+
+from .bottle import request
+from ..mission.mission_ocr import MissionOCR
+from ..utils.utils import initConfigDict
+from ..ocr.output.tools import getDataText
+
+
+# 获取ocr配置字典。 is_format=False 时不含 format 选项。
+def get_ocr_options(is_format=True):
+    opts = {}
+    # OCR 的参数
+    ocr_opts = MissionOCR.getLocalOptions()
+    for key in ocr_opts:
+        opts[f"ocr.{key}"] = ocr_opts[key]
+    # 排版解析的参数
+    opts["tbpu.parser"] = {
+        "title": "排版解析方案",
+        "toolTip": "按什么方式，解析和排序图片中的文字块",
+        "default": "multi_para",
+        "optionsList": [
+            ["multi_para", "多栏-按自然段换行"],
+            ["multi_line", "多栏-总是换行"],
+            ["multi_none", "多栏-无换行"],
+            ["single_para", "单栏-按自然段换行"],
+            ["single_line", "单栏-总是换行"],
+            ["single_none", "单栏-无换行"],
+            ["single_code", "单栏-保留缩进"],
+            ["none", "不做处理"],
+        ],
+    }
+    # 忽略区域
+    opts["tbpu.ignoreArea"] = {
+        "title": "忽略区域",
+        "toolTip": "数组，每一项为[[左上角x,y],[右下角x,y]]。",
+        "default": [],
+        "type": "var",
+    }
+    # 输出格式
+    if is_format:
+        opts["data.format"] = {
+            "title": "数据返回格式",
+            "toolTip": '返回值字典中，["data"] 按什么格式表示OCR结果数据',
+            "default": "dict",
+            "optionsList": [
+                ["dict", "含有位置等信息的原始字典"],
+                ["text", "纯文本"],
+            ],
+        }
+    opts = initConfigDict(opts)  # 格式化
+    return opts
+
+
+# 检查ocr参数字典，返回修改后字典
+def check_ocr_options(opts):
+    # 检查忽略区域参数
+    if opts["tbpu.ignoreArea"]:
+        new_ia = []
+        ia = opts["tbpu.ignoreArea"]
+        for a in ia:
+            if (
+                not isinstance(a, list)
+                or len(a) != 2
+                or not isinstance(a[0], list)
+                or len(a[0]) != 2
+                or not isinstance(a[1], list)
+                or len(a[1]) != 2
+                or not all(
+                    isinstance(x, (int, float))
+                    for x in [a[0][0], a[0][1], a[1][0], a[1][1]]
+                )
+            ):
+                raise Exception(
+                    f"tbpu.ignoreArea 中，每一项的格式必须是 [[x1,y1],[x2,y2]] 。当前值不合法： {ia}"
+                )
+            new_ia.append([[a[0][0], a[0][1]], [], [a[1][0], a[1][1]], []])
+        opts["tbpu.ignoreArea"] = new_ia
+    return opts
+
+
+# 路由函数
+def init(UmiWeb):
+    @UmiWeb.route("/api/ocr/get_options")
+    def _get_options_json():
+        opts = get_ocr_options()
+        res = json.dumps(opts)
+        return res
+
+    """
+    执行OCR，方法：POST
+    参数：
+    "base64": "", # 必填
+    "options": {}, # 选填，内容与 _get_options 的对应。
+    """
+
+    @UmiWeb.route("/api/ocr", method="POST")
+    def _ocr():
+        try:
+            data = request.json
+        except Exception as e:
+            return json.dumps({"code": 800, "data": f"请求无法解析为json。 {e}"})
+        if not data:
+            return json.dumps({"code": 801, "data": "请求为空。"})
+        if "base64" not in data:
+            return json.dumps({"code": 802, "data": "请求中缺少 base64 字段。"})
+        if "options" not in data:
+            data["options"] = {}
+        elif not isinstance(data["options"], dict):
+            return json.dumps({"code": 803, "data": "请求中 options 字段必须为字典。"})
+        try:
+            # 补充缺失的默认参数
+            opt = data["options"]
+            default = get_ocr_options()
+            for key in default:
+                if key not in opt:
+                    opt[key] = default[key]["default"]
+            # 检查OCR参数
+            check_ocr_options(opt)
+        except Exception as e:
+            return json.dumps({"code": 804, "data": f"options 解释失败。 {e}"})
+        # 同步执行
+        resList = MissionOCR.addMissionWait(opt, {"base64": data["base64"]})
+        res = resList[0]["result"]
+        if opt["data.format"] == "text":  # 转纯文本
+            if res["code"] == 100:
+                res["data"] = getDataText(res["data"])
+        res = json.dumps(res)
+        return res
+
+
+"""
+const url = "http://127.0.0.1:1224/api/ocr";
+const data = {
+    // 必填
+    "base64": "iVBORw0KGgoAAAANSUhEUgAAAC4AAAAXCAIAAAD7ruoFAAAACXBIWXMAABnWAAAZ1gEY0crtAAAAEXRFWHRTb2Z0d2FyZQBTbmlwYXN0ZV0Xzt0AAAHjSURBVEiJ7ZYrcsMwEEBXnR7FLuj0BPIJHJOi0DAZ2qSsMCxEgjYrDQqJdALrBJ2ASndRgeNI8ledutOCLrLl1e7T/mRkjIG/IXe/DWBldRTNEoQSpgNURe5puiiaJehrMuJSXSTgbaby0A1WzLrCCQCmyn0FwoN0V06QONWAt1nUxfnjHYA8p65GjhDKxcjedVH6JOejBPwYh21eE0Wzfe0tqIsEkGXcVcpoMH4CRZ+P0lsQp/pWJ4ripf1XFDFe8GHSHlYcSo9Es31t60RdFlN1RUmrma5oTzTVB8ZUaeeYEC9GmL6kNkDw9BANAQYo3xTNdqUkvHq+rYhDKW0Bj3RSEIpmyWyBaZaMTCrCK+tJ5Jsa07fs3E7esE66HzralRLgJKp0/BD6fJRSxvmDsb6joqkcFXGqMVVFFEHDL2gTxwCAaTabnkFUWhDCHTd9iYrGcAL1ZnqIp5Vpiqh7bCfua7FA4qN0INMcN1+cgCzj+UFxtbmvwdZvGIrI41JiqhZBWhhF8WxorkYPpQwJiWYJeA3rXE4hzcwJ+B96F9zCFHC0FcVegghvFul7oeEE8PvHeJqC0w0AUbbFIT8JnEwGbPKcS2OxU3HMTqD0r4wgEIuiKJ7i4MS16+og8/+bPZRPLa+6Ld2DSzcAAAAASUVORK5CYII=",
+    "options": {
+        "ocr.angle": false,
+        "ocr.language": "简体中文",
+        "ocr.maxSideLen": 1024,
+        "tbpu.parser": "multi_para",
+        "data.format": "text",
+    }
+};
+
+fetch(url, {
+        method: "POST",
+        headers: {
+            "Content-Type": "application/json"
+        },
+        body: JSON.stringify(data)
+    })
+    .then(response => response.json())
+    .then(data => {
+        console.log(data);
+    })
+    .catch(error => {
+        console.error(error);
+    });
+
+
+
+const url = "http://127.0.0.1:1224/api/ocr/get_options";
+fetch(url, {
+        method: "GET",
+        headers: {
+            "Content-Type": "application/json"
+        },
+    })
+    .then(response => response.json())
+    .then(data => {
+        console.log(data);
+    })
+    .catch(error => {
+        console.error(error);
+    });
+"""