Files
work-secretfile-selfcheck/UmiOCR-data/py_src/ocr/output/output_csv.py

71 lines
2.6 KiB
Python

# 输出到csv表格文件
import csv
from umi_log import logger
from .output import Output
from .tools import getDataText
class OutputCsv(Output):
def __init__(self, argd):
self.encodings = [ # 保存编码优先级
"ansi", # Windows系统本地编码。在linux和macos下会抛出异常
"ascii", # 纯英
"gbk", # 简中
"big5", # 繁中
"shift_jis", # 日文
"euc-kr", # 韩文
"utf-8",
]
self.dir = argd["outputDir"] # 输出路径(文件夹)
self.fileName = argd["outputFileName"] # 文件名
self.outputPath = f"{self.dir}/{self.fileName}.csv" # 输出路径
self.ignoreBlank = argd["ignoreBlank"] # 忽略空白文件
self.writeLists = [] # 输出内容列表
self.writeText = "" # 输出内容字符串
try: # 覆盖创建临时文件
with open(self.outputPath, "w", encoding="utf-8") as f:
pass
except Exception as e:
raise Exception(f"Failed to create csv file. {e}\n创建csv文件失败。")
def print(self, res): # 输出图片结果
if not res["code"] == 100 and self.ignoreBlank:
return # 忽略空白图片
name = res["fileName"]
path = res["path"]
if res["code"] == 100:
textOut = getDataText(res["data"]) # 获取拼接结果
elif res["code"] == 101:
textOut = ""
else:
textOut = f'[Error] OCR failed. Code: {res["code"]}, Msg: {res["data"]} .\n'
self.writeLists.append([name, textOut, path])
self.writeText += textOut
def onEnd(self): # 结束时保存。
# 顺序测试编码优先级列表,获取保存编码
encoding = "utf-8"
for e in self.encodings:
try:
self.writeText.encode(e)
encoding = e
break
# except UnicodeEncodeError:
except Exception:
pass
logger.info(f"csv encoding: {encoding}")
# 创建文件、输出
headers = ["Name", "OCR", "Path"] # 表头
try:
with open(
self.outputPath, "w", encoding=encoding, newline=""
) as f: # 覆盖创建文件
writer = csv.writer(f)
writer.writerow(headers) # 写入CSV表头
for writeList in self.writeLists:
writer.writerow(writeList) # 写入CSV内容
except Exception as e:
raise Exception(f"Failed to write csv file. {e}\n写入csv文件失败。")