39 lines
1.3 KiB
Python
39 lines
1.3 KiB
Python
|
|
from docx import Document
|
||
|
|
from .logger import logger
|
||
|
|
|
||
|
|
class WordHandler:
|
||
|
|
@staticmethod
|
||
|
|
def import_docx(file_path):
|
||
|
|
"""从docx文件中提取文本内容"""
|
||
|
|
try:
|
||
|
|
doc = Document(file_path)
|
||
|
|
text = []
|
||
|
|
|
||
|
|
for paragraph in doc.paragraphs:
|
||
|
|
if paragraph.text.strip():
|
||
|
|
text.append(paragraph.text)
|
||
|
|
|
||
|
|
logger.info(f"成功从 {file_path} 导入文本,共 {len(text)} 段")
|
||
|
|
return "\n\n".join(text)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"导入Word文件失败: {e}")
|
||
|
|
raise
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def export_docx(file_path, content):
|
||
|
|
"""将文本内容导出为docx文件"""
|
||
|
|
try:
|
||
|
|
doc = Document()
|
||
|
|
|
||
|
|
# 将内容按段落分割并添加到文档
|
||
|
|
paragraphs = content.split("\n\n")
|
||
|
|
for para_text in paragraphs:
|
||
|
|
if para_text.strip():
|
||
|
|
doc.add_paragraph(para_text.strip())
|
||
|
|
|
||
|
|
doc.save(file_path)
|
||
|
|
logger.info(f"成功将文本导出到 {file_path},共 {len(paragraphs)} 段")
|
||
|
|
return True
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"导出Word文件失败: {e}")
|
||
|
|
raise
|