Files
FableFlow/03_Story/merge_md_to_pdf.py
xiaji 7bd57e7cc2 feat(03_Story): 添加章节文件重命名工具并改进PDF合并排序逻辑
新增重命名脚本支持按规则批量修改章节文件名
改进merge_md_to_pdf.py中的文件排序逻辑,支持从文件名提取数字排序
添加工作区配置文件和更新后的PDF文档
2026-01-12 09:22:44 +08:00

102 lines
2.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
将当前目录下的所有md文件按文件名中的数字排序从第一章开始合并成一个pdf文件
"""
import os
import re # 新增:导入正则模块
import markdown
from weasyprint import HTML, CSS
def extract_chapter_number(filename):
"""
正则提取文件名中的数字(核心函数)
:param filename: 文件名第1章.md、10.md、章节2.md
:return: 提取到的数字int无数字返回9999排最后
"""
# 正则匹配所有连续数字(\d+ 匹配1个及以上数字
match = re.search(r'\d+', filename)
if match:
return int(match.group()) # 转成整数保证排序正确10>2
else:
return 9999 # 无数字的文件排最后
# 获取当前目录
current_dir = os.path.dirname(os.path.abspath(__file__))
# 获取所有md文件 + 按提取的数字排序(核心修正点)
md_files = [f for f in os.listdir(current_dir) if f.endswith('.md')]
md_files = sorted(md_files, key=lambda x: extract_chapter_number(x)) # 按数字排序
print(f"找到 {len(md_files)} 个md文件按章节数字排序:")
for f in md_files:
print(f" - {f}")
# 合并所有md文件内容原有逻辑不变
combined_html = """<!DOCTYPE html>
<html lang=\"zh-CN\">
<head>
<meta charset=\"UTF-8\">
<title>小说合集</title>
<style>
@page {
margin: 2cm;
@bottom-right {
content: counter(page);
}
}
body {
font-family: \"SimSun\", \"宋体\", serif;
font-size: 12pt;
line-height: 1.8;
text-align: justify;
}
h1 {
font-size: 18pt;
text-align: center;
margin-top: 2em;
margin-bottom: 1em;
}
h2 {
font-size: 14pt;
text-align: center;
margin-top: 1.5em;
margin-bottom: 0.8em;
}
p {
margin: 0.5em 0;
text-indent: 2em;
}
.chapter-title {
page-break-before: always;
}
.chapter-title:first-child {
page-break-before: auto;
}
</style>
</head>
<body>
"""
for md_file in md_files:
file_path = os.path.join(current_dir, md_file)
# 读取md文件
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 转换为html保留表格、代码块格式
md = markdown.Markdown(extensions=['tables', 'fenced_code'])
html_content = md.convert(content)
# 添加章节标题
chapter_title = md_file.replace('.md', '')
combined_html += f'<h1 class=\"chapter-title\">{chapter_title}</h1>\n'
combined_html += html_content + '\n'
combined_html += '</body></html>'
# 输出pdf文件
output_pdf = os.path.join(current_dir, '小说合集.pdf')
HTML(string=combined_html).write_pdf(output_pdf)
print(f"\nPDF已生成: {output_pdf}")