2026-01-07 13:39:38 +08:00
|
|
|
|
"""
|
2026-01-12 09:22:44 +08:00
|
|
|
|
将当前目录下的所有md文件按文件名中的数字排序(从第一章开始),合并成一个pdf文件
|
2026-01-07 13:39:38 +08:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import os
|
2026-01-12 09:22:44 +08:00
|
|
|
|
import re # 新增:导入正则模块
|
2026-01-07 13:39:38 +08:00
|
|
|
|
import markdown
|
|
|
|
|
|
from weasyprint import HTML, CSS
|
|
|
|
|
|
|
2026-01-12 09:22:44 +08:00
|
|
|
|
def extract_chapter_number(filename):
|
|
|
|
|
|
"""
|
|
|
|
|
|
正则提取文件名中的数字(核心函数)
|
|
|
|
|
|
:param filename: 文件名(如:第1章.md、10.md、章节2.md)
|
|
|
|
|
|
:return: 提取到的数字(int),无数字返回9999(排最后)
|
|
|
|
|
|
"""
|
|
|
|
|
|
# 正则匹配所有连续数字(\d+ 匹配1个及以上数字)
|
|
|
|
|
|
match = re.search(r'\d+', filename)
|
|
|
|
|
|
if match:
|
|
|
|
|
|
return int(match.group()) # 转成整数,保证排序正确(10>2)
|
|
|
|
|
|
else:
|
|
|
|
|
|
return 9999 # 无数字的文件排最后
|
|
|
|
|
|
|
2026-01-07 13:39:38 +08:00
|
|
|
|
# 获取当前目录
|
|
|
|
|
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
|
|
2026-01-12 09:22:44 +08:00
|
|
|
|
# 获取所有md文件 + 按提取的数字排序(核心修正点)
|
|
|
|
|
|
md_files = [f for f in os.listdir(current_dir) if f.endswith('.md')]
|
|
|
|
|
|
md_files = sorted(md_files, key=lambda x: extract_chapter_number(x)) # 按数字排序
|
2026-01-07 13:39:38 +08:00
|
|
|
|
|
2026-01-12 09:22:44 +08:00
|
|
|
|
print(f"找到 {len(md_files)} 个md文件(按章节数字排序):")
|
2026-01-07 13:39:38 +08:00
|
|
|
|
for f in md_files:
|
|
|
|
|
|
print(f" - {f}")
|
|
|
|
|
|
|
2026-01-12 09:22:44 +08:00
|
|
|
|
# 合并所有md文件内容(原有逻辑不变)
|
2026-01-07 13:39:38 +08:00
|
|
|
|
combined_html = """<!DOCTYPE html>
|
|
|
|
|
|
<html lang=\"zh-CN\">
|
|
|
|
|
|
<head>
|
|
|
|
|
|
<meta charset=\"UTF-8\">
|
|
|
|
|
|
<title>小说合集</title>
|
|
|
|
|
|
<style>
|
|
|
|
|
|
@page {
|
|
|
|
|
|
margin: 2cm;
|
|
|
|
|
|
@bottom-right {
|
|
|
|
|
|
content: counter(page);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
body {
|
|
|
|
|
|
font-family: \"SimSun\", \"宋体\", serif;
|
|
|
|
|
|
font-size: 12pt;
|
|
|
|
|
|
line-height: 1.8;
|
|
|
|
|
|
text-align: justify;
|
|
|
|
|
|
}
|
|
|
|
|
|
h1 {
|
|
|
|
|
|
font-size: 18pt;
|
|
|
|
|
|
text-align: center;
|
|
|
|
|
|
margin-top: 2em;
|
|
|
|
|
|
margin-bottom: 1em;
|
|
|
|
|
|
}
|
|
|
|
|
|
h2 {
|
|
|
|
|
|
font-size: 14pt;
|
|
|
|
|
|
text-align: center;
|
|
|
|
|
|
margin-top: 1.5em;
|
|
|
|
|
|
margin-bottom: 0.8em;
|
|
|
|
|
|
}
|
|
|
|
|
|
p {
|
|
|
|
|
|
margin: 0.5em 0;
|
|
|
|
|
|
text-indent: 2em;
|
|
|
|
|
|
}
|
|
|
|
|
|
.chapter-title {
|
|
|
|
|
|
page-break-before: always;
|
|
|
|
|
|
}
|
|
|
|
|
|
.chapter-title:first-child {
|
|
|
|
|
|
page-break-before: auto;
|
|
|
|
|
|
}
|
|
|
|
|
|
</style>
|
|
|
|
|
|
</head>
|
|
|
|
|
|
<body>
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
for md_file in md_files:
|
|
|
|
|
|
file_path = os.path.join(current_dir, md_file)
|
|
|
|
|
|
|
|
|
|
|
|
# 读取md文件
|
|
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
|
|
|
|
content = f.read()
|
|
|
|
|
|
|
2026-01-12 09:22:44 +08:00
|
|
|
|
# 转换为html(保留表格、代码块格式)
|
2026-01-07 13:39:38 +08:00
|
|
|
|
md = markdown.Markdown(extensions=['tables', 'fenced_code'])
|
|
|
|
|
|
html_content = md.convert(content)
|
|
|
|
|
|
|
|
|
|
|
|
# 添加章节标题
|
|
|
|
|
|
chapter_title = md_file.replace('.md', '')
|
|
|
|
|
|
combined_html += f'<h1 class=\"chapter-title\">{chapter_title}</h1>\n'
|
|
|
|
|
|
combined_html += html_content + '\n'
|
|
|
|
|
|
|
|
|
|
|
|
combined_html += '</body></html>'
|
|
|
|
|
|
|
|
|
|
|
|
# 输出pdf文件
|
|
|
|
|
|
output_pdf = os.path.join(current_dir, '小说合集.pdf')
|
|
|
|
|
|
HTML(string=combined_html).write_pdf(output_pdf)
|
|
|
|
|
|
|
2026-01-12 09:22:44 +08:00
|
|
|
|
print(f"\nPDF已生成: {output_pdf}")
|