""" 将当前目录下的所有md文件按文件名中的数字排序（从第一章开始），合并成一个pdf文件 """ import os import re # 新增：导入正则模块 import markdown from weasyprint import HTML, CSS def extract_chapter_number(filename): """ 正则提取文件名中的数字（核心函数） :param filename: 文件名（如：第1章.md、10.md、章节2.md） :return: 提取到的数字（int），无数字返回9999（排最后） """ # 正则匹配所有连续数字（\d+ 匹配1个及以上数字） match = re.search(r'\d+', filename) if match: return int(match.group()) # 转成整数，保证排序正确（10>2） else: return 9999 # 无数字的文件排最后 # 获取当前目录 current_dir = os.path.dirname(os.path.abspath(__file__)) # 获取所有md文件 + 按提取的数字排序（核心修正点） md_files = [f for f in os.listdir(current_dir) if f.endswith('.md')] md_files = sorted(md_files, key=lambda x: extract_chapter_number(x)) # 按数字排序 print(f"找到 {len(md_files)} 个md文件（按章节数字排序）:") for f in md_files: print(f" - {f}") # 合并所有md文件内容（原有逻辑不变） combined_html = """ 小说合集 """ for md_file in md_files: file_path = os.path.join(current_dir, md_file) # 读取md文件 with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # 转换为html（保留表格、代码块格式） md = markdown.Markdown(extensions=['tables', 'fenced_code']) html_content = md.convert(content) # 添加章节标题 chapter_title = md_file.replace('.md', '') combined_html += f'

{chapter_title}

\n' combined_html += html_content + '\n' combined_html += '' # 输出pdf文件 output_pdf = os.path.join(current_dir, '小说合集.pdf') HTML(string=combined_html).write_pdf(output_pdf) print(f"\nPDF已生成: {output_pdf}")