, pub toc: Vec, } use std::path::Path; fn detect_layout(doc: &mut epub::doc::EpubDoc>) -> BookLayout { if let Some(vals) = doc.metadata.get("rendition:layout") { if vals.iter().any(|v| v == "pre-paginated") { return BookLayout::FixedLayout; } } if let Ok(opf) = doc.get_resource_str_by_path(&doc.root_file.clone()) { if opf.contains("rendition:layout") && opf.contains("pre-paginated") { return BookLayout::FixedLayout; } if opf.contains("rendition:layout-pre-paginated") { return BookLayout::FixedLayout; } } BookLayout::Reflowable } pub fn load_epub(path: impl AsRef) -> Result { let path = path.as_ref(); let mut doc = epub::doc::EpubDoc::new(path) .map_err(|e| format!("无法打开文件: {}", e))?; let layout = detect_layout(&mut doc); let title = doc.mdata("title").unwrap_or_else(|| "未知标题".to_string()); let author = doc.mdata("creator").unwrap_or_else(|| "未知作者".to_string()); let cover = doc.get_cover().ok(); let spine = doc.spine.clone(); let mut sections = Vec::new(); for (i, href) in spine.iter().enumerate() { let raw_html = doc.get_resource_str(href) .map_err(|e| format!("读取章节失败: {}", e))?; let text = strip_html(&raw_html); let title = extract_title(&raw_html) .unwrap_or_else(|| format!("第{}章", i + 1)); sections.push(Section { title, content: text, blocks: Vec::new(), page_block_ranges: Vec::new(), }); } let raw_toc = std::mem::take(&mut doc.toc); let toc = build_toc(&raw_toc, &spine); Ok(Book { title, author, cover, layout, sections, toc }) } fn extract_title(html: &str) -> Option { if let Some(start) = html.find("") { let rest = &html[start + 7..]; if let Some(end) = rest.find("") { return Some(strip_html(&rest[..end]).trim().to_string()); } } if let Some(start) = html.find("') { let inner = &rest[content_start + 1..]; if let Some(end) = inner.find("") { return Some(strip_html(&inner[..end]).trim().to_string()); } } } None } fn build_toc( entries: &[epub::doc::NavPoint], spine: &[String], ) -> Vec { entries .iter() .map(|e| { let content_str = e.content.to_string_lossy(); let section = spine .iter() .position(|s| content_str.contains(s.trim_end_matches('/'))) // unwrap_or(0) is safe: a real TOC entry should always match a spine item .unwrap_or(0); TocEntry { label: e.label.clone(), section, children: build_toc(&e.children, spine), } }) .collect() } #[cfg(test)] mod tests { use super::*; #[test] fn test_epub_loader_nonexistent_file() { let result = load_epub("nonexistent.epub"); assert!(result.is_err()); } #[test] fn test_strip_html_plain_text() { assert_eq!(strip_html("Hello World"), "Hello World"); } #[test] fn test_strip_html_simple_tags() { assert_eq!(strip_html("

Hello

"), "Hello"); } #[test] fn test_strip_html_nested_tags() { assert_eq!( strip_html("

Hello World

"), "Hello World" ); } #[test] fn test_strip_html_html_entities() { assert_eq!(strip_html("Hello & World"), "Hello & World"); assert_eq!(strip_html("Hello World"), "Hello World"); } #[test] fn test_strip_html_empty() { assert_eq!(strip_html(""), ""); } #[test] fn test_extract_title_from_title_tag() { let html = "My Book Title"; assert_eq!(extract_title(html), Some("My Book Title".to_string())); } #[test] fn test_extract_title_from_h1() { let html = "

Chapter One

text

"; assert_eq!(extract_title(html), Some("Chapter One".to_string())); } #[test] fn test_extract_title_prefers_title() { let html = "Book

Chapter

"; assert_eq!(extract_title(html), Some("Book".to_string())); } #[test] fn test_extract_title_missing() { assert_eq!(extract_title("

no title

"), None); } #[test] fn test_extract_title_empty() { assert_eq!(extract_title(""), None); } #[test] fn test_html_to_plain_paragraphs() { let html = "

第一段

第二段

"; let result = strip_html(html); assert!(result.contains("第一段")); assert!(result.contains("第二段")); assert!(result.contains('\n')); assert!(result.ends_with("第二段")); } #[test] fn test_html_to_plain_heading() { let html = "

标题

正文

"; let result = strip_html(html); assert!(result.contains("标题")); assert!(result.contains("正文")); assert!(result.contains('\n')); } #[test] fn test_html_to_plain_list() { let html = "

项目一
项目二

"; let result = strip_html(html); assert!(result.starts_with("- ")); assert!(result.contains("项目一")); assert!(result.contains("项目二")); } #[test] fn test_html_to_plain_br() { let html = "第一行
第二行"; let result = strip_html(html); assert_eq!(result, "第一行\n第二行"); } #[test] fn test_html_to_plain_skip_script() { let html = "

正文

更多正文

"; let result = strip_html(html); assert!(result.contains("正文")); assert!(result.contains("更多正文")); assert!(!result.contains("var x=1")); } #[test] fn test_html_to_plain_line_break_collapse() { let html = "

段一

段二

段三

"; let result = strip_html(html); let non_empty: Vec<&str> = result.lines().filter(|l| !l.is_empty()).collect(); assert_eq!(non_empty.len(), 3); assert_eq!(non_empty[0], "段一"); assert_eq!(non_empty[1], "段二"); assert_eq!(non_empty[2], "段三"); } #[test] fn test_build_toc_empty() { let toc = build_toc(&[], &[]); assert!(toc.is_empty()); } }