pub fn strip_html(input: &str) -> String { let mut result = String::with_capacity(input.len()); let mut in_tag = false; let mut in_entity = false; let mut entity = String::new(); for c in input.chars() { match c { '<' => in_tag = true, '>' if in_tag => in_tag = false, '&' if !in_tag => { in_entity = true; entity.clear(); } ';' if in_entity => { in_entity = false; let decoded = match entity.as_str() { "amp" => "&", "lt" => "<", "gt" => ">", "quot" => "\"", "nbsp" => " ", _ => "", }; result.push_str(decoded); } c if !in_tag && !in_entity => result.push(c), c if in_entity => entity.push(c), _ => {} } } result } #[derive(Debug, Clone)] pub struct TocEntry { pub label: String, pub section: usize, pub children: Vec, } #[derive(Debug, Clone)] pub struct Section { pub title: String, pub content: String, pub pages: Vec, } #[derive(Debug, Clone)] pub struct Book { pub title: String, pub author: String, pub cover: Option>, pub sections: Vec
, pub toc: Vec, } use std::path::Path; pub fn load_epub(path: impl AsRef) -> Result { let path = path.as_ref(); let mut doc = epub::doc::EpubDoc::new(path) .map_err(|e| format!("无法打开文件: {}", e))?; let title = doc.mdata("title").unwrap_or_else(|| "未知标题".to_string()); let author = doc.mdata("creator").unwrap_or_else(|| "未知作者".to_string()); let cover = doc.get_cover().ok(); let spine = doc.spine.clone(); let raw_toc = std::mem::take(&mut doc.toc); let mut sections = Vec::new(); for (i, href) in spine.iter().enumerate() { let raw_html = doc.get_resource_str(href) .map_err(|e| format!("读取章节失败: {}", e))?; let text = strip_html(&raw_html); let title = extract_title(&raw_html) .unwrap_or_else(|| format!("第{}章", i + 1)); sections.push(Section { title, content: text, pages: Vec::new(), }); } let toc = build_toc(&raw_toc, &spine); Ok(Book { title, author, cover, sections, toc }) } fn extract_title(html: &str) -> Option { if let Some(start) = html.find("") { let rest = &html[start + 7..]; if let Some(end) = rest.find("") { return Some(strip_html(&rest[..end]).trim().to_string()); } } if let Some(start) = html.find("') { let inner = &rest[content_start + 1..]; if let Some(end) = inner.find("") { return Some(strip_html(&inner[..end]).trim().to_string()); } } } None } fn build_toc( entries: &[epub::doc::NavPoint], spine: &[String], ) -> Vec { entries .iter() .map(|e| { let content_str = e.content.to_string_lossy(); let section = spine .iter() .position(|s| content_str.contains(s.trim_end_matches('/'))) .unwrap_or(0); TocEntry { label: e.label.clone(), section, children: build_toc(&e.children, spine), } }) .collect() } #[cfg(test)] mod tests { use super::*; #[test] fn test_epub_loader_nonexistent_file() { let result = load_epub("nonexistent.epub"); assert!(result.is_err()); } #[test] fn test_strip_html_plain_text() { assert_eq!(strip_html("Hello World"), "Hello World"); } #[test] fn test_strip_html_simple_tags() { assert_eq!(strip_html("

Hello

"), "Hello"); } #[test] fn test_strip_html_nested_tags() { assert_eq!( strip_html("

Hello World

"), "Hello World" ); } #[test] fn test_strip_html_html_entities() { assert_eq!(strip_html("Hello & World"), "Hello & World"); assert_eq!(strip_html("Hello World"), "Hello World"); } #[test] fn test_strip_html_empty() { assert_eq!(strip_html(""), ""); } }