Implement EpubLoader with load_epub, extract_title, and build_toc functions
This commit is contained in:
78
src/book.rs
78
src/book.rs
@@ -55,10 +55,88 @@ pub struct Book {
|
|||||||
pub toc: Vec<TocEntry>,
|
pub toc: Vec<TocEntry>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
pub fn load_epub(path: impl AsRef<Path>) -> Result<Book, String> {
|
||||||
|
let path = path.as_ref();
|
||||||
|
let mut doc = epub::doc::EpubDoc::new(path)
|
||||||
|
.map_err(|e| format!("无法打开文件: {}", e))?;
|
||||||
|
|
||||||
|
let title = doc.mdata("title").unwrap_or_else(|| "未知标题".to_string());
|
||||||
|
let author = doc.mdata("creator").unwrap_or_else(|| "未知作者".to_string());
|
||||||
|
let cover = doc.get_cover().ok();
|
||||||
|
let spine = doc.spine.clone();
|
||||||
|
let raw_toc = std::mem::take(&mut doc.toc);
|
||||||
|
|
||||||
|
let mut sections = Vec::new();
|
||||||
|
for (i, href) in spine.iter().enumerate() {
|
||||||
|
let raw_html = doc.get_resource_str(href)
|
||||||
|
.map_err(|e| format!("读取章节失败: {}", e))?;
|
||||||
|
let text = strip_html(&raw_html);
|
||||||
|
let title = extract_title(&raw_html)
|
||||||
|
.unwrap_or_else(|| format!("第{}章", i + 1));
|
||||||
|
sections.push(Section {
|
||||||
|
title,
|
||||||
|
content: text,
|
||||||
|
pages: Vec::new(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let toc = build_toc(&raw_toc, &spine);
|
||||||
|
|
||||||
|
Ok(Book { title, author, cover, sections, toc })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_title(html: &str) -> Option<String> {
|
||||||
|
if let Some(start) = html.find("<title>") {
|
||||||
|
let rest = &html[start + 7..];
|
||||||
|
if let Some(end) = rest.find("</title>") {
|
||||||
|
return Some(strip_html(&rest[..end]).trim().to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(start) = html.find("<h1") {
|
||||||
|
let rest = &html[start..];
|
||||||
|
if let Some(content_start) = rest.find('>') {
|
||||||
|
let inner = &rest[content_start + 1..];
|
||||||
|
if let Some(end) = inner.find("</h1>") {
|
||||||
|
return Some(strip_html(&inner[..end]).trim().to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_toc(
|
||||||
|
entries: &[epub::doc::NavPoint],
|
||||||
|
spine: &[String],
|
||||||
|
) -> Vec<TocEntry> {
|
||||||
|
entries
|
||||||
|
.iter()
|
||||||
|
.map(|e| {
|
||||||
|
let content_str = e.content.to_string_lossy();
|
||||||
|
let section = spine
|
||||||
|
.iter()
|
||||||
|
.position(|s| content_str.contains(s.trim_end_matches('/')))
|
||||||
|
.unwrap_or(0);
|
||||||
|
TocEntry {
|
||||||
|
label: e.label.clone(),
|
||||||
|
section,
|
||||||
|
children: build_toc(&e.children, spine),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_epub_loader_nonexistent_file() {
|
||||||
|
let result = load_epub("nonexistent.epub");
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_strip_html_plain_text() {
|
fn test_strip_html_plain_text() {
|
||||||
assert_eq!(strip_html("Hello World"), "Hello World");
|
assert_eq!(strip_html("Hello World"), "Hello World");
|
||||||
|
|||||||
Reference in New Issue
Block a user