Files
epub-read/src/book.rs

169 lines
4.5 KiB
Rust
Raw Normal View History

pub fn strip_html(input: &str) -> String {
let mut result = String::with_capacity(input.len());
let mut in_tag = false;
let mut in_entity = false;
let mut entity = String::new();
for c in input.chars() {
match c {
'<' => in_tag = true,
'>' if in_tag => in_tag = false,
'&' if !in_tag => {
in_entity = true;
entity.clear();
}
';' if in_entity => {
in_entity = false;
let decoded = match entity.as_str() {
"amp" => "&",
"lt" => "<",
"gt" => ">",
"quot" => "\"",
"nbsp" => " ",
_ => "",
};
result.push_str(decoded);
}
c if !in_tag && !in_entity => result.push(c),
c if in_entity => entity.push(c),
_ => {}
}
}
result
}
#[derive(Debug, Clone)]
pub struct TocEntry {
pub label: String,
pub section: usize,
pub children: Vec<TocEntry>,
}
#[derive(Debug, Clone)]
pub struct Section {
pub title: String,
pub content: String,
pub pages: Vec<usize>,
}
#[derive(Debug, Clone)]
pub struct Book {
pub title: String,
pub author: String,
pub cover: Option<Vec<u8>>,
pub sections: Vec<Section>,
pub toc: Vec<TocEntry>,
}
use std::path::Path;
pub fn load_epub(path: impl AsRef<Path>) -> Result<Book, String> {
let path = path.as_ref();
let mut doc = epub::doc::EpubDoc::new(path)
.map_err(|e| format!("无法打开文件: {}", e))?;
let title = doc.mdata("title").unwrap_or_else(|| "未知标题".to_string());
let author = doc.mdata("creator").unwrap_or_else(|| "未知作者".to_string());
let cover = doc.get_cover().ok();
let spine = doc.spine.clone();
let raw_toc = std::mem::take(&mut doc.toc);
let mut sections = Vec::new();
for (i, href) in spine.iter().enumerate() {
let raw_html = doc.get_resource_str(href)
.map_err(|e| format!("读取章节失败: {}", e))?;
let text = strip_html(&raw_html);
let title = extract_title(&raw_html)
.unwrap_or_else(|| format!("{}", i + 1));
sections.push(Section {
title,
content: text,
pages: Vec::new(),
});
}
let toc = build_toc(&raw_toc, &spine);
Ok(Book { title, author, cover, sections, toc })
}
fn extract_title(html: &str) -> Option<String> {
if let Some(start) = html.find("<title>") {
let rest = &html[start + 7..];
if let Some(end) = rest.find("</title>") {
return Some(strip_html(&rest[..end]).trim().to_string());
}
}
if let Some(start) = html.find("<h1") {
let rest = &html[start..];
if let Some(content_start) = rest.find('>') {
let inner = &rest[content_start + 1..];
if let Some(end) = inner.find("</h1>") {
return Some(strip_html(&inner[..end]).trim().to_string());
}
}
}
None
}
fn build_toc(
entries: &[epub::doc::NavPoint],
spine: &[String],
) -> Vec<TocEntry> {
entries
.iter()
.map(|e| {
let content_str = e.content.to_string_lossy();
let section = spine
.iter()
.position(|s| content_str.contains(s.trim_end_matches('/')))
.unwrap_or(0);
TocEntry {
label: e.label.clone(),
section,
children: build_toc(&e.children, spine),
}
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_epub_loader_nonexistent_file() {
let result = load_epub("nonexistent.epub");
assert!(result.is_err());
}
#[test]
fn test_strip_html_plain_text() {
assert_eq!(strip_html("Hello World"), "Hello World");
}
#[test]
fn test_strip_html_simple_tags() {
assert_eq!(strip_html("<p>Hello</p>"), "Hello");
}
#[test]
fn test_strip_html_nested_tags() {
assert_eq!(
strip_html("<div><p>Hello <b>World</b></p></div>"),
"Hello World"
);
}
#[test]
fn test_strip_html_html_entities() {
assert_eq!(strip_html("Hello &amp; World"), "Hello & World");
assert_eq!(strip_html("Hello&nbsp;World"), "Hello World");
}
#[test]
fn test_strip_html_empty() {
assert_eq!(strip_html(""), "");
}
}