2026-05-14 20:43:29 +08:00
|
|
|
|
fn decode_entities(text: &str) -> String {
|
|
|
|
|
|
let mut result = String::with_capacity(text.len());
|
|
|
|
|
|
let mut chars = text.chars();
|
|
|
|
|
|
while let Some(c) = chars.next() {
|
|
|
|
|
|
if c == '&' {
|
|
|
|
|
|
let mut entity = String::new();
|
|
|
|
|
|
for ec in &mut chars {
|
|
|
|
|
|
if ec == ';' {
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
entity.push(ec);
|
|
|
|
|
|
}
|
|
|
|
|
|
let decoded = match entity.as_str() {
|
|
|
|
|
|
"amp" => "&",
|
|
|
|
|
|
"lt" => "<",
|
|
|
|
|
|
"gt" => ">",
|
|
|
|
|
|
"quot" => "\"",
|
|
|
|
|
|
"nbsp" => " ",
|
|
|
|
|
|
"emsp" => " ",
|
|
|
|
|
|
"ensp" => " ",
|
|
|
|
|
|
"mdash" => "—",
|
|
|
|
|
|
"ndash" => "–",
|
|
|
|
|
|
"ldquo" => "\"",
|
|
|
|
|
|
"rdquo" => "\"",
|
|
|
|
|
|
"lsquo" => "'",
|
|
|
|
|
|
"rsquo" => "'",
|
|
|
|
|
|
"hellip" => "…",
|
|
|
|
|
|
_ => "",
|
|
|
|
|
|
};
|
|
|
|
|
|
result.push_str(decoded);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
result.push(c);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
result
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn tag_name_from(tag_content: &str) -> &str {
|
|
|
|
|
|
tag_content
|
|
|
|
|
|
.split_whitespace()
|
|
|
|
|
|
.next()
|
|
|
|
|
|
.unwrap_or("")
|
|
|
|
|
|
.trim_end_matches('/')
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-22 17:56:48 +08:00
|
|
|
|
fn extract_id_from_tag(tag_content: &str) -> Option<String> {
|
|
|
|
|
|
if let Some(id_pos) = tag_content.find("id=\"") {
|
|
|
|
|
|
let after_quote = &tag_content[id_pos + 4..];
|
|
|
|
|
|
if let Some(end_quote) = after_quote.find('\"') {
|
|
|
|
|
|
let id_val = &after_quote[..end_quote];
|
|
|
|
|
|
if !id_val.is_empty() {
|
|
|
|
|
|
return Some(id_val.to_string());
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if let Some(id_pos) = tag_content.find("id='") {
|
|
|
|
|
|
let after_quote = &tag_content[id_pos + 4..];
|
|
|
|
|
|
if let Some(end_quote) = after_quote.find('\'') {
|
|
|
|
|
|
let id_val = &after_quote[..end_quote];
|
|
|
|
|
|
if !id_val.is_empty() {
|
|
|
|
|
|
return Some(id_val.to_string());
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
None
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-13 23:02:04 +08:00
|
|
|
|
pub fn strip_html(input: &str) -> String {
|
2026-05-14 20:43:29 +08:00
|
|
|
|
let mut out = String::with_capacity(input.len());
|
|
|
|
|
|
let mut pos = 0;
|
2026-05-15 12:09:11 +08:00
|
|
|
|
let mut heading_level: Option<u32> = None;
|
2026-05-22 17:56:48 +08:00
|
|
|
|
let mut pending_anchor: Option<String> = None;
|
2026-05-14 20:43:29 +08:00
|
|
|
|
|
|
|
|
|
|
while pos < input.len() {
|
|
|
|
|
|
// Find next '<'
|
|
|
|
|
|
let remaining = &input[pos..];
|
|
|
|
|
|
let tag_start = remaining.find('<');
|
|
|
|
|
|
|
|
|
|
|
|
let tag_start = match tag_start {
|
|
|
|
|
|
Some(s) => pos + s,
|
|
|
|
|
|
None => {
|
|
|
|
|
|
// No more tags, emit remaining text
|
|
|
|
|
|
out.push_str(&decode_entities(&input[pos..]));
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// Emit text before the tag
|
|
|
|
|
|
if tag_start > pos {
|
2026-05-15 12:09:11 +08:00
|
|
|
|
let text = decode_entities(&input[pos..tag_start]);
|
2026-05-17 22:06:53 +08:00
|
|
|
|
if let Some(level) = heading_level {
|
|
|
|
|
|
out.push('\x01');
|
|
|
|
|
|
out.push(char::from_digit(level, 10).unwrap_or('1'));
|
2026-05-22 17:56:48 +08:00
|
|
|
|
if let Some(ref anchor) = pending_anchor {
|
|
|
|
|
|
out.push('\x03');
|
|
|
|
|
|
out.push_str(anchor);
|
|
|
|
|
|
out.push('\x04');
|
|
|
|
|
|
}
|
2026-05-15 12:09:11 +08:00
|
|
|
|
out.push_str(&text);
|
2026-05-17 22:06:53 +08:00
|
|
|
|
out.push('\x02');
|
2026-05-22 17:56:48 +08:00
|
|
|
|
pending_anchor = None;
|
2026-05-15 12:09:11 +08:00
|
|
|
|
} else {
|
|
|
|
|
|
out.push_str(&text);
|
|
|
|
|
|
}
|
2026-05-14 20:43:29 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Find '>' to close the tag
|
|
|
|
|
|
let tag_end = match input[tag_start..].find('>') {
|
|
|
|
|
|
Some(i) => tag_start + i,
|
|
|
|
|
|
None => {
|
|
|
|
|
|
// Unclosed tag, emit rest as text
|
|
|
|
|
|
out.push_str(&decode_entities(&input[tag_start..]));
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
let tag_content = &input[tag_start + 1..tag_end];
|
|
|
|
|
|
let name = tag_name_from(tag_content);
|
|
|
|
|
|
|
|
|
|
|
|
match name {
|
|
|
|
|
|
"script" | "style" => {
|
|
|
|
|
|
// Skip content until closing tag
|
|
|
|
|
|
let close_tag = format!("</{}", name);
|
|
|
|
|
|
if let Some(cs) = input[tag_end..].find(&close_tag) {
|
|
|
|
|
|
let close_tag_end = input[tag_end + cs..].find('>');
|
|
|
|
|
|
if let Some(ce) = close_tag_end {
|
|
|
|
|
|
pos = tag_end + cs + ce + 1;
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
pos = tag_end + 1;
|
|
|
|
|
|
}
|
2026-05-22 17:56:48 +08:00
|
|
|
|
"a" => {
|
|
|
|
|
|
// Capture anchor id
|
|
|
|
|
|
if heading_level.is_none() {
|
|
|
|
|
|
pending_anchor = extract_id_from_tag(tag_content);
|
|
|
|
|
|
}
|
|
|
|
|
|
pos = tag_end + 1;
|
|
|
|
|
|
}
|
2026-05-14 20:43:29 +08:00
|
|
|
|
"br" => {
|
|
|
|
|
|
if !out.is_empty() {
|
|
|
|
|
|
out.push('\n');
|
|
|
|
|
|
}
|
|
|
|
|
|
pos = tag_end + 1;
|
2026-05-13 23:02:04 +08:00
|
|
|
|
}
|
2026-05-14 20:43:29 +08:00
|
|
|
|
"hr" => {
|
|
|
|
|
|
if !out.is_empty() {
|
|
|
|
|
|
out.push_str("\n\n");
|
|
|
|
|
|
}
|
|
|
|
|
|
out.push_str("---\n\n");
|
|
|
|
|
|
pos = tag_end + 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
"li" => {
|
|
|
|
|
|
if !out.is_empty() && !out.ends_with('\n') {
|
|
|
|
|
|
out.push('\n');
|
|
|
|
|
|
}
|
|
|
|
|
|
out.push_str("- ");
|
|
|
|
|
|
pos = tag_end + 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
"/li" | "/dd" | "/dt" | "/ol" | "/ul" => {
|
|
|
|
|
|
pos = tag_end + 1;
|
|
|
|
|
|
}
|
2026-05-22 17:56:48 +08:00
|
|
|
|
"h1" | "h2" | "h3" | "h4" | "h5" | "h6" => {
|
|
|
|
|
|
let level = name[1..2].parse::<u32>().unwrap_or(1);
|
|
|
|
|
|
heading_level = Some(level);
|
|
|
|
|
|
if pending_anchor.is_none() {
|
|
|
|
|
|
pending_anchor = extract_id_from_tag(tag_content);
|
|
|
|
|
|
}
|
|
|
|
|
|
pos = tag_end + 1;
|
|
|
|
|
|
}
|
2026-05-15 12:09:11 +08:00
|
|
|
|
"p" | "div" | "blockquote" => {
|
2026-05-22 17:56:48 +08:00
|
|
|
|
if pending_anchor.is_none() {
|
|
|
|
|
|
pending_anchor = extract_id_from_tag(tag_content);
|
|
|
|
|
|
}
|
2026-05-15 12:09:11 +08:00
|
|
|
|
pos = tag_end + 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
"/p" | "/div" | "/blockquote" => {
|
|
|
|
|
|
if !out.is_empty() && !out.ends_with('\n') {
|
|
|
|
|
|
out.push('\n');
|
|
|
|
|
|
}
|
|
|
|
|
|
out.push('\n');
|
2026-05-22 17:56:48 +08:00
|
|
|
|
pending_anchor = None;
|
2026-05-14 20:43:29 +08:00
|
|
|
|
pos = tag_end + 1;
|
|
|
|
|
|
}
|
2026-05-15 12:09:11 +08:00
|
|
|
|
"/h1" | "/h2" | "/h3" | "/h4" | "/h5" | "/h6" => {
|
|
|
|
|
|
heading_level = None;
|
2026-05-22 17:56:48 +08:00
|
|
|
|
pending_anchor = None;
|
2026-05-14 20:43:29 +08:00
|
|
|
|
if !out.is_empty() && !out.ends_with('\n') {
|
|
|
|
|
|
out.push('\n');
|
|
|
|
|
|
}
|
|
|
|
|
|
out.push('\n');
|
|
|
|
|
|
pos = tag_end + 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
_ => {
|
2026-05-22 17:56:48 +08:00
|
|
|
|
if pending_anchor.is_none() {
|
|
|
|
|
|
pending_anchor = extract_id_from_tag(tag_content);
|
|
|
|
|
|
}
|
2026-05-14 20:43:29 +08:00
|
|
|
|
pos = tag_end + 1;
|
2026-05-13 23:02:04 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-05-14 20:43:29 +08:00
|
|
|
|
|
|
|
|
|
|
// Collapse 3+ consecutive newlines into 2
|
|
|
|
|
|
let out = out.trim();
|
|
|
|
|
|
let mut final_out = String::with_capacity(out.len());
|
|
|
|
|
|
let mut nl_count = 0usize;
|
|
|
|
|
|
for c in out.chars() {
|
|
|
|
|
|
if c == '\n' {
|
|
|
|
|
|
nl_count += 1;
|
|
|
|
|
|
if nl_count <= 2 {
|
|
|
|
|
|
final_out.push(c);
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
nl_count = 0;
|
|
|
|
|
|
final_out.push(c);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
final_out
|
2026-05-13 23:02:04 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
|
|
pub struct TocEntry {
|
|
|
|
|
|
pub label: String,
|
|
|
|
|
|
pub section: usize,
|
2026-05-22 17:56:48 +08:00
|
|
|
|
pub anchor: Option<String>,
|
2026-05-13 23:02:04 +08:00
|
|
|
|
pub children: Vec<TocEntry>,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-16 12:11:26 +08:00
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
|
|
pub struct ContentBlock {
|
|
|
|
|
|
pub text: String,
|
2026-05-17 22:06:53 +08:00
|
|
|
|
pub heading_level: u8, // 0 = body, 1-6 = h1-h6
|
2026-05-22 17:56:48 +08:00
|
|
|
|
pub anchor: Option<String>,
|
2026-05-16 12:11:26 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-13 23:02:04 +08:00
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
|
|
pub struct Section {
|
|
|
|
|
|
pub title: String,
|
|
|
|
|
|
pub content: String,
|
2026-05-16 12:11:26 +08:00
|
|
|
|
pub blocks: Vec<ContentBlock>,
|
|
|
|
|
|
pub page_block_ranges: Vec<(usize, usize)>,
|
2026-05-13 23:02:04 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-15 21:11:57 +08:00
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
|
|
|
|
|
pub enum BookLayout {
|
|
|
|
|
|
Reflowable,
|
|
|
|
|
|
FixedLayout,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl BookLayout {
|
|
|
|
|
|
pub fn label(&self) -> &str {
|
|
|
|
|
|
match self {
|
|
|
|
|
|
BookLayout::Reflowable => "重排",
|
|
|
|
|
|
BookLayout::FixedLayout => "固定",
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-13 23:02:04 +08:00
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
|
|
pub struct Book {
|
|
|
|
|
|
pub title: String,
|
|
|
|
|
|
pub author: String,
|
|
|
|
|
|
pub cover: Option<Vec<u8>>,
|
2026-05-15 21:11:57 +08:00
|
|
|
|
pub layout: BookLayout,
|
2026-05-13 23:02:04 +08:00
|
|
|
|
pub sections: Vec<Section>,
|
|
|
|
|
|
pub toc: Vec<TocEntry>,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-13 23:09:01 +08:00
|
|
|
|
use std::path::Path;
|
|
|
|
|
|
|
2026-05-15 21:11:57 +08:00
|
|
|
|
fn detect_layout(doc: &mut epub::doc::EpubDoc<std::io::BufReader<std::fs::File>>) -> BookLayout {
|
|
|
|
|
|
if let Some(vals) = doc.metadata.get("rendition:layout") {
|
|
|
|
|
|
if vals.iter().any(|v| v == "pre-paginated") {
|
|
|
|
|
|
return BookLayout::FixedLayout;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if let Ok(opf) = doc.get_resource_str_by_path(&doc.root_file.clone()) {
|
|
|
|
|
|
if opf.contains("rendition:layout") && opf.contains("pre-paginated") {
|
|
|
|
|
|
return BookLayout::FixedLayout;
|
|
|
|
|
|
}
|
|
|
|
|
|
if opf.contains("rendition:layout-pre-paginated") {
|
|
|
|
|
|
return BookLayout::FixedLayout;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
BookLayout::Reflowable
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-13 23:09:01 +08:00
|
|
|
|
pub fn load_epub(path: impl AsRef<Path>) -> Result<Book, String> {
|
|
|
|
|
|
let path = path.as_ref();
|
|
|
|
|
|
let mut doc = epub::doc::EpubDoc::new(path)
|
|
|
|
|
|
.map_err(|e| format!("无法打开文件: {}", e))?;
|
|
|
|
|
|
|
2026-05-15 21:11:57 +08:00
|
|
|
|
let layout = detect_layout(&mut doc);
|
|
|
|
|
|
|
2026-05-13 23:09:01 +08:00
|
|
|
|
let title = doc.mdata("title").unwrap_or_else(|| "未知标题".to_string());
|
|
|
|
|
|
let author = doc.mdata("creator").unwrap_or_else(|| "未知作者".to_string());
|
|
|
|
|
|
let cover = doc.get_cover().ok();
|
2026-05-21 22:32:18 +08:00
|
|
|
|
let spine: Vec<String> = doc.spine.iter()
|
|
|
|
|
|
.filter(|id| {
|
|
|
|
|
|
if id.as_str() == "nav" { return false; }
|
|
|
|
|
|
if let Some((path, _)) = doc.resources.get(*id) {
|
|
|
|
|
|
let path_str = path.to_string_lossy().to_lowercase();
|
|
|
|
|
|
if path_str.ends_with("nav.xhtml") || path_str.ends_with("nav.html") {
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
true
|
|
|
|
|
|
})
|
|
|
|
|
|
.cloned()
|
|
|
|
|
|
.collect();
|
2026-05-13 23:09:01 +08:00
|
|
|
|
|
2026-05-23 08:25:05 +08:00
|
|
|
|
let spine_paths: Vec<String> = spine.iter().map(|id| {
|
|
|
|
|
|
doc.resources.get(id)
|
|
|
|
|
|
.map(|(path, _)| path.to_string_lossy().to_string())
|
|
|
|
|
|
.unwrap_or_else(|| id.clone())
|
|
|
|
|
|
}).collect();
|
|
|
|
|
|
|
2026-05-13 23:09:01 +08:00
|
|
|
|
let mut sections = Vec::new();
|
|
|
|
|
|
for (i, href) in spine.iter().enumerate() {
|
|
|
|
|
|
let raw_html = doc.get_resource_str(href)
|
|
|
|
|
|
.map_err(|e| format!("读取章节失败: {}", e))?;
|
|
|
|
|
|
let text = strip_html(&raw_html);
|
|
|
|
|
|
let title = extract_title(&raw_html)
|
|
|
|
|
|
.unwrap_or_else(|| format!("第{}章", i + 1));
|
|
|
|
|
|
sections.push(Section {
|
|
|
|
|
|
title,
|
|
|
|
|
|
content: text,
|
2026-05-16 12:11:26 +08:00
|
|
|
|
blocks: Vec::new(),
|
|
|
|
|
|
page_block_ranges: Vec::new(),
|
2026-05-13 23:09:01 +08:00
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-13 23:16:50 +08:00
|
|
|
|
let raw_toc = std::mem::take(&mut doc.toc);
|
2026-05-23 08:25:05 +08:00
|
|
|
|
let toc = build_toc(&raw_toc, &spine, &spine_paths);
|
2026-05-13 23:09:01 +08:00
|
|
|
|
|
2026-05-15 21:11:57 +08:00
|
|
|
|
Ok(Book { title, author, cover, layout, sections, toc })
|
2026-05-13 23:09:01 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn extract_title(html: &str) -> Option<String> {
|
|
|
|
|
|
if let Some(start) = html.find("<title>") {
|
|
|
|
|
|
let rest = &html[start + 7..];
|
|
|
|
|
|
if let Some(end) = rest.find("</title>") {
|
|
|
|
|
|
return Some(strip_html(&rest[..end]).trim().to_string());
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if let Some(start) = html.find("<h1") {
|
|
|
|
|
|
let rest = &html[start..];
|
|
|
|
|
|
if let Some(content_start) = rest.find('>') {
|
|
|
|
|
|
let inner = &rest[content_start + 1..];
|
|
|
|
|
|
if let Some(end) = inner.find("</h1>") {
|
|
|
|
|
|
return Some(strip_html(&inner[..end]).trim().to_string());
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
None
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-21 22:32:18 +08:00
|
|
|
|
fn extract_filename(path: &str) -> &str {
|
2026-05-23 08:25:05 +08:00
|
|
|
|
let path = path.trim_end_matches('/').trim_end_matches('\\');
|
|
|
|
|
|
path.rsplit(&['/', '\\'][..]).next().unwrap_or(path)
|
2026-05-21 22:32:18 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-22 17:56:48 +08:00
|
|
|
|
fn extract_fragment(path: &str) -> Option<String> {
|
|
|
|
|
|
if let Some(hash_pos) = path.find('#') {
|
|
|
|
|
|
let fragment = &path[hash_pos + 1..];
|
|
|
|
|
|
if !fragment.is_empty() {
|
|
|
|
|
|
Some(fragment.to_string())
|
|
|
|
|
|
} else {
|
|
|
|
|
|
None
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
None
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-13 23:09:01 +08:00
|
|
|
|
fn build_toc(
|
|
|
|
|
|
entries: &[epub::doc::NavPoint],
|
|
|
|
|
|
spine: &[String],
|
2026-05-23 08:25:05 +08:00
|
|
|
|
spine_paths: &[String],
|
2026-05-13 23:09:01 +08:00
|
|
|
|
) -> Vec<TocEntry> {
|
|
|
|
|
|
entries
|
|
|
|
|
|
.iter()
|
|
|
|
|
|
.map(|e| {
|
|
|
|
|
|
let content_str = e.content.to_string_lossy();
|
2026-05-22 17:56:48 +08:00
|
|
|
|
let anchor = extract_fragment(&content_str);
|
2026-05-21 22:32:18 +08:00
|
|
|
|
let content_file = extract_filename(&content_str);
|
2026-05-23 08:25:05 +08:00
|
|
|
|
let section = spine_paths
|
2026-05-13 23:09:01 +08:00
|
|
|
|
.iter()
|
2026-05-21 22:32:18 +08:00
|
|
|
|
.position(|s| {
|
|
|
|
|
|
let spine_file = extract_filename(s);
|
2026-05-23 08:25:05 +08:00
|
|
|
|
if spine_file == content_file {
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
content_str.contains(s.as_str()) || s.contains(content_str.as_ref())
|
|
|
|
|
|
})
|
|
|
|
|
|
.or_else(|| {
|
|
|
|
|
|
spine.iter().position(|s| {
|
|
|
|
|
|
let spine_file = extract_filename(s);
|
|
|
|
|
|
spine_file == content_file
|
|
|
|
|
|
|| content_str.contains(s.as_str())
|
|
|
|
|
|
|| s.contains(content_str.as_ref())
|
|
|
|
|
|
})
|
2026-05-21 22:32:18 +08:00
|
|
|
|
})
|
2026-05-13 23:09:01 +08:00
|
|
|
|
.unwrap_or(0);
|
|
|
|
|
|
TocEntry {
|
|
|
|
|
|
label: e.label.clone(),
|
|
|
|
|
|
section,
|
2026-05-22 17:56:48 +08:00
|
|
|
|
anchor,
|
2026-05-23 08:25:05 +08:00
|
|
|
|
children: build_toc(&e.children, spine, spine_paths),
|
2026-05-13 23:09:01 +08:00
|
|
|
|
}
|
|
|
|
|
|
})
|
|
|
|
|
|
.collect()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-13 23:02:04 +08:00
|
|
|
|
#[cfg(test)]
|
|
|
|
|
|
mod tests {
|
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
2026-05-13 23:09:01 +08:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_epub_loader_nonexistent_file() {
|
|
|
|
|
|
let result = load_epub("nonexistent.epub");
|
|
|
|
|
|
assert!(result.is_err());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-13 23:02:04 +08:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_strip_html_plain_text() {
|
|
|
|
|
|
assert_eq!(strip_html("Hello World"), "Hello World");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_strip_html_simple_tags() {
|
|
|
|
|
|
assert_eq!(strip_html("<p>Hello</p>"), "Hello");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_strip_html_nested_tags() {
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
|
strip_html("<div><p>Hello <b>World</b></p></div>"),
|
|
|
|
|
|
"Hello World"
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_strip_html_html_entities() {
|
|
|
|
|
|
assert_eq!(strip_html("Hello & World"), "Hello & World");
|
|
|
|
|
|
assert_eq!(strip_html("Hello World"), "Hello World");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_strip_html_empty() {
|
|
|
|
|
|
assert_eq!(strip_html(""), "");
|
|
|
|
|
|
}
|
2026-05-13 23:16:50 +08:00
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_extract_title_from_title_tag() {
|
|
|
|
|
|
let html = "<html><head><title>My Book Title</title></head><body></body></html>";
|
|
|
|
|
|
assert_eq!(extract_title(html), Some("My Book Title".to_string()));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_extract_title_from_h1() {
|
|
|
|
|
|
let html = "<html><body><h1>Chapter One</h1><p>text</p></body></html>";
|
|
|
|
|
|
assert_eq!(extract_title(html), Some("Chapter One".to_string()));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_extract_title_prefers_title() {
|
|
|
|
|
|
let html = "<html><head><title>Book</title></head><body><h1>Chapter</h1></body></html>";
|
|
|
|
|
|
assert_eq!(extract_title(html), Some("Book".to_string()));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_extract_title_missing() {
|
|
|
|
|
|
assert_eq!(extract_title("<html><body><p>no title</p></body></html>"), None);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_extract_title_empty() {
|
|
|
|
|
|
assert_eq!(extract_title(""), None);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-14 20:43:29 +08:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_html_to_plain_paragraphs() {
|
|
|
|
|
|
let html = "<p>第一段</p><p>第二段</p>";
|
|
|
|
|
|
let result = strip_html(html);
|
|
|
|
|
|
assert!(result.contains("第一段"));
|
|
|
|
|
|
assert!(result.contains("第二段"));
|
|
|
|
|
|
assert!(result.contains('\n'));
|
|
|
|
|
|
assert!(result.ends_with("第二段"));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_html_to_plain_heading() {
|
|
|
|
|
|
let html = "<h1>标题</h1><p>正文</p>";
|
|
|
|
|
|
let result = strip_html(html);
|
|
|
|
|
|
assert!(result.contains("标题"));
|
|
|
|
|
|
assert!(result.contains("正文"));
|
|
|
|
|
|
assert!(result.contains('\n'));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_html_to_plain_list() {
|
|
|
|
|
|
let html = "<ul><li>项目一</li><li>项目二</li></ul>";
|
|
|
|
|
|
let result = strip_html(html);
|
|
|
|
|
|
assert!(result.starts_with("- "));
|
|
|
|
|
|
assert!(result.contains("项目一"));
|
|
|
|
|
|
assert!(result.contains("项目二"));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_html_to_plain_br() {
|
|
|
|
|
|
let html = "第一行<br>第二行";
|
|
|
|
|
|
let result = strip_html(html);
|
|
|
|
|
|
assert_eq!(result, "第一行\n第二行");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_html_to_plain_skip_script() {
|
|
|
|
|
|
let html = "<p>正文</p><script>var x=1;</script><p>更多正文</p>";
|
|
|
|
|
|
let result = strip_html(html);
|
|
|
|
|
|
assert!(result.contains("正文"));
|
|
|
|
|
|
assert!(result.contains("更多正文"));
|
|
|
|
|
|
assert!(!result.contains("var x=1"));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_html_to_plain_line_break_collapse() {
|
|
|
|
|
|
let html = "<p>段一</p><p>段二</p><p>段三</p>";
|
|
|
|
|
|
let result = strip_html(html);
|
|
|
|
|
|
let non_empty: Vec<&str> = result.lines().filter(|l| !l.is_empty()).collect();
|
|
|
|
|
|
assert_eq!(non_empty.len(), 3);
|
|
|
|
|
|
assert_eq!(non_empty[0], "段一");
|
|
|
|
|
|
assert_eq!(non_empty[1], "段二");
|
|
|
|
|
|
assert_eq!(non_empty[2], "段三");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-13 23:16:50 +08:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_build_toc_empty() {
|
2026-05-23 08:25:05 +08:00
|
|
|
|
let toc = build_toc(&[], &[], &[]);
|
2026-05-13 23:16:50 +08:00
|
|
|
|
assert!(toc.is_empty());
|
|
|
|
|
|
}
|
2026-05-21 22:32:18 +08:00
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_load_sample_epub_nav_filtered() {
|
|
|
|
|
|
let book = load_epub("sample-short.epub").expect("Failed to load sample epub");
|
|
|
|
|
|
// Nav document is filtered out, leaving only chapter_0 as the single section
|
|
|
|
|
|
assert_eq!(book.sections.len(), 1);
|
|
|
|
|
|
assert_eq!(book.sections[0].title, "Understanding Digital Formats");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_toc_section_bounds() {
|
|
|
|
|
|
let book = load_epub("sample-short.epub").expect("Failed to load sample epub");
|
|
|
|
|
|
// All TOC section indices should be within sections range
|
|
|
|
|
|
fn check_bounds(entries: &[TocEntry], max: usize) {
|
|
|
|
|
|
for e in entries {
|
|
|
|
|
|
assert!(e.section < max, "TOC entry '{}' maps to section {} but only {} sections exist", e.label, e.section, max);
|
|
|
|
|
|
check_bounds(&e.children, max);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
check_bounds(&book.toc, book.sections.len());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_build_toc_filename_matching() {
|
|
|
|
|
|
use epub::doc::NavPoint;
|
|
|
|
|
|
let spine = vec![
|
|
|
|
|
|
"OEBPS/Text/chapter1.xhtml".to_string(),
|
|
|
|
|
|
"OEBPS/Text/chapter2.xhtml".to_string(),
|
|
|
|
|
|
];
|
|
|
|
|
|
let nav_points = vec![
|
|
|
|
|
|
NavPoint {
|
|
|
|
|
|
label: "Chapter 2".to_string(),
|
|
|
|
|
|
content: std::path::PathBuf::from("Text/chapter2.xhtml"),
|
|
|
|
|
|
play_order: 1,
|
|
|
|
|
|
children: vec![],
|
|
|
|
|
|
},
|
|
|
|
|
|
];
|
2026-05-23 08:25:05 +08:00
|
|
|
|
let toc = build_toc(&nav_points, &spine, &spine);
|
2026-05-21 22:32:18 +08:00
|
|
|
|
assert_eq!(toc.len(), 1);
|
|
|
|
|
|
assert_eq!(toc[0].section, 1); // maps to spine index 1
|
|
|
|
|
|
assert_eq!(toc[0].label, "Chapter 2");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn test_build_toc_exact_path_match() {
|
|
|
|
|
|
use epub::doc::NavPoint;
|
|
|
|
|
|
let spine = vec![
|
|
|
|
|
|
"chapter1.xhtml".to_string(),
|
|
|
|
|
|
"chapter2.xhtml".to_string(),
|
|
|
|
|
|
];
|
|
|
|
|
|
let nav_points = vec![
|
|
|
|
|
|
NavPoint {
|
|
|
|
|
|
label: "Chapter 1".to_string(),
|
|
|
|
|
|
content: std::path::PathBuf::from("chapter1.xhtml"),
|
|
|
|
|
|
play_order: 1,
|
|
|
|
|
|
children: vec![],
|
|
|
|
|
|
},
|
|
|
|
|
|
];
|
2026-05-23 08:25:05 +08:00
|
|
|
|
let toc = build_toc(&nav_points, &spine, &spine);
|
2026-05-21 22:32:18 +08:00
|
|
|
|
assert_eq!(toc.len(), 1);
|
|
|
|
|
|
assert_eq!(toc[0].section, 0);
|
|
|
|
|
|
}
|
2026-05-13 23:02:04 +08:00
|
|
|
|
}
|