Add anchor-based TOC navigation: parse #fragment anchors, find exact page within section

This commit is contained in:
2026-05-22 17:56:48 +08:00
parent 21e9aba274
commit 1d2407098c
3 changed files with 109 additions and 12 deletions

View File

@@ -43,10 +43,33 @@ fn tag_name_from(tag_content: &str) -> &str {
.trim_end_matches('/')
}
fn extract_id_from_tag(tag_content: &str) -> Option<String> {
if let Some(id_pos) = tag_content.find("id=\"") {
let after_quote = &tag_content[id_pos + 4..];
if let Some(end_quote) = after_quote.find('\"') {
let id_val = &after_quote[..end_quote];
if !id_val.is_empty() {
return Some(id_val.to_string());
}
}
}
if let Some(id_pos) = tag_content.find("id='") {
let after_quote = &tag_content[id_pos + 4..];
if let Some(end_quote) = after_quote.find('\'') {
let id_val = &after_quote[..end_quote];
if !id_val.is_empty() {
return Some(id_val.to_string());
}
}
}
None
}
pub fn strip_html(input: &str) -> String {
let mut out = String::with_capacity(input.len());
let mut pos = 0;
let mut heading_level: Option<u32> = None;
let mut pending_anchor: Option<String> = None;
while pos < input.len() {
// Find next '<'
@@ -68,8 +91,14 @@ pub fn strip_html(input: &str) -> String {
if let Some(level) = heading_level {
out.push('\x01');
out.push(char::from_digit(level, 10).unwrap_or('1'));
if let Some(ref anchor) = pending_anchor {
out.push('\x03');
out.push_str(anchor);
out.push('\x04');
}
out.push_str(&text);
out.push('\x02');
pending_anchor = None;
} else {
out.push_str(&text);
}
@@ -101,6 +130,13 @@ pub fn strip_html(input: &str) -> String {
}
pos = tag_end + 1;
}
"a" => {
// Capture anchor id
if heading_level.is_none() {
pending_anchor = extract_id_from_tag(tag_content);
}
pos = tag_end + 1;
}
"br" => {
if !out.is_empty() {
out.push('\n');
@@ -124,13 +160,18 @@ pub fn strip_html(input: &str) -> String {
"/li" | "/dd" | "/dt" | "/ol" | "/ul" => {
pos = tag_end + 1;
}
"h1" => { heading_level = Some(1); pos = tag_end + 1; }
"h2" => { heading_level = Some(2); pos = tag_end + 1; }
"h3" => { heading_level = Some(3); pos = tag_end + 1; }
"h4" => { heading_level = Some(4); pos = tag_end + 1; }
"h5" => { heading_level = Some(5); pos = tag_end + 1; }
"h6" => { heading_level = Some(6); pos = tag_end + 1; }
"h1" | "h2" | "h3" | "h4" | "h5" | "h6" => {
let level = name[1..2].parse::<u32>().unwrap_or(1);
heading_level = Some(level);
if pending_anchor.is_none() {
pending_anchor = extract_id_from_tag(tag_content);
}
pos = tag_end + 1;
}
"p" | "div" | "blockquote" => {
if pending_anchor.is_none() {
pending_anchor = extract_id_from_tag(tag_content);
}
pos = tag_end + 1;
}
"/p" | "/div" | "/blockquote" => {
@@ -138,10 +179,12 @@ pub fn strip_html(input: &str) -> String {
out.push('\n');
}
out.push('\n');
pending_anchor = None;
pos = tag_end + 1;
}
"/h1" | "/h2" | "/h3" | "/h4" | "/h5" | "/h6" => {
heading_level = None;
pending_anchor = None;
if !out.is_empty() && !out.ends_with('\n') {
out.push('\n');
}
@@ -149,6 +192,9 @@ pub fn strip_html(input: &str) -> String {
pos = tag_end + 1;
}
_ => {
if pending_anchor.is_none() {
pending_anchor = extract_id_from_tag(tag_content);
}
pos = tag_end + 1;
}
}
@@ -177,6 +223,7 @@ pub fn strip_html(input: &str) -> String {
pub struct TocEntry {
pub label: String,
pub section: usize,
pub anchor: Option<String>,
pub children: Vec<TocEntry>,
}
@@ -184,6 +231,7 @@ pub struct TocEntry {
pub struct ContentBlock {
pub text: String,
pub heading_level: u8, // 0 = body, 1-6 = h1-h6
pub anchor: Option<String>,
}
#[derive(Debug, Clone)]
@@ -307,6 +355,19 @@ fn extract_filename(path: &str) -> &str {
path.rsplit('/').next().unwrap_or(path)
}
fn extract_fragment(path: &str) -> Option<String> {
if let Some(hash_pos) = path.find('#') {
let fragment = &path[hash_pos + 1..];
if !fragment.is_empty() {
Some(fragment.to_string())
} else {
None
}
} else {
None
}
}
fn build_toc(
entries: &[epub::doc::NavPoint],
spine: &[String],
@@ -315,6 +376,7 @@ fn build_toc(
.iter()
.map(|e| {
let content_str = e.content.to_string_lossy();
let anchor = extract_fragment(&content_str);
let content_file = extract_filename(&content_str);
let section = spine
.iter()
@@ -328,6 +390,7 @@ fn build_toc(
TocEntry {
label: e.label.clone(),
section,
anchor,
children: build_toc(&e.children, spine),
}
})