Add anchor-based TOC navigation: parse #fragment anchors, find exact page within section
This commit is contained in:
75
src/book.rs
75
src/book.rs
@@ -43,10 +43,33 @@ fn tag_name_from(tag_content: &str) -> &str {
|
||||
.trim_end_matches('/')
|
||||
}
|
||||
|
||||
fn extract_id_from_tag(tag_content: &str) -> Option<String> {
|
||||
if let Some(id_pos) = tag_content.find("id=\"") {
|
||||
let after_quote = &tag_content[id_pos + 4..];
|
||||
if let Some(end_quote) = after_quote.find('\"') {
|
||||
let id_val = &after_quote[..end_quote];
|
||||
if !id_val.is_empty() {
|
||||
return Some(id_val.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(id_pos) = tag_content.find("id='") {
|
||||
let after_quote = &tag_content[id_pos + 4..];
|
||||
if let Some(end_quote) = after_quote.find('\'') {
|
||||
let id_val = &after_quote[..end_quote];
|
||||
if !id_val.is_empty() {
|
||||
return Some(id_val.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn strip_html(input: &str) -> String {
|
||||
let mut out = String::with_capacity(input.len());
|
||||
let mut pos = 0;
|
||||
let mut heading_level: Option<u32> = None;
|
||||
let mut pending_anchor: Option<String> = None;
|
||||
|
||||
while pos < input.len() {
|
||||
// Find next '<'
|
||||
@@ -68,8 +91,14 @@ pub fn strip_html(input: &str) -> String {
|
||||
if let Some(level) = heading_level {
|
||||
out.push('\x01');
|
||||
out.push(char::from_digit(level, 10).unwrap_or('1'));
|
||||
if let Some(ref anchor) = pending_anchor {
|
||||
out.push('\x03');
|
||||
out.push_str(anchor);
|
||||
out.push('\x04');
|
||||
}
|
||||
out.push_str(&text);
|
||||
out.push('\x02');
|
||||
pending_anchor = None;
|
||||
} else {
|
||||
out.push_str(&text);
|
||||
}
|
||||
@@ -101,6 +130,13 @@ pub fn strip_html(input: &str) -> String {
|
||||
}
|
||||
pos = tag_end + 1;
|
||||
}
|
||||
"a" => {
|
||||
// Capture anchor id
|
||||
if heading_level.is_none() {
|
||||
pending_anchor = extract_id_from_tag(tag_content);
|
||||
}
|
||||
pos = tag_end + 1;
|
||||
}
|
||||
"br" => {
|
||||
if !out.is_empty() {
|
||||
out.push('\n');
|
||||
@@ -124,13 +160,18 @@ pub fn strip_html(input: &str) -> String {
|
||||
"/li" | "/dd" | "/dt" | "/ol" | "/ul" => {
|
||||
pos = tag_end + 1;
|
||||
}
|
||||
"h1" => { heading_level = Some(1); pos = tag_end + 1; }
|
||||
"h2" => { heading_level = Some(2); pos = tag_end + 1; }
|
||||
"h3" => { heading_level = Some(3); pos = tag_end + 1; }
|
||||
"h4" => { heading_level = Some(4); pos = tag_end + 1; }
|
||||
"h5" => { heading_level = Some(5); pos = tag_end + 1; }
|
||||
"h6" => { heading_level = Some(6); pos = tag_end + 1; }
|
||||
"h1" | "h2" | "h3" | "h4" | "h5" | "h6" => {
|
||||
let level = name[1..2].parse::<u32>().unwrap_or(1);
|
||||
heading_level = Some(level);
|
||||
if pending_anchor.is_none() {
|
||||
pending_anchor = extract_id_from_tag(tag_content);
|
||||
}
|
||||
pos = tag_end + 1;
|
||||
}
|
||||
"p" | "div" | "blockquote" => {
|
||||
if pending_anchor.is_none() {
|
||||
pending_anchor = extract_id_from_tag(tag_content);
|
||||
}
|
||||
pos = tag_end + 1;
|
||||
}
|
||||
"/p" | "/div" | "/blockquote" => {
|
||||
@@ -138,10 +179,12 @@ pub fn strip_html(input: &str) -> String {
|
||||
out.push('\n');
|
||||
}
|
||||
out.push('\n');
|
||||
pending_anchor = None;
|
||||
pos = tag_end + 1;
|
||||
}
|
||||
"/h1" | "/h2" | "/h3" | "/h4" | "/h5" | "/h6" => {
|
||||
heading_level = None;
|
||||
pending_anchor = None;
|
||||
if !out.is_empty() && !out.ends_with('\n') {
|
||||
out.push('\n');
|
||||
}
|
||||
@@ -149,6 +192,9 @@ pub fn strip_html(input: &str) -> String {
|
||||
pos = tag_end + 1;
|
||||
}
|
||||
_ => {
|
||||
if pending_anchor.is_none() {
|
||||
pending_anchor = extract_id_from_tag(tag_content);
|
||||
}
|
||||
pos = tag_end + 1;
|
||||
}
|
||||
}
|
||||
@@ -177,6 +223,7 @@ pub fn strip_html(input: &str) -> String {
|
||||
pub struct TocEntry {
|
||||
pub label: String,
|
||||
pub section: usize,
|
||||
pub anchor: Option<String>,
|
||||
pub children: Vec<TocEntry>,
|
||||
}
|
||||
|
||||
@@ -184,6 +231,7 @@ pub struct TocEntry {
|
||||
pub struct ContentBlock {
|
||||
pub text: String,
|
||||
pub heading_level: u8, // 0 = body, 1-6 = h1-h6
|
||||
pub anchor: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -307,6 +355,19 @@ fn extract_filename(path: &str) -> &str {
|
||||
path.rsplit('/').next().unwrap_or(path)
|
||||
}
|
||||
|
||||
fn extract_fragment(path: &str) -> Option<String> {
|
||||
if let Some(hash_pos) = path.find('#') {
|
||||
let fragment = &path[hash_pos + 1..];
|
||||
if !fragment.is_empty() {
|
||||
Some(fragment.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn build_toc(
|
||||
entries: &[epub::doc::NavPoint],
|
||||
spine: &[String],
|
||||
@@ -315,6 +376,7 @@ fn build_toc(
|
||||
.iter()
|
||||
.map(|e| {
|
||||
let content_str = e.content.to_string_lossy();
|
||||
let anchor = extract_fragment(&content_str);
|
||||
let content_file = extract_filename(&content_str);
|
||||
let section = spine
|
||||
.iter()
|
||||
@@ -328,6 +390,7 @@ fn build_toc(
|
||||
TocEntry {
|
||||
label: e.label.clone(),
|
||||
section,
|
||||
anchor,
|
||||
children: build_toc(&e.children, spine),
|
||||
}
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user