diff --git a/src/book.rs b/src/book.rs index 3d670b9..613419c 100644 --- a/src/book.rs +++ b/src/book.rs @@ -1,37 +1,155 @@ -pub fn strip_html(input: &str) -> String { - let mut result = String::with_capacity(input.len()); - let mut in_tag = false; - let mut in_entity = false; - let mut entity = String::new(); - - for c in input.chars() { - match c { - '<' => in_tag = true, - '>' if in_tag => in_tag = false, - '&' if !in_tag => { - in_entity = true; - entity.clear(); +fn decode_entities(text: &str) -> String { + let mut result = String::with_capacity(text.len()); + let mut chars = text.chars(); + while let Some(c) = chars.next() { + if c == '&' { + let mut entity = String::new(); + for ec in &mut chars { + if ec == ';' { + break; + } + entity.push(ec); } - ';' if in_entity => { - in_entity = false; - let decoded = match entity.as_str() { - "amp" => "&", - "lt" => "<", - "gt" => ">", - "quot" => "\"", - "nbsp" => " ", - _ => "", - }; - result.push_str(decoded); - } - c if !in_tag && !in_entity => result.push(c), - c if in_entity => entity.push(c), - _ => {} + let decoded = match entity.as_str() { + "amp" => "&", + "lt" => "<", + "gt" => ">", + "quot" => "\"", + "nbsp" => " ", + "emsp" => " ", + "ensp" => " ", + "mdash" => "—", + "ndash" => "–", + "ldquo" => "\"", + "rdquo" => "\"", + "lsquo" => "'", + "rsquo" => "'", + "hellip" => "…", + _ => "", + }; + result.push_str(decoded); + } else { + result.push(c); } } result } +fn tag_name_from(tag_content: &str) -> &str { + tag_content + .split_whitespace() + .next() + .unwrap_or("") + .trim_end_matches('/') +} + +pub fn strip_html(input: &str) -> String { + let mut out = String::with_capacity(input.len()); + let mut pos = 0; + + while pos < input.len() { + // Find next '<' + let remaining = &input[pos..]; + let tag_start = remaining.find('<'); + + let tag_start = match tag_start { + Some(s) => pos + s, + None => { + // No more tags, emit remaining text + out.push_str(&decode_entities(&input[pos..])); + break; + } + }; + + // Emit text before the tag + if tag_start > pos { + out.push_str(&decode_entities(&input[pos..tag_start])); + } + + // Find '>' to close the tag + let tag_end = match input[tag_start..].find('>') { + Some(i) => tag_start + i, + None => { + // Unclosed tag, emit rest as text + out.push_str(&decode_entities(&input[tag_start..])); + break; + } + }; + + let tag_content = &input[tag_start + 1..tag_end]; + let name = tag_name_from(tag_content); + + match name { + "script" | "style" => { + // Skip content until closing tag + let close_tag = format!("'); + if let Some(ce) = close_tag_end { + pos = tag_end + cs + ce + 1; + continue; + } + } + pos = tag_end + 1; + } + "br" => { + if !out.is_empty() { + out.push('\n'); + } + pos = tag_end + 1; + } + "hr" => { + if !out.is_empty() { + out.push_str("\n\n"); + } + out.push_str("---\n\n"); + pos = tag_end + 1; + } + "li" => { + if !out.is_empty() && !out.ends_with('\n') { + out.push('\n'); + } + out.push_str("- "); + pos = tag_end + 1; + } + "/li" | "/dd" | "/dt" | "/ol" | "/ul" => { + pos = tag_end + 1; + } + "p" | "div" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "blockquote" => { + pos = tag_end + 1; + } + "/p" | "/div" | "/blockquote" | "/h1" | "/h2" | "/h3" | "/h4" | "/h5" | "/h6" => { + if !out.is_empty() && !out.ends_with('\n') { + out.push('\n'); + } + out.push('\n'); + pos = tag_end + 1; + } + _ => { + pos = tag_end + 1; + } + } + } + + // Collapse 3+ consecutive newlines into 2 + let out = out.trim(); + let mut final_out = String::with_capacity(out.len()); + let mut nl_count = 0usize; + for c in out.chars() { + if c == '\n' { + nl_count += 1; + if nl_count <= 2 { + final_out.push(c); + } + } else { + nl_count = 0; + final_out.push(c); + } + } + + final_out +} + #[derive(Debug, Clone)] pub struct TocEntry { pub label: String, @@ -196,6 +314,61 @@ mod tests { assert_eq!(extract_title(""), None); } + #[test] + fn test_html_to_plain_paragraphs() { + let html = "

第一段

第二段

"; + let result = strip_html(html); + assert!(result.contains("第一段")); + assert!(result.contains("第二段")); + assert!(result.contains('\n')); + assert!(result.ends_with("第二段")); + } + + #[test] + fn test_html_to_plain_heading() { + let html = "

标题

正文

"; + let result = strip_html(html); + assert!(result.contains("标题")); + assert!(result.contains("正文")); + assert!(result.contains('\n')); + } + + #[test] + fn test_html_to_plain_list() { + let html = ""; + let result = strip_html(html); + assert!(result.starts_with("- ")); + assert!(result.contains("项目一")); + assert!(result.contains("项目二")); + } + + #[test] + fn test_html_to_plain_br() { + let html = "第一行
第二行"; + let result = strip_html(html); + assert_eq!(result, "第一行\n第二行"); + } + + #[test] + fn test_html_to_plain_skip_script() { + let html = "

正文

更多正文

"; + let result = strip_html(html); + assert!(result.contains("正文")); + assert!(result.contains("更多正文")); + assert!(!result.contains("var x=1")); + } + + #[test] + fn test_html_to_plain_line_break_collapse() { + let html = "

段一

段二

段三

"; + let result = strip_html(html); + let non_empty: Vec<&str> = result.lines().filter(|l| !l.is_empty()).collect(); + assert_eq!(non_empty.len(), 3); + assert_eq!(non_empty[0], "段一"); + assert_eq!(non_empty[1], "段二"); + assert_eq!(non_empty[2], "段三"); + } + #[test] fn test_build_toc_empty() { let toc = build_toc(&[], &[]); diff --git a/src/reader.rs b/src/reader.rs index 5c9a5e7..64bd57d 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -160,14 +160,22 @@ pub fn reading_view( // --- Center text area --- egui::CentralPanel::default().show_inside(ui, |ui| { - let (rect, response) = ui.allocate_at_least(ui.available_size(), egui::Sense::click()); + let available = ui.available_size(); + let (rect, response) = ui.allocate_at_least(available, egui::Sense::click()); + + // Add reading margins (inset) + let inset = 24.0; + let text_rect = egui::Rect::from_min_size( + egui::pos2(rect.min.x + inset, rect.min.y), + egui::vec2((rect.width() - inset * 2.0).max(100.0), rect.height()), + ); if let Some(section) = book.sections.get(*current_section) { if *current_page < section.pages.len().saturating_sub(1) { let start = section.pages[*current_page]; let end = section.pages[*current_page + 1]; let text: String = section.content.chars().skip(start).take(end - start).collect(); - ui.put(rect, |ui: &mut egui::Ui| { + ui.put(text_rect, |ui: &mut egui::Ui| { ui.add( egui::Label::new( egui::RichText::new(&text) @@ -242,15 +250,49 @@ pub fn calculate_pages(text: &str, chars_per_page: usize) -> Vec { return pages; } - let total_chars = text.chars().count(); + let chars: Vec = text.chars().collect(); + let total_chars = chars.len(); if total_chars <= chars_per_page { pages.push(total_chars); return pages; } - let mut pos = 0; + let mut pos: usize = 0; while pos < total_chars { - pos = (pos + chars_per_page).min(total_chars); + let next = pos + chars_per_page; + if next >= total_chars { + pages.push(total_chars); + break; + } + + // Search backward from next for paragraph (\n\n) or line (\n) breaks + let search_start = pos + chars_per_page / 2; + let search_end = (next + chars_per_page / 2).min(total_chars); + let mut split = next; + + // Prefer double newline (paragraph), then single newline + let mut found = false; + for i in (search_start..search_end).rev() { + if chars[i] == '\n' && i > 0 && chars[i - 1] == '\n' { + split = i - 1; + found = true; + break; + } + } + if !found { + for i in (search_start..search_end).rev() { + if chars[i] == '\n' { + split = i + 1; + break; + } + } + } + + if split <= pos { + split = next; + } + + pos = split.min(total_chars); pages.push(pos); }