2026-05-13 23:02:04 +08:00
|
|
|
pub fn strip_html(input: &str) -> String {
|
|
|
|
|
let mut result = String::with_capacity(input.len());
|
|
|
|
|
let mut in_tag = false;
|
|
|
|
|
let mut in_entity = false;
|
|
|
|
|
let mut entity = String::new();
|
|
|
|
|
|
|
|
|
|
for c in input.chars() {
|
|
|
|
|
match c {
|
|
|
|
|
'<' => in_tag = true,
|
|
|
|
|
'>' if in_tag => in_tag = false,
|
|
|
|
|
'&' if !in_tag => {
|
|
|
|
|
in_entity = true;
|
|
|
|
|
entity.clear();
|
|
|
|
|
}
|
|
|
|
|
';' if in_entity => {
|
|
|
|
|
in_entity = false;
|
|
|
|
|
let decoded = match entity.as_str() {
|
|
|
|
|
"amp" => "&",
|
|
|
|
|
"lt" => "<",
|
|
|
|
|
"gt" => ">",
|
|
|
|
|
"quot" => "\"",
|
|
|
|
|
"nbsp" => " ",
|
|
|
|
|
_ => "",
|
|
|
|
|
};
|
|
|
|
|
result.push_str(decoded);
|
|
|
|
|
}
|
|
|
|
|
c if !in_tag && !in_entity => result.push(c),
|
|
|
|
|
c if in_entity => entity.push(c),
|
|
|
|
|
_ => {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
result
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
|
pub struct TocEntry {
|
|
|
|
|
pub label: String,
|
|
|
|
|
pub section: usize,
|
|
|
|
|
pub children: Vec<TocEntry>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
|
pub struct Section {
|
|
|
|
|
pub title: String,
|
|
|
|
|
pub content: String,
|
|
|
|
|
pub pages: Vec<usize>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
|
pub struct Book {
|
|
|
|
|
pub title: String,
|
|
|
|
|
pub author: String,
|
|
|
|
|
pub cover: Option<Vec<u8>>,
|
|
|
|
|
pub sections: Vec<Section>,
|
|
|
|
|
pub toc: Vec<TocEntry>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_strip_html_plain_text() {
|
|
|
|
|
assert_eq!(strip_html("Hello World"), "Hello World");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_strip_html_simple_tags() {
|
|
|
|
|
assert_eq!(strip_html("<p>Hello</p>"), "Hello");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_strip_html_nested_tags() {
|
|
|
|
|
assert_eq!(
|
|
|
|
|
strip_html("<div><p>Hello <b>World</b></p></div>"),
|
|
|
|
|
"Hello World"
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_strip_html_html_entities() {
|
|
|
|
|
assert_eq!(strip_html("Hello & World"), "Hello & World");
|
|
|
|
|
assert_eq!(strip_html("Hello World"), "Hello World");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_strip_html_empty() {
|
|
|
|
|
assert_eq!(strip_html(""), "");
|
|
|
|
|
}
|
|
|
|
|
}
|