- 主面板:阶段1扫描全盘 → 阶段2抽样 → 阶段3抽检,每阶段独立进度条/已用时/分类型 chips - 日志:按类型着色(命中红/未命中绿/警告黄/阶段青) - 主题:暗绿底 + 鲜绿/青色强调,圆角胶囊按钮(material::security_dark) - 抽检:SampleMode 枚举支持按份数/百分比/全部;设置页 C 组动态切换 - 抽检:XLSX 检查器(zip + quick-xml 解析 sharedStrings 与 sheet) - 扫描:walker 进度回调(已访问、命中候选、当前目录) - 兼容:quick-xml 0.36 使用 reader.config_mut().trim_text() - 仓库:新增 .gitignore 忽略 venv/pyc/target/构建产物
82 lines
3.6 KiB
Rust
82 lines
3.6 KiB
Rust
// DOCX 文本抽检:docx-rs 0.4 读段落,关键词匹配
|
||
use std::future::Future;
|
||
use std::path::Path;
|
||
use std::pin::Pin;
|
||
use std::sync::atomic::AtomicBool;
|
||
|
||
use docx_rs::{DocumentChild, ParagraphChild, RunChild, TableCellContent, TableChild, TableRowChild};
|
||
|
||
use crate::config::AppConfig;
|
||
use crate::inspect::{make_hit, Finding, Inspector};
|
||
use crate::matcher::keywords::{keywords_for, Matcher};
|
||
|
||
pub struct DocxInspector;
|
||
|
||
impl Inspector for DocxInspector {
|
||
fn inspect<'a>(
|
||
&'a self,
|
||
path: &'a Path,
|
||
cfg: &'a AppConfig,
|
||
_cancel: &'a AtomicBool,
|
||
log: &'a (dyn Fn(&str) + Send + Sync),
|
||
) -> Pin<Box<dyn Future<Output = anyhow::Result<Finding>> + Send + 'a>> {
|
||
Box::pin(async move {
|
||
log(" 解析 DOCX 文本……");
|
||
let bytes = std::fs::read(path)?;
|
||
// docx-rs 0.4:使用 read_docx 解析整个 zip
|
||
let doc = docx_rs::read_docx(&bytes).map_err(|e| anyhow::anyhow!("docx-rs 解析失败:{:?}", e))?;
|
||
|
||
let mut text = String::new();
|
||
for d in doc.document.children.iter() {
|
||
match d {
|
||
DocumentChild::Paragraph(p) => {
|
||
// p: &Box<Paragraph>
|
||
for pc in p.children.iter() {
|
||
if let ParagraphChild::Run(r) = pc {
|
||
for rc in r.children.iter() {
|
||
if let RunChild::Text(t) = rc {
|
||
text.push_str(&t.text);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
text.push('\n');
|
||
}
|
||
DocumentChild::Table(t) => {
|
||
// t: &Box<Table>;t.rows: Vec<TableChild>
|
||
for tc in t.rows.iter() {
|
||
if let TableChild::TableRow(row) = tc {
|
||
for rc in row.cells.iter() {
|
||
if let TableRowChild::TableCell(cell) = rc {
|
||
for cc in cell.children.iter() {
|
||
if let TableCellContent::Paragraph(p) = cc {
|
||
for pc in p.children.iter() {
|
||
if let ParagraphChild::Run(r) = pc {
|
||
for rcc in r.children.iter() {
|
||
if let RunChild::Text(t) = rcc {
|
||
text.push_str(&t.text);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
text.push('\n');
|
||
}
|
||
}
|
||
}
|
||
text.push('\t');
|
||
}
|
||
text.push('\n');
|
||
}
|
||
}
|
||
}
|
||
_ => {}
|
||
}
|
||
}
|
||
let kws = keywords_for("docx", &cfg.keyword);
|
||
let m = Matcher::new(kws, &cfg.keyword);
|
||
let hits = m.find(&text);
|
||
Ok(make_hit(path, "docx", hits, text, None))
|
||
})
|
||
}
|
||
}
|