Files
work-secretfile-selfcheck/src/inspect/docx_inspector.rs
xiaji 7e256c426f feat(ui): 安全软件风格主题 + 三阶段进度/日志 + XLSX 支持
- 主面板:阶段1扫描全盘 → 阶段2抽样 → 阶段3抽检,每阶段独立进度条/已用时/分类型 chips

- 日志:按类型着色(命中红/未命中绿/警告黄/阶段青)

- 主题:暗绿底 + 鲜绿/青色强调,圆角胶囊按钮(material::security_dark)

- 抽检:SampleMode 枚举支持按份数/百分比/全部;设置页 C 组动态切换

- 抽检:XLSX 检查器(zip + quick-xml 解析 sharedStrings 与 sheet)

- 扫描:walker 进度回调(已访问、命中候选、当前目录)

- 兼容:quick-xml 0.36 使用 reader.config_mut().trim_text()

- 仓库:新增 .gitignore 忽略 venv/pyc/target/构建产物
2026-06-10 12:20:25 +08:00

82 lines
3.6 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// DOCX 文本抽检docx-rs 0.4 读段落,关键词匹配
use std::future::Future;
use std::path::Path;
use std::pin::Pin;
use std::sync::atomic::AtomicBool;
use docx_rs::{DocumentChild, ParagraphChild, RunChild, TableCellContent, TableChild, TableRowChild};
use crate::config::AppConfig;
use crate::inspect::{make_hit, Finding, Inspector};
use crate::matcher::keywords::{keywords_for, Matcher};
pub struct DocxInspector;
impl Inspector for DocxInspector {
fn inspect<'a>(
&'a self,
path: &'a Path,
cfg: &'a AppConfig,
_cancel: &'a AtomicBool,
log: &'a (dyn Fn(&str) + Send + Sync),
) -> Pin<Box<dyn Future<Output = anyhow::Result<Finding>> + Send + 'a>> {
Box::pin(async move {
log(" 解析 DOCX 文本……");
let bytes = std::fs::read(path)?;
// docx-rs 0.4:使用 read_docx 解析整个 zip
let doc = docx_rs::read_docx(&bytes).map_err(|e| anyhow::anyhow!("docx-rs 解析失败:{:?}", e))?;
let mut text = String::new();
for d in doc.document.children.iter() {
match d {
DocumentChild::Paragraph(p) => {
// p: &Box<Paragraph>
for pc in p.children.iter() {
if let ParagraphChild::Run(r) = pc {
for rc in r.children.iter() {
if let RunChild::Text(t) = rc {
text.push_str(&t.text);
}
}
}
}
text.push('\n');
}
DocumentChild::Table(t) => {
// t: &Box<Table>t.rows: Vec<TableChild>
for tc in t.rows.iter() {
if let TableChild::TableRow(row) = tc {
for rc in row.cells.iter() {
if let TableRowChild::TableCell(cell) = rc {
for cc in cell.children.iter() {
if let TableCellContent::Paragraph(p) = cc {
for pc in p.children.iter() {
if let ParagraphChild::Run(r) = pc {
for rcc in r.children.iter() {
if let RunChild::Text(t) = rcc {
text.push_str(&t.text);
}
}
}
}
text.push('\n');
}
}
}
text.push('\t');
}
text.push('\n');
}
}
}
_ => {}
}
}
let kws = keywords_for("docx", &cfg.keyword);
let m = Matcher::new(kws, &cfg.keyword);
let hits = m.find(&text);
Ok(make_hit(path, "docx", hits, text, None))
})
}
}