From 2f92826525bdb0b4556de2cd0921fe56adaa2965 Mon Sep 17 00:00:00 2001 From: xiaji Date: Wed, 10 Jun 2026 15:50:07 +0800 Subject: [PATCH] =?UTF-8?q?feat(scan):=20=E7=BB=AD=E6=89=AB=E6=94=AF?= =?UTF-8?q?=E6=8C=81=20+=20=E9=98=B6=E6=AE=B51=20ETA=20=E4=BC=B0=E7=AE=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 ScanProgress 持久化层(%APPDATA%/scan-progress.txt,append 模式) - walker 评估文件前 contains 检查,命中则跳过;评估后 record 加入 - 阶段1 启动时 load,若 enable_resume=false 则先清空旧进度 - 阶段1 启动时拿 sysinfo 磁盘总容量,按 1000/5000/10000/50000/100000 里程碑动态计算 ETA - settings 页加 '启用续扫' 开关 + '清空续扫进度' 按钮 - 修复 on_progress 双计 bug(fetch_add -> store) - 修复 ETA 显示及 skipped 计数器 --- src/app.rs | 6 +++ src/config/model.rs | 5 +- src/scan/mod.rs | 1 + src/scan/progress_store.rs | 94 ++++++++++++++++++++++++++++++++++++++ src/scan/runner.rs | 91 ++++++++++++++++++++++++++++++++++-- src/scan/walker.rs | 28 ++++++++---- src/ui/home.rs | 16 +++++++ src/ui/settings.rs | 10 +++- 8 files changed, 237 insertions(+), 14 deletions(-) create mode 100644 src/scan/progress_store.rs diff --git a/src/app.rs b/src/app.rs index 4c411a0..5f55920 100644 --- a/src/app.rs +++ b/src/app.rs @@ -35,6 +35,10 @@ pub struct App { pub scan_scanned: Arc, // 已访问文件数 pub scan_found: Arc, // 候选累计 pub scan_current_dir: Arc>, // 当前目录 + /// 已扫描字节数(用于估算) + pub scan_bytes: Arc, + /// 阶段 1 预估剩余时间(字符串,已格式化为"约 X 分 Y 秒") + pub scan_eta: Arc>, /// 当前正在处理的文件 pub current_file: Arc>>, pub current_step: Arc>, @@ -70,6 +74,8 @@ impl App { scan_scanned: Arc::new(AtomicUsize::new(0)), scan_found: Arc::new(AtomicUsize::new(0)), scan_current_dir: Arc::new(std::sync::Mutex::new(String::new())), + scan_bytes: Arc::new(AtomicU64::new(0)), + scan_eta: Arc::new(std::sync::Mutex::new(String::new())), current_file: Arc::new(std::sync::Mutex::new(None)), current_step: Arc::new(std::sync::Mutex::new(String::new())), elapsed_ms: Arc::new(AtomicU64::new(0)), diff --git a/src/config/model.rs b/src/config/model.rs index 1634f98..db26cd0 100644 --- a/src/config/model.rs +++ b/src/config/model.rs @@ -126,6 +126,8 @@ pub struct ScanSettings { pub max_depth: u32, pub extensions: Vec, pub scan_timeout_minutes: u32, + /// 是否启用续扫(跳过上次已访问的文件) + pub enable_resume: bool, } impl Default for ScanSettings { fn default() -> Self { @@ -143,6 +145,7 @@ impl Default for ScanSettings { max_depth: 0, extensions: vec!["doc".into(), "docx".into(), "pdf".into(), "xlsx".into()], scan_timeout_minutes: 0, + enable_resume: true, } } } @@ -223,7 +226,7 @@ impl Default for ViewerSettings { kill_timeout_ms: 3000, umi_ocr_url: "http://127.0.0.1:1224/api/ocr".into(), umi_ocr_exe: None, - umi_ocr_startup_wait_sec: 3, + umi_ocr_startup_wait_sec: 60, umi_ocr_call_timeout_sec: 30, ocr_language: "models/config_chinese.txt".into(), ocr_cls: false, diff --git a/src/scan/mod.rs b/src/scan/mod.rs index 36f692d..0b67378 100644 --- a/src/scan/mod.rs +++ b/src/scan/mod.rs @@ -1,5 +1,6 @@ // 扫描模块 pub mod filter; +pub mod progress_store; pub mod runner; pub mod sampler; pub mod walker; diff --git a/src/scan/progress_store.rs b/src/scan/progress_store.rs new file mode 100644 index 0000000..286ada8 --- /dev/null +++ b/src/scan/progress_store.rs @@ -0,0 +1,94 @@ +// 扫描进度持久化:把已扫过的文件路径集合存到磁盘,用于"续扫" +use std::collections::HashSet; +use std::fs::OpenOptions; +use std::io::{BufRead, BufReader, Write}; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicUsize, Ordering}; + +use crate::utils::paths; + +/// 续扫进度文件:%APPDATA%\secret-file-selfcheck\scan-progress.txt +/// 每行一个已访问过的文件绝对路径 +pub fn progress_file() -> PathBuf { + paths::app_config_dir().join("scan-progress.txt") +} + +/// 续扫进度对象:内存 HashSet(查重)+ 待写盘队列(增量 append) +pub struct ScanProgress { + set: HashSet, + pending: Vec, + file: PathBuf, + /// flush 阈值:新增多少条后写一次盘 + flush_threshold: usize, + /// 本次扫描续扫跳过的文件数(仅会话内) + pub skipped: AtomicUsize, +} + +impl ScanProgress { + /// 启动时加载(不存在则返回空集合) + pub fn load() -> Self { + let file = progress_file(); + let mut set: HashSet = HashSet::new(); + if let Ok(f) = std::fs::File::open(&file) { + let reader = BufReader::new(f); + for line in reader.lines().map_while(Result::ok) { + let line = line.trim(); + if line.is_empty() { continue; } + set.insert(PathBuf::from(line)); + } + } + Self { set, pending: Vec::new(), file, flush_threshold: 2000, skipped: AtomicUsize::new(0) } + } + + pub fn contains(&self, p: &Path) -> bool { + if self.set.contains(p) { + self.skipped.fetch_add(1, Ordering::Relaxed); + true + } else { false } + } + pub fn is_empty(&self) -> bool { self.set.is_empty() } + pub fn len(&self) -> usize { self.set.len() } + + /// 记录一个已访问的文件(去重 + 累积待写盘) + pub fn record(&mut self, p: PathBuf) { + if self.set.insert(p.clone()) { + self.pending.push(p); + if self.pending.len() >= self.flush_threshold { + let _ = self.flush(); + } + } + } + + /// 把待写盘队列追加写入文件 + pub fn flush(&mut self) -> std::io::Result<()> { + if self.pending.is_empty() { return Ok(()); } + if let Some(parent) = self.file.parent() { + std::fs::create_dir_all(parent)?; + } + let mut f = OpenOptions::new() + .create(true) + .append(true) + .open(&self.file)?; + for p in &self.pending { + f.write_all(p.to_string_lossy().as_bytes())?; + f.write_all(b"\n")?; + } + f.flush()?; + self.pending.clear(); + Ok(()) + } + + /// 清空进度(删除文件 + 清空内存) + pub fn clear(&mut self) { + self.set.clear(); + self.pending.clear(); + self.skipped.store(0, Ordering::Relaxed); + let _ = std::fs::remove_file(&self.file); + } +} + +impl Drop for ScanProgress { + fn drop(&mut self) { + let _ = self.flush(); + } +} diff --git a/src/scan/runner.rs b/src/scan/runner.rs index ed77846..fa2955e 100644 --- a/src/scan/runner.rs +++ b/src/scan/runner.rs @@ -9,9 +9,13 @@ use crate::app::RunState; use crate::config::AppConfig; use crate::inspect::{self, Finding, Inspector}; use crate::report::model::Report; +use crate::scan::progress_store::ScanProgress; use crate::scan::sampler::{self, FileKind, SampleItem}; use crate::scan::walker; +/// ETA 估算的里程碑(已扫文件数达到下列任一阈值时刷新一次) +const ETA_MILESTONES: &[usize] = &[1000, 5000, 10_000, 50_000, 100_000, 500_000, 1_000_000]; + /// 调度入口 #[allow(clippy::too_many_arguments)] pub async fn run( @@ -23,6 +27,8 @@ pub async fn run( scan_scanned: Arc, scan_found: Arc, scan_current_dir: Arc>, + scan_bytes: Arc, + scan_eta: Arc>, current_file: Arc>>, current_step: Arc>, start_instant: Instant, @@ -68,29 +74,95 @@ pub async fn run( set_step("🔍 阶段 1/3:正在扫描全盘候选文件……".into()); push_log("═══ 阶段 1:扫描全盘所有候选文件 ═══".into()); let scan_started = Instant::now(); + if let Ok(mut e) = scan_eta.lock() { e.clear(); } + scan_bytes.store(0, Ordering::Relaxed); + + // 启动时拿一次磁盘总容量(用 sysinfo 枚举所有盘),用于 ETA 估算 + let total_disk_bytes: u64 = { + use sysinfo::Disks; + Disks::new_with_refreshed_list() + .iter() + .map(|d| d.total_space()) + .sum() + }; + push_log(format!(" 磁盘总容量:{} GB(用于 ETA 估算)", total_disk_bytes / 1024 / 1024 / 1024)); + + // 续扫进度:启动时加载;若用户关闭了续扫开关则先清空旧进度 + let mut scan_progress = ScanProgress::load(); + if !cfg.scan.enable_resume { + if !scan_progress.is_empty() { + scan_progress.clear(); + push_log("⚠ 已关闭续扫,已清空旧续扫进度".into()); + } + } else if !scan_progress.is_empty() { + push_log(format!("✔ 续扫模式:已加载 {} 条历史进度,将跳过这些文件", scan_progress.len())); + } // 进度回调:walker 每个目录+每个文件都会调 let scan_scanned_cb = Arc::clone(&scan_scanned); let scan_found_cb = Arc::clone(&scan_found); let scan_dir_cb = Arc::clone(&scan_current_dir); + let scan_bytes_cb = Arc::clone(&scan_bytes); + let scan_eta_cb = Arc::clone(&scan_eta); let cur_file_cb = Arc::clone(¤t_file); - let mut on_progress = |_scanned: usize, found: usize, dir: &std::path::Path, file: Option<&std::path::Path>| { - scan_scanned_cb.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + // 上一次 ETA 计算时的文件计数;保证只在里程碑触发 + let last_eta_at: Arc = Arc::new(AtomicUsize::new(0)); + let cancel_cb = Arc::clone(&cancel); + + let mut on_progress = |scanned: usize, found: usize, dir: &std::path::Path, file: Option<&std::path::Path>, bytes: u64| { + // 直接 store walker 传过来的真实值(避免双计) + scan_scanned_cb.store(scanned, std::sync::atomic::Ordering::Relaxed); scan_found_cb.store(found, std::sync::atomic::Ordering::Relaxed); + if bytes > 0 { + scan_bytes_cb.fetch_add(bytes, std::sync::atomic::Ordering::Relaxed); + } if let Ok(mut g) = scan_dir_cb.lock() { *g = dir.display().to_string(); } if let Some(f) = file { if let Ok(mut g) = cur_file_cb.lock() { *g = Some(f.display().to_string()); } } + + // 里程碑 ETA 计算 + if !cancel_cb.load(Ordering::Relaxed) { + let last = last_eta_at.load(Ordering::Relaxed); + if scanned > last { + // 取下一个目标里程碑 + let next = ETA_MILESTONES.iter().find(|&&m| m > last && m <= scanned).copied(); + if let Some(_hit) = next { + last_eta_at.store(scanned, Ordering::Relaxed); + let elapsed = scan_started.elapsed().as_secs_f64().max(0.001); + let total_bytes = scan_bytes_cb.load(Ordering::Relaxed) as f64; + if total_bytes > 0.0 && total_disk_bytes > 0 { + let rate = total_bytes / elapsed; // bytes / sec + let remaining = (total_disk_bytes as f64 - total_bytes).max(0.0); + let eta_sec = if rate > 0.0 { (remaining / rate) as u64 } else { 0 }; + let s = format_eta(eta_sec); + if let Ok(mut e) = scan_eta_cb.lock() { *e = s.clone(); } + push_log(format!(" ⏱ 已扫 {} 个文件(约 {} MB),速率 {:.1} MB/s,预估剩余:{}", + scanned, + (total_bytes / 1024.0 / 1024.0) as u64, + rate / 1024.0 / 1024.0, + s, + )); + } + } + } + } }; - let candidates = walker::walk(&cfg.scan, &cancel, |s| push_log(s.to_string()), &mut on_progress); + let candidates = walker::walk(&cfg.scan, &cancel, |s| push_log(s.to_string()), &mut on_progress, &mut scan_progress); + // 扫完落盘最后一批 + let _ = scan_progress.flush(); let scan_ms = scan_started.elapsed().as_millis(); let candidates_count = candidates.len(); + let skipped = scan_progress.skipped.load(Ordering::Relaxed); push_log(format!( - "✔ 扫描完成:访问 {} 个文件,命中候选 {} 份(用时 {} ms)", + "✔ 扫描完成:访问 {} 个文件,命中候选 {} 份,续扫跳过 {} 个(用时 {} ms)", scan_scanned.load(Ordering::Relaxed), candidates_count, + skipped, scan_ms )); + // 扫完清空 ETA + if let Ok(mut e) = scan_eta.lock() { e.clear(); } if candidates_count == 0 { push_log("⚠ 没有可抽检的文件,请检查扫描范围/白名单".into()); @@ -238,6 +310,17 @@ fn hostname() -> String { sysinfo::System::host_name().unwrap_or_else(|| "unknown".into()) } +/// 把秒数格式化为人类可读字符串 +fn format_eta(sec: u64) -> String { + if sec < 60 { + format!("约 {} 秒", sec) + } else if sec < 3600 { + format!("约 {} 分 {} 秒", sec / 60, sec % 60) + } else { + format!("约 {} 时 {} 分", sec / 3600, (sec % 3600) / 60) + } +} + // 供 Result/Err 引用避免 warning #[allow(dead_code)] fn _pathbuf_marker(_p: PathBuf) {} diff --git a/src/scan/walker.rs b/src/scan/walker.rs index eaf1562..aec2454 100644 --- a/src/scan/walker.rs +++ b/src/scan/walker.rs @@ -6,20 +6,24 @@ use walkdir::WalkDir; use crate::config::ScanSettings; use crate::scan::filter; +use crate::scan::progress_store::ScanProgress; /// 遍历过程中每次回调: /// - scanned_so_far: 已扫描的文件总数(仅候选类型) /// - found_so_far: 当前累计已收纳的候选数 /// - current_dir: 当前正在扫描的目录路径 /// - current_file: 当前正在评估的文件路径(若是目录则为 None) -pub type ProgressFn<'a> = &'a mut dyn FnMut(usize, usize, &Path, Option<&Path>); +/// - bytes: 当前文件的字节数(目录回调时为 0) +pub type ProgressFn<'a> = &'a mut dyn FnMut(usize, usize, &Path, Option<&Path>, u64); /// 遍历全盘,输出满足条件的文件路径 +/// - `progress`: 已扫过文件集合,命中则跳过;每条新评估的文件都会记录到其中 pub fn walk( s: &ScanSettings, cancel: &AtomicBool, log: impl Fn(&str), - progress: ProgressFn, + progress_fn: ProgressFn, + scan_progress: &mut ScanProgress, ) -> Vec { let mut out = Vec::new(); let scanned = AtomicUsize::new(0); // 已访问的文件数(仅作信息) @@ -55,11 +59,12 @@ pub fn walk( if let Ok(mut last) = last_dir_logged.lock() { *last = p.display().to_string(); } - progress( + progress_fn( scanned.load(Ordering::Relaxed), found.load(Ordering::Relaxed), p, None, + 0, ); } } @@ -67,13 +72,20 @@ pub fn walk( } scanned.fetch_add(1, Ordering::Relaxed); - // 先把当前文件(及所在目录)回调给 UI,便于实时显示 + // 续扫:若该文件上次已访问过,跳过整个评估过程 + if scan_progress.contains(p) { + continue; + } + + // 先把当前文件(及所在目录)回调给 UI,便于实时显示;顺带取大小 + let bytes = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0); let parent = p.parent().unwrap_or(p); - progress( + progress_fn( scanned.load(Ordering::Relaxed), found.load(Ordering::Relaxed), parent, Some(p), + bytes, ); if filter::is_whitelisted(p, &s.whitelist) { continue; } @@ -81,10 +93,10 @@ pub fn walk( if !s.include_system && filter::is_system(p) { continue; } if !filter::extension_allowed(p, &s.extensions) { continue; } if s.min_size_kb > 0 { - if let Ok(m) = std::fs::metadata(p) { - if m.len() < s.min_size_kb * 1024 { continue; } - } + if bytes < s.min_size_kb * 1024 { continue; } } + // 评估过的文件都进入续扫集合(无论是否命中候选) + scan_progress.record(p.to_path_buf()); found.fetch_add(1, Ordering::Relaxed); out.push(p.to_path_buf()); } diff --git a/src/ui/home.rs b/src/ui/home.rs index 599a5ea..83ad7af 100644 --- a/src/ui/home.rs +++ b/src/ui/home.rs @@ -106,6 +106,18 @@ pub fn draw(ui: &mut egui::Ui, app: &mut App) { } } } + // 预估剩余时间(达到里程碑后才有) + if is_scanning { + if let Ok(eta) = app.scan_eta.lock() { + if !eta.is_empty() { + ui.add_space(2.0); + ui.horizontal(|ui| { + ui.label("⏱ 预估剩余:"); + ui.label(egui::RichText::new(&*eta).strong().color(material::LIME)); + }); + } + } + } }); ui.add_space(6.0); @@ -264,6 +276,8 @@ fn start_inspection(app: &mut App) { let scan_scanned = Arc::clone(&app.scan_scanned); let scan_found = Arc::clone(&app.scan_found); let scan_dir = Arc::clone(&app.scan_current_dir); + let scan_bytes = Arc::clone(&app.scan_bytes); + let scan_eta = Arc::clone(&app.scan_eta); let cur_file = Arc::clone(&app.current_file); let cur_step = Arc::clone(&app.current_step); let elapsed_ms = Arc::clone(&app.elapsed_ms); @@ -295,6 +309,8 @@ fn start_inspection(app: &mut App) { scan_scanned, scan_found, scan_dir, + scan_bytes, + scan_eta, cur_file, cur_step, start_instant, diff --git a/src/ui/settings.rs b/src/ui/settings.rs index a44763b..39b661c 100644 --- a/src/ui/settings.rs +++ b/src/ui/settings.rs @@ -126,6 +126,14 @@ fn group_b_scan(ui: &mut egui::Ui, s: &mut crate::config::ScanSettings) { if ui.text_edit_singleline(&mut ext_str).changed() { s.extensions = ext_str.split(',').map(|x| x.trim().to_string()).filter(|x| !x.is_empty()).collect(); } + ui.checkbox(&mut s.enable_resume, "启用续扫(跳过上次已扫过的文件)"); + ui.horizontal(|ui| { + let pf = crate::scan::progress_store::progress_file(); + if ui.button("🗑 清空续扫进度").clicked() { + let _ = std::fs::remove_file(&pf); + } + ui.label(format!("进度文件:{}", pf.display())); + }); ui.horizontal(|ui| { ui.label("单次扫描超时(分钟,0=无限):"); ui.add(egui::DragValue::new(&mut s.scan_timeout_minutes).clamp_range(0..=24 * 60)); @@ -227,7 +235,7 @@ fn group_d_viewer(ui: &mut egui::Ui, v: &mut crate::config::ViewerSettings) { ui.label(egui::RichText::new("Umi-OCR").strong()); ui.horizontal(|ui| { ui.label("HTTP 地址:"); ui.text_edit_singleline(&mut v.umi_ocr_url); }); widgets::file_picker(ui, "Umi-OCR.exe 路径(默认 exe 同目录)", &mut v.umi_ocr_exe, &["exe"]); - ui.horizontal(|ui| { ui.label("启动后等待秒:"); ui.add(egui::DragValue::new(&mut v.umi_ocr_startup_wait_sec).clamp_range(0..=60)); }); + ui.horizontal(|ui| { ui.label("启动后等待秒:"); ui.add(egui::DragValue::new(&mut v.umi_ocr_startup_wait_sec).clamp_range(0..=600)); }); ui.horizontal(|ui| { ui.label("调用超时秒:"); ui.add(egui::DragValue::new(&mut v.umi_ocr_call_timeout_sec).clamp_range(5..=600)); }); ui.horizontal(|ui| { ui.label("OCR 语言:"); ui.text_edit_singleline(&mut v.ocr_language); }); ui.checkbox(&mut v.ocr_cls, "启用文本方向校正");