feat(scan): 续扫支持 + 阶段1 ETA 估算
- 新增 ScanProgress 持久化层(%APPDATA%/scan-progress.txt,append 模式) - walker 评估文件前 contains 检查,命中则跳过;评估后 record 加入 - 阶段1 启动时 load,若 enable_resume=false 则先清空旧进度 - 阶段1 启动时拿 sysinfo 磁盘总容量,按 1000/5000/10000/50000/100000 里程碑动态计算 ETA - settings 页加 '启用续扫' 开关 + '清空续扫进度' 按钮 - 修复 on_progress 双计 bug(fetch_add -> store) - 修复 ETA 显示及 skipped 计数器
This commit is contained in:
@@ -35,6 +35,10 @@ pub struct App {
|
||||
pub scan_scanned: Arc<AtomicUsize>, // 已访问文件数
|
||||
pub scan_found: Arc<AtomicUsize>, // 候选累计
|
||||
pub scan_current_dir: Arc<std::sync::Mutex<String>>, // 当前目录
|
||||
/// 已扫描字节数(用于估算)
|
||||
pub scan_bytes: Arc<AtomicU64>,
|
||||
/// 阶段 1 预估剩余时间(字符串,已格式化为"约 X 分 Y 秒")
|
||||
pub scan_eta: Arc<std::sync::Mutex<String>>,
|
||||
/// 当前正在处理的文件
|
||||
pub current_file: Arc<std::sync::Mutex<Option<String>>>,
|
||||
pub current_step: Arc<std::sync::Mutex<String>>,
|
||||
@@ -70,6 +74,8 @@ impl App {
|
||||
scan_scanned: Arc::new(AtomicUsize::new(0)),
|
||||
scan_found: Arc::new(AtomicUsize::new(0)),
|
||||
scan_current_dir: Arc::new(std::sync::Mutex::new(String::new())),
|
||||
scan_bytes: Arc::new(AtomicU64::new(0)),
|
||||
scan_eta: Arc::new(std::sync::Mutex::new(String::new())),
|
||||
current_file: Arc::new(std::sync::Mutex::new(None)),
|
||||
current_step: Arc::new(std::sync::Mutex::new(String::new())),
|
||||
elapsed_ms: Arc::new(AtomicU64::new(0)),
|
||||
|
||||
@@ -126,6 +126,8 @@ pub struct ScanSettings {
|
||||
pub max_depth: u32,
|
||||
pub extensions: Vec<String>,
|
||||
pub scan_timeout_minutes: u32,
|
||||
/// 是否启用续扫(跳过上次已访问的文件)
|
||||
pub enable_resume: bool,
|
||||
}
|
||||
impl Default for ScanSettings {
|
||||
fn default() -> Self {
|
||||
@@ -143,6 +145,7 @@ impl Default for ScanSettings {
|
||||
max_depth: 0,
|
||||
extensions: vec!["doc".into(), "docx".into(), "pdf".into(), "xlsx".into()],
|
||||
scan_timeout_minutes: 0,
|
||||
enable_resume: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -223,7 +226,7 @@ impl Default for ViewerSettings {
|
||||
kill_timeout_ms: 3000,
|
||||
umi_ocr_url: "http://127.0.0.1:1224/api/ocr".into(),
|
||||
umi_ocr_exe: None,
|
||||
umi_ocr_startup_wait_sec: 3,
|
||||
umi_ocr_startup_wait_sec: 60,
|
||||
umi_ocr_call_timeout_sec: 30,
|
||||
ocr_language: "models/config_chinese.txt".into(),
|
||||
ocr_cls: false,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
// 扫描模块
|
||||
pub mod filter;
|
||||
pub mod progress_store;
|
||||
pub mod runner;
|
||||
pub mod sampler;
|
||||
pub mod walker;
|
||||
|
||||
94
src/scan/progress_store.rs
Normal file
94
src/scan/progress_store.rs
Normal file
@@ -0,0 +1,94 @@
|
||||
// 扫描进度持久化:把已扫过的文件路径集合存到磁盘,用于"续扫"
|
||||
use std::collections::HashSet;
|
||||
use std::fs::OpenOptions;
|
||||
use std::io::{BufRead, BufReader, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use crate::utils::paths;
|
||||
|
||||
/// 续扫进度文件:%APPDATA%\secret-file-selfcheck\scan-progress.txt
|
||||
/// 每行一个已访问过的文件绝对路径
|
||||
pub fn progress_file() -> PathBuf {
|
||||
paths::app_config_dir().join("scan-progress.txt")
|
||||
}
|
||||
|
||||
/// 续扫进度对象:内存 HashSet(查重)+ 待写盘队列(增量 append)
|
||||
pub struct ScanProgress {
|
||||
set: HashSet<PathBuf>,
|
||||
pending: Vec<PathBuf>,
|
||||
file: PathBuf,
|
||||
/// flush 阈值:新增多少条后写一次盘
|
||||
flush_threshold: usize,
|
||||
/// 本次扫描续扫跳过的文件数(仅会话内)
|
||||
pub skipped: AtomicUsize,
|
||||
}
|
||||
|
||||
impl ScanProgress {
|
||||
/// 启动时加载(不存在则返回空集合)
|
||||
pub fn load() -> Self {
|
||||
let file = progress_file();
|
||||
let mut set: HashSet<PathBuf> = HashSet::new();
|
||||
if let Ok(f) = std::fs::File::open(&file) {
|
||||
let reader = BufReader::new(f);
|
||||
for line in reader.lines().map_while(Result::ok) {
|
||||
let line = line.trim();
|
||||
if line.is_empty() { continue; }
|
||||
set.insert(PathBuf::from(line));
|
||||
}
|
||||
}
|
||||
Self { set, pending: Vec::new(), file, flush_threshold: 2000, skipped: AtomicUsize::new(0) }
|
||||
}
|
||||
|
||||
pub fn contains(&self, p: &Path) -> bool {
|
||||
if self.set.contains(p) {
|
||||
self.skipped.fetch_add(1, Ordering::Relaxed);
|
||||
true
|
||||
} else { false }
|
||||
}
|
||||
pub fn is_empty(&self) -> bool { self.set.is_empty() }
|
||||
pub fn len(&self) -> usize { self.set.len() }
|
||||
|
||||
/// 记录一个已访问的文件(去重 + 累积待写盘)
|
||||
pub fn record(&mut self, p: PathBuf) {
|
||||
if self.set.insert(p.clone()) {
|
||||
self.pending.push(p);
|
||||
if self.pending.len() >= self.flush_threshold {
|
||||
let _ = self.flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// 把待写盘队列追加写入文件
|
||||
pub fn flush(&mut self) -> std::io::Result<()> {
|
||||
if self.pending.is_empty() { return Ok(()); }
|
||||
if let Some(parent) = self.file.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
let mut f = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&self.file)?;
|
||||
for p in &self.pending {
|
||||
f.write_all(p.to_string_lossy().as_bytes())?;
|
||||
f.write_all(b"\n")?;
|
||||
}
|
||||
f.flush()?;
|
||||
self.pending.clear();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 清空进度(删除文件 + 清空内存)
|
||||
pub fn clear(&mut self) {
|
||||
self.set.clear();
|
||||
self.pending.clear();
|
||||
self.skipped.store(0, Ordering::Relaxed);
|
||||
let _ = std::fs::remove_file(&self.file);
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ScanProgress {
|
||||
fn drop(&mut self) {
|
||||
let _ = self.flush();
|
||||
}
|
||||
}
|
||||
@@ -9,9 +9,13 @@ use crate::app::RunState;
|
||||
use crate::config::AppConfig;
|
||||
use crate::inspect::{self, Finding, Inspector};
|
||||
use crate::report::model::Report;
|
||||
use crate::scan::progress_store::ScanProgress;
|
||||
use crate::scan::sampler::{self, FileKind, SampleItem};
|
||||
use crate::scan::walker;
|
||||
|
||||
/// ETA 估算的里程碑(已扫文件数达到下列任一阈值时刷新一次)
|
||||
const ETA_MILESTONES: &[usize] = &[1000, 5000, 10_000, 50_000, 100_000, 500_000, 1_000_000];
|
||||
|
||||
/// 调度入口
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn run(
|
||||
@@ -23,6 +27,8 @@ pub async fn run(
|
||||
scan_scanned: Arc<AtomicUsize>,
|
||||
scan_found: Arc<AtomicUsize>,
|
||||
scan_current_dir: Arc<Mutex<String>>,
|
||||
scan_bytes: Arc<AtomicU64>,
|
||||
scan_eta: Arc<Mutex<String>>,
|
||||
current_file: Arc<Mutex<Option<String>>>,
|
||||
current_step: Arc<Mutex<String>>,
|
||||
start_instant: Instant,
|
||||
@@ -68,29 +74,95 @@ pub async fn run(
|
||||
set_step("🔍 阶段 1/3:正在扫描全盘候选文件……".into());
|
||||
push_log("═══ 阶段 1:扫描全盘所有候选文件 ═══".into());
|
||||
let scan_started = Instant::now();
|
||||
if let Ok(mut e) = scan_eta.lock() { e.clear(); }
|
||||
scan_bytes.store(0, Ordering::Relaxed);
|
||||
|
||||
// 启动时拿一次磁盘总容量(用 sysinfo 枚举所有盘),用于 ETA 估算
|
||||
let total_disk_bytes: u64 = {
|
||||
use sysinfo::Disks;
|
||||
Disks::new_with_refreshed_list()
|
||||
.iter()
|
||||
.map(|d| d.total_space())
|
||||
.sum()
|
||||
};
|
||||
push_log(format!(" 磁盘总容量:{} GB(用于 ETA 估算)", total_disk_bytes / 1024 / 1024 / 1024));
|
||||
|
||||
// 续扫进度:启动时加载;若用户关闭了续扫开关则先清空旧进度
|
||||
let mut scan_progress = ScanProgress::load();
|
||||
if !cfg.scan.enable_resume {
|
||||
if !scan_progress.is_empty() {
|
||||
scan_progress.clear();
|
||||
push_log("⚠ 已关闭续扫,已清空旧续扫进度".into());
|
||||
}
|
||||
} else if !scan_progress.is_empty() {
|
||||
push_log(format!("✔ 续扫模式:已加载 {} 条历史进度,将跳过这些文件", scan_progress.len()));
|
||||
}
|
||||
|
||||
// 进度回调:walker 每个目录+每个文件都会调
|
||||
let scan_scanned_cb = Arc::clone(&scan_scanned);
|
||||
let scan_found_cb = Arc::clone(&scan_found);
|
||||
let scan_dir_cb = Arc::clone(&scan_current_dir);
|
||||
let scan_bytes_cb = Arc::clone(&scan_bytes);
|
||||
let scan_eta_cb = Arc::clone(&scan_eta);
|
||||
let cur_file_cb = Arc::clone(¤t_file);
|
||||
let mut on_progress = |_scanned: usize, found: usize, dir: &std::path::Path, file: Option<&std::path::Path>| {
|
||||
scan_scanned_cb.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
// 上一次 ETA 计算时的文件计数;保证只在里程碑触发
|
||||
let last_eta_at: Arc<AtomicUsize> = Arc::new(AtomicUsize::new(0));
|
||||
let cancel_cb = Arc::clone(&cancel);
|
||||
|
||||
let mut on_progress = |scanned: usize, found: usize, dir: &std::path::Path, file: Option<&std::path::Path>, bytes: u64| {
|
||||
// 直接 store walker 传过来的真实值(避免双计)
|
||||
scan_scanned_cb.store(scanned, std::sync::atomic::Ordering::Relaxed);
|
||||
scan_found_cb.store(found, std::sync::atomic::Ordering::Relaxed);
|
||||
if bytes > 0 {
|
||||
scan_bytes_cb.fetch_add(bytes, std::sync::atomic::Ordering::Relaxed);
|
||||
}
|
||||
if let Ok(mut g) = scan_dir_cb.lock() { *g = dir.display().to_string(); }
|
||||
if let Some(f) = file {
|
||||
if let Ok(mut g) = cur_file_cb.lock() { *g = Some(f.display().to_string()); }
|
||||
}
|
||||
|
||||
// 里程碑 ETA 计算
|
||||
if !cancel_cb.load(Ordering::Relaxed) {
|
||||
let last = last_eta_at.load(Ordering::Relaxed);
|
||||
if scanned > last {
|
||||
// 取下一个目标里程碑
|
||||
let next = ETA_MILESTONES.iter().find(|&&m| m > last && m <= scanned).copied();
|
||||
if let Some(_hit) = next {
|
||||
last_eta_at.store(scanned, Ordering::Relaxed);
|
||||
let elapsed = scan_started.elapsed().as_secs_f64().max(0.001);
|
||||
let total_bytes = scan_bytes_cb.load(Ordering::Relaxed) as f64;
|
||||
if total_bytes > 0.0 && total_disk_bytes > 0 {
|
||||
let rate = total_bytes / elapsed; // bytes / sec
|
||||
let remaining = (total_disk_bytes as f64 - total_bytes).max(0.0);
|
||||
let eta_sec = if rate > 0.0 { (remaining / rate) as u64 } else { 0 };
|
||||
let s = format_eta(eta_sec);
|
||||
if let Ok(mut e) = scan_eta_cb.lock() { *e = s.clone(); }
|
||||
push_log(format!(" ⏱ 已扫 {} 个文件(约 {} MB),速率 {:.1} MB/s,预估剩余:{}",
|
||||
scanned,
|
||||
(total_bytes / 1024.0 / 1024.0) as u64,
|
||||
rate / 1024.0 / 1024.0,
|
||||
s,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
let candidates = walker::walk(&cfg.scan, &cancel, |s| push_log(s.to_string()), &mut on_progress);
|
||||
let candidates = walker::walk(&cfg.scan, &cancel, |s| push_log(s.to_string()), &mut on_progress, &mut scan_progress);
|
||||
// 扫完落盘最后一批
|
||||
let _ = scan_progress.flush();
|
||||
let scan_ms = scan_started.elapsed().as_millis();
|
||||
let candidates_count = candidates.len();
|
||||
let skipped = scan_progress.skipped.load(Ordering::Relaxed);
|
||||
push_log(format!(
|
||||
"✔ 扫描完成:访问 {} 个文件,命中候选 {} 份(用时 {} ms)",
|
||||
"✔ 扫描完成:访问 {} 个文件,命中候选 {} 份,续扫跳过 {} 个(用时 {} ms)",
|
||||
scan_scanned.load(Ordering::Relaxed),
|
||||
candidates_count,
|
||||
skipped,
|
||||
scan_ms
|
||||
));
|
||||
// 扫完清空 ETA
|
||||
if let Ok(mut e) = scan_eta.lock() { e.clear(); }
|
||||
|
||||
if candidates_count == 0 {
|
||||
push_log("⚠ 没有可抽检的文件,请检查扫描范围/白名单".into());
|
||||
@@ -238,6 +310,17 @@ fn hostname() -> String {
|
||||
sysinfo::System::host_name().unwrap_or_else(|| "unknown".into())
|
||||
}
|
||||
|
||||
/// 把秒数格式化为人类可读字符串
|
||||
fn format_eta(sec: u64) -> String {
|
||||
if sec < 60 {
|
||||
format!("约 {} 秒", sec)
|
||||
} else if sec < 3600 {
|
||||
format!("约 {} 分 {} 秒", sec / 60, sec % 60)
|
||||
} else {
|
||||
format!("约 {} 时 {} 分", sec / 3600, (sec % 3600) / 60)
|
||||
}
|
||||
}
|
||||
|
||||
// 供 Result/Err 引用避免 warning
|
||||
#[allow(dead_code)]
|
||||
fn _pathbuf_marker(_p: PathBuf) {}
|
||||
|
||||
@@ -6,20 +6,24 @@ use walkdir::WalkDir;
|
||||
|
||||
use crate::config::ScanSettings;
|
||||
use crate::scan::filter;
|
||||
use crate::scan::progress_store::ScanProgress;
|
||||
|
||||
/// 遍历过程中每次回调:
|
||||
/// - scanned_so_far: 已扫描的文件总数(仅候选类型)
|
||||
/// - found_so_far: 当前累计已收纳的候选数
|
||||
/// - current_dir: 当前正在扫描的目录路径
|
||||
/// - current_file: 当前正在评估的文件路径(若是目录则为 None)
|
||||
pub type ProgressFn<'a> = &'a mut dyn FnMut(usize, usize, &Path, Option<&Path>);
|
||||
/// - bytes: 当前文件的字节数(目录回调时为 0)
|
||||
pub type ProgressFn<'a> = &'a mut dyn FnMut(usize, usize, &Path, Option<&Path>, u64);
|
||||
|
||||
/// 遍历全盘,输出满足条件的文件路径
|
||||
/// - `progress`: 已扫过文件集合,命中则跳过;每条新评估的文件都会记录到其中
|
||||
pub fn walk(
|
||||
s: &ScanSettings,
|
||||
cancel: &AtomicBool,
|
||||
log: impl Fn(&str),
|
||||
progress: ProgressFn,
|
||||
progress_fn: ProgressFn,
|
||||
scan_progress: &mut ScanProgress,
|
||||
) -> Vec<PathBuf> {
|
||||
let mut out = Vec::new();
|
||||
let scanned = AtomicUsize::new(0); // 已访问的文件数(仅作信息)
|
||||
@@ -55,11 +59,12 @@ pub fn walk(
|
||||
if let Ok(mut last) = last_dir_logged.lock() {
|
||||
*last = p.display().to_string();
|
||||
}
|
||||
progress(
|
||||
progress_fn(
|
||||
scanned.load(Ordering::Relaxed),
|
||||
found.load(Ordering::Relaxed),
|
||||
p,
|
||||
None,
|
||||
0,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -67,13 +72,20 @@ pub fn walk(
|
||||
}
|
||||
scanned.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
// 先把当前文件(及所在目录)回调给 UI,便于实时显示
|
||||
// 续扫:若该文件上次已访问过,跳过整个评估过程
|
||||
if scan_progress.contains(p) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 先把当前文件(及所在目录)回调给 UI,便于实时显示;顺带取大小
|
||||
let bytes = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
|
||||
let parent = p.parent().unwrap_or(p);
|
||||
progress(
|
||||
progress_fn(
|
||||
scanned.load(Ordering::Relaxed),
|
||||
found.load(Ordering::Relaxed),
|
||||
parent,
|
||||
Some(p),
|
||||
bytes,
|
||||
);
|
||||
|
||||
if filter::is_whitelisted(p, &s.whitelist) { continue; }
|
||||
@@ -81,10 +93,10 @@ pub fn walk(
|
||||
if !s.include_system && filter::is_system(p) { continue; }
|
||||
if !filter::extension_allowed(p, &s.extensions) { continue; }
|
||||
if s.min_size_kb > 0 {
|
||||
if let Ok(m) = std::fs::metadata(p) {
|
||||
if m.len() < s.min_size_kb * 1024 { continue; }
|
||||
}
|
||||
if bytes < s.min_size_kb * 1024 { continue; }
|
||||
}
|
||||
// 评估过的文件都进入续扫集合(无论是否命中候选)
|
||||
scan_progress.record(p.to_path_buf());
|
||||
found.fetch_add(1, Ordering::Relaxed);
|
||||
out.push(p.to_path_buf());
|
||||
}
|
||||
|
||||
@@ -106,6 +106,18 @@ pub fn draw(ui: &mut egui::Ui, app: &mut App) {
|
||||
}
|
||||
}
|
||||
}
|
||||
// 预估剩余时间(达到里程碑后才有)
|
||||
if is_scanning {
|
||||
if let Ok(eta) = app.scan_eta.lock() {
|
||||
if !eta.is_empty() {
|
||||
ui.add_space(2.0);
|
||||
ui.horizontal(|ui| {
|
||||
ui.label("⏱ 预估剩余:");
|
||||
ui.label(egui::RichText::new(&*eta).strong().color(material::LIME));
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
ui.add_space(6.0);
|
||||
@@ -264,6 +276,8 @@ fn start_inspection(app: &mut App) {
|
||||
let scan_scanned = Arc::clone(&app.scan_scanned);
|
||||
let scan_found = Arc::clone(&app.scan_found);
|
||||
let scan_dir = Arc::clone(&app.scan_current_dir);
|
||||
let scan_bytes = Arc::clone(&app.scan_bytes);
|
||||
let scan_eta = Arc::clone(&app.scan_eta);
|
||||
let cur_file = Arc::clone(&app.current_file);
|
||||
let cur_step = Arc::clone(&app.current_step);
|
||||
let elapsed_ms = Arc::clone(&app.elapsed_ms);
|
||||
@@ -295,6 +309,8 @@ fn start_inspection(app: &mut App) {
|
||||
scan_scanned,
|
||||
scan_found,
|
||||
scan_dir,
|
||||
scan_bytes,
|
||||
scan_eta,
|
||||
cur_file,
|
||||
cur_step,
|
||||
start_instant,
|
||||
|
||||
@@ -126,6 +126,14 @@ fn group_b_scan(ui: &mut egui::Ui, s: &mut crate::config::ScanSettings) {
|
||||
if ui.text_edit_singleline(&mut ext_str).changed() {
|
||||
s.extensions = ext_str.split(',').map(|x| x.trim().to_string()).filter(|x| !x.is_empty()).collect();
|
||||
}
|
||||
ui.checkbox(&mut s.enable_resume, "启用续扫(跳过上次已扫过的文件)");
|
||||
ui.horizontal(|ui| {
|
||||
let pf = crate::scan::progress_store::progress_file();
|
||||
if ui.button("🗑 清空续扫进度").clicked() {
|
||||
let _ = std::fs::remove_file(&pf);
|
||||
}
|
||||
ui.label(format!("进度文件:{}", pf.display()));
|
||||
});
|
||||
ui.horizontal(|ui| {
|
||||
ui.label("单次扫描超时(分钟,0=无限):");
|
||||
ui.add(egui::DragValue::new(&mut s.scan_timeout_minutes).clamp_range(0..=24 * 60));
|
||||
@@ -227,7 +235,7 @@ fn group_d_viewer(ui: &mut egui::Ui, v: &mut crate::config::ViewerSettings) {
|
||||
ui.label(egui::RichText::new("Umi-OCR").strong());
|
||||
ui.horizontal(|ui| { ui.label("HTTP 地址:"); ui.text_edit_singleline(&mut v.umi_ocr_url); });
|
||||
widgets::file_picker(ui, "Umi-OCR.exe 路径(默认 exe 同目录)", &mut v.umi_ocr_exe, &["exe"]);
|
||||
ui.horizontal(|ui| { ui.label("启动后等待秒:"); ui.add(egui::DragValue::new(&mut v.umi_ocr_startup_wait_sec).clamp_range(0..=60)); });
|
||||
ui.horizontal(|ui| { ui.label("启动后等待秒:"); ui.add(egui::DragValue::new(&mut v.umi_ocr_startup_wait_sec).clamp_range(0..=600)); });
|
||||
ui.horizontal(|ui| { ui.label("调用超时秒:"); ui.add(egui::DragValue::new(&mut v.umi_ocr_call_timeout_sec).clamp_range(5..=600)); });
|
||||
ui.horizontal(|ui| { ui.label("OCR 语言:"); ui.text_edit_singleline(&mut v.ocr_language); });
|
||||
ui.checkbox(&mut v.ocr_cls, "启用文本方向校正");
|
||||
|
||||
Reference in New Issue
Block a user