feat(scan): 续扫支持 + 阶段1 ETA 估算

- 新增 ScanProgress 持久化层(%APPDATA%/scan-progress.txt,append 模式)
- walker 评估文件前 contains 检查,命中则跳过;评估后 record 加入
- 阶段1 启动时 load,若 enable_resume=false 则先清空旧进度
- 阶段1 启动时拿 sysinfo 磁盘总容量,按 1000/5000/10000/50000/100000 里程碑动态计算 ETA
- settings 页加 '启用续扫' 开关 + '清空续扫进度' 按钮
- 修复 on_progress 双计 bug(fetch_add -> store)
- 修复 ETA 显示及 skipped 计数器
This commit is contained in:
2026-06-10 15:50:07 +08:00
parent ce6c8b70f4
commit 2f92826525
8 changed files with 237 additions and 14 deletions

View File

@@ -35,6 +35,10 @@ pub struct App {
pub scan_scanned: Arc<AtomicUsize>, // 已访问文件数
pub scan_found: Arc<AtomicUsize>, // 候选累计
pub scan_current_dir: Arc<std::sync::Mutex<String>>, // 当前目录
/// 已扫描字节数(用于估算)
pub scan_bytes: Arc<AtomicU64>,
/// 阶段 1 预估剩余时间(字符串,已格式化为"约 X 分 Y 秒"
pub scan_eta: Arc<std::sync::Mutex<String>>,
/// 当前正在处理的文件
pub current_file: Arc<std::sync::Mutex<Option<String>>>,
pub current_step: Arc<std::sync::Mutex<String>>,
@@ -70,6 +74,8 @@ impl App {
scan_scanned: Arc::new(AtomicUsize::new(0)),
scan_found: Arc::new(AtomicUsize::new(0)),
scan_current_dir: Arc::new(std::sync::Mutex::new(String::new())),
scan_bytes: Arc::new(AtomicU64::new(0)),
scan_eta: Arc::new(std::sync::Mutex::new(String::new())),
current_file: Arc::new(std::sync::Mutex::new(None)),
current_step: Arc::new(std::sync::Mutex::new(String::new())),
elapsed_ms: Arc::new(AtomicU64::new(0)),

View File

@@ -126,6 +126,8 @@ pub struct ScanSettings {
pub max_depth: u32,
pub extensions: Vec<String>,
pub scan_timeout_minutes: u32,
/// 是否启用续扫(跳过上次已访问的文件)
pub enable_resume: bool,
}
impl Default for ScanSettings {
fn default() -> Self {
@@ -143,6 +145,7 @@ impl Default for ScanSettings {
max_depth: 0,
extensions: vec!["doc".into(), "docx".into(), "pdf".into(), "xlsx".into()],
scan_timeout_minutes: 0,
enable_resume: true,
}
}
}
@@ -223,7 +226,7 @@ impl Default for ViewerSettings {
kill_timeout_ms: 3000,
umi_ocr_url: "http://127.0.0.1:1224/api/ocr".into(),
umi_ocr_exe: None,
umi_ocr_startup_wait_sec: 3,
umi_ocr_startup_wait_sec: 60,
umi_ocr_call_timeout_sec: 30,
ocr_language: "models/config_chinese.txt".into(),
ocr_cls: false,

View File

@@ -1,5 +1,6 @@
// 扫描模块
pub mod filter;
pub mod progress_store;
pub mod runner;
pub mod sampler;
pub mod walker;

View File

@@ -0,0 +1,94 @@
// 扫描进度持久化:把已扫过的文件路径集合存到磁盘,用于"续扫"
use std::collections::HashSet;
use std::fs::OpenOptions;
use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicUsize, Ordering};
use crate::utils::paths;
/// 续扫进度文件:%APPDATA%\secret-file-selfcheck\scan-progress.txt
/// 每行一个已访问过的文件绝对路径
pub fn progress_file() -> PathBuf {
paths::app_config_dir().join("scan-progress.txt")
}
/// 续扫进度对象:内存 HashSet查重+ 待写盘队列(增量 append
pub struct ScanProgress {
set: HashSet<PathBuf>,
pending: Vec<PathBuf>,
file: PathBuf,
/// flush 阈值:新增多少条后写一次盘
flush_threshold: usize,
/// 本次扫描续扫跳过的文件数(仅会话内)
pub skipped: AtomicUsize,
}
impl ScanProgress {
/// 启动时加载(不存在则返回空集合)
pub fn load() -> Self {
let file = progress_file();
let mut set: HashSet<PathBuf> = HashSet::new();
if let Ok(f) = std::fs::File::open(&file) {
let reader = BufReader::new(f);
for line in reader.lines().map_while(Result::ok) {
let line = line.trim();
if line.is_empty() { continue; }
set.insert(PathBuf::from(line));
}
}
Self { set, pending: Vec::new(), file, flush_threshold: 2000, skipped: AtomicUsize::new(0) }
}
pub fn contains(&self, p: &Path) -> bool {
if self.set.contains(p) {
self.skipped.fetch_add(1, Ordering::Relaxed);
true
} else { false }
}
pub fn is_empty(&self) -> bool { self.set.is_empty() }
pub fn len(&self) -> usize { self.set.len() }
/// 记录一个已访问的文件(去重 + 累积待写盘)
pub fn record(&mut self, p: PathBuf) {
if self.set.insert(p.clone()) {
self.pending.push(p);
if self.pending.len() >= self.flush_threshold {
let _ = self.flush();
}
}
}
/// 把待写盘队列追加写入文件
pub fn flush(&mut self) -> std::io::Result<()> {
if self.pending.is_empty() { return Ok(()); }
if let Some(parent) = self.file.parent() {
std::fs::create_dir_all(parent)?;
}
let mut f = OpenOptions::new()
.create(true)
.append(true)
.open(&self.file)?;
for p in &self.pending {
f.write_all(p.to_string_lossy().as_bytes())?;
f.write_all(b"\n")?;
}
f.flush()?;
self.pending.clear();
Ok(())
}
/// 清空进度(删除文件 + 清空内存)
pub fn clear(&mut self) {
self.set.clear();
self.pending.clear();
self.skipped.store(0, Ordering::Relaxed);
let _ = std::fs::remove_file(&self.file);
}
}
impl Drop for ScanProgress {
fn drop(&mut self) {
let _ = self.flush();
}
}

View File

@@ -9,9 +9,13 @@ use crate::app::RunState;
use crate::config::AppConfig;
use crate::inspect::{self, Finding, Inspector};
use crate::report::model::Report;
use crate::scan::progress_store::ScanProgress;
use crate::scan::sampler::{self, FileKind, SampleItem};
use crate::scan::walker;
/// ETA 估算的里程碑(已扫文件数达到下列任一阈值时刷新一次)
const ETA_MILESTONES: &[usize] = &[1000, 5000, 10_000, 50_000, 100_000, 500_000, 1_000_000];
/// 调度入口
#[allow(clippy::too_many_arguments)]
pub async fn run(
@@ -23,6 +27,8 @@ pub async fn run(
scan_scanned: Arc<AtomicUsize>,
scan_found: Arc<AtomicUsize>,
scan_current_dir: Arc<Mutex<String>>,
scan_bytes: Arc<AtomicU64>,
scan_eta: Arc<Mutex<String>>,
current_file: Arc<Mutex<Option<String>>>,
current_step: Arc<Mutex<String>>,
start_instant: Instant,
@@ -68,29 +74,95 @@ pub async fn run(
set_step("🔍 阶段 1/3正在扫描全盘候选文件……".into());
push_log("═══ 阶段 1扫描全盘所有候选文件 ═══".into());
let scan_started = Instant::now();
if let Ok(mut e) = scan_eta.lock() { e.clear(); }
scan_bytes.store(0, Ordering::Relaxed);
// 启动时拿一次磁盘总容量(用 sysinfo 枚举所有盘),用于 ETA 估算
let total_disk_bytes: u64 = {
use sysinfo::Disks;
Disks::new_with_refreshed_list()
.iter()
.map(|d| d.total_space())
.sum()
};
push_log(format!(" 磁盘总容量:{} GB用于 ETA 估算)", total_disk_bytes / 1024 / 1024 / 1024));
// 续扫进度:启动时加载;若用户关闭了续扫开关则先清空旧进度
let mut scan_progress = ScanProgress::load();
if !cfg.scan.enable_resume {
if !scan_progress.is_empty() {
scan_progress.clear();
push_log("⚠ 已关闭续扫,已清空旧续扫进度".into());
}
} else if !scan_progress.is_empty() {
push_log(format!("✔ 续扫模式:已加载 {} 条历史进度,将跳过这些文件", scan_progress.len()));
}
// 进度回调walker 每个目录+每个文件都会调
let scan_scanned_cb = Arc::clone(&scan_scanned);
let scan_found_cb = Arc::clone(&scan_found);
let scan_dir_cb = Arc::clone(&scan_current_dir);
let scan_bytes_cb = Arc::clone(&scan_bytes);
let scan_eta_cb = Arc::clone(&scan_eta);
let cur_file_cb = Arc::clone(&current_file);
let mut on_progress = |_scanned: usize, found: usize, dir: &std::path::Path, file: Option<&std::path::Path>| {
scan_scanned_cb.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
// 上一次 ETA 计算时的文件计数;保证只在里程碑触发
let last_eta_at: Arc<AtomicUsize> = Arc::new(AtomicUsize::new(0));
let cancel_cb = Arc::clone(&cancel);
let mut on_progress = |scanned: usize, found: usize, dir: &std::path::Path, file: Option<&std::path::Path>, bytes: u64| {
// 直接 store walker 传过来的真实值(避免双计)
scan_scanned_cb.store(scanned, std::sync::atomic::Ordering::Relaxed);
scan_found_cb.store(found, std::sync::atomic::Ordering::Relaxed);
if bytes > 0 {
scan_bytes_cb.fetch_add(bytes, std::sync::atomic::Ordering::Relaxed);
}
if let Ok(mut g) = scan_dir_cb.lock() { *g = dir.display().to_string(); }
if let Some(f) = file {
if let Ok(mut g) = cur_file_cb.lock() { *g = Some(f.display().to_string()); }
}
// 里程碑 ETA 计算
if !cancel_cb.load(Ordering::Relaxed) {
let last = last_eta_at.load(Ordering::Relaxed);
if scanned > last {
// 取下一个目标里程碑
let next = ETA_MILESTONES.iter().find(|&&m| m > last && m <= scanned).copied();
if let Some(_hit) = next {
last_eta_at.store(scanned, Ordering::Relaxed);
let elapsed = scan_started.elapsed().as_secs_f64().max(0.001);
let total_bytes = scan_bytes_cb.load(Ordering::Relaxed) as f64;
if total_bytes > 0.0 && total_disk_bytes > 0 {
let rate = total_bytes / elapsed; // bytes / sec
let remaining = (total_disk_bytes as f64 - total_bytes).max(0.0);
let eta_sec = if rate > 0.0 { (remaining / rate) as u64 } else { 0 };
let s = format_eta(eta_sec);
if let Ok(mut e) = scan_eta_cb.lock() { *e = s.clone(); }
push_log(format!(" ⏱ 已扫 {} 个文件(约 {} MB速率 {:.1} MB/s预估剩余{}",
scanned,
(total_bytes / 1024.0 / 1024.0) as u64,
rate / 1024.0 / 1024.0,
s,
));
}
}
}
}
};
let candidates = walker::walk(&cfg.scan, &cancel, |s| push_log(s.to_string()), &mut on_progress);
let candidates = walker::walk(&cfg.scan, &cancel, |s| push_log(s.to_string()), &mut on_progress, &mut scan_progress);
// 扫完落盘最后一批
let _ = scan_progress.flush();
let scan_ms = scan_started.elapsed().as_millis();
let candidates_count = candidates.len();
let skipped = scan_progress.skipped.load(Ordering::Relaxed);
push_log(format!(
"✔ 扫描完成:访问 {} 个文件,命中候选 {} 份(用时 {} ms",
"✔ 扫描完成:访问 {} 个文件,命中候选 {},续扫跳过 {}(用时 {} ms",
scan_scanned.load(Ordering::Relaxed),
candidates_count,
skipped,
scan_ms
));
// 扫完清空 ETA
if let Ok(mut e) = scan_eta.lock() { e.clear(); }
if candidates_count == 0 {
push_log("⚠ 没有可抽检的文件,请检查扫描范围/白名单".into());
@@ -238,6 +310,17 @@ fn hostname() -> String {
sysinfo::System::host_name().unwrap_or_else(|| "unknown".into())
}
/// 把秒数格式化为人类可读字符串
fn format_eta(sec: u64) -> String {
if sec < 60 {
format!("{}", sec)
} else if sec < 3600 {
format!("{}{}", sec / 60, sec % 60)
} else {
format!("{}{}", sec / 3600, (sec % 3600) / 60)
}
}
// 供 Result/Err 引用避免 warning
#[allow(dead_code)]
fn _pathbuf_marker(_p: PathBuf) {}

View File

@@ -6,20 +6,24 @@ use walkdir::WalkDir;
use crate::config::ScanSettings;
use crate::scan::filter;
use crate::scan::progress_store::ScanProgress;
/// 遍历过程中每次回调:
/// - scanned_so_far: 已扫描的文件总数(仅候选类型)
/// - found_so_far: 当前累计已收纳的候选数
/// - current_dir: 当前正在扫描的目录路径
/// - current_file: 当前正在评估的文件路径(若是目录则为 None
pub type ProgressFn<'a> = &'a mut dyn FnMut(usize, usize, &Path, Option<&Path>);
/// - bytes: 当前文件的字节数(目录回调时为 0
pub type ProgressFn<'a> = &'a mut dyn FnMut(usize, usize, &Path, Option<&Path>, u64);
/// 遍历全盘,输出满足条件的文件路径
/// - `progress`: 已扫过文件集合,命中则跳过;每条新评估的文件都会记录到其中
pub fn walk(
s: &ScanSettings,
cancel: &AtomicBool,
log: impl Fn(&str),
progress: ProgressFn,
progress_fn: ProgressFn,
scan_progress: &mut ScanProgress,
) -> Vec<PathBuf> {
let mut out = Vec::new();
let scanned = AtomicUsize::new(0); // 已访问的文件数(仅作信息)
@@ -55,11 +59,12 @@ pub fn walk(
if let Ok(mut last) = last_dir_logged.lock() {
*last = p.display().to_string();
}
progress(
progress_fn(
scanned.load(Ordering::Relaxed),
found.load(Ordering::Relaxed),
p,
None,
0,
);
}
}
@@ -67,13 +72,20 @@ pub fn walk(
}
scanned.fetch_add(1, Ordering::Relaxed);
// 先把当前文件(及所在目录)回调给 UI便于实时显示
// 续扫:若该文件上次已访问过,跳过整个评估过程
if scan_progress.contains(p) {
continue;
}
// 先把当前文件(及所在目录)回调给 UI便于实时显示顺带取大小
let bytes = std::fs::metadata(p).map(|m| m.len()).unwrap_or(0);
let parent = p.parent().unwrap_or(p);
progress(
progress_fn(
scanned.load(Ordering::Relaxed),
found.load(Ordering::Relaxed),
parent,
Some(p),
bytes,
);
if filter::is_whitelisted(p, &s.whitelist) { continue; }
@@ -81,10 +93,10 @@ pub fn walk(
if !s.include_system && filter::is_system(p) { continue; }
if !filter::extension_allowed(p, &s.extensions) { continue; }
if s.min_size_kb > 0 {
if let Ok(m) = std::fs::metadata(p) {
if m.len() < s.min_size_kb * 1024 { continue; }
}
if bytes < s.min_size_kb * 1024 { continue; }
}
// 评估过的文件都进入续扫集合(无论是否命中候选)
scan_progress.record(p.to_path_buf());
found.fetch_add(1, Ordering::Relaxed);
out.push(p.to_path_buf());
}

View File

@@ -106,6 +106,18 @@ pub fn draw(ui: &mut egui::Ui, app: &mut App) {
}
}
}
// 预估剩余时间(达到里程碑后才有)
if is_scanning {
if let Ok(eta) = app.scan_eta.lock() {
if !eta.is_empty() {
ui.add_space(2.0);
ui.horizontal(|ui| {
ui.label("⏱ 预估剩余:");
ui.label(egui::RichText::new(&*eta).strong().color(material::LIME));
});
}
}
}
});
ui.add_space(6.0);
@@ -264,6 +276,8 @@ fn start_inspection(app: &mut App) {
let scan_scanned = Arc::clone(&app.scan_scanned);
let scan_found = Arc::clone(&app.scan_found);
let scan_dir = Arc::clone(&app.scan_current_dir);
let scan_bytes = Arc::clone(&app.scan_bytes);
let scan_eta = Arc::clone(&app.scan_eta);
let cur_file = Arc::clone(&app.current_file);
let cur_step = Arc::clone(&app.current_step);
let elapsed_ms = Arc::clone(&app.elapsed_ms);
@@ -295,6 +309,8 @@ fn start_inspection(app: &mut App) {
scan_scanned,
scan_found,
scan_dir,
scan_bytes,
scan_eta,
cur_file,
cur_step,
start_instant,

View File

@@ -126,6 +126,14 @@ fn group_b_scan(ui: &mut egui::Ui, s: &mut crate::config::ScanSettings) {
if ui.text_edit_singleline(&mut ext_str).changed() {
s.extensions = ext_str.split(',').map(|x| x.trim().to_string()).filter(|x| !x.is_empty()).collect();
}
ui.checkbox(&mut s.enable_resume, "启用续扫(跳过上次已扫过的文件)");
ui.horizontal(|ui| {
let pf = crate::scan::progress_store::progress_file();
if ui.button("🗑 清空续扫进度").clicked() {
let _ = std::fs::remove_file(&pf);
}
ui.label(format!("进度文件:{}", pf.display()));
});
ui.horizontal(|ui| {
ui.label("单次扫描超时分钟0=无限):");
ui.add(egui::DragValue::new(&mut s.scan_timeout_minutes).clamp_range(0..=24 * 60));
@@ -227,7 +235,7 @@ fn group_d_viewer(ui: &mut egui::Ui, v: &mut crate::config::ViewerSettings) {
ui.label(egui::RichText::new("Umi-OCR").strong());
ui.horizontal(|ui| { ui.label("HTTP 地址:"); ui.text_edit_singleline(&mut v.umi_ocr_url); });
widgets::file_picker(ui, "Umi-OCR.exe 路径(默认 exe 同目录)", &mut v.umi_ocr_exe, &["exe"]);
ui.horizontal(|ui| { ui.label("启动后等待秒:"); ui.add(egui::DragValue::new(&mut v.umi_ocr_startup_wait_sec).clamp_range(0..=60)); });
ui.horizontal(|ui| { ui.label("启动后等待秒:"); ui.add(egui::DragValue::new(&mut v.umi_ocr_startup_wait_sec).clamp_range(0..=600)); });
ui.horizontal(|ui| { ui.label("调用超时秒:"); ui.add(egui::DragValue::new(&mut v.umi_ocr_call_timeout_sec).clamp_range(5..=600)); });
ui.horizontal(|ui| { ui.label("OCR 语言:"); ui.text_edit_singleline(&mut v.ocr_language); });
ui.checkbox(&mut v.ocr_cls, "启用文本方向校正");