fix(inspect): 修复 OCR 启动+窗口查找 bug,新增'再次抽样'工作流

- find_hwnd_by_pid 修复:原实现在每个窗口都覆盖 thread-local,导致几乎永远匹配不到目标 PID;改为按目标 PID 精确匹配,匹配成功立即返回 0 停止枚举
- Umi-OCR 启动从裸 CreateProcessW 改为 std::process::Command,并加 CREATE_NO_WINDOW 标志
- 新增 is_alive/health_url 辅助函数
- doc_inspector 在打开 doc 文件前先确保 OCR 正在运行(即便上次被关掉也会自动重启);日志分步骤 ①②③④ 标记打开→截图→OCR→关闭
- 候选文件列表持久化到 candidates.txt;阶段 1 完成后自动保存
- runner::run 新增 prescanned_candidates 参数:若提供则跳过全盘扫描,直接进入抽样/抽检
- 扫描控制区新增'📋 再次抽样'按钮,仅当候选文件列表存在时启用
This commit is contained in:
2026-06-10 17:42:48 +08:00
parent b634d3464e
commit ae2b5192c4
6 changed files with 280 additions and 144 deletions

View File

@@ -29,15 +29,25 @@ impl Inspector for DocInspector {
log: &'a (dyn Fn(&str) + Send + Sync),
) -> Pin<Box<dyn Future<Output = anyhow::Result<Finding>> + Send + 'a>> {
Box::pin(async move {
// 在打开 doc 文件前,先确保 OCR 软件可用:先健康检查,失败则启动 Umi-OCR.exe
// 这样无论上次启动后是否被关掉,本次都会重新拉起
if !umi_ocr::is_alive(&cfg.viewer) {
log(" ⚠ Umi-OCR 未运行,尝试启动……");
if let Err(e) = umi_ocr::ensure_started(&cfg.viewer) {
log(&format!(" ⚠ 启动 Umi-OCR 失败:{}", e));
}
}
let viewer = cfg.viewer.doc_viewer.clone()
.or_else(paths::detect_doclite)
.ok_or_else(|| anyhow::anyhow!("未找到 doclite.exe请在设置中指定或把它放到 exe 同目录"))?;
let args = cfg.viewer.doc_args.replace("{path}", &path.display().to_string());
log(&format!(" 启动 DOC 查看器:{} {}", viewer.display(), args));
log(&format!(" 启动 DOC 查看器:{} {}", viewer.display(), args));
let child = external::spawn(&viewer, &args)?;
let wait_ms = cfg.viewer.pre_capture_wait_ms;
tokio::time::sleep(std::time::Duration::from_millis(wait_ms)).await;
log(" ② 抓取窗口截图……");
let png = match cfg.viewer.screenshot_mode {
crate::config::ScreenshotMode::Manual => {
screenshot::capture_manual(
@@ -62,7 +72,7 @@ impl Inspector for DocInspector {
}
};
// OCR
log(" ③ 调用 Umi-OCR 识别截图……");
let ocr = umi_ocr::UmiOcrClient::new(&cfg.viewer.umi_ocr_url, std::time::Duration::from_secs(cfg.viewer.umi_ocr_call_timeout_sec));
let resp = ocr.recognize_png(&png, &cfg.viewer.ocr_language, cfg.viewer.ocr_cls, cfg.viewer.ocr_limit_side_len).await?;
let raw_text: String = resp.data.iter().map(|d| d.text.clone()).collect::<Vec<_>>().join("\n");
@@ -78,9 +88,12 @@ impl Inspector for DocInspector {
let m = Matcher::new(kws, &cfg.keyword);
let hits = m.find(&raw_text);
// 关闭查看器
// 关闭查看器OCR 完成后)
if cfg.viewer.auto_close_after {
log(" ④ 关闭 DOC 查看器……");
let _ = external::close(&child, &cfg.viewer);
} else {
log(" ④ 保留 DOC 查看器auto_close_after=false");
}
Ok(make_hit(path, "doc", hits, raw_text, Some(shot_path)))

View File

@@ -12,17 +12,21 @@ use windows_sys::Win32::UI::WindowsAndMessaging::{
use std::cell::RefCell;
thread_local! {
static ENUM_RESULT: RefCell<EnumResult> = RefCell::new(EnumResult { hwnd: std::ptr::null_mut(), pid: 0 });
/// 当前正在查找的目标 PID
static ENUM_TARGET_PID: RefCell<u32> = RefCell::new(0);
/// 找到的第一个匹配的 HWND找到后会停止枚举
static ENUM_RESULT_HWND: RefCell<HWND> = RefCell::new(std::ptr::null_mut());
}
struct EnumResult { hwnd: HWND, pid: u32 }
unsafe extern "system" fn enum_callback(hwnd: HWND, _lparam: isize) -> i32 {
let mut proc_id: u32 = 0;
GetWindowThreadProcessId(hwnd, &mut proc_id);
ENUM_RESULT.with(|r| {
*r.borrow_mut() = EnumResult { hwnd, pid: proc_id };
});
1 // continue
let target = ENUM_TARGET_PID.with(|r| *r.borrow());
if proc_id == target && !hwnd.is_null() {
ENUM_RESULT_HWND.with(|r| *r.borrow_mut() = hwnd);
return 0; // 找到匹配窗口,立即停止枚举
}
1 // 继续枚举
}
/// 启动的子进程
@@ -44,14 +48,17 @@ pub fn spawn(exe: &Path, args: &str) -> anyhow::Result<Child> {
Ok(Child { pid: child.id(), _handle: Some(child) })
}
/// 通过 PID 找到主窗口 HWND
/// 通过 PID 找到主窗口 HWND(修复:原实现每窗口都覆盖 thread-local导致返回值几乎永远不是目标 PID
pub fn find_hwnd_by_pid(pid: u32) -> Option<HWND> {
// 重置结果与目标
ENUM_RESULT_HWND.with(|r| *r.borrow_mut() = std::ptr::null_mut());
ENUM_TARGET_PID.with(|r| *r.borrow_mut() = pid);
unsafe {
let _ = EnumWindows(Some(enum_callback), 0);
}
let r = ENUM_RESULT.with(|r| r.borrow().hwnd);
let p = ENUM_RESULT.with(|r| r.borrow().pid);
if p == pid && !r.is_null() { Some(r) } else { None }
let r = ENUM_RESULT_HWND.with(|r| *r.borrow());
if !r.is_null() { Some(r) } else { None }
}
/// 优雅关闭PostMessage WM_CLOSE 给主窗口 → 等待 → taskkill
@@ -61,6 +68,8 @@ pub fn close(child: &Child, cfg: &crate::config::ViewerSettings) -> anyhow::Resu
PostMessageW(hwnd, WM_CLOSE, 0, 0);
}
std::thread::sleep(std::time::Duration::from_millis(cfg.close_wait_ms));
} else {
tracing::warn!("未找到 PID={} 对应的主窗口,关闭将走强杀流程", child.pid);
}
if is_running(child.pid) {
kill(child.pid);

View File

@@ -1,11 +1,9 @@
// Umi-OCR HTTP 客户端
use std::os::windows::ffi::OsStrExt;
use std::path::PathBuf;
use std::time::Duration;
use base64::Engine;
use serde::{Deserialize, Serialize};
use windows_sys::Win32::System::Threading::{CreateProcessW, PROCESS_INFORMATION, STARTUPINFOW};
use crate::config::ViewerSettings;
use crate::utils::paths;
@@ -93,49 +91,54 @@ impl UmiOcrClient {
}
}
/// 健康检查地址:把 `/api/ocr` 替换为 `/` 作为 Umi-OCR 主页面探测
fn health_url(cfg: &ViewerSettings) -> String {
cfg.umi_ocr_url.replace("/api/ocr", "/")
}
/// 检测 Umi-OCR 是否已经在运行
pub fn is_alive(cfg: &ViewerSettings) -> bool {
if let Ok(r) = reqwest::blocking::get(health_url(cfg)) {
if r.status().is_success() { return true; }
}
false
}
/// 启动 Umi-OCR.exe若未运行
pub fn ensure_started(cfg: &ViewerSettings) -> anyhow::Result<()> {
if let Ok(r) = reqwest::blocking::get(cfg.umi_ocr_url.replace("/api/ocr", "/")) {
if r.status().is_success() { return Ok(()); }
if is_alive(cfg) {
return Ok(());
}
let exe = cfg.umi_ocr_exe.clone()
.or_else(paths::detect_umi_ocr)
.ok_or_else(|| anyhow::anyhow!("未找到 Umi-OCR.exe"))?;
.ok_or_else(|| anyhow::anyhow!("未找到 Umi-OCR.exe;请在设置中指定或把它放到 exe 同目录"))?;
spawn(&exe)?;
let start = std::time::Instant::now();
let wait = std::time::Duration::from_secs(cfg.umi_ocr_startup_wait_sec.max(1));
while start.elapsed() < wait {
std::thread::sleep(std::time::Duration::from_millis(500));
if let Ok(r) = reqwest::blocking::get(cfg.umi_ocr_url.replace("/api/ocr", "/")) {
if r.status().is_success() { return Ok(()); }
if is_alive(cfg) { return Ok(()); }
}
}
Err(anyhow::anyhow!("Umi-OCR 健康检查超时"))
Err(anyhow::anyhow!("Umi-OCR 健康检查超时(等 {} 秒)", wait.as_secs()))
}
fn spawn(exe: &PathBuf) -> anyhow::Result<()> {
unsafe {
let mut cmd: Vec<u16> = exe.as_os_str().encode_wide().chain(std::iter::once(0)).collect();
let mut si: STARTUPINFOW = std::mem::zeroed();
si.cb = std::mem::size_of::<STARTUPINFOW>() as u32;
let mut pi: PROCESS_INFORMATION = std::mem::zeroed();
let ok = CreateProcessW(
cmd.as_mut_ptr(),
std::ptr::null_mut(),
std::ptr::null(),
std::ptr::null(),
0,
0,
std::ptr::null(),
std::ptr::null(),
&si,
&mut pi,
);
if ok == 0 {
return Err(anyhow::anyhow!("CreateProcessW 失败"));
}
windows_sys::Win32::Foundation::CloseHandle(pi.hProcess);
windows_sys::Win32::Foundation::CloseHandle(pi.hThread);
// 修复:原来用裸 CreateProcessW 且 flags=0会沿用父进程控制台/句柄,
// Umi-OCR 是 GUI 程序,可能被父进程退出影响。
// 改用 std::process::Command更可靠自动处理 stdio 继承与子进程脱离)。
tracing::info!("启动 Umi-OCR{}", exe.display());
let mut cmd = std::process::Command::new(exe);
#[cfg(windows)]
{
use std::os::windows::process::CommandExt;
// CREATE_NO_WINDOW不创建控制台窗口
cmd.creation_flags(0x00000008);
}
let _child = cmd
.stdin(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.spawn()
.map_err(|e| anyhow::anyhow!("启动 Umi-OCR 失败:{}exe={:?}", e, exe))?;
Ok(())
}

View File

@@ -92,3 +92,47 @@ impl Drop for ScanProgress {
let _ = self.flush();
}
}
// ====== 候选文件持久化(用于"再次抽样":跳过全盘扫描) ======
/// 候选文件列表路径:%APPDATA%\secret-file-selfcheck\candidates.txt
/// 每行一个候选文件绝对路径
pub fn candidates_file() -> PathBuf {
paths::app_config_dir().join("candidates.txt")
}
/// 把候选文件列表写入磁盘
pub fn save_candidates(items: &[PathBuf]) -> std::io::Result<()> {
let file = candidates_file();
if let Some(parent) = file.parent() {
std::fs::create_dir_all(parent)?;
}
let mut f = std::fs::File::create(&file)?;
use std::io::Write;
for p in items {
f.write_all(p.to_string_lossy().as_bytes())?;
f.write_all(b"\n")?;
}
f.flush()?;
Ok(())
}
/// 加载候选文件列表
pub fn load_candidates() -> Vec<PathBuf> {
let file = candidates_file();
let mut out: Vec<PathBuf> = Vec::new();
if let Ok(f) = std::fs::File::open(&file) {
let reader = BufReader::new(f);
for line in reader.lines().map_while(Result::ok) {
let line = line.trim();
if line.is_empty() { continue; }
out.push(PathBuf::from(line));
}
}
out
}
/// 清空候选文件
pub fn clear_candidates() {
let _ = std::fs::remove_file(candidates_file());
}

View File

@@ -17,9 +17,14 @@ use crate::scan::walker;
const ETA_MILESTONES: &[usize] = &[1000, 5000, 10_000, 50_000, 100_000, 500_000, 1_000_000];
/// 调度入口
///
/// `prescanned_candidates`
/// - `None` 表示走完整流程(先全盘扫描)
/// - `Some(v)` 表示跳过全盘扫描,直接用提供的候选列表进入抽样/抽检
#[allow(clippy::too_many_arguments)]
pub async fn run(
cfg: AppConfig,
prescanned_candidates: Option<Vec<PathBuf>>,
progress: Arc<AtomicUsize>,
total: Arc<AtomicUsize>,
hit_count: Arc<AtomicUsize>,
@@ -69,6 +74,17 @@ pub async fn run(
push_log(format!("⚠ Umi-OCR 启动失败(可继续但 OCR 会失败):{}", e));
}
// 候选文件:若提供了预扫描的候选列表,则直接使用,跳过全盘扫描
let candidates: Vec<PathBuf> = if let Some(v) = prescanned_candidates {
set_state(RunState::Sampling);
push_log("═══ 阶段 1跳过使用上次保存的候选文件═══".into());
push_log(format!(" 加载候选文件:{}", v.len()));
// 阶段 1 跳过时,扫描统计项保持空即可
scan_scanned.store(0, Ordering::Relaxed);
scan_found.store(v.len(), Ordering::Relaxed);
if let Ok(mut d) = scan_current_dir.lock() { d.clear(); }
v
} else {
// —— 阶段 1扫描全盘所有候选文件 ——
set_state(RunState::Scanning);
set_step("🔍 阶段 1/3正在扫描全盘候选文件……".into());
@@ -173,12 +189,23 @@ pub async fn run(
return Ok(());
}
candidates
};
let candidates_count = candidates.len();
if candidates_count == 0 {
push_log("⚠ 没有可抽检的文件,请检查扫描范围/白名单".into());
set_state(RunState::Done);
return Ok(());
}
// 持久化候选文件列表(供"再次抽样"使用,跳过全盘扫描)
if let Err(e) = crate::scan::progress_store::save_candidates(&candidates) {
push_log(format!("⚠ 保存候选文件列表失败:{}", e));
} else {
push_log(format!("✔ 已保存候选文件列表(共 {} 份)→ {}", candidates_count, crate::scan::progress_store::candidates_file().display()));
}
// —— 阶段 2按配置抽样 ——
set_state(RunState::Sampling);
let mode_str = match cfg.inspect.sample_mode {

View File

@@ -246,7 +246,7 @@ fn classify_log(line: &str) -> (egui::Color32, &'static str) {
}
}
/// 扫描控制区:开始扫描 / 继续扫描 / 取消
/// 扫描控制区:开始扫描 / 继续扫描 / 再次抽样 / 取消
fn draw_scan_controls(ui: &mut egui::Ui, app: &mut App) {
material::group(ui, "扫描控制", |ui| {
// 状态判断
@@ -260,6 +260,9 @@ fn draw_scan_controls(ui: &mut egui::Ui, app: &mut App) {
// 续扫进度文件是否存在
let pf = crate::scan::progress_store::progress_file();
let has_progress = pf.exists();
// 候选文件列表是否存在
let cf = crate::scan::progress_store::candidates_file();
let has_candidates = cf.exists();
ui.horizontal(|ui| {
// ▶ 开始扫描:全新扫描(先清空旧续扫进度)
@@ -268,7 +271,7 @@ fn draw_scan_controls(ui: &mut egui::Ui, app: &mut App) {
.on_hover_text("从头开始扫描:先清空旧续扫进度,再启动新的扫描流程")
.clicked()
{
start_inspection(app, true);
start_inspection(app, true, None);
}
// ⏩ 继续扫描:使用已有续扫进度跳过已扫过的文件
@@ -277,7 +280,21 @@ fn draw_scan_controls(ui: &mut egui::Ui, app: &mut App) {
.on_hover_text("使用已有的续扫进度,跳过上次已扫过的文件")
.clicked()
{
start_inspection(app, false);
start_inspection(app, false, None);
}
// 📋 再次抽样:跳过全盘扫描,直接用上次保存的候选文件重新抽样+抽检
if ui
.add_enabled(can_start && has_candidates, material::primary_button("📋 再次抽样"))
.on_hover_text("跳过全盘扫描,直接使用上次保存的候选文件重新抽样+抽检(会保留续扫进度)")
.clicked()
{
let candidates = crate::scan::progress_store::load_candidates();
if candidates.is_empty() {
app.task_log("⚠ 候选文件列表为空,无法再次抽样");
} else {
start_inspection(app, false, Some(candidates));
}
}
// ⏸ 取消:中断当前正在运行的扫描/抽检
@@ -325,6 +342,21 @@ fn draw_scan_controls(ui: &mut egui::Ui, app: &mut App) {
);
}
// 候选文件列表提示
if has_candidates {
ui.add_space(2.0);
ui.horizontal(|ui| {
ui.label(
egui::RichText::new("📋 已保存候选文件列表 → 可点'再次抽样'")
.color(material::SUCCESS)
.size(12.0),
);
if ui.small_button("🗑 清空候选").clicked() {
crate::scan::progress_store::clear_candidates();
}
});
}
// 当前正在扫描/处理的文件(统一显示入口)
if running {
if let Ok(cur) = app.current_file.lock() {
@@ -353,7 +385,8 @@ fn draw_scan_controls(ui: &mut egui::Ui, app: &mut App) {
/// 启动后台抽检任务
///
/// - `clear_progress`true 表示全新扫描先清空续扫进度文件false 表示续扫(保留续扫进度)
fn start_inspection(app: &mut App, clear_progress: bool) {
/// - `prescanned`:若 Some跳过全盘扫描直接用提供的候选列表进入抽样/抽检
fn start_inspection(app: &mut App, clear_progress: bool, prescanned: Option<Vec<std::path::PathBuf>>) {
// 全新扫描:先删掉续扫进度文件,确保不会跳过任何文件
if clear_progress {
let pf = crate::scan::progress_store::progress_file();
@@ -403,9 +436,15 @@ fn start_inspection(app: &mut App, clear_progress: bool) {
let state_slot_for_task = Arc::clone(&state_slot);
let start_instant = Instant::now();
let cfg = app.config.clone();
let log_msg = if prescanned.is_some() {
"再次抽样(使用已有候选文件)……"
} else if clear_progress {
"开始抽检(全新扫描)……"
} else {
"开始抽检(续扫)……"
};
app.task_state = Some(state_slot.clone());
app.task_log("开始抽检……");
app.task_log(log_msg);
std::thread::spawn(move || {
let rt = tokio::runtime::Builder::new_multi_thread()
@@ -415,6 +454,7 @@ fn start_inspection(app: &mut App, clear_progress: bool) {
let outcome = rt.block_on(async move {
crate::scan::runner::run(
cfg,
prescanned,
progress,
total,
hit_count,