feat(bins): multi-library populate_knowledge + progress UX

populate_knowledge now loads real libraries from the DB instead of
fabricating a single library_id=1 row from BASE_PATH. Adds --library
<id|name> to restrict the walk and validates --path against the selected
library roots. The full library set is still passed to InsightGenerator so
resolve_full_path can probe every root when an insight resolves to a
different library than the one being walked.

Adds indicatif progress bars across the long-running utility binaries via
a shared src/bin_progress.rs helper (determinate bar + open-ended spinner
with consistent styling). Per-batch info! noise is replaced by the bar's
throughput/ETA; warnings and errors route through pb.println so they
scroll above the bar instead of fighting with it.

  populate_knowledge   spinner during scan, determinate bar over all libs
  backfill_hashes      spinner with running hashed/missing/errors counts
  import_calendar      determinate bar; embedding/store failures inline
  import_location_*    determinate bar advancing by chunk size
  import_search_*      determinate bar; pb cloned into the spawn task
  cleanup_files P1     determinate bar over DB paths
  cleanup_files P2     determinate bar; pb.suspend() around y/n/a/s prompt

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron
2026-04-26 23:55:33 -04:00
parent d5f944c7b6
commit b9d5578653
11 changed files with 362 additions and 149 deletions

View File

@@ -10,8 +10,10 @@ use std::sync::{Arc, Mutex};
use std::time::Instant;
use clap::Parser;
use log::{error, warn};
use rayon::prelude::*;
use image_api::bin_progress;
use image_api::content_hash;
use image_api::database::{ExifDao, SqliteExifDao, connect};
use image_api::libraries::{self, Library};
@@ -76,6 +78,8 @@ fn main() -> anyhow::Result<()> {
let mut total_errors = 0u64;
let start = Instant::now();
let pb = bin_progress::spinner("hashing");
loop {
let rows = {
let mut guard = dao.lock().expect("Unable to lock ExifDao");
@@ -86,7 +90,11 @@ fn main() -> anyhow::Result<()> {
if rows.is_empty() {
break;
}
println!("Processing batch of {} rows", rows.len());
let batch_size = rows.len();
pb.set_message(format!(
"batch of {} (hashed={} missing={} errors={})",
batch_size, total_hashed, total_missing, total_errors
));
// Compute hashes in parallel (I/O-bound; rayon helps on local disks,
// throttled by network on SMB mounts — use --parallelism to tune).
@@ -100,13 +108,13 @@ fn main() -> anyhow::Result<()> {
Some(abs_path) if abs_path.exists() => match content_hash::compute(&abs_path) {
Ok(id) => (library_id, rel_path, Some(id)),
Err(e) => {
eprintln!("hash error for {}: {:?}", abs_path.display(), e);
error!("hash error for {}: {:?}", abs_path.display(), e);
(library_id, rel_path, None)
}
},
Some(_) => (library_id, rel_path, None), // file missing on disk
None => {
eprintln!("Row refers to unknown library_id {}", library_id);
warn!("Row refers to unknown library_id {}", library_id);
(library_id, rel_path, None)
}
}
@@ -126,9 +134,12 @@ fn main() -> anyhow::Result<()> {
&id.content_hash,
id.size_bytes,
) {
Ok(_) => total_hashed += 1,
Ok(_) => {
total_hashed += 1;
pb.inc(1);
}
Err(e) => {
eprintln!("persist error for {}: {:?}", rel_path, e);
pb.println(format!("persist error for {}: {:?}", rel_path, e));
total_errors += 1;
}
}
@@ -142,34 +153,28 @@ fn main() -> anyhow::Result<()> {
for (_, rel_path, ident) in &results {
match ident {
Some(id) => {
println!(
pb.println(format!(
"[dry-run] {} -> {} ({} bytes)",
rel_path, id.content_hash, id.size_bytes
);
));
total_hashed += 1;
pb.inc(1);
}
None => {
total_missing += 1;
}
}
}
println!(
pb.println(format!(
"[dry-run] processed one batch of {}. Stopping — a real run would continue \
until no NULL content_hash rows remain.",
results.len()
);
));
break;
}
let elapsed = start.elapsed().as_secs_f64().max(0.001);
let rate = total_hashed as f64 / elapsed;
println!(
" hashed={} missing={} errors={} ({:.1} files/sec)",
total_hashed, total_missing, total_errors, rate
);
}
println!();
pb.finish_and_clear();
println!(
"Done. hashed={}, skipped (missing on disk)={}, errors={}, elapsed={:.1}s",
total_hashed,