feat(bins): multi-library populate_knowledge + progress UX
populate_knowledge now loads real libraries from the DB instead of fabricating a single library_id=1 row from BASE_PATH. Adds --library <id|name> to restrict the walk and validates --path against the selected library roots. The full library set is still passed to InsightGenerator so resolve_full_path can probe every root when an insight resolves to a different library than the one being walked. Adds indicatif progress bars across the long-running utility binaries via a shared src/bin_progress.rs helper (determinate bar + open-ended spinner with consistent styling). Per-batch info! noise is replaced by the bar's throughput/ETA; warnings and errors route through pb.println so they scroll above the bar instead of fighting with it. populate_knowledge spinner during scan, determinate bar over all libs backfill_hashes spinner with running hashed/missing/errors counts import_calendar determinate bar; embedding/store failures inline import_location_* determinate bar advancing by chunk size import_search_* determinate bar; pb cloned into the spawn task cleanup_files P1 determinate bar over DB paths cleanup_files P2 determinate bar; pb.suspend() around y/n/a/s prompt Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -10,8 +10,10 @@ use std::sync::{Arc, Mutex};
|
||||
use std::time::Instant;
|
||||
|
||||
use clap::Parser;
|
||||
use log::{error, warn};
|
||||
use rayon::prelude::*;
|
||||
|
||||
use image_api::bin_progress;
|
||||
use image_api::content_hash;
|
||||
use image_api::database::{ExifDao, SqliteExifDao, connect};
|
||||
use image_api::libraries::{self, Library};
|
||||
@@ -76,6 +78,8 @@ fn main() -> anyhow::Result<()> {
|
||||
let mut total_errors = 0u64;
|
||||
let start = Instant::now();
|
||||
|
||||
let pb = bin_progress::spinner("hashing");
|
||||
|
||||
loop {
|
||||
let rows = {
|
||||
let mut guard = dao.lock().expect("Unable to lock ExifDao");
|
||||
@@ -86,7 +90,11 @@ fn main() -> anyhow::Result<()> {
|
||||
if rows.is_empty() {
|
||||
break;
|
||||
}
|
||||
println!("Processing batch of {} rows", rows.len());
|
||||
let batch_size = rows.len();
|
||||
pb.set_message(format!(
|
||||
"batch of {} (hashed={} missing={} errors={})",
|
||||
batch_size, total_hashed, total_missing, total_errors
|
||||
));
|
||||
|
||||
// Compute hashes in parallel (I/O-bound; rayon helps on local disks,
|
||||
// throttled by network on SMB mounts — use --parallelism to tune).
|
||||
@@ -100,13 +108,13 @@ fn main() -> anyhow::Result<()> {
|
||||
Some(abs_path) if abs_path.exists() => match content_hash::compute(&abs_path) {
|
||||
Ok(id) => (library_id, rel_path, Some(id)),
|
||||
Err(e) => {
|
||||
eprintln!("hash error for {}: {:?}", abs_path.display(), e);
|
||||
error!("hash error for {}: {:?}", abs_path.display(), e);
|
||||
(library_id, rel_path, None)
|
||||
}
|
||||
},
|
||||
Some(_) => (library_id, rel_path, None), // file missing on disk
|
||||
None => {
|
||||
eprintln!("Row refers to unknown library_id {}", library_id);
|
||||
warn!("Row refers to unknown library_id {}", library_id);
|
||||
(library_id, rel_path, None)
|
||||
}
|
||||
}
|
||||
@@ -126,9 +134,12 @@ fn main() -> anyhow::Result<()> {
|
||||
&id.content_hash,
|
||||
id.size_bytes,
|
||||
) {
|
||||
Ok(_) => total_hashed += 1,
|
||||
Ok(_) => {
|
||||
total_hashed += 1;
|
||||
pb.inc(1);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("persist error for {}: {:?}", rel_path, e);
|
||||
pb.println(format!("persist error for {}: {:?}", rel_path, e));
|
||||
total_errors += 1;
|
||||
}
|
||||
}
|
||||
@@ -142,34 +153,28 @@ fn main() -> anyhow::Result<()> {
|
||||
for (_, rel_path, ident) in &results {
|
||||
match ident {
|
||||
Some(id) => {
|
||||
println!(
|
||||
pb.println(format!(
|
||||
"[dry-run] {} -> {} ({} bytes)",
|
||||
rel_path, id.content_hash, id.size_bytes
|
||||
);
|
||||
));
|
||||
total_hashed += 1;
|
||||
pb.inc(1);
|
||||
}
|
||||
None => {
|
||||
total_missing += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
println!(
|
||||
pb.println(format!(
|
||||
"[dry-run] processed one batch of {}. Stopping — a real run would continue \
|
||||
until no NULL content_hash rows remain.",
|
||||
results.len()
|
||||
);
|
||||
));
|
||||
break;
|
||||
}
|
||||
|
||||
let elapsed = start.elapsed().as_secs_f64().max(0.001);
|
||||
let rate = total_hashed as f64 / elapsed;
|
||||
println!(
|
||||
" hashed={} missing={} errors={} ({:.1} files/sec)",
|
||||
total_hashed, total_missing, total_errors, rate
|
||||
);
|
||||
}
|
||||
|
||||
println!();
|
||||
pb.finish_and_clear();
|
||||
println!(
|
||||
"Done. hashed={}, skipped (missing on disk)={}, errors={}, elapsed={:.1}s",
|
||||
total_hashed,
|
||||
|
||||
Reference in New Issue
Block a user