feat(bins): multi-library populate_knowledge + progress UX

populate_knowledge now loads real libraries from the DB instead of fabricating a single library_id=1 row from BASE_PATH. Adds --library <id|name> to restrict the walk and validates --path against the selected library roots. The full library set is still passed to InsightGenerator so resolve_full_path can probe every root when an insight resolves to a different library than the one being walked. Adds indicatif progress bars across the long-running utility binaries via a shared src/bin_progress.rs helper (determinate bar + open-ended spinner with consistent styling). Per-batch info! noise is replaced by the bar's throughput/ETA; warnings and errors route through pb.println so they scroll above the bar instead of fighting with it. populate_knowledge spinner during scan, determinate bar over all libs backfill_hashes spinner with running hashed/missing/errors counts import_calendar determinate bar; embedding/store failures inline import_location_* determinate bar advancing by chunk size import_search_* determinate bar; pb cloned into the spawn task cleanup_files P1 determinate bar over DB paths cleanup_files P2 determinate bar; pb.suspend() around y/n/a/s prompt Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 23:55:33 -04:00
parent d5f944c7b6
commit b9d5578653
11 changed files with 362 additions and 149 deletions
--- a/src/bin/import_search_history.rs
+++ b/src/bin/import_search_history.rs
@@ -2,9 +2,10 @@ use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Parser;
 use image_api::ai::ollama::OllamaClient;
+use image_api::bin_progress;
 use image_api::database::search_dao::{InsertSearchRecord, SqliteSearchHistoryDao};
 use image_api::parsers::search_html_parser::parse_search_html;
-use log::{error, info, warn};
+use log::{error, info};

 // Import the trait to use its methods
 use image_api::database::SearchHistoryDao;
@@ -49,24 +50,22 @@ async fn main() -> Result<()> {
    let ollama = OllamaClient::new(primary_url, fallback_url, primary_model, fallback_model);
    let context = opentelemetry::Context::current();

-    let mut inserted_count = 0;
-    let mut skipped_count = 0;
-    let mut error_count = 0;
+    let mut inserted_count = 0usize;
+    let mut skipped_count = 0usize;
+    let mut error_count = 0usize;

    let mut dao_instance = SqliteSearchHistoryDao::new();
    let created_at = Utc::now().timestamp();

+    let pb = bin_progress::determinate(searches.len() as u64, "importing");
+    let total_batches = searches.len().div_ceil(args.batch_size);
+
    // Process searches in batches (embeddings are REQUIRED for searches)
    for (batch_idx, chunk) in searches.chunks(args.batch_size).enumerate() {
-        info!(
-            "Processing batch {} ({} searches)...",
-            batch_idx + 1,
-            chunk.len()
-        );
-
        // Generate embeddings for this batch
        let queries: Vec<String> = chunk.iter().map(|s| s.query.clone()).collect();

+        let pb_for_warn = pb.clone();
        let embeddings_result = tokio::task::spawn({
            let ollama_client = ollama.clone();
            async move {
@@ -76,7 +75,7 @@ async fn main() -> Result<()> {
                    match ollama_client.generate_embedding(query).await {
                        Ok(emb) => embeddings.push(Some(emb)),
                        Err(e) => {
-                            warn!("Failed to generate embedding for query '{}': {}", query, e);
+                            pb_for_warn.println(format!("embedding failed for '{}': {}", query, e));
                            embeddings.push(None);
                        }
                    }
@@ -112,10 +111,7 @@ async fn main() -> Result<()> {
                    source_file: Some(args.path.clone()),
                });
            } else {
-                error!(
-                    "Skipping search '{}' due to missing embedding",
-                    search.query
-                );
+                pb.println(format!("skipping '{}' — missing embedding", search.query));
                error_count += 1;
            }
        }
@@ -123,30 +119,41 @@ async fn main() -> Result<()> {
        // Batch insert entire chunk in single transaction
        if !batch_inserts.is_empty() {
            match dao_instance.store_searches_batch(&context, batch_inserts) {
-                Ok(count) => {
-                    inserted_count += count;
-                    info!("Imported {} searches (total: {})...", count, inserted_count);
-                }
+                Ok(count) => inserted_count += count,
                Err(e) => {
-                    error!("Failed to store batch: {:?}", e);
+                    pb.println(format!("batch insert failed: {:?}", e));
                    error_count += chunk.len();
                }
            }
        }

+        pb.set_message(format!(
+            "inserted={} skipped={} errors={}",
+            inserted_count, skipped_count, error_count
+        ));
+        pb.inc(chunk.len() as u64);
+
        // Rate limiting between batches
-        if batch_idx < searches.len() / args.batch_size {
-            info!("Waiting 500ms before next batch...");
+        if batch_idx + 1 < total_batches {
            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
        }
    }

-    info!("\n=== Import Summary ===");
+    pb.finish_and_clear();
+
+    info!("=== Import Summary ===");
    info!("Total searches found: {}", searches.len());
    info!("Successfully inserted: {}", inserted_count);
    info!("Skipped (already exist): {}", skipped_count);
    info!("Errors: {}", error_count);
    info!("All imported searches have embeddings for semantic search");

+    if error_count > 0 {
+        error!(
+            "Completed with {} errors — review log output above",
+            error_count
+        );
+    }
+
    Ok(())
 }