feat(bins): multi-library populate_knowledge + progress UX

populate_knowledge now loads real libraries from the DB instead of
fabricating a single library_id=1 row from BASE_PATH. Adds --library
<id|name> to restrict the walk and validates --path against the selected
library roots. The full library set is still passed to InsightGenerator so
resolve_full_path can probe every root when an insight resolves to a
different library than the one being walked.

Adds indicatif progress bars across the long-running utility binaries via
a shared src/bin_progress.rs helper (determinate bar + open-ended spinner
with consistent styling). Per-batch info! noise is replaced by the bar's
throughput/ETA; warnings and errors route through pb.println so they
scroll above the bar instead of fighting with it.

  populate_knowledge   spinner during scan, determinate bar over all libs
  backfill_hashes      spinner with running hashed/missing/errors counts
  import_calendar      determinate bar; embedding/store failures inline
  import_location_*    determinate bar advancing by chunk size
  import_search_*      determinate bar; pb cloned into the spawn task
  cleanup_files P1     determinate bar over DB paths
  cleanup_files P2     determinate bar; pb.suspend() around y/n/a/s prompt

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron
2026-04-26 23:55:33 -04:00
parent d5f944c7b6
commit b9d5578653
11 changed files with 362 additions and 149 deletions

View File

@@ -2,9 +2,10 @@ use anyhow::{Context, Result};
use chrono::Utc;
use clap::Parser;
use image_api::ai::ollama::OllamaClient;
use image_api::bin_progress;
use image_api::database::search_dao::{InsertSearchRecord, SqliteSearchHistoryDao};
use image_api::parsers::search_html_parser::parse_search_html;
use log::{error, info, warn};
use log::{error, info};
// Import the trait to use its methods
use image_api::database::SearchHistoryDao;
@@ -49,24 +50,22 @@ async fn main() -> Result<()> {
let ollama = OllamaClient::new(primary_url, fallback_url, primary_model, fallback_model);
let context = opentelemetry::Context::current();
let mut inserted_count = 0;
let mut skipped_count = 0;
let mut error_count = 0;
let mut inserted_count = 0usize;
let mut skipped_count = 0usize;
let mut error_count = 0usize;
let mut dao_instance = SqliteSearchHistoryDao::new();
let created_at = Utc::now().timestamp();
let pb = bin_progress::determinate(searches.len() as u64, "importing");
let total_batches = searches.len().div_ceil(args.batch_size);
// Process searches in batches (embeddings are REQUIRED for searches)
for (batch_idx, chunk) in searches.chunks(args.batch_size).enumerate() {
info!(
"Processing batch {} ({} searches)...",
batch_idx + 1,
chunk.len()
);
// Generate embeddings for this batch
let queries: Vec<String> = chunk.iter().map(|s| s.query.clone()).collect();
let pb_for_warn = pb.clone();
let embeddings_result = tokio::task::spawn({
let ollama_client = ollama.clone();
async move {
@@ -76,7 +75,7 @@ async fn main() -> Result<()> {
match ollama_client.generate_embedding(query).await {
Ok(emb) => embeddings.push(Some(emb)),
Err(e) => {
warn!("Failed to generate embedding for query '{}': {}", query, e);
pb_for_warn.println(format!("embedding failed for '{}': {}", query, e));
embeddings.push(None);
}
}
@@ -112,10 +111,7 @@ async fn main() -> Result<()> {
source_file: Some(args.path.clone()),
});
} else {
error!(
"Skipping search '{}' due to missing embedding",
search.query
);
pb.println(format!("skipping '{}' — missing embedding", search.query));
error_count += 1;
}
}
@@ -123,30 +119,41 @@ async fn main() -> Result<()> {
// Batch insert entire chunk in single transaction
if !batch_inserts.is_empty() {
match dao_instance.store_searches_batch(&context, batch_inserts) {
Ok(count) => {
inserted_count += count;
info!("Imported {} searches (total: {})...", count, inserted_count);
}
Ok(count) => inserted_count += count,
Err(e) => {
error!("Failed to store batch: {:?}", e);
pb.println(format!("batch insert failed: {:?}", e));
error_count += chunk.len();
}
}
}
pb.set_message(format!(
"inserted={} skipped={} errors={}",
inserted_count, skipped_count, error_count
));
pb.inc(chunk.len() as u64);
// Rate limiting between batches
if batch_idx < searches.len() / args.batch_size {
info!("Waiting 500ms before next batch...");
if batch_idx + 1 < total_batches {
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
}
}
info!("\n=== Import Summary ===");
pb.finish_and_clear();
info!("=== Import Summary ===");
info!("Total searches found: {}", searches.len());
info!("Successfully inserted: {}", inserted_count);
info!("Skipped (already exist): {}", skipped_count);
info!("Errors: {}", error_count);
info!("All imported searches have embeddings for semantic search");
if error_count > 0 {
error!(
"Completed with {} errors — review log output above",
error_count
);
}
Ok(())
}