feat(bins): multi-library populate_knowledge + progress UX
populate_knowledge now loads real libraries from the DB instead of fabricating a single library_id=1 row from BASE_PATH. Adds --library <id|name> to restrict the walk and validates --path against the selected library roots. The full library set is still passed to InsightGenerator so resolve_full_path can probe every root when an insight resolves to a different library than the one being walked. Adds indicatif progress bars across the long-running utility binaries via a shared src/bin_progress.rs helper (determinate bar + open-ended spinner with consistent styling). Per-batch info! noise is replaced by the bar's throughput/ETA; warnings and errors route through pb.println so they scroll above the bar instead of fighting with it. populate_knowledge spinner during scan, determinate bar over all libs backfill_hashes spinner with running hashed/missing/errors counts import_calendar determinate bar; embedding/store failures inline import_location_* determinate bar advancing by chunk size import_search_* determinate bar; pb cloned into the spawn task cleanup_files P1 determinate bar over DB paths cleanup_files P2 determinate bar; pb.suspend() around y/n/a/s prompt Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2,9 +2,10 @@ use anyhow::{Context, Result};
|
||||
use chrono::Utc;
|
||||
use clap::Parser;
|
||||
use image_api::ai::ollama::OllamaClient;
|
||||
use image_api::bin_progress;
|
||||
use image_api::database::search_dao::{InsertSearchRecord, SqliteSearchHistoryDao};
|
||||
use image_api::parsers::search_html_parser::parse_search_html;
|
||||
use log::{error, info, warn};
|
||||
use log::{error, info};
|
||||
|
||||
// Import the trait to use its methods
|
||||
use image_api::database::SearchHistoryDao;
|
||||
@@ -49,24 +50,22 @@ async fn main() -> Result<()> {
|
||||
let ollama = OllamaClient::new(primary_url, fallback_url, primary_model, fallback_model);
|
||||
let context = opentelemetry::Context::current();
|
||||
|
||||
let mut inserted_count = 0;
|
||||
let mut skipped_count = 0;
|
||||
let mut error_count = 0;
|
||||
let mut inserted_count = 0usize;
|
||||
let mut skipped_count = 0usize;
|
||||
let mut error_count = 0usize;
|
||||
|
||||
let mut dao_instance = SqliteSearchHistoryDao::new();
|
||||
let created_at = Utc::now().timestamp();
|
||||
|
||||
let pb = bin_progress::determinate(searches.len() as u64, "importing");
|
||||
let total_batches = searches.len().div_ceil(args.batch_size);
|
||||
|
||||
// Process searches in batches (embeddings are REQUIRED for searches)
|
||||
for (batch_idx, chunk) in searches.chunks(args.batch_size).enumerate() {
|
||||
info!(
|
||||
"Processing batch {} ({} searches)...",
|
||||
batch_idx + 1,
|
||||
chunk.len()
|
||||
);
|
||||
|
||||
// Generate embeddings for this batch
|
||||
let queries: Vec<String> = chunk.iter().map(|s| s.query.clone()).collect();
|
||||
|
||||
let pb_for_warn = pb.clone();
|
||||
let embeddings_result = tokio::task::spawn({
|
||||
let ollama_client = ollama.clone();
|
||||
async move {
|
||||
@@ -76,7 +75,7 @@ async fn main() -> Result<()> {
|
||||
match ollama_client.generate_embedding(query).await {
|
||||
Ok(emb) => embeddings.push(Some(emb)),
|
||||
Err(e) => {
|
||||
warn!("Failed to generate embedding for query '{}': {}", query, e);
|
||||
pb_for_warn.println(format!("embedding failed for '{}': {}", query, e));
|
||||
embeddings.push(None);
|
||||
}
|
||||
}
|
||||
@@ -112,10 +111,7 @@ async fn main() -> Result<()> {
|
||||
source_file: Some(args.path.clone()),
|
||||
});
|
||||
} else {
|
||||
error!(
|
||||
"Skipping search '{}' due to missing embedding",
|
||||
search.query
|
||||
);
|
||||
pb.println(format!("skipping '{}' — missing embedding", search.query));
|
||||
error_count += 1;
|
||||
}
|
||||
}
|
||||
@@ -123,30 +119,41 @@ async fn main() -> Result<()> {
|
||||
// Batch insert entire chunk in single transaction
|
||||
if !batch_inserts.is_empty() {
|
||||
match dao_instance.store_searches_batch(&context, batch_inserts) {
|
||||
Ok(count) => {
|
||||
inserted_count += count;
|
||||
info!("Imported {} searches (total: {})...", count, inserted_count);
|
||||
}
|
||||
Ok(count) => inserted_count += count,
|
||||
Err(e) => {
|
||||
error!("Failed to store batch: {:?}", e);
|
||||
pb.println(format!("batch insert failed: {:?}", e));
|
||||
error_count += chunk.len();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pb.set_message(format!(
|
||||
"inserted={} skipped={} errors={}",
|
||||
inserted_count, skipped_count, error_count
|
||||
));
|
||||
pb.inc(chunk.len() as u64);
|
||||
|
||||
// Rate limiting between batches
|
||||
if batch_idx < searches.len() / args.batch_size {
|
||||
info!("Waiting 500ms before next batch...");
|
||||
if batch_idx + 1 < total_batches {
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
||||
}
|
||||
}
|
||||
|
||||
info!("\n=== Import Summary ===");
|
||||
pb.finish_and_clear();
|
||||
|
||||
info!("=== Import Summary ===");
|
||||
info!("Total searches found: {}", searches.len());
|
||||
info!("Successfully inserted: {}", inserted_count);
|
||||
info!("Skipped (already exist): {}", skipped_count);
|
||||
info!("Errors: {}", error_count);
|
||||
info!("All imported searches have embeddings for semantic search");
|
||||
|
||||
if error_count > 0 {
|
||||
error!(
|
||||
"Completed with {} errors — review log output above",
|
||||
error_count
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user