populate_knowledge now loads real libraries from the DB instead of fabricating a single library_id=1 row from BASE_PATH. Adds --library <id|name> to restrict the walk and validates --path against the selected library roots. The full library set is still passed to InsightGenerator so resolve_full_path can probe every root when an insight resolves to a different library than the one being walked. Adds indicatif progress bars across the long-running utility binaries via a shared src/bin_progress.rs helper (determinate bar + open-ended spinner with consistent styling). Per-batch info! noise is replaced by the bar's throughput/ETA; warnings and errors route through pb.println so they scroll above the bar instead of fighting with it. populate_knowledge spinner during scan, determinate bar over all libs backfill_hashes spinner with running hashed/missing/errors counts import_calendar determinate bar; embedding/store failures inline import_location_* determinate bar advancing by chunk size import_search_* determinate bar; pb cloned into the spawn task cleanup_files P1 determinate bar over DB paths cleanup_files P2 determinate bar; pb.suspend() around y/n/a/s prompt Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
172 lines
5.4 KiB
Rust
172 lines
5.4 KiB
Rust
use anyhow::{Context, Result};
|
|
use chrono::Utc;
|
|
use clap::Parser;
|
|
use image_api::ai::ollama::OllamaClient;
|
|
use image_api::bin_progress;
|
|
use image_api::database::calendar_dao::{InsertCalendarEvent, SqliteCalendarEventDao};
|
|
use image_api::parsers::ical_parser::parse_ics_file;
|
|
use log::{error, info};
|
|
|
|
// Import the trait to use its methods
|
|
use image_api::database::CalendarEventDao;
|
|
|
|
#[derive(Parser, Debug)]
|
|
#[command(author, version, about = "Import Google Takeout Calendar data", long_about = None)]
|
|
struct Args {
|
|
/// Path to the .ics calendar file
|
|
#[arg(short, long)]
|
|
path: String,
|
|
|
|
/// Generate embeddings for calendar events (slower but enables semantic search)
|
|
#[arg(long, default_value = "false")]
|
|
generate_embeddings: bool,
|
|
|
|
/// Skip events that already exist in the database
|
|
#[arg(long, default_value = "true")]
|
|
skip_existing: bool,
|
|
|
|
/// Batch size for embedding generation
|
|
#[arg(long, default_value = "128")]
|
|
batch_size: usize,
|
|
}
|
|
|
|
#[tokio::main]
|
|
async fn main() -> Result<()> {
|
|
dotenv::dotenv().ok();
|
|
env_logger::init();
|
|
|
|
let args = Args::parse();
|
|
|
|
info!("Parsing calendar file: {}", args.path);
|
|
let events = parse_ics_file(&args.path).context("Failed to parse .ics file")?;
|
|
|
|
info!("Found {} calendar events", events.len());
|
|
|
|
let context = opentelemetry::Context::current();
|
|
|
|
let ollama = if args.generate_embeddings {
|
|
let primary_url = dotenv::var("OLLAMA_PRIMARY_URL")
|
|
.or_else(|_| dotenv::var("OLLAMA_URL"))
|
|
.unwrap_or_else(|_| "http://localhost:11434".to_string());
|
|
let fallback_url = dotenv::var("OLLAMA_FALLBACK_URL").ok();
|
|
let primary_model = dotenv::var("OLLAMA_PRIMARY_MODEL")
|
|
.or_else(|_| dotenv::var("OLLAMA_MODEL"))
|
|
.unwrap_or_else(|_| "nomic-embed-text:v1.5".to_string());
|
|
let fallback_model = dotenv::var("OLLAMA_FALLBACK_MODEL").ok();
|
|
|
|
Some(OllamaClient::new(
|
|
primary_url,
|
|
fallback_url,
|
|
primary_model,
|
|
fallback_model,
|
|
))
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let mut inserted_count = 0usize;
|
|
let mut skipped_count = 0usize;
|
|
let mut error_count = 0usize;
|
|
|
|
let pb = bin_progress::determinate(events.len() as u64, "importing");
|
|
|
|
// Process events in batches
|
|
// Can't use rayon with async, so process sequentially
|
|
for event in &events {
|
|
let mut dao_instance = SqliteCalendarEventDao::new();
|
|
|
|
// Check if event exists
|
|
if args.skip_existing
|
|
&& let Ok(exists) = dao_instance.event_exists(
|
|
&context,
|
|
event.event_uid.as_deref().unwrap_or(""),
|
|
event.start_time,
|
|
)
|
|
&& exists
|
|
{
|
|
skipped_count += 1;
|
|
pb.inc(1);
|
|
continue;
|
|
}
|
|
|
|
// Generate embedding if requested (blocking call)
|
|
let embedding = if let Some(ref ollama_client) = ollama {
|
|
let text = format!(
|
|
"{} {} {}",
|
|
event.summary,
|
|
event.description.as_deref().unwrap_or(""),
|
|
event.location.as_deref().unwrap_or("")
|
|
);
|
|
|
|
match tokio::task::block_in_place(|| {
|
|
tokio::runtime::Handle::current()
|
|
.block_on(async { ollama_client.generate_embedding(&text).await })
|
|
}) {
|
|
Ok(emb) => Some(emb),
|
|
Err(e) => {
|
|
pb.println(format!("embedding failed for '{}': {}", event.summary, e));
|
|
None
|
|
}
|
|
}
|
|
} else {
|
|
None
|
|
};
|
|
|
|
// Insert into database
|
|
let insert_event = InsertCalendarEvent {
|
|
event_uid: event.event_uid.clone(),
|
|
summary: event.summary.clone(),
|
|
description: event.description.clone(),
|
|
location: event.location.clone(),
|
|
start_time: event.start_time,
|
|
end_time: event.end_time,
|
|
all_day: event.all_day,
|
|
organizer: event.organizer.clone(),
|
|
attendees: if event.attendees.is_empty() {
|
|
None
|
|
} else {
|
|
Some(serde_json::to_string(&event.attendees).unwrap_or_default())
|
|
},
|
|
embedding,
|
|
created_at: Utc::now().timestamp(),
|
|
source_file: Some(args.path.clone()),
|
|
};
|
|
|
|
match dao_instance.store_event(&context, insert_event) {
|
|
Ok(_) => inserted_count += 1,
|
|
Err(e) => {
|
|
pb.println(format!("store failed for '{}': {:?}", event.summary, e));
|
|
error_count += 1;
|
|
}
|
|
}
|
|
pb.set_message(format!(
|
|
"inserted={} skipped={} errors={}",
|
|
inserted_count, skipped_count, error_count
|
|
));
|
|
pb.inc(1);
|
|
}
|
|
|
|
pb.finish_and_clear();
|
|
|
|
info!("=== Import Summary ===");
|
|
info!("Total events found: {}", events.len());
|
|
info!("Successfully inserted: {}", inserted_count);
|
|
info!("Skipped (already exist): {}", skipped_count);
|
|
info!("Errors: {}", error_count);
|
|
|
|
if args.generate_embeddings {
|
|
info!("Embeddings were generated for semantic search");
|
|
} else {
|
|
info!("No embeddings generated (use --generate-embeddings to enable semantic search)");
|
|
}
|
|
|
|
if error_count > 0 {
|
|
error!(
|
|
"Completed with {} errors — review log output above",
|
|
error_count
|
|
);
|
|
}
|
|
|
|
Ok(())
|
|
}
|