feat(bins): multi-library populate_knowledge + progress UX
populate_knowledge now loads real libraries from the DB instead of fabricating a single library_id=1 row from BASE_PATH. Adds --library <id|name> to restrict the walk and validates --path against the selected library roots. The full library set is still passed to InsightGenerator so resolve_full_path can probe every root when an insight resolves to a different library than the one being walked. Adds indicatif progress bars across the long-running utility binaries via a shared src/bin_progress.rs helper (determinate bar + open-ended spinner with consistent styling). Per-batch info! noise is replaced by the bar's throughput/ETA; warnings and errors route through pb.println so they scroll above the bar instead of fighting with it. populate_knowledge spinner during scan, determinate bar over all libs backfill_hashes spinner with running hashed/missing/errors counts import_calendar determinate bar; embedding/store failures inline import_location_* determinate bar advancing by chunk size import_search_* determinate bar; pb cloned into the spawn task cleanup_files P1 determinate bar over DB paths cleanup_files P2 determinate bar; pb.suspend() around y/n/a/s prompt Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,14 +1,17 @@
|
||||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use clap::Parser;
|
||||
use log::warn;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use image_api::ai::{InsightGenerator, OllamaClient, SmsApiClient};
|
||||
use image_api::bin_progress;
|
||||
use image_api::database::{
|
||||
CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao,
|
||||
SearchHistoryDao, SqliteCalendarEventDao, SqliteDailySummaryDao, SqliteExifDao,
|
||||
SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao,
|
||||
connect,
|
||||
};
|
||||
use image_api::file_types::{IMAGE_EXTENSIONS, VIDEO_EXTENSIONS};
|
||||
use image_api::libraries::{self, Library};
|
||||
@@ -20,7 +23,13 @@ use image_api::tags::{SqliteTagDao, TagDao};
|
||||
about = "Batch populate the knowledge base by running the agentic insight loop over a folder"
|
||||
)]
|
||||
struct Args {
|
||||
/// Directory to scan. Defaults to BASE_PATH from .env
|
||||
/// Restrict to a single library by numeric id or name. Defaults to all
|
||||
/// configured libraries.
|
||||
#[arg(long)]
|
||||
library: Option<String>,
|
||||
|
||||
/// Optional subdirectory to scan instead of full library roots. Must be
|
||||
/// an absolute path under one of the selected libraries.
|
||||
#[arg(long)]
|
||||
path: Option<String>,
|
||||
|
||||
@@ -68,10 +77,57 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
let args = Args::parse();
|
||||
|
||||
let base_path = dotenv::var("BASE_PATH")?;
|
||||
let scan_path = args.path.as_deref().unwrap_or(&base_path).to_string();
|
||||
// Load libraries from the DB. Patch the placeholder row from BASE_PATH
|
||||
// first when present so a fresh install still gets a valid root.
|
||||
let env_base_path = dotenv::var("BASE_PATH").ok();
|
||||
let mut seed_conn = connect();
|
||||
if let Some(base) = env_base_path.as_deref() {
|
||||
libraries::seed_or_patch_from_env(&mut seed_conn, base);
|
||||
}
|
||||
let all_libs = libraries::load_all(&mut seed_conn);
|
||||
drop(seed_conn);
|
||||
if all_libs.is_empty() {
|
||||
anyhow::bail!("No libraries configured");
|
||||
}
|
||||
|
||||
// Ollama config from env with CLI overrides
|
||||
// Resolve --library to a concrete subset.
|
||||
let selected_libs: Vec<Library> = match args.library.as_deref() {
|
||||
None => all_libs.clone(),
|
||||
Some(raw) => {
|
||||
let raw = raw.trim();
|
||||
let matched = if let Ok(id) = raw.parse::<i32>() {
|
||||
all_libs.iter().find(|l| l.id == id).cloned()
|
||||
} else {
|
||||
all_libs.iter().find(|l| l.name == raw).cloned()
|
||||
};
|
||||
match matched {
|
||||
Some(lib) => vec![lib],
|
||||
None => anyhow::bail!("Unknown library: {}", raw),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Resolve --path to (target_library, walk_root). When provided, the path
|
||||
// must live under exactly one of the selected libraries.
|
||||
let scan_targets: Vec<(Library, PathBuf)> = match args.path.as_deref() {
|
||||
None => selected_libs
|
||||
.iter()
|
||||
.map(|lib| (lib.clone(), PathBuf::from(&lib.root_path)))
|
||||
.collect(),
|
||||
Some(raw) => {
|
||||
let abs = PathBuf::from(raw);
|
||||
let matched = selected_libs
|
||||
.iter()
|
||||
.find(|lib| abs.starts_with(&lib.root_path))
|
||||
.cloned();
|
||||
match matched {
|
||||
Some(lib) => vec![(lib, abs)],
|
||||
None => anyhow::bail!("--path {} is not under any selected library root", raw),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Ollama config from env with CLI overrides.
|
||||
let primary_url = std::env::var("OLLAMA_PRIMARY_URL")
|
||||
.or_else(|_| std::env::var("OLLAMA_URL"))
|
||||
.unwrap_or_else(|_| "http://localhost:11434".to_string());
|
||||
@@ -108,7 +164,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
let sms_api_token = std::env::var("SMS_API_TOKEN").ok();
|
||||
let sms_client = SmsApiClient::new(sms_api_url, sms_api_token);
|
||||
|
||||
// Wire up all DAOs
|
||||
let insight_dao: Arc<Mutex<Box<dyn InsightDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteInsightDao::new())));
|
||||
let exif_dao: Arc<Mutex<Box<dyn ExifDao>>> =
|
||||
@@ -126,12 +181,9 @@ async fn main() -> anyhow::Result<()> {
|
||||
let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
|
||||
|
||||
let populate_lib = Library {
|
||||
id: libraries::PRIMARY_LIBRARY_ID,
|
||||
name: "main".to_string(),
|
||||
root_path: base_path.clone(),
|
||||
};
|
||||
|
||||
// Pass the full library set so `resolve_full_path` probes every root,
|
||||
// even when --library restricts the walk. A rel_path shared across
|
||||
// libraries will resolve against the first existing match.
|
||||
let generator = InsightGenerator::new(
|
||||
ollama,
|
||||
None,
|
||||
@@ -144,12 +196,15 @@ async fn main() -> anyhow::Result<()> {
|
||||
search_dao,
|
||||
tag_dao,
|
||||
knowledge_dao,
|
||||
vec![populate_lib],
|
||||
all_libs.clone(),
|
||||
);
|
||||
|
||||
println!("Knowledge Base Population");
|
||||
println!("=========================");
|
||||
println!("Scan path: {}", scan_path);
|
||||
for (lib, root) in &scan_targets {
|
||||
println!("Library: {} (id={})", lib.name, lib.id);
|
||||
println!("Scan root: {}", root.display());
|
||||
}
|
||||
println!("Model: {}", primary_model);
|
||||
println!("Max iterations: {}", args.max_iterations);
|
||||
println!("Timeout: {}s", args.timeout_secs);
|
||||
@@ -178,30 +233,56 @@ async fn main() -> anyhow::Result<()> {
|
||||
);
|
||||
println!();
|
||||
|
||||
// Collect all image and video files
|
||||
let all_extensions: Vec<&str> = IMAGE_EXTENSIONS
|
||||
.iter()
|
||||
.chain(VIDEO_EXTENSIONS.iter())
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
println!("Scanning {}...", scan_path);
|
||||
let files: Vec<PathBuf> = WalkDir::new(&scan_path)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| e.file_type().is_file())
|
||||
.filter(|e| {
|
||||
e.path()
|
||||
// Collect (library, abs_path, rel_path) for every media file across all
|
||||
// scan targets so the progress counter spans the full job.
|
||||
let mut files: Vec<(Library, PathBuf, String)> = Vec::new();
|
||||
for (lib, walk_root) in &scan_targets {
|
||||
let lib_root = Path::new(&lib.root_path);
|
||||
let scan_pb = bin_progress::spinner(format!("scanning {}", walk_root.display()));
|
||||
let count_before = files.len();
|
||||
for entry in WalkDir::new(walk_root).into_iter().filter_map(|e| e.ok()) {
|
||||
if !entry.file_type().is_file() {
|
||||
continue;
|
||||
}
|
||||
let abs_path = entry.path().to_path_buf();
|
||||
let ext_ok = abs_path
|
||||
.extension()
|
||||
.and_then(|ext| ext.to_str())
|
||||
.map(|ext| all_extensions.contains(&ext.to_lowercase().as_str()))
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.map(|e| e.path().to_path_buf())
|
||||
.collect();
|
||||
.unwrap_or(false);
|
||||
if !ext_ok {
|
||||
continue;
|
||||
}
|
||||
let rel = match abs_path.strip_prefix(lib_root) {
|
||||
Ok(p) => p.to_string_lossy().replace('\\', "/"),
|
||||
Err(_) => {
|
||||
warn!(
|
||||
"{} is not under library root {}; skipping",
|
||||
abs_path.display(),
|
||||
lib_root.display()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
files.push((lib.clone(), abs_path, rel));
|
||||
scan_pb.inc(1);
|
||||
}
|
||||
let added = files.len() - count_before;
|
||||
scan_pb.finish_with_message(format!(
|
||||
"scanned {} ({} media files)",
|
||||
walk_root.display(),
|
||||
added
|
||||
));
|
||||
}
|
||||
|
||||
let total = files.len();
|
||||
println!("Found {} files\n", total);
|
||||
println!("\nTotal files to consider: {}\n", total);
|
||||
|
||||
if total == 0 {
|
||||
println!("Nothing to process.");
|
||||
@@ -213,35 +294,29 @@ async fn main() -> anyhow::Result<()> {
|
||||
let mut skipped = 0usize;
|
||||
let mut errors = 0usize;
|
||||
|
||||
for (i, path) in files.iter().enumerate() {
|
||||
let relative = match path.strip_prefix(&base_path) {
|
||||
Ok(p) => p.to_string_lossy().replace('\\', "/"),
|
||||
Err(_) => path.to_string_lossy().replace('\\', "/"),
|
||||
};
|
||||
let pb = bin_progress::determinate(total as u64, "");
|
||||
|
||||
let prefix = format!("[{}/{}]", i + 1, total);
|
||||
for (lib, _abs_path, relative) in files.iter() {
|
||||
pb.set_message(format!("{}: {}", lib.name, relative));
|
||||
|
||||
// Check for existing insight unless --reprocess
|
||||
if !args.reprocess {
|
||||
let has_insight = insight_dao
|
||||
.lock()
|
||||
.unwrap()
|
||||
.get_insight(&cx, &relative)
|
||||
.get_insight(&cx, relative)
|
||||
.unwrap_or(None)
|
||||
.is_some();
|
||||
|
||||
if has_insight {
|
||||
println!("{} skip {}", prefix, relative);
|
||||
skipped += 1;
|
||||
pb.inc(1);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
println!("{} start {}", prefix, relative);
|
||||
|
||||
match generator
|
||||
.generate_agentic_insight_for_photo(
|
||||
&relative,
|
||||
relative,
|
||||
args.model.clone(),
|
||||
None,
|
||||
args.num_ctx,
|
||||
@@ -256,17 +331,17 @@ async fn main() -> anyhow::Result<()> {
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(_) => {
|
||||
println!("{} done {}", prefix, relative);
|
||||
processed += 1;
|
||||
}
|
||||
Ok(_) => processed += 1,
|
||||
Err(e) => {
|
||||
eprintln!("{} error {} — {:?}", prefix, relative, e);
|
||||
pb.println(format!("error {}: {} — {:?}", lib.name, relative, e));
|
||||
errors += 1;
|
||||
}
|
||||
}
|
||||
pb.inc(1);
|
||||
}
|
||||
|
||||
pb.finish_and_clear();
|
||||
|
||||
println!();
|
||||
println!("=========================");
|
||||
println!("Complete");
|
||||
|
||||
Reference in New Issue
Block a user