Files
ImageApi/src/cleanup/phase1.rs
T
Cameron b9d5578653 feat(bins): multi-library populate_knowledge + progress UX
populate_knowledge now loads real libraries from the DB instead of
fabricating a single library_id=1 row from BASE_PATH. Adds --library
<id|name> to restrict the walk and validates --path against the selected
library roots. The full library set is still passed to InsightGenerator so
resolve_full_path can probe every root when an insight resolves to a
different library than the one being walked.

Adds indicatif progress bars across the long-running utility binaries via
a shared src/bin_progress.rs helper (determinate bar + open-ended spinner
with consistent styling). Per-batch info! noise is replaced by the bar's
throughput/ETA; warnings and errors route through pb.println so they
scroll above the bar instead of fighting with it.

  populate_knowledge   spinner during scan, determinate bar over all libs
  backfill_hashes      spinner with running hashed/missing/errors counts
  import_calendar      determinate bar; embedding/store failures inline
  import_location_*    determinate bar advancing by chunk size
  import_search_*      determinate bar; pb cloned into the spawn task
  cleanup_files P1     determinate bar over DB paths
  cleanup_files P2     determinate bar; pb.suspend() around y/n/a/s prompt

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 23:55:33 -04:00

158 lines
4.8 KiB
Rust

use crate::bin_progress;
use crate::cleanup::database_updater::DatabaseUpdater;
use crate::cleanup::types::{CleanupConfig, CleanupStats};
use crate::file_types::IMAGE_EXTENSIONS;
use anyhow::Result;
use log::error;
use std::path::PathBuf;
// All supported image extensions to try
const SUPPORTED_EXTENSIONS: &[&str] = IMAGE_EXTENSIONS;
/// Phase 1: Resolve missing files by searching for alternative extensions
pub fn resolve_missing_files(
config: &CleanupConfig,
db_updater: &mut DatabaseUpdater,
) -> Result<CleanupStats> {
let mut stats = CleanupStats::new();
println!("\nPhase 1: Missing File Resolution");
println!("---------------------------------");
// Get all file paths from database
println!("Scanning database for file references...");
let all_paths = db_updater.get_all_file_paths()?;
println!("Found {} unique file paths\n", all_paths.len());
stats.files_checked = all_paths.len();
let mut missing_count = 0;
let mut resolved_count = 0;
let pb = bin_progress::determinate(stats.files_checked as u64, "checking");
for path_str in all_paths {
let full_path = config.base_path.join(&path_str);
// Check if file exists
if full_path.exists() {
pb.inc(1);
continue;
}
missing_count += 1;
stats.issues_found += 1;
// Try to find the file with different extensions
match find_file_with_alternative_extension(&config.base_path, &path_str) {
Some(new_path_str) => {
pb.println(format!(
"{} → found as {}{}",
path_str,
new_path_str,
if config.dry_run {
" (dry-run, not updated)"
} else {
""
}
));
if !config.dry_run {
// Update database
match db_updater.update_file_path(&path_str, &new_path_str) {
Ok(_) => {
resolved_count += 1;
stats.issues_fixed += 1;
}
Err(e) => {
error!("Failed to update database for {}: {:?}", path_str, e);
stats.add_error(format!("DB update failed for {}: {}", path_str, e));
}
}
} else {
resolved_count += 1;
}
}
None => {
pb.println(format!("{} — not found with any extension", path_str));
}
}
pb.set_message(format!(
"missing={} resolved={}",
missing_count, resolved_count
));
pb.inc(1);
}
pb.finish_and_clear();
println!("\nResults:");
println!("- Files checked: {}", stats.files_checked);
println!("- Missing files: {}", missing_count);
println!("- Resolved: {}", resolved_count);
println!(
"- Still missing: {}",
missing_count - if config.dry_run { 0 } else { resolved_count }
);
if !stats.errors.is_empty() {
println!("- Errors: {}", stats.errors.len());
}
Ok(stats)
}
/// Find a file with an alternative extension
/// Returns the relative path with the new extension if found
fn find_file_with_alternative_extension(
base_path: &PathBuf,
relative_path: &str,
) -> Option<String> {
let full_path = base_path.join(relative_path);
// Get the parent directory and file stem (name without extension)
let parent = full_path.parent()?;
let stem = full_path.file_stem()?.to_str()?;
// Try each supported extension
for ext in SUPPORTED_EXTENSIONS {
let test_path = parent.join(format!("{}.{}", stem, ext));
if test_path.exists() {
// Convert back to relative path
if let Ok(rel) = test_path.strip_prefix(base_path)
&& let Some(rel_str) = rel.to_str()
{
return Some(rel_str.to_string());
}
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_find_file_with_alternative_extension() {
let temp_dir = TempDir::new().unwrap();
let base_path = temp_dir.path().to_path_buf();
// Create a test file with .jpeg extension
let test_file = base_path.join("test.jpeg");
fs::write(&test_file, b"test").unwrap();
// Try to find it as .jpg
let result = find_file_with_alternative_extension(&base_path, "test.jpg");
assert!(result.is_some());
assert_eq!(result.unwrap(), "test.jpeg");
// Try to find non-existent file
let result = find_file_with_alternative_extension(&base_path, "nonexistent.jpg");
assert!(result.is_none());
}
}