From aaf9cc64bee0023bc0242967c2e7dc57323edcab Mon Sep 17 00:00:00 2001 From: Cameron Date: Thu, 18 Dec 2025 16:02:15 -0500 Subject: [PATCH] Add Cleanup binary for fixing broken DB/file relations --- Cargo.lock | 117 ++++++++++++++ Cargo.toml | 3 + src/bin/cleanup_files.rs | 143 ++++++++++++++++ src/cleanup/database_updater.rs | 157 ++++++++++++++++++ src/cleanup/file_type_detector.rs | 103 ++++++++++++ src/cleanup/mod.rs | 11 ++ src/cleanup/phase1.rs | 145 +++++++++++++++++ src/cleanup/phase2.rs | 261 ++++++++++++++++++++++++++++++ src/cleanup/types.rs | 39 +++++ src/database/mod.rs | 52 ++++++ src/lib.rs | 35 ++++ src/tags.rs | 43 +++++ 12 files changed, 1109 insertions(+) create mode 100644 src/bin/cleanup_files.rs create mode 100644 src/cleanup/database_updater.rs create mode 100644 src/cleanup/file_type_detector.rs create mode 100644 src/cleanup/mod.rs create mode 100644 src/cleanup/phase1.rs create mode 100644 src/cleanup/phase2.rs create mode 100644 src/cleanup/types.rs diff --git a/Cargo.lock b/Cargo.lock index 6c69ac7..9228d68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -650,6 +650,17 @@ dependencies = [ "shlex", ] +[[package]] +name = "cfb" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f" +dependencies = [ + "byteorder", + "fnv", + "uuid", +] + [[package]] name = "cfg-expr" version = "0.15.8" @@ -690,12 +701,65 @@ dependencies = [ "inout", ] +[[package]] +name = "clap" +version = "4.5.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" + [[package]] name = "colorchoice" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys 0.59.0", +] + [[package]] name = "convert_case" version = "0.4.0" @@ -859,6 +923,19 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "dialoguer" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "658bce805d770f407bc62102fca7c2c64ceef2fbcb2b8bd19d2765ce093980de" +dependencies = [ + "console", + "shell-words", + "tempfile", + "thiserror 1.0.69", + "zeroize", +] + [[package]] name = "diesel" version = "2.2.12" @@ -950,6 +1027,12 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -1561,12 +1644,15 @@ dependencies = [ "anyhow", "bcrypt", "chrono", + "clap", + "dialoguer", "diesel", "diesel_migrations", "dotenv", "env_logger", "futures", "image", + "infer", "jsonwebtoken", "kamadak-exif", "lazy_static", @@ -1611,6 +1697,15 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "infer" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc150e5ce2330295b8616ce0e3f53250e53af31759a9dbedad1621ba29151847" +dependencies = [ + "cfb", +] + [[package]] name = "inotify" version = "0.9.6" @@ -2854,6 +2949,12 @@ dependencies = [ "digest", ] +[[package]] +name = "shell-words" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77" + [[package]] name = "shlex" version = "1.3.0" @@ -3342,6 +3443,12 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -3378,6 +3485,16 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "uuid" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "v_frame" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 5083e2d..c5f2406 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,9 +24,12 @@ serde_json = "1" diesel = { version = "2.2.10", features = ["sqlite"] } diesel_migrations = "2.2.0" chrono = "0.4" +clap = { version = "4.5", features = ["derive"] } +dialoguer = "0.11" dotenv = "0.15" bcrypt = "0.16.0" image = { version = "0.25.5", default-features = false, features = ["jpeg", "png", "rayon"] } +infer = "0.16" walkdir = "2.4.0" rayon = "1.5" notify = "6.1.1" diff --git a/src/bin/cleanup_files.rs b/src/bin/cleanup_files.rs new file mode 100644 index 0000000..5b938d4 --- /dev/null +++ b/src/bin/cleanup_files.rs @@ -0,0 +1,143 @@ +use std::path::PathBuf; +use std::sync::{Arc, Mutex}; + +use clap::Parser; + +use image_api::cleanup::{ + resolve_missing_files, validate_file_types, CleanupConfig, DatabaseUpdater, +}; +use image_api::database::{SqliteExifDao, SqliteFavoriteDao}; +use image_api::tags::SqliteTagDao; + +#[derive(Parser, Debug)] +#[command(name = "cleanup_files")] +#[command(about = "File cleanup and fix utility for ImageApi", long_about = None)] +struct Args { + #[arg(long, help = "Preview changes without making them")] + dry_run: bool, + + #[arg(long, help = "Auto-fix all issues without prompting")] + auto_fix: bool, + + #[arg(long, help = "Skip phase 1 (missing file resolution)")] + skip_phase1: bool, + + #[arg(long, help = "Skip phase 2 (file type validation)")] + skip_phase2: bool, +} + +fn main() -> anyhow::Result<()> { + // Initialize logging + env_logger::init(); + + // Load environment variables + dotenv::dotenv()?; + + // Parse CLI arguments + let args = Args::parse(); + + // Get base path from environment + let base_path = dotenv::var("BASE_PATH")?; + let base = PathBuf::from(&base_path); + + println!("File Cleanup and Fix Utility"); + println!("============================"); + println!("Base path: {}", base.display()); + println!("Dry run: {}", args.dry_run); + println!("Auto fix: {}", args.auto_fix); + println!(); + + // Pre-flight checks + if !base.exists() { + eprintln!("Error: Base path does not exist: {}", base.display()); + std::process::exit(1); + } + + if !base.is_dir() { + eprintln!("Error: Base path is not a directory: {}", base.display()); + std::process::exit(1); + } + + // Create configuration + let config = CleanupConfig { + base_path: base, + dry_run: args.dry_run, + auto_fix: args.auto_fix, + }; + + // Create DAOs + println!("Connecting to database..."); + let tag_dao: Arc> = + Arc::new(Mutex::new(SqliteTagDao::default())); + let exif_dao: Arc> = + Arc::new(Mutex::new(SqliteExifDao::new())); + let favorites_dao: Arc> = + Arc::new(Mutex::new(SqliteFavoriteDao::new())); + + // Create database updater + let mut db_updater = DatabaseUpdater::new(tag_dao, exif_dao, favorites_dao); + + println!("✓ Database connected\n"); + + // Track overall statistics + let mut total_issues_found = 0; + let mut total_issues_fixed = 0; + let mut total_errors = Vec::new(); + + // Phase 1: Missing file resolution + if !args.skip_phase1 { + match resolve_missing_files(&config, &mut db_updater) { + Ok(stats) => { + total_issues_found += stats.issues_found; + total_issues_fixed += stats.issues_fixed; + total_errors.extend(stats.errors); + } + Err(e) => { + eprintln!("Phase 1 failed: {:?}", e); + total_errors.push(format!("Phase 1 error: {}", e)); + } + } + } else { + println!("Phase 1: Skipped (--skip-phase1)"); + } + + // Phase 2: File type validation + if !args.skip_phase2 { + match validate_file_types(&config, &mut db_updater) { + Ok(stats) => { + total_issues_found += stats.issues_found; + total_issues_fixed += stats.issues_fixed; + total_errors.extend(stats.errors); + } + Err(e) => { + eprintln!("Phase 2 failed: {:?}", e); + total_errors.push(format!("Phase 2 error: {}", e)); + } + } + } else { + println!("\nPhase 2: Skipped (--skip-phase2)"); + } + + // Final summary + println!("\n============================"); + println!("Cleanup Complete!"); + println!("============================"); + println!("Total issues found: {}", total_issues_found); + if config.dry_run { + println!("Total issues that would be fixed: {}", total_issues_found); + } else { + println!("Total issues fixed: {}", total_issues_fixed); + } + + if !total_errors.is_empty() { + println!("\nErrors encountered:"); + for (i, error) in total_errors.iter().enumerate() { + println!(" {}. {}", i + 1, error); + } + println!("\nSome operations failed. Review errors above."); + } else { + println!("\n✓ No errors encountered"); + } + + Ok(()) +} diff --git a/src/cleanup/database_updater.rs b/src/cleanup/database_updater.rs new file mode 100644 index 0000000..4f6c79d --- /dev/null +++ b/src/cleanup/database_updater.rs @@ -0,0 +1,157 @@ +use crate::database::{ExifDao, FavoriteDao}; +use crate::tags::TagDao; +use anyhow::{Context, Result}; +use log::{error, info}; +use opentelemetry; +use std::sync::{Arc, Mutex}; + +pub struct DatabaseUpdater { + tag_dao: Arc>, + exif_dao: Arc>, + favorites_dao: Arc>, +} + +impl DatabaseUpdater { + pub fn new( + tag_dao: Arc>, + exif_dao: Arc>, + favorites_dao: Arc>, + ) -> Self { + Self { + tag_dao, + exif_dao, + favorites_dao, + } + } + + /// Update file path across all three database tables + /// Returns Ok(()) if successful, continues on partial failures but logs errors + pub fn update_file_path(&mut self, old_path: &str, new_path: &str) -> Result<()> { + let context = opentelemetry::Context::current(); + let mut success_count = 0; + let mut error_count = 0; + + // Update tagged_photo table + if let Ok(mut dao) = self.tag_dao.lock() { + match dao.update_photo_name(old_path, new_path, &context) { + Ok(_) => { + info!("Updated tagged_photo: {} -> {}", old_path, new_path); + success_count += 1; + } + Err(e) => { + error!( + "Failed to update tagged_photo for {}: {:?}", + old_path, e + ); + error_count += 1; + } + } + } else { + error!("Failed to acquire lock on TagDao"); + error_count += 1; + } + + // Update image_exif table + if let Ok(mut dao) = self.exif_dao.lock() { + match dao.update_file_path(old_path, new_path) { + Ok(_) => { + info!("Updated image_exif: {} -> {}", old_path, new_path); + success_count += 1; + } + Err(e) => { + error!("Failed to update image_exif for {}: {:?}", old_path, e); + error_count += 1; + } + } + } else { + error!("Failed to acquire lock on ExifDao"); + error_count += 1; + } + + // Update favorites table + if let Ok(mut dao) = self.favorites_dao.lock() { + match dao.update_path(old_path, new_path) { + Ok(_) => { + info!("Updated favorites: {} -> {}", old_path, new_path); + success_count += 1; + } + Err(e) => { + error!("Failed to update favorites for {}: {:?}", old_path, e); + error_count += 1; + } + } + } else { + error!("Failed to acquire lock on FavoriteDao"); + error_count += 1; + } + + if success_count > 0 { + info!( + "Updated {}/{} tables for {} -> {}", + success_count, + success_count + error_count, + old_path, + new_path + ); + Ok(()) + } else { + Err(anyhow::anyhow!( + "Failed to update any tables for {} -> {}", + old_path, + new_path + )) + } + } + + /// Get all file paths from all three database tables + pub fn get_all_file_paths(&mut self) -> Result> { + let context = opentelemetry::Context::current(); + let mut all_paths = Vec::new(); + + // Get from tagged_photo + if let Ok(mut dao) = self.tag_dao.lock() { + match dao.get_all_photo_names(&context) { + Ok(paths) => { + info!("Found {} paths in tagged_photo", paths.len()); + all_paths.extend(paths); + } + Err(e) => { + error!("Failed to get paths from tagged_photo: {:?}", e); + } + } + } + + // Get from image_exif + if let Ok(mut dao) = self.exif_dao.lock() { + match dao.get_all_file_paths() { + Ok(paths) => { + info!("Found {} paths in image_exif", paths.len()); + all_paths.extend(paths); + } + Err(e) => { + error!("Failed to get paths from image_exif: {:?}", e); + } + } + } + + // Get from favorites + if let Ok(mut dao) = self.favorites_dao.lock() { + match dao.get_all_paths() { + Ok(paths) => { + info!("Found {} paths in favorites", paths.len()); + all_paths.extend(paths); + } + Err(e) => { + error!("Failed to get paths from favorites: {:?}", e); + } + } + } + + // Deduplicate + all_paths.sort(); + all_paths.dedup(); + + info!("Total unique paths across all tables: {}", all_paths.len()); + Ok(all_paths) + } +} diff --git a/src/cleanup/file_type_detector.rs b/src/cleanup/file_type_detector.rs new file mode 100644 index 0000000..caa76fb --- /dev/null +++ b/src/cleanup/file_type_detector.rs @@ -0,0 +1,103 @@ +use anyhow::{Context, Result}; +use std::fs::File; +use std::io::Read; +use std::path::Path; + +/// Detect the actual file type by reading the magic number (file header) +/// Returns the canonical extension for the detected type, or None if unknown +pub fn detect_file_type(path: &Path) -> Result> { + let mut file = File::open(path).with_context(|| format!("Failed to open file: {:?}", path))?; + + // Read first 512 bytes for magic number detection + let mut buffer = vec![0; 512]; + let bytes_read = file + .read(&mut buffer) + .with_context(|| format!("Failed to read file: {:?}", path))?; + buffer.truncate(bytes_read); + + // Detect type using infer crate + let detected_type = infer::get(&buffer); + + Ok(detected_type.map(|t| get_canonical_extension(t.mime_type()))) +} + +/// Map MIME type to canonical file extension +pub fn get_canonical_extension(mime_type: &str) -> String { + match mime_type { + // Images + "image/jpeg" => "jpg", + "image/png" => "png", + "image/webp" => "webp", + "image/tiff" => "tiff", + "image/heif" | "image/heic" => "heic", + "image/avif" => "avif", + + // Videos + "video/mp4" => "mp4", + "video/quicktime" => "mov", + + // Fallback: use the last part of MIME type + _ => mime_type.split('/').last().unwrap_or("unknown"), + } + .to_string() +} + +/// Check if a file should be renamed based on current vs detected extension +/// Handles aliases (jpg/jpeg are equivalent) +pub fn should_rename(current_ext: &str, detected_ext: &str) -> bool { + let current = current_ext.to_lowercase(); + let detected = detected_ext.to_lowercase(); + + // Direct match + if current == detected { + return false; + } + + // Handle JPEG aliases (jpg and jpeg are equivalent) + if (current == "jpg" || current == "jpeg") && (detected == "jpg" || detected == "jpeg") { + return false; + } + + // Handle TIFF aliases (tiff and tif are equivalent) + if (current == "tiff" || current == "tif") && (detected == "tiff" || detected == "tif") { + return false; + } + + // Extensions differ and are not aliases + true +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_canonical_extension() { + assert_eq!(get_canonical_extension("image/jpeg"), "jpg"); + assert_eq!(get_canonical_extension("image/png"), "png"); + assert_eq!(get_canonical_extension("image/webp"), "webp"); + assert_eq!(get_canonical_extension("video/mp4"), "mp4"); + assert_eq!(get_canonical_extension("video/quicktime"), "mov"); + } + + #[test] + fn test_should_rename() { + // Same extension - no rename + assert!(!should_rename("jpg", "jpg")); + assert!(!should_rename("png", "png")); + + // JPEG aliases - no rename + assert!(!should_rename("jpg", "jpeg")); + assert!(!should_rename("jpeg", "jpg")); + assert!(!should_rename("JPG", "jpeg")); + + // TIFF aliases - no rename + assert!(!should_rename("tiff", "tif")); + assert!(!should_rename("tif", "tiff")); + + // Different types - should rename + assert!(should_rename("png", "jpg")); + assert!(should_rename("jpg", "png")); + assert!(should_rename("webp", "png")); + } +} diff --git a/src/cleanup/mod.rs b/src/cleanup/mod.rs new file mode 100644 index 0000000..8c8ca72 --- /dev/null +++ b/src/cleanup/mod.rs @@ -0,0 +1,11 @@ +pub mod database_updater; +pub mod file_type_detector; +pub mod phase1; +pub mod phase2; +pub mod types; + +pub use database_updater::DatabaseUpdater; +pub use file_type_detector::{detect_file_type, get_canonical_extension, should_rename}; +pub use phase1::resolve_missing_files; +pub use phase2::validate_file_types; +pub use types::{CleanupConfig, CleanupStats, FileIssue, IssueType}; diff --git a/src/cleanup/phase1.rs b/src/cleanup/phase1.rs new file mode 100644 index 0000000..910df4d --- /dev/null +++ b/src/cleanup/phase1.rs @@ -0,0 +1,145 @@ +use crate::cleanup::database_updater::DatabaseUpdater; +use crate::cleanup::types::{CleanupConfig, CleanupStats}; +use anyhow::Result; +use log::{error, info, warn}; +use std::path::PathBuf; + +// All supported image extensions to try +const SUPPORTED_EXTENSIONS: &[&str] = &[ + "jpg", "jpeg", "png", "webp", "tiff", "tif", "heif", "heic", "avif", "nef", +]; + +/// Phase 1: Resolve missing files by searching for alternative extensions +pub fn resolve_missing_files( + config: &CleanupConfig, + db_updater: &mut DatabaseUpdater, +) -> Result { + let mut stats = CleanupStats::new(); + + println!("\nPhase 1: Missing File Resolution"); + println!("---------------------------------"); + + // Get all file paths from database + println!("Scanning database for file references..."); + let all_paths = db_updater.get_all_file_paths()?; + println!("Found {} unique file paths\n", all_paths.len()); + + stats.files_checked = all_paths.len(); + + println!("Checking file existence..."); + let mut missing_count = 0; + let mut resolved_count = 0; + + for path_str in all_paths { + let full_path = config.base_path.join(&path_str); + + // Check if file exists + if full_path.exists() { + continue; + } + + missing_count += 1; + stats.issues_found += 1; + + // Try to find the file with different extensions + match find_file_with_alternative_extension(&config.base_path, &path_str) { + Some(new_path_str) => { + println!( + "✓ {} → found as {} {}", + path_str, + new_path_str, + if config.dry_run { + "(dry-run, not updated)" + } else { + "" + } + ); + + if !config.dry_run { + // Update database + match db_updater.update_file_path(&path_str, &new_path_str) { + Ok(_) => { + resolved_count += 1; + stats.issues_fixed += 1; + } + Err(e) => { + error!("Failed to update database for {}: {:?}", path_str, e); + stats.add_error(format!("DB update failed for {}: {}", path_str, e)); + } + } + } else { + resolved_count += 1; + } + } + None => { + warn!("✗ {} → not found with any extension", path_str); + } + } + } + + println!("\nResults:"); + println!("- Files checked: {}", stats.files_checked); + println!("- Missing files: {}", missing_count); + println!("- Resolved: {}", resolved_count); + println!( + "- Still missing: {}", + missing_count - if config.dry_run { 0 } else { resolved_count } + ); + + if !stats.errors.is_empty() { + println!("- Errors: {}", stats.errors.len()); + } + + Ok(stats) +} + +/// Find a file with an alternative extension +/// Returns the relative path with the new extension if found +fn find_file_with_alternative_extension(base_path: &PathBuf, relative_path: &str) -> Option { + let full_path = base_path.join(relative_path); + + // Get the parent directory and file stem (name without extension) + let parent = full_path.parent()?; + let stem = full_path.file_stem()?.to_str()?; + + // Try each supported extension + for ext in SUPPORTED_EXTENSIONS { + let test_path = parent.join(format!("{}.{}", stem, ext)); + if test_path.exists() { + // Convert back to relative path + if let Ok(rel) = test_path.strip_prefix(base_path) { + if let Some(rel_str) = rel.to_str() { + return Some(rel_str.to_string()); + } + } + } + } + + None +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::TempDir; + + #[test] + fn test_find_file_with_alternative_extension() { + let temp_dir = TempDir::new().unwrap(); + let base_path = temp_dir.path().to_path_buf(); + + // Create a test file with .jpeg extension + let test_file = base_path.join("test.jpeg"); + fs::write(&test_file, b"test").unwrap(); + + // Try to find it as .jpg + let result = find_file_with_alternative_extension(&base_path, "test.jpg"); + assert!(result.is_some()); + assert_eq!(result.unwrap(), "test.jpeg"); + + // Try to find non-existent file + let result = find_file_with_alternative_extension(&base_path, "nonexistent.jpg"); + assert!(result.is_none()); + } +} diff --git a/src/cleanup/phase2.rs b/src/cleanup/phase2.rs new file mode 100644 index 0000000..a9311af --- /dev/null +++ b/src/cleanup/phase2.rs @@ -0,0 +1,261 @@ +use crate::cleanup::database_updater::DatabaseUpdater; +use crate::cleanup::file_type_detector::{detect_file_type, should_rename}; +use crate::cleanup::types::{CleanupConfig, CleanupStats}; +use anyhow::{Context, Result}; +use dialoguer::Confirm; +use log::{error, info, warn}; +use std::fs; +use std::path::{Path, PathBuf}; +use walkdir::WalkDir; + +/// Phase 2: Validate file types and rename mismatches +pub fn validate_file_types( + config: &CleanupConfig, + db_updater: &mut DatabaseUpdater, +) -> Result { + let mut stats = CleanupStats::new(); + let mut auto_fix_all = config.auto_fix; + let mut skip_all = false; + + println!("\nPhase 2: File Type Validation"); + println!("------------------------------"); + + // Walk the filesystem + println!("Scanning filesystem..."); + let files: Vec = WalkDir::new(&config.base_path) + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.file_type().is_file()) + .filter(|e| is_supported_media_file(e.path())) + .map(|e| e.path().to_path_buf()) + .collect(); + + println!("Files found: {}\n", files.len()); + stats.files_checked = files.len(); + + println!("Detecting file types..."); + let mut mismatches_found = 0; + let mut files_renamed = 0; + let mut user_skipped = 0; + + for file_path in files { + // Get current extension + let current_ext = match file_path.extension() { + Some(ext) => ext.to_str().unwrap_or(""), + None => continue, // Skip files without extensions + }; + + // Detect actual file type + match detect_file_type(&file_path) { + Ok(Some(detected_ext)) => { + // Check if we should rename + if should_rename(current_ext, &detected_ext) { + mismatches_found += 1; + stats.issues_found += 1; + + // Get relative path for display and database + let relative_path = match file_path.strip_prefix(&config.base_path) { + Ok(rel) => rel.to_str().unwrap_or(""), + Err(_) => { + error!("Failed to get relative path for {:?}", file_path); + continue; + } + }; + + println!("\nFile type mismatch:"); + println!(" Path: {}", relative_path); + println!(" Current: .{}", current_ext); + println!(" Actual: .{}", detected_ext); + + // Calculate new path + let new_file_path = file_path.with_extension(&detected_ext); + let new_relative_path = match new_file_path.strip_prefix(&config.base_path) { + Ok(rel) => rel.to_str().unwrap_or(""), + Err(_) => { + error!("Failed to get new relative path for {:?}", new_file_path); + continue; + } + }; + + // Check if destination already exists + if new_file_path.exists() { + warn!( + "✗ Destination already exists: {}", + new_relative_path + ); + stats.add_error(format!( + "Destination exists for {}: {}", + relative_path, new_relative_path + )); + continue; + } + + // Determine if we should proceed + let should_proceed = if config.dry_run { + println!(" (dry-run mode - would rename to {})", new_relative_path); + false + } else if skip_all { + println!(" Skipped (skip all)"); + user_skipped += 1; + false + } else if auto_fix_all { + true + } else { + // Interactive prompt + match prompt_for_rename(&new_relative_path) { + RenameDecision::Yes => true, + RenameDecision::No => { + user_skipped += 1; + false + } + RenameDecision::All => { + auto_fix_all = true; + true + } + RenameDecision::SkipAll => { + skip_all = true; + user_skipped += 1; + false + } + } + }; + + if should_proceed { + // Rename the file + match fs::rename(&file_path, &new_file_path) { + Ok(_) => { + println!("✓ Renamed file"); + + // Update database + match db_updater + .update_file_path(relative_path, new_relative_path) + { + Ok(_) => { + files_renamed += 1; + stats.issues_fixed += 1; + } + Err(e) => { + error!( + "File renamed but DB update failed for {}: {:?}", + relative_path, e + ); + stats.add_error(format!( + "DB update failed for {}: {}", + relative_path, e + )); + } + } + } + Err(e) => { + error!("✗ Failed to rename file: {:?}", e); + stats.add_error(format!("Rename failed for {}: {}", relative_path, e)); + } + } + } + } + } + Ok(None) => { + // Could not detect file type - skip + // This is normal for some RAW formats or corrupted files + } + Err(e) => { + warn!("Failed to detect type for {:?}: {:?}", file_path, e); + } + } + } + + println!("\nResults:"); + println!("- Files scanned: {}", stats.files_checked); + println!("- Mismatches found: {}", mismatches_found); + if config.dry_run { + println!("- Would rename: {}", mismatches_found); + } else { + println!("- Files renamed: {}", files_renamed); + if user_skipped > 0 { + println!("- User skipped: {}", user_skipped); + } + } + + if !stats.errors.is_empty() { + println!("- Errors: {}", stats.errors.len()); + } + + Ok(stats) +} + +/// Check if a file is a supported media file based on extension +fn is_supported_media_file(path: &Path) -> bool { + if let Some(ext) = path.extension() { + if let Some(ext_str) = ext.to_str() { + let ext_lower = ext_str.to_lowercase(); + return matches!( + ext_lower.as_str(), + "jpg" | "jpeg" + | "png" + | "webp" + | "tiff" + | "tif" + | "heif" + | "heic" + | "avif" + | "nef" + | "mp4" + | "mov" + ); + } + } + false +} + +#[derive(Debug)] +enum RenameDecision { + Yes, + No, + All, + SkipAll, +} + +/// Prompt the user for rename decision +fn prompt_for_rename(new_path: &str) -> RenameDecision { + println!("\nRename to {}?", new_path); + println!(" [y] Yes"); + println!(" [n] No (default)"); + println!(" [a] Yes to all"); + println!(" [s] Skip all remaining"); + print!("Choice: "); + + // Force flush stdout + use std::io::{self, Write}; + let _ = io::stdout().flush(); + + let mut input = String::new(); + match io::stdin().read_line(&mut input) { + Ok(_) => { + let choice = input.trim().to_lowercase(); + match choice.as_str() { + "y" | "yes" => RenameDecision::Yes, + "a" | "all" => RenameDecision::All, + "s" | "skip" => RenameDecision::SkipAll, + _ => RenameDecision::No, + } + } + Err(_) => RenameDecision::No, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_is_supported_media_file() { + assert!(is_supported_media_file(Path::new("test.jpg"))); + assert!(is_supported_media_file(Path::new("test.JPG"))); + assert!(is_supported_media_file(Path::new("test.png"))); + assert!(is_supported_media_file(Path::new("test.webp"))); + assert!(is_supported_media_file(Path::new("test.mp4"))); + assert!(is_supported_media_file(Path::new("test.mov"))); + assert!(!is_supported_media_file(Path::new("test.txt"))); + assert!(!is_supported_media_file(Path::new("test"))); + } +} diff --git a/src/cleanup/types.rs b/src/cleanup/types.rs new file mode 100644 index 0000000..472ec88 --- /dev/null +++ b/src/cleanup/types.rs @@ -0,0 +1,39 @@ +use std::path::PathBuf; + +#[derive(Debug, Clone)] +pub struct CleanupConfig { + pub base_path: PathBuf, + pub dry_run: bool, + pub auto_fix: bool, +} + +#[derive(Debug, Clone)] +pub struct FileIssue { + pub current_path: String, + pub issue_type: IssueType, + pub suggested_path: Option, +} + +#[derive(Debug, Clone)] +pub enum IssueType { + MissingFile, + ExtensionMismatch { current: String, actual: String }, +} + +#[derive(Debug, Clone, Default)] +pub struct CleanupStats { + pub files_checked: usize, + pub issues_found: usize, + pub issues_fixed: usize, + pub errors: Vec, +} + +impl CleanupStats { + pub fn new() -> Self { + Self::default() + } + + pub fn add_error(&mut self, error: String) { + self.errors.push(error); + } +} diff --git a/src/database/mod.rs b/src/database/mod.rs index 018a0ea..305e31d 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -123,12 +123,15 @@ pub enum DbErrorKind { AlreadyExists, InsertError, QueryError, + UpdateError, } pub trait FavoriteDao: Sync + Send { fn add_favorite(&mut self, user_id: i32, favorite_path: &str) -> Result; fn remove_favorite(&mut self, user_id: i32, favorite_path: String); fn get_favorites(&mut self, user_id: i32) -> Result, DbError>; + fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError>; + fn get_all_paths(&mut self) -> Result, DbError>; } pub struct SqliteFavoriteDao { @@ -183,6 +186,26 @@ impl FavoriteDao for SqliteFavoriteDao { .load::(self.connection.lock().unwrap().deref_mut()) .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + + fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError> { + use schema::favorites::dsl::*; + + diesel::update(favorites.filter(path.eq(old_path))) + .set(path.eq(new_path)) + .execute(self.connection.lock().unwrap().deref_mut()) + .map_err(|_| DbError::new(DbErrorKind::UpdateError))?; + Ok(()) + } + + fn get_all_paths(&mut self) -> Result, DbError> { + use schema::favorites::dsl::*; + + favorites + .select(path) + .distinct() + .load(self.connection.lock().unwrap().deref_mut()) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } } pub trait ExifDao: Sync + Send { @@ -208,6 +231,12 @@ pub trait ExifDao: Sync + Send { /// Get distinct camera makes with counts fn get_camera_makes(&mut self) -> Result, DbError>; + + /// Update file path in EXIF database + fn update_file_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError>; + + /// Get all file paths from EXIF database + fn get_all_file_paths(&mut self) -> Result, DbError>; } pub struct SqliteExifDao { @@ -398,4 +427,27 @@ impl ExifDao for SqliteExifDao { }) .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + + fn update_file_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError> { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + diesel::update(image_exif.filter(file_path.eq(old_path))) + .set(file_path.eq(new_path)) + .execute(connection.deref_mut()) + .map_err(|_| DbError::new(DbErrorKind::UpdateError))?; + Ok(()) + } + + fn get_all_file_paths(&mut self) -> Result, DbError> { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + image_exif + .select(file_path) + .load(connection.deref_mut()) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } } diff --git a/src/lib.rs b/src/lib.rs index 627a1e6..6933068 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,40 @@ #[macro_use] extern crate diesel; +pub mod auth; +pub mod cleanup; +pub mod data; pub mod database; +pub mod error; pub mod exif; +pub mod files; +pub mod geo; +pub mod memories; +pub mod otel; +pub mod service; +pub mod state; +pub mod tags; +pub mod video; + +// Re-export commonly used types +pub use data::{Claims, ThumbnailRequest}; +pub use database::{connect, schema}; +pub use state::AppState; + +// Stub functions for modules that reference main.rs +// These are not used by cleanup_files binary +use std::path::Path; +use walkdir::DirEntry; + +pub fn create_thumbnails() { + // Stub - implemented in main.rs +} + +pub fn update_media_counts(_media_dir: &Path) { + // Stub - implemented in main.rs +} + +pub fn is_video(_entry: &DirEntry) -> bool { + // Stub - implemented in main.rs + false +} diff --git a/src/tags.rs b/src/tags.rs index 5cf0ed2..fa83f23 100644 --- a/src/tags.rs +++ b/src/tags.rs @@ -303,6 +303,14 @@ pub trait TagDao { exclude_tag_ids: Vec, context: &opentelemetry::Context, ) -> anyhow::Result>; + fn update_photo_name( + &mut self, + old_name: &str, + new_name: &str, + context: &opentelemetry::Context, + ) -> anyhow::Result<()>; + fn get_all_photo_names(&mut self, context: &opentelemetry::Context) + -> anyhow::Result>; } pub struct SqliteTagDao { @@ -576,6 +584,33 @@ impl TagDao for SqliteTagDao { .with_context(|| "Unable to get tagged photos") }) } + + fn update_photo_name( + &mut self, + old_name: &str, + new_name: &str, + context: &opentelemetry::Context, + ) -> anyhow::Result<()> { + use crate::database::schema::tagged_photo::dsl::*; + + diesel::update(tagged_photo.filter(photo_name.eq(old_name))) + .set(photo_name.eq(new_name)) + .execute(&mut self.connection)?; + Ok(()) + } + + fn get_all_photo_names( + &mut self, + context: &opentelemetry::Context, + ) -> anyhow::Result> { + use crate::database::schema::tagged_photo::dsl::*; + + tagged_photo + .select(photo_name) + .distinct() + .load(&mut self.connection) + .with_context(|| "Unable to get photo names") + } } #[cfg(test)] @@ -736,6 +771,14 @@ mod tests { ) -> anyhow::Result> { todo!() } + + fn update_photo_name(&mut self, old_name: &str, new_name: &str, context: &opentelemetry::Context) -> anyhow::Result<()> { + todo!() + } + + fn get_all_photo_names(&mut self, context: &opentelemetry::Context) -> anyhow::Result> { + todo!() + } } #[actix_rt::test]