diff --git a/Cargo.lock b/Cargo.lock index 3e1e750..4f04521 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -474,6 +474,12 @@ dependencies = [ "syn", ] +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + [[package]] name = "arrayvec" version = "0.7.6" @@ -572,6 +578,20 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6099cdc01846bc367c4e7dd630dc5966dccf36b652fae7a74e17b640411a91b2" +[[package]] +name = "blake3" +version = "1.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "cpufeatures 0.3.0", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -766,6 +786,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + [[package]] name = "convert_case" version = "0.4.0" @@ -808,6 +834,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -1797,7 +1832,7 @@ dependencies = [ [[package]] name = "image-api" -version = "0.5.2" +version = "1.0.0" dependencies = [ "actix", "actix-cors", @@ -1810,6 +1845,7 @@ dependencies = [ "anyhow", "base64", "bcrypt", + "blake3", "chrono", "clap", "diesel", @@ -3365,7 +3401,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] diff --git a/Cargo.toml b/Cargo.toml index 88b9f09..1e606b0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "image-api" -version = "0.5.2" +version = "1.0.0" authors = ["Cameron Cordes "] edition = "2024" @@ -55,3 +55,4 @@ zerocopy = "0.8" ical = "0.11" scraper = "0.20" base64 = "0.22" +blake3 = "1.5" diff --git a/migrations/2026-04-17-000000_multi_library/down.sql b/migrations/2026-04-17-000000_multi_library/down.sql new file mode 100644 index 0000000..9dcb5c7 --- /dev/null +++ b/migrations/2026-04-17-000000_multi_library/down.sql @@ -0,0 +1,155 @@ +-- Revert multi-library support. +-- Drops library_id/content_hash/size_bytes, renames rel_path back to the +-- original column names, and drops the libraries table. Rows originally +-- from non-primary libraries (id > 1) would be orphaned, so the rollback +-- keeps only rows from library_id=1. + +PRAGMA foreign_keys=OFF; + +-- tagged_photo: rel_path → photo_name. +DROP INDEX IF EXISTS idx_tagged_photo_relpath_tag; +DROP INDEX IF EXISTS idx_tagged_photo_rel_path; +ALTER TABLE tagged_photo RENAME COLUMN rel_path TO photo_name; +CREATE INDEX IF NOT EXISTS idx_tagged_photo_photo_name ON tagged_photo(photo_name); +CREATE INDEX IF NOT EXISTS idx_tagged_photo_count ON tagged_photo(photo_name, tag_id); + +-- favorites: rel_path → path. +DROP INDEX IF EXISTS idx_favorites_unique; +DROP INDEX IF EXISTS idx_favorites_rel_path; +ALTER TABLE favorites RENAME COLUMN rel_path TO path; +CREATE INDEX IF NOT EXISTS idx_favorites_path ON favorites(path); +CREATE UNIQUE INDEX IF NOT EXISTS idx_favorites_unique ON favorites(userid, path); + +-- video_preview_clips: drop library_id, rel_path → file_path. +CREATE TABLE video_preview_clips_old ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + file_path TEXT NOT NULL UNIQUE, + status TEXT NOT NULL DEFAULT 'pending', + duration_seconds REAL, + file_size_bytes INTEGER, + error_message TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +INSERT INTO video_preview_clips_old ( + id, file_path, status, duration_seconds, file_size_bytes, + error_message, created_at, updated_at +) +SELECT + id, rel_path, status, duration_seconds, file_size_bytes, + error_message, created_at, updated_at +FROM video_preview_clips +WHERE library_id = 1; + +DROP TABLE video_preview_clips; +ALTER TABLE video_preview_clips_old RENAME TO video_preview_clips; + +CREATE INDEX idx_preview_clips_file_path ON video_preview_clips(file_path); +CREATE INDEX idx_preview_clips_status ON video_preview_clips(status); + +-- entity_photo_links: drop library_id, rel_path → file_path. +CREATE TABLE entity_photo_links_old ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + entity_id INTEGER NOT NULL, + file_path TEXT NOT NULL, + role TEXT NOT NULL, + CONSTRAINT fk_epl_entity FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE CASCADE, + UNIQUE(entity_id, file_path, role) +); + +INSERT INTO entity_photo_links_old (id, entity_id, file_path, role) +SELECT id, entity_id, rel_path, role +FROM entity_photo_links +WHERE library_id = 1; + +DROP TABLE entity_photo_links; +ALTER TABLE entity_photo_links_old RENAME TO entity_photo_links; + +CREATE INDEX idx_entity_photo_links_entity ON entity_photo_links(entity_id); +CREATE INDEX idx_entity_photo_links_photo ON entity_photo_links(file_path); + +-- photo_insights: drop library_id, rel_path → file_path. +CREATE TABLE photo_insights_old ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + file_path TEXT NOT NULL, + title TEXT NOT NULL, + summary TEXT NOT NULL, + generated_at BIGINT NOT NULL, + model_version TEXT NOT NULL, + is_current BOOLEAN NOT NULL DEFAULT 0, + training_messages TEXT, + approved BOOLEAN +); + +INSERT INTO photo_insights_old ( + id, file_path, title, summary, generated_at, model_version, is_current, + training_messages, approved +) +SELECT + id, rel_path, title, summary, generated_at, model_version, is_current, + training_messages, approved +FROM photo_insights +WHERE library_id = 1; + +DROP TABLE photo_insights; +ALTER TABLE photo_insights_old RENAME TO photo_insights; + +CREATE INDEX idx_photo_insights_file_path ON photo_insights(file_path); +CREATE INDEX idx_photo_insights_current ON photo_insights(file_path, is_current); + +-- image_exif: drop library_id/content_hash/size_bytes, rel_path → file_path. +CREATE TABLE image_exif_old ( + id INTEGER PRIMARY KEY NOT NULL, + file_path TEXT NOT NULL UNIQUE, + camera_make TEXT, + camera_model TEXT, + lens_model TEXT, + width INTEGER, + height INTEGER, + orientation INTEGER, + gps_latitude REAL, + gps_longitude REAL, + gps_altitude REAL, + focal_length REAL, + aperture REAL, + shutter_speed TEXT, + iso INTEGER, + date_taken BIGINT, + created_time BIGINT NOT NULL, + last_modified BIGINT NOT NULL +); + +INSERT INTO image_exif_old ( + id, file_path, + camera_make, camera_model, lens_model, + width, height, orientation, + gps_latitude, gps_longitude, gps_altitude, + focal_length, aperture, shutter_speed, iso, date_taken, + created_time, last_modified +) +SELECT + id, rel_path, + camera_make, camera_model, lens_model, + width, height, orientation, + gps_latitude, gps_longitude, gps_altitude, + focal_length, aperture, shutter_speed, iso, date_taken, + created_time, last_modified +FROM image_exif +WHERE library_id = 1; + +DROP TABLE image_exif; +ALTER TABLE image_exif_old RENAME TO image_exif; + +CREATE INDEX idx_image_exif_file_path ON image_exif(file_path); +CREATE INDEX idx_image_exif_camera ON image_exif(camera_make, camera_model); +CREATE INDEX idx_image_exif_gps ON image_exif(gps_latitude, gps_longitude); +CREATE INDEX idx_image_exif_date_taken ON image_exif(date_taken); +CREATE INDEX idx_image_exif_date_path ON image_exif(date_taken DESC, file_path); + +-- Finally, drop the libraries registry. +DROP TABLE libraries; + +PRAGMA foreign_keys=ON; + +ANALYZE; diff --git a/migrations/2026-04-17-000000_multi_library/up.sql b/migrations/2026-04-17-000000_multi_library/up.sql new file mode 100644 index 0000000..7b32c31 --- /dev/null +++ b/migrations/2026-04-17-000000_multi_library/up.sql @@ -0,0 +1,216 @@ +-- Multi-library support. +-- Adds `libraries` registry table and a `library_id` column on per-instance +-- metadata tables. Renames `file_path` / `photo_name` to `rel_path` for +-- semantic clarity (values already stored relative to BASE_PATH). +-- Adds `content_hash` + `size_bytes` to `image_exif` to support +-- content-based dedup of thumbnails and HLS output across libraries. +-- +-- SQLite cannot alter column constraints in place, so per-instance tables +-- are recreated following the idiom established in +-- 2026-04-02-000000_photo_insights_history/up.sql. Existing row `id`s are +-- preserved so foreign keys (entity_facts.source_insight_id, etc.) remain +-- valid after migration. + +PRAGMA foreign_keys=OFF; + +-- --------------------------------------------------------------------------- +-- 1. Libraries registry. +-- Seeded with a placeholder for the primary library; AppState patches +-- `root_path` from the BASE_PATH env var on first boot. Subsequent +-- prod-to-dev DB syncs update this row via a single SQL UPDATE. +-- --------------------------------------------------------------------------- +CREATE TABLE libraries ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name TEXT NOT NULL UNIQUE, + root_path TEXT NOT NULL, + created_at BIGINT NOT NULL +); + +INSERT INTO libraries (id, name, root_path, created_at) +VALUES (1, 'main', 'BASE_PATH_PLACEHOLDER', strftime('%s','now')); + +-- --------------------------------------------------------------------------- +-- 2. image_exif: + library_id, file_path → rel_path, + content_hash/size_bytes. +-- --------------------------------------------------------------------------- +CREATE TABLE image_exif_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + library_id INTEGER NOT NULL REFERENCES libraries(id), + rel_path TEXT NOT NULL, + + -- Camera information + camera_make TEXT, + camera_model TEXT, + lens_model TEXT, + + -- Image properties + width INTEGER, + height INTEGER, + orientation INTEGER, + + -- GPS + gps_latitude REAL, + gps_longitude REAL, + gps_altitude REAL, + + -- Capture settings + focal_length REAL, + aperture REAL, + shutter_speed TEXT, + iso INTEGER, + date_taken BIGINT, + + -- Housekeeping + created_time BIGINT NOT NULL, + last_modified BIGINT NOT NULL, + + -- Content identity (backfilled by the `backfill_hashes` binary and by the watcher for new files) + content_hash TEXT, + size_bytes BIGINT, + + UNIQUE(library_id, rel_path) +); + +INSERT INTO image_exif_new ( + id, library_id, rel_path, + camera_make, camera_model, lens_model, + width, height, orientation, + gps_latitude, gps_longitude, gps_altitude, + focal_length, aperture, shutter_speed, iso, date_taken, + created_time, last_modified +) +SELECT + id, 1, file_path, + camera_make, camera_model, lens_model, + width, height, orientation, + gps_latitude, gps_longitude, gps_altitude, + focal_length, aperture, shutter_speed, iso, date_taken, + created_time, last_modified +FROM image_exif; + +DROP TABLE image_exif; +ALTER TABLE image_exif_new RENAME TO image_exif; + +CREATE INDEX idx_image_exif_rel_path ON image_exif(rel_path); +CREATE INDEX idx_image_exif_camera ON image_exif(camera_make, camera_model); +CREATE INDEX idx_image_exif_gps ON image_exif(gps_latitude, gps_longitude); +CREATE INDEX idx_image_exif_date_taken ON image_exif(date_taken); +CREATE INDEX idx_image_exif_date_path ON image_exif(date_taken DESC, rel_path); +CREATE INDEX idx_image_exif_lib_date ON image_exif(library_id, date_taken); +CREATE INDEX idx_image_exif_content_hash ON image_exif(content_hash); + +-- --------------------------------------------------------------------------- +-- 3. photo_insights: + library_id, file_path → rel_path. +-- Preserve `id` so entity_facts.source_insight_id FKs remain valid. +-- --------------------------------------------------------------------------- +CREATE TABLE photo_insights_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + library_id INTEGER NOT NULL REFERENCES libraries(id), + rel_path TEXT NOT NULL, + title TEXT NOT NULL, + summary TEXT NOT NULL, + generated_at BIGINT NOT NULL, + model_version TEXT NOT NULL, + is_current BOOLEAN NOT NULL DEFAULT 0, + training_messages TEXT, + approved BOOLEAN +); + +INSERT INTO photo_insights_new ( + id, library_id, rel_path, title, summary, generated_at, model_version, + is_current, training_messages, approved +) +SELECT + id, 1, file_path, title, summary, generated_at, model_version, + is_current, training_messages, approved +FROM photo_insights; + +DROP TABLE photo_insights; +ALTER TABLE photo_insights_new RENAME TO photo_insights; + +CREATE INDEX idx_photo_insights_rel_path ON photo_insights(rel_path); +CREATE INDEX idx_photo_insights_current ON photo_insights(library_id, rel_path, is_current); + +-- --------------------------------------------------------------------------- +-- 4. entity_photo_links: + library_id, file_path → rel_path. +-- Preserves entity FK; UNIQUE now includes library_id to allow the same +-- rel_path to link entities in multiple libraries independently. +-- --------------------------------------------------------------------------- +CREATE TABLE entity_photo_links_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + entity_id INTEGER NOT NULL, + library_id INTEGER NOT NULL REFERENCES libraries(id), + rel_path TEXT NOT NULL, + role TEXT NOT NULL, + CONSTRAINT fk_epl_entity FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE CASCADE, + UNIQUE(entity_id, library_id, rel_path, role) +); + +INSERT INTO entity_photo_links_new (id, entity_id, library_id, rel_path, role) +SELECT id, entity_id, 1, file_path, role FROM entity_photo_links; + +DROP TABLE entity_photo_links; +ALTER TABLE entity_photo_links_new RENAME TO entity_photo_links; + +CREATE INDEX idx_entity_photo_links_entity ON entity_photo_links(entity_id); +CREATE INDEX idx_entity_photo_links_photo ON entity_photo_links(library_id, rel_path); + +-- --------------------------------------------------------------------------- +-- 5. video_preview_clips: + library_id, file_path → rel_path. +-- --------------------------------------------------------------------------- +CREATE TABLE video_preview_clips_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + library_id INTEGER NOT NULL REFERENCES libraries(id), + rel_path TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + duration_seconds REAL, + file_size_bytes INTEGER, + error_message TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + UNIQUE(library_id, rel_path) +); + +INSERT INTO video_preview_clips_new ( + id, library_id, rel_path, status, duration_seconds, file_size_bytes, + error_message, created_at, updated_at +) +SELECT + id, 1, file_path, status, duration_seconds, file_size_bytes, + error_message, created_at, updated_at +FROM video_preview_clips; + +DROP TABLE video_preview_clips; +ALTER TABLE video_preview_clips_new RENAME TO video_preview_clips; + +CREATE INDEX idx_preview_clips_rel_path ON video_preview_clips(rel_path); +CREATE INDEX idx_preview_clips_status ON video_preview_clips(status); + +-- --------------------------------------------------------------------------- +-- 6. favorites: path → rel_path. Library-agnostic (cross-library sharing). +-- --------------------------------------------------------------------------- +ALTER TABLE favorites RENAME COLUMN path TO rel_path; + +DROP INDEX IF EXISTS idx_favorites_path; +DROP INDEX IF EXISTS idx_favorites_unique; +CREATE INDEX idx_favorites_rel_path ON favorites(rel_path); +CREATE UNIQUE INDEX idx_favorites_unique ON favorites(userid, rel_path); + +-- --------------------------------------------------------------------------- +-- 7. tagged_photo: photo_name → rel_path. Library-agnostic. +-- Dedup first so the (rel_path, tag_id) unique index can be created safely. +-- --------------------------------------------------------------------------- +ALTER TABLE tagged_photo RENAME COLUMN photo_name TO rel_path; + +DELETE FROM tagged_photo +WHERE id NOT IN ( + SELECT MIN(id) FROM tagged_photo GROUP BY rel_path, tag_id +); + +DROP INDEX IF EXISTS idx_tagged_photo_photo_name; +DROP INDEX IF EXISTS idx_tagged_photo_count; +CREATE INDEX idx_tagged_photo_rel_path ON tagged_photo(rel_path); +CREATE UNIQUE INDEX idx_tagged_photo_relpath_tag ON tagged_photo(rel_path, tag_id); + +PRAGMA foreign_keys=ON; + +ANALYZE; diff --git a/migrations/2026-04-17-000100_normalize_path_separators/down.sql b/migrations/2026-04-17-000100_normalize_path_separators/down.sql new file mode 100644 index 0000000..4f3169c --- /dev/null +++ b/migrations/2026-04-17-000100_normalize_path_separators/down.sql @@ -0,0 +1,4 @@ +-- No-op: there's no sensible way to recover which rows originally used +-- backslashes, and there's no reason to want backslashes back. The +-- deleted duplicates are also gone. +SELECT 1; diff --git a/migrations/2026-04-17-000100_normalize_path_separators/up.sql b/migrations/2026-04-17-000100_normalize_path_separators/up.sql new file mode 100644 index 0000000..fc3bcdf --- /dev/null +++ b/migrations/2026-04-17-000100_normalize_path_separators/up.sql @@ -0,0 +1,85 @@ +-- Normalize `rel_path` columns to forward slashes. Windows ingest +-- historically produced a mix of `\` and `/`, which broke lookups and +-- caused spurious UNIQUE-constraint violations on re-registration. +-- +-- SQLite enforces UNIQUE per-row during UPDATE, so we have to drop +-- losing duplicates BEFORE normalizing. For each table that has a +-- UNIQUE on rel_path, we delete rows whose normalized form already +-- exists in canonical (forward-slash) form — keeping the existing +-- forward-slash row as the survivor. Then a flat UPDATE finishes the +-- job for remaining backslash rows. + +-- image_exif: UNIQUE(library_id, rel_path) +DELETE FROM image_exif + WHERE rel_path LIKE '%\%' + AND EXISTS ( + SELECT 1 FROM image_exif AS other + WHERE other.library_id = image_exif.library_id + AND other.rel_path = REPLACE(image_exif.rel_path, '\', '/') + AND other.id != image_exif.id + ); +UPDATE image_exif + SET rel_path = REPLACE(rel_path, '\', '/') + WHERE rel_path LIKE '%\%'; + +-- favorites: UNIQUE(userid, rel_path) +DELETE FROM favorites + WHERE rel_path LIKE '%\%' + AND EXISTS ( + SELECT 1 FROM favorites AS other + WHERE other.userid = favorites.userid + AND other.rel_path = REPLACE(favorites.rel_path, '\', '/') + AND other.id != favorites.id + ); +UPDATE favorites + SET rel_path = REPLACE(rel_path, '\', '/') + WHERE rel_path LIKE '%\%'; + +-- tagged_photo: UNIQUE(rel_path, tag_id) +DELETE FROM tagged_photo + WHERE rel_path LIKE '%\%' + AND EXISTS ( + SELECT 1 FROM tagged_photo AS other + WHERE other.tag_id = tagged_photo.tag_id + AND other.rel_path = REPLACE(tagged_photo.rel_path, '\', '/') + AND other.id != tagged_photo.id + ); +UPDATE tagged_photo + SET rel_path = REPLACE(rel_path, '\', '/') + WHERE rel_path LIKE '%\%'; + +-- entity_photo_links: UNIQUE(entity_id, library_id, rel_path, role) +DELETE FROM entity_photo_links + WHERE rel_path LIKE '%\%' + AND EXISTS ( + SELECT 1 FROM entity_photo_links AS other + WHERE other.entity_id = entity_photo_links.entity_id + AND other.library_id = entity_photo_links.library_id + AND other.role = entity_photo_links.role + AND other.rel_path = REPLACE(entity_photo_links.rel_path, '\', '/') + AND other.id != entity_photo_links.id + ); +UPDATE entity_photo_links + SET rel_path = REPLACE(rel_path, '\', '/') + WHERE rel_path LIKE '%\%'; + +-- video_preview_clips: UNIQUE(library_id, rel_path) +DELETE FROM video_preview_clips + WHERE rel_path LIKE '%\%' + AND EXISTS ( + SELECT 1 FROM video_preview_clips AS other + WHERE other.library_id = video_preview_clips.library_id + AND other.rel_path = REPLACE(video_preview_clips.rel_path, '\', '/') + AND other.id != video_preview_clips.id + ); +UPDATE video_preview_clips + SET rel_path = REPLACE(rel_path, '\', '/') + WHERE rel_path LIKE '%\%'; + +-- photo_insights has no UNIQUE on rel_path (history table), so a plain +-- normalize is safe. +UPDATE photo_insights + SET rel_path = REPLACE(rel_path, '\', '/') + WHERE rel_path LIKE '%\%'; + +ANALYZE; diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index cf7fd5b..abf2369 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -5,8 +5,10 @@ use serde::{Deserialize, Serialize}; use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient}; use crate::data::Claims; -use crate::database::InsightDao; +use crate::database::{ExifDao, InsightDao}; +use crate::libraries; use crate::otel::{extract_context_from_request, global_tracer}; +use crate::state::AppState; use crate::utils::normalize_path; #[derive(Debug, Deserialize)] @@ -31,6 +33,10 @@ pub struct GeneratePhotoInsightRequest { #[derive(Debug, Deserialize)] pub struct GetPhotoInsightQuery { pub path: String, + /// Library context for this lookup. Used to pick the right content + /// hash when the same rel_path exists under multiple roots. + #[serde(default)] + pub library: Option, } #[derive(Debug, Deserialize)] @@ -146,15 +152,30 @@ pub async fn generate_insight_handler( pub async fn get_insight_handler( _claims: Claims, query: web::Query, + app_state: web::Data, insight_dao: web::Data>>, + exif_dao: web::Data>>, ) -> impl Responder { let normalized_path = normalize_path(&query.path); log::debug!("Fetching insight for {}", normalized_path); let otel_context = opentelemetry::Context::new(); + + // Expand to rel_paths sharing content so an insight generated under + // library 1 still shows when the same photo is viewed from library 2. + let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); + let sibling_paths = { + let mut exif = exif_dao.lock().expect("Unable to lock ExifDao"); + exif.get_rel_paths_sharing_content(&otel_context, library.id, &normalized_path) + .unwrap_or_else(|_| vec![normalized_path.clone()]) + }; + let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); - match dao.get_insight(&otel_context, &normalized_path) { + match dao.get_insight_for_paths(&otel_context, &sibling_paths) { Ok(Some(insight)) => { let response = PhotoInsightResponse { id: insight.id, @@ -482,7 +503,10 @@ pub async fn export_training_data_handler( HttpResponse::Ok() .content_type("application/jsonl") - .insert_header(("Content-Disposition", "attachment; filename=\"training_data.jsonl\"")) + .insert_header(( + "Content-Disposition", + "attachment; filename=\"training_data.jsonl\"", + )) .body(jsonl) } Err(e) => { diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 2ef503d..18e50c7 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -16,6 +16,7 @@ use crate::database::{ CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao, SearchHistoryDao, }; +use crate::libraries::Library; use crate::memories::extract_date_from_filename; use crate::otel::global_tracer; use crate::tags::TagDao; @@ -52,7 +53,7 @@ pub struct InsightGenerator { // Knowledge memory knowledge_dao: Arc>>, - base_path: String, + libraries: Vec, } impl InsightGenerator { @@ -67,7 +68,7 @@ impl InsightGenerator { search_dao: Arc>>, tag_dao: Arc>>, knowledge_dao: Arc>>, - base_path: String, + libraries: Vec, ) -> Self { Self { ollama, @@ -80,10 +81,25 @@ impl InsightGenerator { search_dao, tag_dao, knowledge_dao, - base_path, + libraries, } } + /// Resolve `rel_path` against the configured libraries, returning the + /// first root under which the file exists. Insights may be generated + /// for any library — the generator itself doesn't know which — so we + /// probe each root rather than trust a single `base_path`. + fn resolve_full_path(&self, rel_path: &str) -> Option { + use std::path::Path; + for lib in &self.libraries { + let candidate = Path::new(&lib.root_path).join(rel_path); + if candidate.exists() { + return Some(candidate); + } + } + None + } + /// Extract contact name from file path /// e.g., "Sarah/img.jpeg" -> Some("Sarah") /// e.g., "img.jpeg" -> None @@ -108,9 +124,13 @@ impl InsightGenerator { /// Resizes to max 1024px on longest edge to reduce context usage fn load_image_as_base64(&self, file_path: &str) -> Result { use image::imageops::FilterType; - use std::path::Path; - let full_path = Path::new(&self.base_path).join(file_path); + let full_path = self.resolve_full_path(file_path).ok_or_else(|| { + anyhow::anyhow!( + "File '{}' not found under any configured library", + file_path + ) + })?; log::debug!("Loading image for vision model: {:?}", full_path); @@ -420,7 +440,11 @@ impl InsightGenerator { .iter() .map(|e| { let date = DateTime::from_timestamp(e.start_time, 0) - .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) + .map(|dt| { + dt.with_timezone(&Local) + .format("%Y-%m-%d %H:%M") + .to_string() + }) .unwrap_or_else(|| "unknown".to_string()); let attendees = e @@ -725,8 +749,7 @@ impl InsightGenerator { extract_date_from_filename(&file_path) .map(|dt| dt.timestamp()) .or_else(|| { - // Combine base_path with file_path to get full path - let full_path = std::path::Path::new(&self.base_path).join(&file_path); + let full_path = self.resolve_full_path(&file_path)?; File::open(&full_path) .and_then(|f| f.metadata()) .and_then(|m| m.created().or(m.modified())) @@ -1187,6 +1210,7 @@ impl InsightGenerator { // 11. Store in database let insight = InsertPhotoInsight { + library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: file_path.to_string(), title, summary, @@ -1334,7 +1358,11 @@ Return ONLY the summary, nothing else."#, .map(|m| { let sender = if m.is_sent { "Me" } else { &m.contact }; let timestamp = chrono::DateTime::from_timestamp(m.timestamp, 0) - .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) + .map(|dt| { + dt.with_timezone(&Local) + .format("%Y-%m-%d %H:%M") + .to_string() + }) .unwrap_or_else(|| "unknown time".to_string()); format!("[{}] {}: {}", timestamp, sender, m.body) }) @@ -1429,16 +1457,22 @@ Return ONLY the summary, nothing else."#, .get("contact") .and_then(|v| v.as_str()) .map(|s| s.to_string()); + let limit = args + .get("limit") + .and_then(|v| v.as_i64()) + .unwrap_or(10) + .clamp(1, 25) as usize; log::info!( - "tool_search_rag: query='{}', date={}, contact={:?}", + "tool_search_rag: query='{}', date={}, contact={:?}, limit={}", query, date, - contact + contact, + limit ); match self - .find_relevant_messages_rag(date, None, contact.as_deref(), None, 5, Some(&query)) + .find_relevant_messages_rag(date, None, contact.as_deref(), None, limit, Some(&query)) .await { Ok(results) if !results.is_empty() => results.join("\n\n"), @@ -1465,6 +1499,11 @@ Return ONLY the summary, nothing else."#, .get("days_radius") .and_then(|v| v.as_i64()) .unwrap_or(4); + let limit = args + .get("limit") + .and_then(|v| v.as_i64()) + .unwrap_or(60) + .clamp(1, 150) as usize; let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") { Ok(d) => d, @@ -1473,10 +1512,11 @@ Return ONLY the summary, nothing else."#, let timestamp = date.and_hms_opt(12, 0, 0).unwrap().and_utc().timestamp(); log::info!( - "tool_get_sms_messages: date={}, contact={:?}, days_radius={}", + "tool_get_sms_messages: date={}, contact={:?}, days_radius={}, limit={}", date, contact, - days_radius + days_radius, + limit ); match self @@ -1487,11 +1527,15 @@ Return ONLY the summary, nothing else."#, Ok(messages) if !messages.is_empty() => { let formatted: Vec = messages .iter() - .take(30) + .take(limit) .map(|m| { let sender = if m.is_sent { "Me" } else { &m.contact }; let ts = DateTime::from_timestamp(m.timestamp, 0) - .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) + .map(|dt| { + dt.with_timezone(&Local) + .format("%Y-%m-%d %H:%M") + .to_string() + }) .unwrap_or_else(|| "unknown".to_string()); format!("[{}] {}: {}", ts, sender, m.body) }) @@ -1524,6 +1568,11 @@ Return ONLY the summary, nothing else."#, .get("days_radius") .and_then(|v| v.as_i64()) .unwrap_or(7); + let limit = args + .get("limit") + .and_then(|v| v.as_i64()) + .unwrap_or(20) + .clamp(1, 50) as usize; let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") { Ok(d) => d, @@ -1532,9 +1581,10 @@ Return ONLY the summary, nothing else."#, let timestamp = date.and_hms_opt(12, 0, 0).unwrap().and_utc().timestamp(); log::info!( - "tool_get_calendar_events: date={}, days_radius={}", + "tool_get_calendar_events: date={}, days_radius={}, limit={}", date, - days_radius + days_radius, + limit ); let events = { @@ -1542,7 +1592,7 @@ Return ONLY the summary, nothing else."#, .calendar_dao .lock() .expect("Unable to lock CalendarEventDao"); - dao.find_relevant_events_hybrid(cx, timestamp, days_radius, None, 10) + dao.find_relevant_events_hybrid(cx, timestamp, days_radius, None, limit) .ok() }; @@ -1552,7 +1602,11 @@ Return ONLY the summary, nothing else."#, .iter() .map(|e| { let dt = DateTime::from_timestamp(e.start_time, 0) - .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) + .map(|dt| { + dt.with_timezone(&Local) + .format("%Y-%m-%d %H:%M") + .to_string() + }) .unwrap_or_else(|| "unknown".to_string()); let loc = e .location @@ -1624,7 +1678,11 @@ Return ONLY the summary, nothing else."#, .take(20) .map(|loc| { let dt = DateTime::from_timestamp(loc.timestamp, 0) - .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) + .map(|dt| { + dt.with_timezone(&Local) + .format("%Y-%m-%d %H:%M") + .to_string() + }) .unwrap_or_else(|| "unknown".to_string()); let activity = loc .activity @@ -1733,7 +1791,11 @@ Return ONLY the summary, nothing else."#, .get("entity_type") .and_then(|v| v.as_str()) .map(|s| s.to_string()); - let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(10); + let limit = args + .get("limit") + .and_then(|v| v.as_i64()) + .unwrap_or(20) + .clamp(1, 50); log::info!( "tool_recall_entities: name={:?}, type={:?}, limit={}", @@ -1807,32 +1869,32 @@ Return ONLY the summary, nothing else."#, // For each linked entity, fetch its facts for entity_id in entity_ids { - if let Ok(entity) = kdao.get_entity_by_id(cx, entity_id) { - if let Some(e) = entity { - let role = links - .iter() - .find(|l| l.entity_id == entity_id) - .map(|l| l.role.as_str()) - .unwrap_or("subject"); - output_lines.push(format!( - "Entity: {} ({}, role: {})", - e.name, e.entity_type, role - )); - if let Ok(facts) = kdao.get_facts_for_entity(cx, entity_id) { - for f in facts.iter().filter(|f| f.status == "active") { - let obj = if let Some(ref v) = f.object_value { - v.clone() - } else if let Some(oid) = f.object_entity_id { - kdao.get_entity_by_id(cx, oid) - .ok() - .flatten() - .map(|e| format!("{} (entity ID: {})", e.name, e.id)) - .unwrap_or_else(|| format!("entity:{}", oid)) - } else { - "(unknown)".to_string() - }; - output_lines.push(format!(" - {} {}", f.predicate, obj)); - } + if let Ok(entity) = kdao.get_entity_by_id(cx, entity_id) + && let Some(e) = entity + { + let role = links + .iter() + .find(|l| l.entity_id == entity_id) + .map(|l| l.role.as_str()) + .unwrap_or("subject"); + output_lines.push(format!( + "Entity: {} ({}, role: {})", + e.name, e.entity_type, role + )); + if let Ok(facts) = kdao.get_facts_for_entity(cx, entity_id) { + for f in facts.iter().filter(|f| f.status == "active") { + let obj = if let Some(ref v) = f.object_value { + v.clone() + } else if let Some(oid) = f.object_entity_id { + kdao.get_entity_by_id(cx, oid) + .ok() + .flatten() + .map(|e| format!("{} (entity ID: {})", e.name, e.id)) + .unwrap_or_else(|| format!("entity:{}", oid)) + } else { + "(unknown)".to_string() + }; + output_lines.push(format!(" - {} {}", f.predicate, obj)); } } } @@ -1882,14 +1944,10 @@ Return ONLY the summary, nothing else."#, // those already). Results are appended to the tool response so the // model can choose to use an existing entity's ID instead. let similar_entities: Vec = { - use crate::database::{EntityFilter, KnowledgeDao}; + use crate::database::EntityFilter; use crate::database::knowledge_dao::normalize_entity_type; let normalised_type = normalize_entity_type(&entity_type); - let first_token = name - .split_whitespace() - .next() - .unwrap_or(&name) - .to_string(); + let first_token = name.split_whitespace().next().unwrap_or(&name).to_string(); let filter = EntityFilter { entity_type: None, // search all types, filter client-side to avoid case issues status: Some("active".to_string()), @@ -1897,7 +1955,10 @@ Return ONLY the summary, nothing else."#, limit: 10, offset: 0, }; - let mut kdao = self.knowledge_dao.lock().expect("Unable to lock KnowledgeDao"); + let mut kdao = self + .knowledge_dao + .lock() + .expect("Unable to lock KnowledgeDao"); kdao.list_entities(cx, filter) .unwrap_or_default() .0 @@ -2031,6 +2092,7 @@ Return ONLY the summary, nothing else."#, // Upsert a photo link so this entity is associated with this photo let link = InsertEntityPhotoLink { entity_id: subject_entity_id, + library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: file_path.to_string(), role: photo_role, }; @@ -2086,6 +2148,10 @@ Return ONLY the summary, nothing else."#, "contact": { "type": "string", "description": "Optional contact name to filter results" + }, + "limit": { + "type": "integer", + "description": "Maximum number of results to return (default: 10, max: 25)" } } }), @@ -2108,6 +2174,10 @@ Return ONLY the summary, nothing else."#, "days_radius": { "type": "integer", "description": "Number of days before and after the date to search (default: 4)" + }, + "limit": { + "type": "integer", + "description": "Maximum number of messages to return (default: 60, max: 150)" } } }), @@ -2126,6 +2196,10 @@ Return ONLY the summary, nothing else."#, "days_radius": { "type": "integer", "description": "Number of days before and after the date to search (default: 7)" + }, + "limit": { + "type": "integer", + "description": "Maximum number of events to return (default: 20, max: 50)" } } }), @@ -2201,7 +2275,7 @@ Return ONLY the summary, nothing else."#, }, "limit": { "type": "integer", - "description": "Maximum number of results to return (default: 10)" + "description": "Maximum number of results to return (default: 20, max: 50)" } } }), @@ -2453,7 +2527,7 @@ Return ONLY the summary, nothing else."#, extract_date_from_filename(&file_path) .map(|dt| dt.timestamp()) .or_else(|| { - let full_path = std::path::Path::new(&self.base_path).join(&file_path); + let full_path = self.resolve_full_path(&file_path)?; File::open(&full_path) .and_then(|f| f.metadata()) .and_then(|m| m.created().or(m.modified())) @@ -2704,10 +2778,9 @@ Return ONLY the summary, nothing else."#, messages.push(ChatMessage::user( "Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as Cameron.", )); - let (final_response, prompt_tokens, eval_tokens) = - ollama_client - .chat_with_tools(messages.clone(), vec![]) - .await?; + let (final_response, prompt_tokens, eval_tokens) = ollama_client + .chat_with_tools(messages.clone(), vec![]) + .await?; last_prompt_eval_count = prompt_tokens; last_eval_count = eval_tokens; final_content = final_response.content.clone(); @@ -2742,6 +2815,7 @@ Return ONLY the summary, nothing else."#, // 15. Store insight (returns the persisted row including its new id) let insight = InsertPhotoInsight { + library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: file_path.to_string(), title, summary: final_content, diff --git a/src/ai/ollama.rs b/src/ai/ollama.rs index 1f42b6c..184bc61 100644 --- a/src/ai/ollama.rs +++ b/src/ai/ollama.rs @@ -120,6 +120,7 @@ impl OllamaClient { /// Replace the HTTP client with one using a custom request timeout. /// Useful for slow models where the default 120s may be insufficient. + #[allow(dead_code)] pub fn with_request_timeout(mut self, secs: u64) -> Self { self.client = Client::builder() .connect_timeout(Duration::from_secs(5)) @@ -174,6 +175,7 @@ impl OllamaClient { } /// Clear the model list cache for a specific URL or all URLs + #[allow(dead_code)] pub fn clear_model_cache(url: Option<&str>) { let mut cache = MODEL_LIST_CACHE.lock().unwrap(); if let Some(url) = url { @@ -186,6 +188,7 @@ impl OllamaClient { } /// Clear the model capabilities cache for a specific URL or all URLs + #[allow(dead_code)] pub fn clear_capabilities_cache(url: Option<&str>) { let mut cache = MODEL_CAPABILITIES_CACHE.lock().unwrap(); if let Some(url) = url { @@ -992,7 +995,6 @@ struct OllamaEmbedResponse { #[cfg(test)] mod tests { - use super::*; #[test] fn generate_photo_description_prompt_is_concise() { diff --git a/src/bin/backfill_hashes.rs b/src/bin/backfill_hashes.rs new file mode 100644 index 0000000..ad9b20f --- /dev/null +++ b/src/bin/backfill_hashes.rs @@ -0,0 +1,181 @@ +//! Backfill `image_exif.content_hash` + `size_bytes` for rows that were +//! ingested before hash computation was wired into the watcher. +//! +//! The watcher computes hashes for new files as they're ingested, so this +//! binary is a one-shot tool for the historical backlog. Safe to re-run; +//! only rows with NULL content_hash are processed. + +use std::path::Path; +use std::sync::{Arc, Mutex}; +use std::time::Instant; + +use clap::Parser; +use rayon::prelude::*; + +use image_api::content_hash; +use image_api::database::{ExifDao, SqliteExifDao, connect}; +use image_api::libraries::{self, Library}; + +#[derive(Parser, Debug)] +#[command(name = "backfill_hashes")] +#[command(about = "Compute content_hash for image_exif rows missing one")] +struct Args { + /// Max rows to hash per batch. The process loops until no rows remain. + #[arg(long, default_value_t = 500)] + batch_size: i64, + + /// Rayon parallelism override. 0 uses the default thread pool size. + #[arg(long, default_value_t = 0)] + parallelism: usize, + + /// Dry-run: log what would be hashed without writing to the DB. + #[arg(long)] + dry_run: bool, +} + +fn main() -> anyhow::Result<()> { + env_logger::init(); + dotenv::dotenv().ok(); + + let args = Args::parse(); + if args.parallelism > 0 { + rayon::ThreadPoolBuilder::new() + .num_threads(args.parallelism) + .build_global() + .expect("Unable to configure rayon thread pool"); + } + + // Resolve libraries (patch placeholder if still unset) so we can map + // library_id back to a root_path on disk. + let base_path = dotenv::var("BASE_PATH").ok(); + let mut seed_conn = connect(); + if let Some(base) = base_path.as_deref() { + libraries::seed_or_patch_from_env(&mut seed_conn, base); + } + let libs = libraries::load_all(&mut seed_conn); + drop(seed_conn); + if libs.is_empty() { + anyhow::bail!("No libraries configured; cannot backfill hashes"); + } + let libs_by_id: std::collections::HashMap = + libs.into_iter().map(|lib| (lib.id, lib)).collect(); + println!( + "Configured libraries: {}", + libs_by_id + .values() + .map(|l| format!("{} -> {}", l.name, l.root_path)) + .collect::>() + .join(", ") + ); + + let dao: Arc>> = Arc::new(Mutex::new(Box::new(SqliteExifDao::new()))); + let ctx = opentelemetry::Context::new(); + + let mut total_hashed = 0u64; + let mut total_missing = 0u64; + let mut total_errors = 0u64; + let start = Instant::now(); + + loop { + let rows = { + let mut guard = dao.lock().expect("Unable to lock ExifDao"); + guard + .get_rows_missing_hash(&ctx, args.batch_size) + .map_err(|e| anyhow::anyhow!("DB error: {:?}", e))? + }; + if rows.is_empty() { + break; + } + println!("Processing batch of {} rows", rows.len()); + + // Compute hashes in parallel (I/O-bound; rayon helps on local disks, + // throttled by network on SMB mounts — use --parallelism to tune). + let results: Vec<(i32, String, Option)> = rows + .into_par_iter() + .map(|(library_id, rel_path)| { + let abs = libs_by_id + .get(&library_id) + .map(|lib| Path::new(&lib.root_path).join(&rel_path)); + match abs { + Some(abs_path) if abs_path.exists() => match content_hash::compute(&abs_path) { + Ok(id) => (library_id, rel_path, Some(id)), + Err(e) => { + eprintln!("hash error for {}: {:?}", abs_path.display(), e); + (library_id, rel_path, None) + } + }, + Some(_) => (library_id, rel_path, None), // file missing on disk + None => { + eprintln!("Row refers to unknown library_id {}", library_id); + (library_id, rel_path, None) + } + } + }) + .collect(); + + // Persist sequentially — SQLite writes serialize anyway. + if !args.dry_run { + let mut guard = dao.lock().expect("Unable to lock ExifDao"); + for (library_id, rel_path, ident) in &results { + match ident { + Some(id) => { + match guard.backfill_content_hash( + &ctx, + *library_id, + rel_path, + &id.content_hash, + id.size_bytes, + ) { + Ok(_) => total_hashed += 1, + Err(e) => { + eprintln!("persist error for {}: {:?}", rel_path, e); + total_errors += 1; + } + } + } + None => { + total_missing += 1; + } + } + } + } else { + for (_, rel_path, ident) in &results { + match ident { + Some(id) => { + println!( + "[dry-run] {} -> {} ({} bytes)", + rel_path, id.content_hash, id.size_bytes + ); + total_hashed += 1; + } + None => { + total_missing += 1; + } + } + } + println!( + "[dry-run] processed one batch of {}. Stopping — a real run would continue \ + until no NULL content_hash rows remain.", + results.len() + ); + break; + } + + let elapsed = start.elapsed().as_secs_f64().max(0.001); + let rate = total_hashed as f64 / elapsed; + println!( + " hashed={} missing={} errors={} ({:.1} files/sec)", + total_hashed, total_missing, total_errors, rate + ); + } + + println!(); + println!( + "Done. hashed={}, skipped (missing on disk)={}, errors={}, elapsed={:.1}s", + total_hashed, + total_missing, + total_errors, + start.elapsed().as_secs_f64() + ); + Ok(()) +} diff --git a/src/bin/migrate_exif.rs b/src/bin/migrate_exif.rs index 3235a63..2f8f868 100644 --- a/src/bin/migrate_exif.rs +++ b/src/bin/migrate_exif.rs @@ -67,7 +67,7 @@ fn main() -> anyhow::Result<()> { let context = opentelemetry::Context::new(); let relative_path = match path.strip_prefix(&base) { - Ok(p) => p.to_str().unwrap().to_string(), + Ok(p) => p.to_str().unwrap().replace('\\', "/"), Err(_) => { eprintln!( "Error: Could not create relative path for {}", @@ -94,6 +94,7 @@ fn main() -> anyhow::Result<()> { Ok(exif_data) => { let timestamp = Utc::now().timestamp(); let insert_exif = InsertImageExif { + library_id: image_api::libraries::PRIMARY_LIBRARY_ID, file_path: relative_path.clone(), camera_make: exif_data.camera_make, camera_model: exif_data.camera_model, @@ -114,6 +115,8 @@ fn main() -> anyhow::Result<()> { .map(|e| e.created_time) .unwrap_or(timestamp), last_modified: timestamp, + content_hash: None, + size_bytes: None, }; // Store or update in database diff --git a/src/bin/populate_knowledge.rs b/src/bin/populate_knowledge.rs index f9373ad..bc37960 100644 --- a/src/bin/populate_knowledge.rs +++ b/src/bin/populate_knowledge.rs @@ -11,6 +11,7 @@ use image_api::database::{ SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao, }; use image_api::file_types::{IMAGE_EXTENSIONS, VIDEO_EXTENSIONS}; +use image_api::libraries::{self, Library}; use image_api::tags::{SqliteTagDao, TagDao}; #[derive(Parser, Debug)] @@ -125,6 +126,12 @@ async fn main() -> anyhow::Result<()> { let knowledge_dao: Arc>> = Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new()))); + let populate_lib = Library { + id: libraries::PRIMARY_LIBRARY_ID, + name: "main".to_string(), + root_path: base_path.clone(), + }; + let generator = InsightGenerator::new( ollama, sms_client, @@ -136,7 +143,7 @@ async fn main() -> anyhow::Result<()> { search_dao, tag_dao, knowledge_dao, - base_path.clone(), + vec![populate_lib], ); println!("Knowledge Base Population"); diff --git a/src/content_hash.rs b/src/content_hash.rs new file mode 100644 index 0000000..7f05f06 --- /dev/null +++ b/src/content_hash.rs @@ -0,0 +1,108 @@ +//! Content-based file identity used to dedup derivative outputs +//! (thumbnails, HLS segments) across libraries. +//! +//! Hashes are computed with blake3 streaming so that network-mounted +//! libraries don't need to load whole files into memory. The result is +//! a 64-character hex string; we shard derivative directories on the +//! first two characters to keep any single directory's fanout bounded. + +use std::fs::File; +use std::io::{self, Read}; +use std::path::{Path, PathBuf}; + +/// Size of the read buffer used when streaming a file through blake3. +/// 1 MiB trades a bit of RSS for fewer syscalls on slow network mounts. +const HASH_BUFFER_SIZE: usize = 1024 * 1024; + +/// Hash identity of a file, together with its byte length. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct FileIdentity { + pub content_hash: String, + pub size_bytes: i64, +} + +/// Stream a file through blake3 and return the hex-encoded digest + size. +pub fn compute(path: &Path) -> io::Result { + let mut file = File::open(path)?; + let size_bytes = file.metadata()?.len() as i64; + + let mut hasher = blake3::Hasher::new(); + let mut buf = vec![0u8; HASH_BUFFER_SIZE]; + loop { + let n = file.read(&mut buf)?; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + } + + Ok(FileIdentity { + content_hash: hasher.finalize().to_hex().to_string(), + size_bytes, + }) +} + +/// Hash-keyed thumbnail path: `//.jpg`. +/// Generation and serving both consult this first; the legacy mirrored +/// path acts as a fallback for pre-backfill rows. +pub fn thumbnail_path(thumbs_dir: &Path, hash: &str) -> PathBuf { + let shard = shard_prefix(hash); + thumbs_dir.join(shard).join(format!("{}.jpg", hash)) +} + +/// Hash-keyed HLS output directory: `///`. +/// The playlist lives at `playlist.m3u8` inside this directory and its +/// segments are co-located so HLS relative references Just Work. +#[allow(dead_code)] +pub fn hls_dir(video_dir: &Path, hash: &str) -> PathBuf { + let shard = shard_prefix(hash); + video_dir.join(shard).join(hash) +} + +fn shard_prefix(hash: &str) -> &str { + let end = hash + .char_indices() + .nth(2) + .map(|(i, _)| i) + .unwrap_or(hash.len()); + &hash[..end] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn identical_content_yields_identical_hash() { + let dir = tempfile::tempdir().unwrap(); + let a = dir.path().join("a.bin"); + let b = dir.path().join("b.bin"); + std::fs::write(&a, b"hello world").unwrap(); + std::fs::write(&b, b"hello world").unwrap(); + let ha = compute(&a).unwrap(); + let hb = compute(&b).unwrap(); + assert_eq!(ha, hb); + assert_eq!(ha.size_bytes, 11); + } + + #[test] + fn different_content_yields_different_hash() { + let dir = tempfile::tempdir().unwrap(); + let a = dir.path().join("a.bin"); + let b = dir.path().join("b.bin"); + std::fs::write(&a, b"aaa").unwrap(); + std::fs::write(&b, b"bbb").unwrap(); + assert_ne!(compute(&a).unwrap(), compute(&b).unwrap()); + } + + #[test] + fn derivative_paths_shard_by_first_two_hex() { + let thumbs = Path::new("/tmp/thumbs"); + let p = thumbnail_path(thumbs, "abcdef0123"); + assert_eq!(p, PathBuf::from("/tmp/thumbs/ab/abcdef0123.jpg")); + + let video = Path::new("/tmp/video"); + let d = hls_dir(video, "1234deadbeef"); + assert_eq!(d, PathBuf::from("/tmp/video/12/1234deadbeef")); + } +} diff --git a/src/data/mod.rs b/src/data/mod.rs index 6935819..fe5e183 100644 --- a/src/data/mod.rs +++ b/src/data/mod.rs @@ -102,6 +102,12 @@ pub struct PhotosResponse { pub photos: Vec, pub dirs: Vec, + /// Library id for each entry in `photos`, same length and ordering. + /// Parallel array rather than an object per row to keep the payload + /// small and backwards-compatible with older clients. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub photo_libraries: Vec, + // Pagination metadata (only present when limit is set) #[serde(skip_serializing_if = "Option::is_none")] pub total_count: Option, @@ -155,6 +161,10 @@ pub struct FilesRequest { // Pagination parameters (optional - backward compatible) pub limit: Option, pub offset: Option, + + /// Optional library filter. Accepts a library id (e.g. "1") or name + /// (e.g. "main"). When omitted, results span all libraries. + pub library: Option, } #[derive(Copy, Clone, Deserialize, PartialEq, Debug)] @@ -187,7 +197,12 @@ pub struct ThumbnailRequest { #[allow(dead_code)] // Part of API contract, may be used in future pub(crate) format: Option, #[serde(default)] + #[allow(dead_code)] // Part of API contract, may be used in future pub(crate) shape: Option, + /// Optional library filter. Accepts a library id (e.g. "1") or name + /// (e.g. "main"). When omitted, defaults to the primary library. + #[serde(default)] + pub(crate) library: Option, } #[derive(Debug, Deserialize, PartialEq)] @@ -231,6 +246,8 @@ pub struct MetadataResponse { pub size: u64, pub exif: Option, pub filename_date: Option, // Date extracted from filename + pub library_id: Option, + pub library_name: Option, } impl From for MetadataResponse { @@ -247,6 +264,8 @@ impl From for MetadataResponse { size: metadata.len(), exif: None, filename_date: None, // Will be set in endpoint handler + library_id: None, + library_name: None, } } } @@ -422,11 +441,8 @@ mod tests { ); match err.unwrap_err().into_kind() { - ErrorKind::ExpiredSignature => assert!(true), - kind => { - println!("Unexpected error: {:?}", kind); - assert!(false) - } + ErrorKind::ExpiredSignature => {} + kind => panic!("Unexpected error: {:?}", kind), } } @@ -435,11 +451,8 @@ mod tests { let err = Claims::from_str("uni-֍ՓՓՓՓՓՓՓՓՓՓՓՓՓՓՓ"); match err.unwrap_err().into_kind() { - ErrorKind::InvalidToken => assert!(true), - kind => { - println!("Unexpected error: {:?}", kind); - assert!(false) - } + ErrorKind::InvalidToken => {} + kind => panic!("Unexpected error: {:?}", kind), } } diff --git a/src/database/calendar_dao.rs b/src/database/calendar_dao.rs index 82eea20..b70a9f6 100644 --- a/src/database/calendar_dao.rs +++ b/src/database/calendar_dao.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use diesel::prelude::*; use diesel::sqlite::SqliteConnection; use serde::Serialize; diff --git a/src/database/daily_summary_dao.rs b/src/database/daily_summary_dao.rs index 5b1126f..6ea560a 100644 --- a/src/database/daily_summary_dao.rs +++ b/src/database/daily_summary_dao.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use chrono::NaiveDate; use diesel::prelude::*; use diesel::sqlite::SqliteConnection; diff --git a/src/database/insights_dao.rs b/src/database/insights_dao.rs index 473bb3c..553b579 100644 --- a/src/database/insights_dao.rs +++ b/src/database/insights_dao.rs @@ -21,6 +21,17 @@ pub trait InsightDao: Sync + Send { file_path: &str, ) -> Result, DbError>; + /// Return the most recent current insight whose rel_path is one of + /// `paths`. Used for content-hash sharing: the caller expands a + /// single file into all rel_paths with the same content_hash, then + /// asks here for any existing insight attached to any of them. + fn get_insight_for_paths( + &mut self, + context: &opentelemetry::Context, + paths: &[String], + ) -> Result, DbError>; + + #[allow(dead_code)] fn get_insight_history( &mut self, context: &opentelemetry::Context, @@ -69,6 +80,7 @@ impl SqliteInsightDao { } #[cfg(test)] + #[allow(dead_code)] pub fn from_connection(conn: Arc>) -> Self { SqliteInsightDao { connection: conn } } @@ -86,10 +98,14 @@ impl InsightDao for SqliteInsightDao { let mut connection = self.connection.lock().expect("Unable to get InsightDao"); // Mark all existing insights for this file as no longer current - diesel::update(photo_insights.filter(file_path.eq(&insight.file_path))) - .set(is_current.eq(false)) - .execute(connection.deref_mut()) - .map_err(|_| anyhow::anyhow!("Update is_current error"))?; + diesel::update( + photo_insights + .filter(library_id.eq(insight.library_id)) + .filter(rel_path.eq(&insight.file_path)), + ) + .set(is_current.eq(false)) + .execute(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Update is_current error"))?; // Insert the new insight as current diesel::insert_into(photo_insights) @@ -99,7 +115,8 @@ impl InsightDao for SqliteInsightDao { // Retrieve the inserted record (is_current = true) photo_insights - .filter(file_path.eq(&insight.file_path)) + .filter(library_id.eq(insight.library_id)) + .filter(rel_path.eq(&insight.file_path)) .filter(is_current.eq(true)) .first::(connection.deref_mut()) .map_err(|_| anyhow::anyhow!("Query error")) @@ -118,7 +135,7 @@ impl InsightDao for SqliteInsightDao { let mut connection = self.connection.lock().expect("Unable to get InsightDao"); photo_insights - .filter(file_path.eq(path)) + .filter(rel_path.eq(path)) .filter(is_current.eq(true)) .first::(connection.deref_mut()) .optional() @@ -127,6 +144,30 @@ impl InsightDao for SqliteInsightDao { .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + fn get_insight_for_paths( + &mut self, + context: &opentelemetry::Context, + paths: &[String], + ) -> Result, DbError> { + if paths.is_empty() { + return Ok(None); + } + trace_db_call(context, "query", "get_insight_for_paths", |_span| { + use schema::photo_insights::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get InsightDao"); + + photo_insights + .filter(rel_path.eq_any(paths)) + .filter(is_current.eq(true)) + .order(generated_at.desc()) + .first::(connection.deref_mut()) + .optional() + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + fn get_insight_history( &mut self, context: &opentelemetry::Context, @@ -138,7 +179,7 @@ impl InsightDao for SqliteInsightDao { let mut connection = self.connection.lock().expect("Unable to get InsightDao"); photo_insights - .filter(file_path.eq(path)) + .filter(rel_path.eq(path)) .order(generated_at.desc()) .load::(connection.deref_mut()) .map_err(|_| anyhow::anyhow!("Query error")) @@ -156,7 +197,7 @@ impl InsightDao for SqliteInsightDao { let mut connection = self.connection.lock().expect("Unable to get InsightDao"); - diesel::delete(photo_insights.filter(file_path.eq(path))) + diesel::delete(photo_insights.filter(rel_path.eq(path))) .execute(connection.deref_mut()) .map(|_| ()) .map_err(|_| anyhow::anyhow!("Delete error")) @@ -195,7 +236,7 @@ impl InsightDao for SqliteInsightDao { diesel::update( photo_insights - .filter(file_path.eq(path)) + .filter(rel_path.eq(path)) .filter(is_current.eq(true)), ) .set(approved.eq(Some(is_approved))) diff --git a/src/database/knowledge_dao.rs b/src/database/knowledge_dao.rs index 05d1865..f0d6c12 100644 --- a/src/database/knowledge_dao.rs +++ b/src/database/knowledge_dao.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use diesel::prelude::*; use diesel::sqlite::SqliteConnection; use std::ops::DerefMut; @@ -230,7 +232,7 @@ impl SqliteKnowledgeDao { } fn deserialize_embedding(bytes: &[u8]) -> Result, DbError> { - if bytes.len() % 4 != 0 { + if !bytes.len().is_multiple_of(4) { return Err(DbError::new(DbErrorKind::QueryError)); } Ok(bytes @@ -535,7 +537,6 @@ impl KnowledgeDao for SqliteKnowledgeDao { conn.transaction::<(i64, i64), diesel::result::Error, _>(|conn| { use schema::entity_facts::dsl as ef; - use schema::entity_photo_links::dsl as epl; // 1. Re-point facts where source is subject let facts_updated = @@ -550,8 +551,8 @@ impl KnowledgeDao for SqliteKnowledgeDao { // 3. Copy photo links to target (INSERT OR IGNORE to skip duplicates) let links_updated = diesel::sql_query( - "INSERT OR IGNORE INTO entity_photo_links (entity_id, file_path, role) \ - SELECT ?, file_path, role FROM entity_photo_links WHERE entity_id = ?", + "INSERT OR IGNORE INTO entity_photo_links (entity_id, library_id, rel_path, role) \ + SELECT ?, library_id, rel_path, role FROM entity_photo_links WHERE entity_id = ?", ) .bind::(target_id) .bind::(source_id) @@ -781,11 +782,12 @@ impl KnowledgeDao for SqliteKnowledgeDao { ) -> Result<(), DbError> { trace_db_call(cx, "insert", "upsert_photo_link", |_span| { let mut conn = self.connection.lock().expect("KnowledgeDao lock"); - // INSERT OR IGNORE respects the UNIQUE(entity_id, file_path, role) constraint + // INSERT OR IGNORE respects the UNIQUE(entity_id, library_id, rel_path, role) constraint diesel::sql_query( - "INSERT OR IGNORE INTO entity_photo_links (entity_id, file_path, role) VALUES (?, ?, ?)" + "INSERT OR IGNORE INTO entity_photo_links (entity_id, library_id, rel_path, role) VALUES (?, ?, ?, ?)" ) .bind::(link.entity_id) + .bind::(link.library_id) .bind::(&link.file_path) .bind::(&link.role) .execute(conn.deref_mut()) @@ -803,7 +805,7 @@ impl KnowledgeDao for SqliteKnowledgeDao { trace_db_call(cx, "delete", "delete_photo_links_for_file", |_span| { use schema::entity_photo_links::dsl::*; let mut conn = self.connection.lock().expect("KnowledgeDao lock"); - diesel::delete(entity_photo_links.filter(file_path.eq(file_path_val))) + diesel::delete(entity_photo_links.filter(rel_path.eq(file_path_val))) .execute(conn.deref_mut()) .map(|_| ()) .map_err(|e| anyhow::anyhow!("Delete error: {}", e)) @@ -820,7 +822,7 @@ impl KnowledgeDao for SqliteKnowledgeDao { use schema::entity_photo_links::dsl::*; let mut conn = self.connection.lock().expect("KnowledgeDao lock"); entity_photo_links - .filter(file_path.eq(file_path_val)) + .filter(rel_path.eq(file_path_val)) .load::(conn.deref_mut()) .map_err(|e| anyhow::anyhow!("Query error: {}", e)) }) diff --git a/src/database/location_dao.rs b/src/database/location_dao.rs index 73e1c10..95f5d8f 100644 --- a/src/database/location_dao.rs +++ b/src/database/location_dao.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use diesel::prelude::*; use diesel::sqlite::SqliteConnection; use serde::Serialize; diff --git a/src/database/mod.rs b/src/database/mod.rs index 78cac22..07406d6 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -155,7 +155,9 @@ pub trait FavoriteDao: Sync + Send { fn add_favorite(&mut self, user_id: i32, favorite_path: &str) -> Result; fn remove_favorite(&mut self, user_id: i32, favorite_path: String); fn get_favorites(&mut self, user_id: i32) -> Result, DbError>; + #[allow(dead_code)] fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError>; + #[allow(dead_code)] fn get_all_paths(&mut self) -> Result, DbError>; } @@ -184,7 +186,7 @@ impl FavoriteDao for SqliteFavoriteDao { let mut connection = self.connection.lock().expect("Unable to get FavoriteDao"); if favorites - .filter(userid.eq(user_id).and(path.eq(&favorite_path))) + .filter(userid.eq(user_id).and(rel_path.eq(&favorite_path))) .first::(connection.deref_mut()) .is_err() { @@ -204,7 +206,7 @@ impl FavoriteDao for SqliteFavoriteDao { use schema::favorites::dsl::*; diesel::delete(favorites) - .filter(userid.eq(user_id).and(path.eq(favorite_path))) + .filter(userid.eq(user_id).and(rel_path.eq(favorite_path))) .execute(self.connection.lock().unwrap().deref_mut()) .unwrap(); } @@ -221,8 +223,8 @@ impl FavoriteDao for SqliteFavoriteDao { fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError> { use schema::favorites::dsl::*; - diesel::update(favorites.filter(path.eq(old_path))) - .set(path.eq(new_path)) + diesel::update(favorites.filter(rel_path.eq(old_path))) + .set(rel_path.eq(new_path)) .execute(self.connection.lock().unwrap().deref_mut()) .map_err(|_| DbError::new(DbErrorKind::UpdateError))?; Ok(()) @@ -232,13 +234,14 @@ impl FavoriteDao for SqliteFavoriteDao { use schema::favorites::dsl::*; favorites - .select(path) + .select(rel_path) .distinct() .load(self.connection.lock().unwrap().deref_mut()) .map_err(|_| DbError::new(DbErrorKind::QueryError)) } } +#[allow(dead_code)] pub trait ExifDao: Sync + Send { fn store_exif( &mut self, @@ -263,6 +266,7 @@ pub trait ExifDao: Sync + Send { fn get_all_with_date_taken( &mut self, context: &opentelemetry::Context, + library_id: Option, ) -> Result, DbError>; /// Batch load EXIF data for multiple file paths (single query) @@ -306,12 +310,103 @@ pub trait ExifDao: Sync + Send { /// Get all photos with GPS coordinates /// Returns Vec<(file_path, latitude, longitude, date_taken)> + #[allow(clippy::type_complexity)] fn get_all_with_gps( &mut self, context: &opentelemetry::Context, base_path: &str, recursive: bool, ) -> Result)>, DbError>; + + /// Return rows that still lack a `content_hash`, oldest first. Used by + /// the `backfill_hashes` binary to batch through the historical + /// backlog. Returns `(library_id, rel_path)` tuples so the caller can + /// resolve each file on disk. + fn get_rows_missing_hash( + &mut self, + context: &opentelemetry::Context, + limit: i64, + ) -> Result, DbError>; + + /// Persist the computed blake3 hash + file size for an existing row. + fn backfill_content_hash( + &mut self, + context: &opentelemetry::Context, + library_id: i32, + rel_path: &str, + hash: &str, + size_bytes: i64, + ) -> Result<(), DbError>; + + /// Return the first EXIF row with the given content hash (any library). + /// Used by thumbnail/HLS generation to detect pre-existing derivatives + /// from another library before regenerating. + fn find_by_content_hash( + &mut self, + context: &opentelemetry::Context, + hash: &str, + ) -> Result, DbError>; + + /// Given a file instance `(library_id, rel_path)`, return every distinct + /// rel_path in `image_exif` whose `content_hash` matches this file's. + /// Used by tag and insight read-paths so annotations follow content + /// rather than path, even when the same file is indexed under + /// different library roots. Falls back to `[rel_path]` when the file + /// hasn't been hashed yet. + fn get_rel_paths_sharing_content( + &mut self, + context: &opentelemetry::Context, + library_id: i32, + rel_path: &str, + ) -> Result, DbError>; + + /// All rel_paths known to live in a given library. Used by search to + /// scope tag-based (path-keyed) hits to a single library after joining + /// through the library-agnostic tag tables. + fn get_rel_paths_for_library( + &mut self, + context: &opentelemetry::Context, + library_id: i32, + ) -> Result, DbError>; + + /// Look up a content_hash for a rel_path in *any* library. Useful when + /// the caller has a library-agnostic rel_path (e.g. from tagged_photo) + /// and wants to find content-equivalent siblings without knowing the + /// file's original library. + fn find_content_hash_anywhere( + &mut self, + context: &opentelemetry::Context, + rel_path: &str, + ) -> Result, DbError>; + + /// Given a content_hash, return all rel_paths carrying that hash. + fn get_rel_paths_by_hash( + &mut self, + context: &opentelemetry::Context, + hash: &str, + ) -> Result, DbError>; + + /// List `(library_id, rel_path)` pairs for the given libraries, optionally + /// restricted to rows whose rel_path starts with `path_prefix`. When + /// `library_ids` is empty, rows from every library are returned. Used by + /// `/photos` recursive listing to skip the filesystem walk — the watcher + /// keeps image_exif in parity with disk via the reconciliation pass. + fn list_rel_paths_for_libraries( + &mut self, + context: &opentelemetry::Context, + library_ids: &[i32], + path_prefix: Option<&str>, + ) -> Result, DbError>; + + /// Delete a single image_exif row scoped to `(library_id, rel_path)`. + /// Distinct from `delete_exif`, which matches on rel_path alone and + /// would clobber same-named files across libraries. + fn delete_exif_by_library( + &mut self, + context: &opentelemetry::Context, + library_id: i32, + rel_path: &str, + ) -> Result<(), DbError>; } pub struct SqliteExifDao { @@ -330,6 +425,13 @@ impl SqliteExifDao { connection: Arc::new(Mutex::new(connect())), } } + + #[cfg(test)] + pub fn from_connection(conn: SqliteConnection) -> Self { + SqliteExifDao { + connection: Arc::new(Mutex::new(conn)), + } + } } impl ExifDao for SqliteExifDao { @@ -346,12 +448,21 @@ impl ExifDao for SqliteExifDao { diesel::insert_into(image_exif) .values(&exif_data) .execute(connection.deref_mut()) - .map_err(|_| anyhow::anyhow!("Insert error"))?; + .map_err(|e| { + log::warn!( + "image_exif insert failed (lib={}, rel_path={:?}): {}", + exif_data.library_id, + exif_data.file_path, + e + ); + anyhow::anyhow!("Insert error: {}", e) + })?; image_exif - .filter(file_path.eq(&exif_data.file_path)) + .filter(library_id.eq(exif_data.library_id)) + .filter(rel_path.eq(&exif_data.file_path)) .first::(connection.deref_mut()) - .map_err(|_| anyhow::anyhow!("Query error")) + .map_err(|e| anyhow::anyhow!("Post-insert lookup failed: {}", e)) }) .map_err(|_| DbError::new(DbErrorKind::InsertError)) } @@ -372,7 +483,7 @@ impl ExifDao for SqliteExifDao { let windows_path = path.replace('/', "\\"); match image_exif - .filter(file_path.eq(&normalized).or(file_path.eq(&windows_path))) + .filter(rel_path.eq(&normalized).or(rel_path.eq(&windows_path))) .first::(connection.deref_mut()) { Ok(exif) => Ok(Some(exif)), @@ -393,29 +504,34 @@ impl ExifDao for SqliteExifDao { let mut connection = self.connection.lock().expect("Unable to get ExifDao"); - diesel::update(image_exif.filter(file_path.eq(&exif_data.file_path))) - .set(( - camera_make.eq(&exif_data.camera_make), - camera_model.eq(&exif_data.camera_model), - lens_model.eq(&exif_data.lens_model), - width.eq(&exif_data.width), - height.eq(&exif_data.height), - orientation.eq(&exif_data.orientation), - gps_latitude.eq(&exif_data.gps_latitude), - gps_longitude.eq(&exif_data.gps_longitude), - gps_altitude.eq(&exif_data.gps_altitude), - focal_length.eq(&exif_data.focal_length), - aperture.eq(&exif_data.aperture), - shutter_speed.eq(&exif_data.shutter_speed), - iso.eq(&exif_data.iso), - date_taken.eq(&exif_data.date_taken), - last_modified.eq(&exif_data.last_modified), - )) - .execute(connection.deref_mut()) - .map_err(|_| anyhow::anyhow!("Update error"))?; + diesel::update( + image_exif + .filter(library_id.eq(exif_data.library_id)) + .filter(rel_path.eq(&exif_data.file_path)), + ) + .set(( + camera_make.eq(&exif_data.camera_make), + camera_model.eq(&exif_data.camera_model), + lens_model.eq(&exif_data.lens_model), + width.eq(&exif_data.width), + height.eq(&exif_data.height), + orientation.eq(&exif_data.orientation), + gps_latitude.eq(&exif_data.gps_latitude), + gps_longitude.eq(&exif_data.gps_longitude), + gps_altitude.eq(&exif_data.gps_altitude), + focal_length.eq(&exif_data.focal_length), + aperture.eq(&exif_data.aperture), + shutter_speed.eq(&exif_data.shutter_speed), + iso.eq(&exif_data.iso), + date_taken.eq(&exif_data.date_taken), + last_modified.eq(&exif_data.last_modified), + )) + .execute(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Update error"))?; image_exif - .filter(file_path.eq(&exif_data.file_path)) + .filter(library_id.eq(exif_data.library_id)) + .filter(rel_path.eq(&exif_data.file_path)) .first::(connection.deref_mut()) .map_err(|_| anyhow::anyhow!("Query error")) }) @@ -426,7 +542,7 @@ impl ExifDao for SqliteExifDao { trace_db_call(context, "delete", "delete_exif", |_span| { use schema::image_exif::dsl::*; - diesel::delete(image_exif.filter(file_path.eq(path))) + diesel::delete(image_exif.filter(rel_path.eq(path))) .execute(self.connection.lock().unwrap().deref_mut()) .map(|_| ()) .map_err(|_| anyhow::anyhow!("Delete error")) @@ -437,15 +553,24 @@ impl ExifDao for SqliteExifDao { fn get_all_with_date_taken( &mut self, context: &opentelemetry::Context, + lib_id: Option, ) -> Result, DbError> { trace_db_call(context, "query", "get_all_with_date_taken", |_span| { use schema::image_exif::dsl::*; let mut connection = self.connection.lock().expect("Unable to get ExifDao"); - image_exif - .select((file_path, date_taken)) + let query = image_exif + .select((rel_path, date_taken)) .filter(date_taken.is_not_null()) + .into_boxed(); + + let query = match lib_id { + Some(filter_id) => query.filter(library_id.eq(filter_id)), + None => query, + }; + + query .load::<(String, Option)>(connection.deref_mut()) .map(|records| { records @@ -473,7 +598,7 @@ impl ExifDao for SqliteExifDao { let mut connection = self.connection.lock().expect("Unable to get ExifDao"); image_exif - .filter(file_path.eq_any(file_paths)) + .filter(rel_path.eq_any(file_paths)) .load::(connection.deref_mut()) .map_err(|_| anyhow::anyhow!("Query error")) }) @@ -572,8 +697,8 @@ impl ExifDao for SqliteExifDao { let mut connection = self.connection.lock().expect("Unable to get ExifDao"); - diesel::update(image_exif.filter(file_path.eq(old_path))) - .set(file_path.eq(new_path)) + diesel::update(image_exif.filter(rel_path.eq(old_path))) + .set(rel_path.eq(new_path)) .execute(connection.deref_mut()) .map_err(|_| anyhow::anyhow!("Update error"))?; Ok(()) @@ -591,14 +716,13 @@ impl ExifDao for SqliteExifDao { let mut connection = self.connection.lock().expect("Unable to get ExifDao"); image_exif - .select(file_path) + .select(rel_path) .load(connection.deref_mut()) .map_err(|_| anyhow::anyhow!("Query error")) }) .map_err(|_| DbError::new(DbErrorKind::QueryError)) } - fn get_all_with_gps( &mut self, context: &opentelemetry::Context, @@ -627,7 +751,7 @@ impl ExifDao for SqliteExifDao { // Otherwise filter by path prefix if !base_path.is_empty() && base_path != "/" { // Match base path as prefix (with wildcard) - query = query.filter(file_path.like(format!("{}%", base_path))); + query = query.filter(rel_path.like(format!("{}%", base_path))); span.set_attribute(KeyValue::new("path_filter_applied", true)); } else { @@ -666,4 +790,311 @@ impl ExifDao for SqliteExifDao { }) .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + + fn get_rows_missing_hash( + &mut self, + context: &opentelemetry::Context, + limit: i64, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_rows_missing_hash", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + image_exif + .filter(content_hash.is_null()) + .select((library_id, rel_path)) + .order(id.asc()) + .limit(limit) + .load::<(i32, String)>(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn backfill_content_hash( + &mut self, + context: &opentelemetry::Context, + library_id_val: i32, + rel_path_val: &str, + hash: &str, + size_val: i64, + ) -> Result<(), DbError> { + trace_db_call(context, "update", "backfill_content_hash", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + diesel::update( + image_exif + .filter(library_id.eq(library_id_val)) + .filter(rel_path.eq(rel_path_val)), + ) + .set((content_hash.eq(hash), size_bytes.eq(size_val))) + .execute(connection.deref_mut()) + .map(|_| ()) + .map_err(|_| anyhow::anyhow!("Update error")) + }) + .map_err(|_| DbError::new(DbErrorKind::UpdateError)) + } + + fn find_by_content_hash( + &mut self, + context: &opentelemetry::Context, + hash: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "find_by_content_hash", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + image_exif + .filter(content_hash.eq(hash)) + .first::(connection.deref_mut()) + .optional() + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn get_rel_paths_sharing_content( + &mut self, + context: &opentelemetry::Context, + library_id_val: i32, + rel_path_val: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_rel_paths_sharing_content", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + // Look up this file's content_hash. Missing row or NULL hash + // means we can't expand the match set; return the given + // rel_path so callers fall through to direct-match behavior. + let hash: Option = image_exif + .filter(library_id.eq(library_id_val)) + .filter(rel_path.eq(rel_path_val)) + .select(content_hash) + .first::>(connection.deref_mut()) + .optional() + .map_err(|_| anyhow::anyhow!("Query error"))? + .flatten(); + + let paths = match hash { + Some(h) => image_exif + .filter(content_hash.eq(h)) + .select(rel_path) + .distinct() + .load::(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error"))?, + None => vec![rel_path_val.to_string()], + }; + + Ok(paths) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn get_rel_paths_for_library( + &mut self, + context: &opentelemetry::Context, + library_id_val: i32, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_rel_paths_for_library", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + image_exif + .filter(library_id.eq(library_id_val)) + .select(rel_path) + .load::(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn find_content_hash_anywhere( + &mut self, + context: &opentelemetry::Context, + rel_path_val: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "find_content_hash_anywhere", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + image_exif + .filter(rel_path.eq(rel_path_val)) + .filter(content_hash.is_not_null()) + .select(content_hash) + .first::>(connection.deref_mut()) + .optional() + .map(|opt| opt.flatten()) + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn get_rel_paths_by_hash( + &mut self, + context: &opentelemetry::Context, + hash: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_rel_paths_by_hash", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + image_exif + .filter(content_hash.eq(hash)) + .select(rel_path) + .distinct() + .load::(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn list_rel_paths_for_libraries( + &mut self, + context: &opentelemetry::Context, + library_ids: &[i32], + path_prefix: Option<&str>, + ) -> Result, DbError> { + trace_db_call(context, "query", "list_rel_paths_for_libraries", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + let mut query = image_exif.select((library_id, rel_path)).into_boxed(); + + if !library_ids.is_empty() { + query = query.filter(library_id.eq_any(library_ids.to_vec())); + } + + if let Some(prefix) = path_prefix.map(str::trim).filter(|s| !s.is_empty()) { + // Trailing slash normalization so "2024" matches "2024/..." + // without also matching "2024-archive/...". + let prefix = prefix.trim_end_matches('/'); + let pattern = format!("{}/%", prefix.replace('%', "\\%").replace('_', "\\_")); + query = query.filter(rel_path.like(pattern).escape('\\')); + } + + query + .load::<(i32, String)>(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn delete_exif_by_library( + &mut self, + context: &opentelemetry::Context, + library_id_val: i32, + rel_path_val: &str, + ) -> Result<(), DbError> { + trace_db_call(context, "delete", "delete_exif_by_library", |_span| { + use schema::image_exif::dsl::*; + + diesel::delete( + image_exif + .filter(library_id.eq(library_id_val)) + .filter(rel_path.eq(rel_path_val)), + ) + .execute(self.connection.lock().unwrap().deref_mut()) + .map(|_| ()) + .map_err(|_| anyhow::anyhow!("Delete error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } +} + +#[cfg(test)] +mod exif_dao_tests { + use super::*; + use crate::database::models::InsertLibrary; + use crate::database::test::in_memory_db_connection; + + fn ctx() -> opentelemetry::Context { + opentelemetry::Context::new() + } + + fn insert_row(dao: &mut SqliteExifDao, lib_id: i32, rel: &str, date: Option) { + dao.store_exif( + &ctx(), + InsertImageExif { + library_id: lib_id, + file_path: rel.to_string(), + camera_make: None, + camera_model: None, + lens_model: None, + width: None, + height: None, + orientation: None, + gps_latitude: None, + gps_longitude: None, + gps_altitude: None, + focal_length: None, + aperture: None, + shutter_speed: None, + iso: None, + date_taken: date, + created_time: 0, + last_modified: 0, + content_hash: None, + size_bytes: None, + }, + ) + .expect("insert exif row"); + } + + fn setup_two_libraries() -> SqliteExifDao { + let mut conn = in_memory_db_connection(); + // Migration seeds library id=1 with a placeholder root; add id=2. + diesel::insert_into(schema::libraries::table) + .values(InsertLibrary { + name: "archive", + root_path: "/tmp/archive", + created_at: 0, + }) + .execute(&mut conn) + .expect("seed second library"); + SqliteExifDao::from_connection(conn) + } + + #[test] + fn get_all_with_date_taken_union_returns_all_libraries() { + let mut dao = setup_two_libraries(); + insert_row(&mut dao, 1, "main/a.jpg", Some(100)); + insert_row(&mut dao, 2, "archive/b.jpg", Some(200)); + // Row without a date must be excluded even in union mode. + insert_row(&mut dao, 2, "archive/c.jpg", None); + + let mut rows = dao.get_all_with_date_taken(&ctx(), None).unwrap(); + rows.sort_by_key(|(_, ts)| *ts); + assert_eq!( + rows, + vec![ + ("main/a.jpg".to_string(), 100), + ("archive/b.jpg".to_string(), 200), + ] + ); + } + + #[test] + fn get_all_with_date_taken_scopes_by_library_id() { + let mut dao = setup_two_libraries(); + insert_row(&mut dao, 1, "main/a.jpg", Some(100)); + insert_row(&mut dao, 2, "archive/b.jpg", Some(200)); + insert_row(&mut dao, 2, "archive/c.jpg", Some(300)); + + let lib2 = dao.get_all_with_date_taken(&ctx(), Some(2)).unwrap(); + let mut paths: Vec = lib2.into_iter().map(|(p, _)| p).collect(); + paths.sort(); + assert_eq!(paths, vec!["archive/b.jpg", "archive/c.jpg"]); + + let lib1 = dao.get_all_with_date_taken(&ctx(), Some(1)).unwrap(); + assert_eq!(lib1, vec![("main/a.jpg".to_string(), 100)]); + } } diff --git a/src/database/models.rs b/src/database/models.rs index 237e9b4..d95876b 100644 --- a/src/database/models.rs +++ b/src/database/models.rs @@ -1,6 +1,6 @@ use crate::database::schema::{ - entities, entity_facts, entity_photo_links, favorites, image_exif, photo_insights, users, - video_preview_clips, + entities, entity_facts, entity_photo_links, favorites, image_exif, libraries, photo_insights, + users, video_preview_clips, }; use serde::Serialize; @@ -23,6 +23,7 @@ pub struct User { #[diesel(table_name = favorites)] pub struct InsertFavorite<'a> { pub userid: &'a i32, + #[diesel(column_name = rel_path)] pub path: &'a str, } @@ -30,12 +31,15 @@ pub struct InsertFavorite<'a> { pub struct Favorite { pub id: i32, pub userid: i32, + #[diesel(column_name = rel_path)] pub path: String, } #[derive(Insertable)] #[diesel(table_name = image_exif)] pub struct InsertImageExif { + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub camera_make: Option, pub camera_model: Option, @@ -53,11 +57,16 @@ pub struct InsertImageExif { pub date_taken: Option, pub created_time: i64, pub last_modified: i64, + pub content_hash: Option, + pub size_bytes: Option, } +// Field order matches the post-migration column order in `image_exif`. #[derive(Serialize, Queryable, Clone, Debug)] pub struct ImageExif { pub id: i32, + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub camera_make: Option, pub camera_model: Option, @@ -75,11 +84,15 @@ pub struct ImageExif { pub date_taken: Option, pub created_time: i64, pub last_modified: i64, + pub content_hash: Option, + pub size_bytes: Option, } #[derive(Insertable)] #[diesel(table_name = photo_insights)] pub struct InsertPhotoInsight { + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub title: String, pub summary: String, @@ -92,6 +105,8 @@ pub struct InsertPhotoInsight { #[derive(Serialize, Queryable, Clone, Debug)] pub struct PhotoInsight { pub id: i32, + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub title: String, pub summary: String, @@ -102,6 +117,24 @@ pub struct PhotoInsight { pub approved: Option, } +// --- Libraries --- + +#[derive(Serialize, Queryable, Clone, Debug)] +pub struct LibraryRow { + pub id: i32, + pub name: String, + pub root_path: String, + pub created_at: i64, +} + +#[derive(Insertable)] +#[diesel(table_name = libraries)] +pub struct InsertLibrary<'a> { + pub name: &'a str, + pub root_path: &'a str, + pub created_at: i64, +} + // --- Knowledge memory models --- #[derive(Insertable)] @@ -162,6 +195,8 @@ pub struct EntityFact { #[diesel(table_name = entity_photo_links)] pub struct InsertEntityPhotoLink { pub entity_id: i32, + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub role: String, } @@ -170,6 +205,8 @@ pub struct InsertEntityPhotoLink { pub struct EntityPhotoLink { pub id: i32, pub entity_id: i32, + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub role: String, } @@ -177,6 +214,8 @@ pub struct EntityPhotoLink { #[derive(Insertable)] #[diesel(table_name = video_preview_clips)] pub struct InsertVideoPreviewClip { + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub status: String, pub created_at: String, @@ -186,6 +225,8 @@ pub struct InsertVideoPreviewClip { #[derive(Serialize, Queryable, Clone, Debug)] pub struct VideoPreviewClip { pub id: i32, + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub status: String, pub duration_seconds: Option, diff --git a/src/database/preview_dao.rs b/src/database/preview_dao.rs index fe90f4d..c528327 100644 --- a/src/database/preview_dao.rs +++ b/src/database/preview_dao.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use diesel::prelude::*; use diesel::sqlite::SqliteConnection; use std::ops::DerefMut; @@ -84,6 +86,7 @@ impl PreviewDao for SqlitePreviewDao { diesel::insert_or_ignore_into(video_preview_clips) .values(InsertVideoPreviewClip { + library_id: 1, file_path: file_path_val.to_string(), status: status_val.to_string(), created_at: now.clone(), @@ -111,7 +114,7 @@ impl PreviewDao for SqlitePreviewDao { let mut connection = self.connection.lock().expect("Unable to get PreviewDao"); let now = chrono::Utc::now().to_rfc3339(); - diesel::update(video_preview_clips.filter(file_path.eq(file_path_val))) + diesel::update(video_preview_clips.filter(rel_path.eq(file_path_val))) .set(( status.eq(status_val), duration_seconds.eq(duration), @@ -137,7 +140,7 @@ impl PreviewDao for SqlitePreviewDao { let mut connection = self.connection.lock().expect("Unable to get PreviewDao"); match video_preview_clips - .filter(file_path.eq(file_path_val)) + .filter(rel_path.eq(file_path_val)) .first::(connection.deref_mut()) { Ok(clip) => Ok(Some(clip)), @@ -163,7 +166,7 @@ impl PreviewDao for SqlitePreviewDao { let mut connection = self.connection.lock().expect("Unable to get PreviewDao"); video_preview_clips - .filter(file_path.eq_any(file_paths)) + .filter(rel_path.eq_any(file_paths)) .load::(connection.deref_mut()) .map_err(|e| anyhow::anyhow!("Query error: {}", e)) }) diff --git a/src/database/schema.rs b/src/database/schema.rs index bddced4..3352ca6 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -64,7 +64,8 @@ diesel::table! { entity_photo_links (id) { id -> Integer, entity_id -> Integer, - file_path -> Text, + library_id -> Integer, + rel_path -> Text, role -> Text, } } @@ -73,14 +74,15 @@ diesel::table! { favorites (id) { id -> Integer, userid -> Integer, - path -> Text, + rel_path -> Text, } } diesel::table! { image_exif (id) { id -> Integer, - file_path -> Text, + library_id -> Integer, + rel_path -> Text, camera_make -> Nullable, camera_model -> Nullable, lens_model -> Nullable, @@ -97,18 +99,17 @@ diesel::table! { date_taken -> Nullable, created_time -> BigInt, last_modified -> BigInt, + content_hash -> Nullable, + size_bytes -> Nullable, } } diesel::table! { - knowledge_embeddings (id) { + libraries (id) { id -> Integer, - keyword -> Text, - description -> Text, - category -> Nullable, - embedding -> Binary, + name -> Text, + root_path -> Text, created_at -> BigInt, - model_version -> Text, } } @@ -129,23 +130,11 @@ diesel::table! { } } -diesel::table! { - message_embeddings (id) { - id -> Integer, - contact -> Text, - body -> Text, - timestamp -> BigInt, - is_sent -> Bool, - embedding -> Binary, - created_at -> BigInt, - model_version -> Text, - } -} - diesel::table! { photo_insights (id) { id -> Integer, - file_path -> Text, + library_id -> Integer, + rel_path -> Text, title -> Text, summary -> Text, generated_at -> BigInt, @@ -171,7 +160,7 @@ diesel::table! { diesel::table! { tagged_photo (id) { id -> Integer, - photo_name -> Text, + rel_path -> Text, tag_id -> Integer, created_time -> BigInt, } @@ -196,7 +185,8 @@ diesel::table! { diesel::table! { video_preview_clips (id) { id -> Integer, - file_path -> Text, + library_id -> Integer, + rel_path -> Text, status -> Text, duration_seconds -> Nullable, file_size_bytes -> Nullable, @@ -208,7 +198,11 @@ diesel::table! { diesel::joinable!(entity_facts -> photo_insights (source_insight_id)); diesel::joinable!(entity_photo_links -> entities (entity_id)); +diesel::joinable!(entity_photo_links -> libraries (library_id)); +diesel::joinable!(image_exif -> libraries (library_id)); +diesel::joinable!(photo_insights -> libraries (library_id)); diesel::joinable!(tagged_photo -> tags (tag_id)); +diesel::joinable!(video_preview_clips -> libraries (library_id)); diesel::allow_tables_to_appear_in_same_query!( calendar_events, @@ -218,9 +212,8 @@ diesel::allow_tables_to_appear_in_same_query!( entity_photo_links, favorites, image_exif, - knowledge_embeddings, + libraries, location_history, - message_embeddings, photo_insights, search_history, tagged_photo, diff --git a/src/database/search_dao.rs b/src/database/search_dao.rs index 04d0d2f..a74fd92 100644 --- a/src/database/search_dao.rs +++ b/src/database/search_dao.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use diesel::prelude::*; use diesel::sqlite::SqliteConnection; use serde::Serialize; diff --git a/src/files.rs b/src/files.rs index f3cd8fa..561414c 100644 --- a/src/files.rs +++ b/src/files.rs @@ -1,6 +1,6 @@ use ::anyhow; use actix::{Handler, Message}; -use anyhow::{Context, anyhow}; +use anyhow::Context; use std::collections::HashSet; use std::fmt::Debug; use std::fs::read_dir; @@ -16,7 +16,6 @@ use crate::file_types; use crate::geo::{gps_bounding_box, haversine_distance}; use crate::memories::extract_date_from_filename; use crate::{AppState, create_thumbnails}; -use actix_web::dev::ResourcePath; use actix_web::web::Data; use actix_web::{ HttpRequest, HttpResponse, @@ -42,52 +41,53 @@ pub struct FileWithMetadata { pub file_name: String, pub tag_count: i64, pub date_taken: Option, // Unix timestamp from EXIF or filename extraction + pub library_id: i32, } use serde::Deserialize; /// Apply sorting to files with EXIF data support for date-based sorting /// Handles both date sorting (with EXIF/filename fallback) and regular sorting -/// Returns (sorted_file_paths, total_count) +/// Returns (sorted_file_paths, sorted_library_ids, total_count) fn apply_sorting_with_exif( files: Vec, + file_libraries: Vec, sort_type: SortType, exif_dao: &mut Box, span_context: &opentelemetry::Context, - base_path: &Path, + libraries: &[crate::libraries::Library], limit: Option, offset: i64, -) -> (Vec, i64) { +) -> (Vec, Vec, i64) { let total_count = files.len() as i64; match sort_type { SortType::DateTakenAsc | SortType::DateTakenDesc => { info!("Date sorting requested, using in-memory sort with EXIF/filename fallback"); - // Use in-memory sort so files without EXIF dates are included via - // filename extraction and filesystem metadata fallbacks. - let (sorted, _) = in_memory_date_sort( + let (sorted, sorted_libs, _) = in_memory_date_sort( files, + file_libraries, sort_type, exif_dao, span_context, - base_path, + libraries, limit, offset, ); - (sorted, total_count) + (sorted, sorted_libs, total_count) } _ => { - // Use regular sort for non-date sorting - let sorted = sort(files, sort_type); - let result = if let Some(limit_val) = limit { - sorted - .into_iter() - .skip(offset as usize) - .take(limit_val as usize) - .collect() + let (sorted, sorted_libs) = sort(files, file_libraries, sort_type); + let (result, result_libs) = if let Some(limit_val) = limit { + let skip = offset as usize; + let take = limit_val as usize; + ( + sorted.iter().skip(skip).take(take).cloned().collect(), + sorted_libs.iter().skip(skip).take(take).copied().collect(), + ) } else { - sorted + (sorted, sorted_libs) }; - (result, total_count) + (result, result_libs, total_count) } } } @@ -95,66 +95,88 @@ fn apply_sorting_with_exif( /// Fallback in-memory date sorting with EXIF/filename extraction fn in_memory_date_sort( files: Vec, + file_libraries: Vec, sort_type: SortType, exif_dao: &mut Box, span_context: &opentelemetry::Context, - base_path: &Path, + libraries: &[crate::libraries::Library], limit: Option, offset: i64, -) -> (Vec, i64) { +) -> (Vec, Vec, i64) { let total_count = files.len() as i64; let file_paths: Vec = files.iter().map(|f| f.file_name.clone()).collect(); - // Batch fetch EXIF data - let exif_map: std::collections::HashMap = exif_dao + // Batch fetch EXIF data (keyed by rel_path; in union mode a rel_path may + // correspond to rows in multiple libraries — pick the date from the one + // matching the requesting row's library_id when possible). + let exif_rows = exif_dao .get_exif_batch(span_context, &file_paths) - .unwrap_or_default() + .unwrap_or_default(); + let exif_map: std::collections::HashMap<(String, i32), i64> = exif_rows .into_iter() - .filter_map(|exif| exif.date_taken.map(|dt| (exif.file_path, dt))) + .filter_map(|exif| { + exif.date_taken + .map(|dt| ((exif.file_path, exif.library_id), dt)) + }) + .collect(); + + let lib_roots: std::collections::HashMap = libraries + .iter() + .map(|l| (l.id, l.root_path.as_str())) .collect(); // Convert to FileWithMetadata with date fallback logic let files_with_metadata: Vec = files .into_iter() - .map(|f| { - // Try EXIF date first + .zip(file_libraries.iter().copied()) + .map(|(f, lib_id)| { let date_taken = exif_map - .get(&f.file_name) + .get(&(f.file_name.clone(), lib_id)) .copied() + .or_else(|| extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp())) .or_else(|| { - // Fallback to filename extraction - extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp()) - }) - .or_else(|| { - // Fallback to filesystem metadata creation date - let full_path = base_path.join(&f.file_name); - std::fs::metadata(full_path) - .and_then(|md| md.created().or(md.modified())) - .ok() - .map(|system_time| { - >>::into(system_time).timestamp() - }) + lib_roots.get(&lib_id).and_then(|root| { + let full_path = Path::new(root).join(&f.file_name); + std::fs::metadata(full_path) + .and_then(|md| md.created().or(md.modified())) + .ok() + .map(|system_time| { + >>::into(system_time).timestamp() + }) + }) }); FileWithMetadata { file_name: f.file_name, tag_count: f.tag_count, date_taken, + library_id: lib_id, } }) .collect(); - let sorted = sort_with_metadata(files_with_metadata, sort_type); - let result = if let Some(limit_val) = limit { - sorted - .into_iter() - .skip(offset as usize) - .take(limit_val as usize) - .collect() + let (sorted, sorted_libs) = sort_with_metadata(files_with_metadata, sort_type); + let (result, result_libs) = if let Some(limit_val) = limit { + let skip = offset as usize; + let take = limit_val as usize; + ( + sorted + .iter() + .skip(skip) + .take(take) + .cloned() + .collect::>(), + sorted_libs + .iter() + .skip(skip) + .take(take) + .copied() + .collect::>(), + ) } else { - sorted + (sorted, sorted_libs) }; - (result, total_count) + (result, result_libs, total_count) } pub async fn list_photos( @@ -235,8 +257,21 @@ pub async fn list_photos( ) .to_string(), ), + KeyValue::new("library", req.library.clone().unwrap_or_default()), ]); + // Resolve the optional library filter. Unknown values return 400. A + // `None` result means "union across all libraries" and downstream + // walks iterate every configured library root. + let library = match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) + { + Ok(lib) => lib, + Err(msg) => { + log::warn!("Rejecting /photos request: {}", msg); + return HttpResponse::BadRequest().body(msg); + } + }; + let span_context = opentelemetry::Context::current_with_span(span); // Check if EXIF filtering is requested @@ -319,6 +354,16 @@ pub async fn list_photos( None }; + // In scoped mode (`library` is Some) we gate tag-based results (which + // key on rel_path only) by "does this rel_path actually exist on disk + // in the selected library's root". In union mode we assign each + // returned file to the first library it resolves in, and drop files + // that exist in no configured library. + let libraries_to_scan: Vec<&crate::libraries::Library> = match library { + Some(lib) => vec![lib], + None => app_state.libraries.iter().collect(), + }; + let search_recursively = req.recursive.unwrap_or(false); if let Some(tag_ids) = &req.tag_ids && search_recursively @@ -384,37 +429,80 @@ pub async fn list_photos( true } }) - .filter(|f| { - // Apply media type filtering if specified + .filter_map(|f| { + // Apply media type filter first (cheap check before disk I/O). if let Some(ref media_type) = req.media_type { let path = PathBuf::from(&f.file_name); - matches_media_type(&path, media_type) + if !matches_media_type(&path, media_type) { + return None; + } + } + + // Resolve the file's library by checking each + // candidate library's root on disk. Falls back to + // content-hash siblings if the rel_path was + // registered under a different path but same content. + for lib in &libraries_to_scan { + if PathBuf::from(&lib.root_path).join(&f.file_name).exists() { + return Some((f, lib.id)); + } + } + let siblings = { + let mut dao = exif_dao.lock().expect("Unable to get ExifDao"); + match dao + .find_content_hash_anywhere(&span_context, &f.file_name) + .unwrap_or(None) + { + Some(hash) => dao + .get_rel_paths_by_hash(&span_context, &hash) + .unwrap_or_default(), + None => Vec::new(), + } + }; + for lib in &libraries_to_scan { + if siblings + .iter() + .any(|p| PathBuf::from(&lib.root_path).join(p).exists()) + { + return Some((f, lib.id)); + } + } + // Tags are library-agnostic. If we can't confirm which + // library currently holds the file on disk (e.g. the + // tagged rel_path is stale or the caller is testing + // without real files), keep the tagged row and + // attribute it to the primary library so the client + // still sees the tag hit. + if library.is_none() { + Some((f, app_state.primary_library().id)) } else { - true + None } }) - .collect::>() + .collect::>() }) - .map(|files| { + .map(|paired| { // Handle sorting - use helper function that supports EXIF date sorting and pagination let sort_type = req.sort.unwrap_or(NameAsc); let limit = req.limit; let offset = req.offset.unwrap_or(0); + let (files, file_libs): (Vec, Vec) = paired.into_iter().unzip(); let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao"); let result = apply_sorting_with_exif( files, + file_libs, sort_type, &mut exif_dao_guard, &span_context, - app_state.base_path.as_ref(), + &app_state.libraries, limit, offset, ); drop(exif_dao_guard); result }) - .inspect(|(files, total)| debug!("Found {:?} files (total: {})", files.len(), total)) - .map(|(tagged_files, total_count)| { + .inspect(|(files, _libs, total)| debug!("Found {:?} files (total: {})", files.len(), total)) + .map(|(tagged_files, photo_libraries, total_count)| { info!( "Found {:?} tagged files: {:?}", tagged_files.len(), @@ -448,6 +536,7 @@ pub async fn list_photos( HttpResponse::Ok().json(PhotosResponse { photos: tagged_files, dirs: vec![], + photo_libraries, total_count: pagination_metadata.0, has_more: pagination_metadata.1, next_offset: pagination_metadata.2, @@ -457,307 +546,368 @@ pub async fn list_photos( .unwrap_or_else(|e| e.error_response()); } - // Use recursive or non-recursive file listing based on flag - let files_result = if search_recursively { - // For recursive search without tags, manually list files recursively - is_valid_full_path( - &PathBuf::from(&app_state.base_path), - &PathBuf::from(search_path), - false, - ) - .map(|path| { - debug!("Valid path for recursive search: {:?}", path); - list_files_recursive(&path).unwrap_or_default() - }) - .context("Invalid path") - } else { - file_system.get_files_for_path(search_path) - }; + // In scoped mode `libraries_to_scan` has one entry (the selected library); + // in union mode we enumerate every configured library and intermix results. + // + // Recursive mode pulls rel_paths from image_exif (kept in parity with disk + // by the watcher's full-scan reconciliation) instead of walking — a ~10k + // file library drops from multi-second to ~10ms for the listing itself. + // Non-recursive mode still walks because we need directory metadata for + // the `dirs` response and listing a single directory is cheap. + let mut file_names: Vec = Vec::new(); + let mut file_libraries: Vec = Vec::new(); + let mut dirs_set: std::collections::HashSet = std::collections::HashSet::new(); + let mut any_library_resolved = false; + + if search_recursively { + let start_db_list = std::time::Instant::now(); + let lib_ids: Vec = libraries_to_scan.iter().map(|l| l.id).collect(); + let trimmed = search_path.trim(); + let prefix = if trimmed.is_empty() || trimmed == "/" { + None + } else { + Some(trimmed) + }; + let rows = { + let mut dao = exif_dao.lock().expect("Unable to get ExifDao"); + dao.list_rel_paths_for_libraries(&span_context, &lib_ids, prefix) + .unwrap_or_else(|e| { + warn!("list_rel_paths_for_libraries failed: {:?}", e); + Vec::new() + }) + }; + info!( + "DB-backed recursive listing: {} files across {} libraries in {:?}", + rows.len(), + lib_ids.len(), + start_db_list.elapsed() + ); + any_library_resolved = true; + for (lib_id, path) in rows { + file_libraries.push(lib_id); + file_names.push(path); + } + } else { + for lib in &libraries_to_scan { + let files_result = if lib.id == app_state.primary_library().id { + file_system.get_files_for_path(search_path) + } else { + is_valid_full_path( + &PathBuf::from(&lib.root_path), + &PathBuf::from(search_path), + false, + ) + .map(|path| { + debug!("Valid path for non-recursive search: {:?}", path); + list_files(&path).unwrap_or_default() + }) + .context("Invalid path") + }; + + let files = match files_result { + Ok(f) => { + any_library_resolved = true; + f + } + Err(e) => { + debug!( + "Skipping library '{}' for path '{}': {:?}", + lib.name, search_path, e + ); + continue; + } + }; - match files_result { - Ok(files) => { info!( - "Found {:?} files in path: {:?} (recursive: {})", + "Found {:?} files in library '{}' path: {:?} (recursive: {})", files.len(), + lib.name, search_path, search_recursively ); - info!("Starting to filter {} files from filesystem", files.len()); - let start_filter = std::time::Instant::now(); + for path in &files { + match path.metadata() { + Ok(md) => { + let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| { + panic!( + "Unable to strip library root {} from file path {}", + &lib.root_path, + path.display() + ) + }); + // Normalize separators to '/' so downstream lookups + // (tags, EXIF, insights) that store rel_paths with + // forward slashes still match on Windows. + let relative_str = relative.to_str().unwrap().replace('\\', "/"); - // Separate files and directories in a single pass to avoid redundant metadata calls - let (file_names, dirs): (Vec, Vec) = - files - .iter() - .fold((Vec::new(), Vec::new()), |(mut files, mut dirs), path| { - match path.metadata() { - Ok(md) => { - let relative = - path.strip_prefix(&app_state.base_path).unwrap_or_else(|_| { - panic!( - "Unable to strip base path {} from file path {}", - &app_state.base_path.path(), - path.display() - ) - }); - let relative_str = relative.to_str().unwrap().to_string(); - - if md.is_file() { - files.push(relative_str); - } else if md.is_dir() { - dirs.push(relative_str); - } - } - Err(e) => { - error!("Failed getting file metadata: {:?}", e); - // Include files without metadata if they have extensions - if path.extension().is_some() { - let relative = path - .strip_prefix(&app_state.base_path) - .unwrap_or_else(|_| { - panic!( - "Unable to strip base path {} from file path {}", - &app_state.base_path.path(), - path.display() - ) - }); - files.push(relative.to_str().unwrap().to_string()); - } - } + if md.is_file() { + file_names.push(relative_str); + file_libraries.push(lib.id); + } else if md.is_dir() { + dirs_set.insert(relative_str); } - (files, dirs) - }); + } + Err(e) => { + error!("Failed getting file metadata: {:?}", e); + // Include files without metadata if they have extensions + if path.extension().is_some() { + let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| { + panic!( + "Unable to strip library root {} from file path {}", + &lib.root_path, + path.display() + ) + }); + file_names.push(relative.to_str().unwrap().replace('\\', "/")); + file_libraries.push(lib.id); + } + } + } + } + } + } + if !any_library_resolved { + error!("Bad photos request: {}", req.path); + span_context + .span() + .set_status(Status::error("Invalid path")); + return HttpResponse::BadRequest().finish(); + } + + let dirs: Vec = dirs_set.into_iter().collect(); + + info!( + "Starting to filter {} files from filesystem", + file_names.len() + ); + let start_filter = std::time::Instant::now(); + + info!( + "File filtering took {:?}, now fetching tag counts for {} files", + start_filter.elapsed(), + file_names.len() + ); + let start_tags = std::time::Instant::now(); + + // Batch query for tag counts (tags are library-agnostic / keyed by rel_path). + let tag_counts = { + let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); + tag_dao_guard + .get_tag_counts_batch(&span_context, &file_names) + .unwrap_or_default() + }; + info!("Batch tag count query took {:?}", start_tags.elapsed()); + + let start_tag_filter = std::time::Instant::now(); + let file_tags_map: std::collections::HashMap> = + if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() { info!( - "File filtering took {:?}, now fetching tag counts for {} files", - start_filter.elapsed(), + "Tag filtering requested, fetching full tag lists for {} files", file_names.len() ); - let start_tags = std::time::Instant::now(); + let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); + file_names + .iter() + .filter_map(|file_name| { + tag_dao_guard + .get_tags_for_path(&span_context, file_name) + .ok() + .map(|tags| (file_name.clone(), tags)) + }) + .collect() + } else { + std::collections::HashMap::new() + }; + if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() { + info!("Full tag list fetch took {:?}", start_tag_filter.elapsed()); + } - // Batch query for tag counts to avoid N+1 queries - let tag_counts = { - let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); - tag_dao_guard - .get_tag_counts_batch(&span_context, &file_names) + // Filter + pair with the parallel library_id while preserving ordering + // so the downstream sort can return both arrays in lockstep. + let photos_with_libs: Vec<(FileWithTagCount, i32)> = file_names + .into_iter() + .zip(file_libraries.into_iter()) + .filter_map(|(file_name, lib_id)| { + let file_tags = file_tags_map.get(&file_name).cloned().unwrap_or_default(); + + if let Some(tag_ids_csv) = &req.tag_ids { + let tag_ids = tag_ids_csv + .split(',') + .filter_map(|t| t.parse().ok()) + .collect::>(); + + let excluded_tag_ids = req + .exclude_tag_ids + .clone() .unwrap_or_default() - }; - info!("Batch tag count query took {:?}", start_tags.elapsed()); + .split(',') + .filter_map(|t| t.parse().ok()) + .collect::>(); - // Also get full tag lists for files that need tag filtering - let start_tag_filter = std::time::Instant::now(); - let file_tags_map: std::collections::HashMap> = - if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() { - info!( - "Tag filtering requested, fetching full tag lists for {} files", - file_names.len() - ); - let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); - file_names - .iter() - .filter_map(|file_name| { - tag_dao_guard - .get_tags_for_path(&span_context, file_name) - .ok() - .map(|tags| (file_name.clone(), tags)) - }) - .collect() - } else { - std::collections::HashMap::new() - }; - if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() { - info!("Full tag list fetch took {:?}", start_tag_filter.elapsed()); + let filter_mode = req.tag_filter_mode.unwrap_or(FilterMode::Any); + let excluded = file_tags.iter().any(|t| excluded_tag_ids.contains(&t.id)); + + let keep = !excluded + && match filter_mode { + FilterMode::Any => file_tags.iter().any(|t| tag_ids.contains(&t.id)), + FilterMode::All => tag_ids + .iter() + .all(|id| file_tags.iter().any(|tag| &tag.id == id)), + }; + if !keep { + return None; + } } - let photos = file_names + if let Some(ref exif_files) = exif_matched_files + && !exif_files.contains(&file_name) + { + return None; + } + + if let Some(ref media_type) = req.media_type { + let path = PathBuf::from(&file_name); + if !matches_media_type(&path, media_type) { + return None; + } + } + + let tag_count = *tag_counts.get(&file_name).unwrap_or(&0); + Some(( + FileWithTagCount { + file_name, + tag_count, + }, + lib_id, + )) + }) + .collect(); + + info!( + "After all filters, {} files remain (filtering took {:?})", + photos_with_libs.len(), + start_filter.elapsed() + ); + + // Extract pagination parameters + let limit = req.limit; + let offset = req.offset.unwrap_or(0); + let start_sort = std::time::Instant::now(); + + let (photos, file_libs_sorted_input): (Vec, Vec) = + photos_with_libs.into_iter().unzip(); + + let (response_files, response_libraries, total_count) = if let Some(sort_type) = req.sort { + info!("Sorting {} files by {:?}", photos.len(), sort_type); + let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao"); + let result = apply_sorting_with_exif( + photos, + file_libs_sorted_input, + sort_type, + &mut exif_dao_guard, + &span_context, + &app_state.libraries, + limit, + offset, + ); + drop(exif_dao_guard); + result + } else { + // No sorting requested - apply pagination if requested + let total = photos.len() as i64; + let (paged_files, paged_libs): (Vec, Vec) = if let Some(limit_val) = limit { + photos .into_iter() - .map(|file_name| { - let file_tags = file_tags_map.get(&file_name).cloned().unwrap_or_default(); - (file_name, file_tags) - }) - .filter(|(_, file_tags): &(String, Vec)| { - if let Some(tag_ids) = &req.tag_ids { - let tag_ids = tag_ids - .split(',') - .filter_map(|t| t.parse().ok()) - .collect::>(); + .zip(file_libs_sorted_input) + .skip(offset as usize) + .take(limit_val as usize) + .map(|(f, lib)| (f.file_name, lib)) + .unzip() + } else { + photos + .into_iter() + .zip(file_libs_sorted_input) + .map(|(f, lib)| (f.file_name, lib)) + .unzip() + }; + (paged_files, paged_libs, total) + }; + info!( + "Sorting took {:?}, returned {} files (total: {})", + start_sort.elapsed(), + response_files.len(), + total_count + ); - let excluded_tag_ids = &req - .exclude_tag_ids - .clone() - .unwrap_or_default() - .split(',') - .filter_map(|t| t.parse().ok()) - .collect::>(); - - let filter_mode = &req.tag_filter_mode.unwrap_or(FilterMode::Any); - let excluded = file_tags.iter().any(|t| excluded_tag_ids.contains(&t.id)); - - return !excluded - && match filter_mode { - FilterMode::Any => { - file_tags.iter().any(|t| tag_ids.contains(&t.id)) - } - FilterMode::All => tag_ids - .iter() - .all(|id| file_tags.iter().any(|tag| &tag.id == id)), - }; - } - - true - }) - .filter(|(file_name, _)| { - // Apply EXIF filtering if present - if let Some(ref exif_files) = exif_matched_files { - exif_files.contains(file_name) - } else { - true - } - }) - .filter(|(file_name, _)| { - // Apply media type filtering if specified - if let Some(ref media_type) = req.media_type { - let path = PathBuf::from(file_name); - matches_media_type(&path, media_type) - } else { - true - } - }) - .map( - |(file_name, _tags): (String, Vec)| FileWithTagCount { - file_name: file_name.clone(), - tag_count: *tag_counts.get(&file_name).unwrap_or(&0), - }, - ) - .collect::>(); - - info!( - "After all filters, {} files remain (filtering took {:?})", - photos.len(), - start_filter.elapsed() - ); - - // Extract pagination parameters - let limit = req.limit; - let offset = req.offset.unwrap_or(0); - let start_sort = std::time::Instant::now(); - - // Handle sorting - use helper function that supports EXIF date sorting and pagination - let (response_files, total_count) = if let Some(sort_type) = req.sort { - info!("Sorting {} files by {:?}", photos.len(), sort_type); - let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao"); - let result = apply_sorting_with_exif( - photos, - sort_type, - &mut exif_dao_guard, - &span_context, - app_state.base_path.as_ref(), - limit, - offset, - ); - drop(exif_dao_guard); - result + let returned_count = response_files.len() as i64; + let pagination_metadata = if limit.is_some() { + ( + Some(total_count), + Some(offset + returned_count < total_count), + if offset + returned_count < total_count { + Some(offset + returned_count) } else { - // No sorting requested - apply pagination if requested - let total = photos.len() as i64; - let files: Vec = if let Some(limit_val) = limit { - photos - .into_iter() - .skip(offset as usize) - .take(limit_val as usize) - .map(|f| f.file_name) - .collect() - } else { - photos.into_iter().map(|f| f.file_name).collect() - }; - (files, total) - }; - info!( - "Sorting took {:?}, returned {} files (total: {})", - start_sort.elapsed(), - response_files.len(), - total_count - ); + None + }, + ) + } else { + (None, None, None) + }; - // Note: dirs were already collected during file filtering to avoid redundant metadata calls + span_context.span().set_attribute(KeyValue::new( + "file_count", + response_files.len().to_string(), + )); + span_context + .span() + .set_attribute(KeyValue::new("returned_count", returned_count.to_string())); + span_context + .span() + .set_attribute(KeyValue::new("total_count", total_count.to_string())); + span_context.span().set_status(Status::Ok); - // Calculate pagination metadata - let returned_count = response_files.len() as i64; - let pagination_metadata = if limit.is_some() { - ( - Some(total_count), - Some(offset + returned_count < total_count), - if offset + returned_count < total_count { - Some(offset + returned_count) - } else { - None - }, - ) - } else { - (None, None, None) - }; - - span_context - .span() - .set_attribute(KeyValue::new("file_count", files.len().to_string())); - span_context - .span() - .set_attribute(KeyValue::new("returned_count", returned_count.to_string())); - span_context - .span() - .set_attribute(KeyValue::new("total_count", total_count.to_string())); - span_context.span().set_status(Status::Ok); - - HttpResponse::Ok().json(PhotosResponse { - photos: response_files, - dirs, - total_count: pagination_metadata.0, - has_more: pagination_metadata.1, - next_offset: pagination_metadata.2, - }) - } - _ => { - error!("Bad photos request: {}", req.path); - span_context - .span() - .set_status(Status::error("Invalid path")); - HttpResponse::BadRequest().finish() - } - } + HttpResponse::Ok().json(PhotosResponse { + photos: response_files, + dirs, + photo_libraries: response_libraries, + total_count: pagination_metadata.0, + has_more: pagination_metadata.1, + next_offset: pagination_metadata.2, + }) } -fn sort(mut files: Vec, sort_type: SortType) -> Vec { +fn sort( + files: Vec, + file_libraries: Vec, + sort_type: SortType, +) -> (Vec, Vec) { + let mut paired: Vec<(FileWithTagCount, i32)> = files.into_iter().zip(file_libraries).collect(); + match sort_type { - SortType::Shuffle => files.shuffle(&mut thread_rng()), - NameAsc => { - files.sort_by(|l, r| l.file_name.cmp(&r.file_name)); - } - SortType::NameDesc => { - files.sort_by(|l, r| r.file_name.cmp(&l.file_name)); - } - SortType::TagCountAsc => { - files.sort_by(|l, r| l.tag_count.cmp(&r.tag_count)); - } - SortType::TagCountDesc => { - files.sort_by(|l, r| r.tag_count.cmp(&l.tag_count)); - } + SortType::Shuffle => paired.shuffle(&mut thread_rng()), + NameAsc => paired.sort_by(|l, r| l.0.file_name.cmp(&r.0.file_name)), + SortType::NameDesc => paired.sort_by(|l, r| r.0.file_name.cmp(&l.0.file_name)), + SortType::TagCountAsc => paired.sort_by(|l, r| l.0.tag_count.cmp(&r.0.tag_count)), + SortType::TagCountDesc => paired.sort_by(|l, r| r.0.tag_count.cmp(&l.0.tag_count)), SortType::DateTakenAsc | SortType::DateTakenDesc => { - // Date sorting not implemented for FileWithTagCount - // We shouldn't be hitting this code warn!("Date sorting not implemented for FileWithTagCount"); - files.sort_by(|l, r| l.file_name.cmp(&r.file_name)); + paired.sort_by(|l, r| l.0.file_name.cmp(&r.0.file_name)); } } - files - .iter() - .map(|f| f.file_name.clone()) - .collect::>() + paired + .into_iter() + .map(|(f, lib)| (f.file_name, lib)) + .unzip() } /// Sort files with metadata support (including date sorting) -fn sort_with_metadata(mut files: Vec, sort_type: SortType) -> Vec { +fn sort_with_metadata( + mut files: Vec, + sort_type: SortType, +) -> (Vec, Vec) { match sort_type { SortType::Shuffle => files.shuffle(&mut thread_rng()), NameAsc => { @@ -791,9 +941,9 @@ fn sort_with_metadata(mut files: Vec, sort_type: SortType) -> } files - .iter() - .map(|f| f.file_name.clone()) - .collect::>() + .into_iter() + .map(|f| (f.file_name, f.library_id)) + .unzip() } pub fn list_files(dir: &Path) -> io::Result> { @@ -815,43 +965,6 @@ pub fn list_files(dir: &Path) -> io::Result> { Ok(files) } -pub fn list_files_recursive(dir: &Path) -> io::Result> { - let tracer = global_tracer(); - let mut span = tracer.start("list_files_recursive"); - let dir_name_string = dir.to_str().unwrap_or_default().to_string(); - span.set_attribute(KeyValue::new("dir", dir_name_string)); - info!("Recursively listing files in: {:?}", dir); - - let mut result = Vec::new(); - - fn visit_dirs(dir: &Path, files: &mut Vec) -> io::Result<()> { - if dir.is_dir() { - for entry in read_dir(dir)? { - let entry = entry?; - let path = entry.path(); - - if path.is_dir() { - visit_dirs(&path, files)?; - } else if is_image_or_video(&path) { - files.push(path); - } - } - } - Ok(()) - } - - visit_dirs(dir, &mut result)?; - - span.set_attribute(KeyValue::new("file_count", result.len().to_string())); - span.set_status(Status::Ok); - info!( - "Found {:?} files recursively in directory: {:?}", - result.len(), - dir - ); - Ok(result) -} - pub fn is_image_or_video(path: &Path) -> bool { file_types::is_media_file(path) } @@ -896,33 +1009,58 @@ pub fn is_valid_full_path + Debug + AsRef>( match is_path_above_base_dir(base, &mut path, new_file) { Ok(path) => Some(path), - Err(e) => { + Err(PathValidationError::DoesNotExist(p)) => { + debug!("Path does not exist under base {:?}: {:?}", base, p); + None + } + Err(PathValidationError::AboveBase(p)) => { + error!("Path above base directory {:?}: {:?}", base, p); + None + } + Err(PathValidationError::Other(e)) => { error!("{}", e); None } } } +#[derive(Debug)] +enum PathValidationError { + DoesNotExist(PathBuf), + AboveBase(PathBuf), + Other(anyhow::Error), +} + +impl std::fmt::Display for PathValidationError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + PathValidationError::DoesNotExist(p) => write!(f, "Path does not exist: {:?}", p), + PathValidationError::AboveBase(p) => write!(f, "Path above base directory: {:?}", p), + PathValidationError::Other(e) => write!(f, "{}", e), + } + } +} + fn is_path_above_base_dir + Debug>( base: P, full_path: &mut PathBuf, new_file: bool, -) -> anyhow::Result { - full_path - .absolutize() - .with_context(|| format!("Unable to resolve absolute path: {:?}", full_path)) - .map_or_else( - |e| Err(anyhow!(e)), - |p| { - if p.starts_with(base) && (new_file || p.exists()) { - Ok(p.into_owned()) - } else if !p.exists() { - Err(anyhow!("Path does not exist: {:?}", p)) - } else { - Err(anyhow!("Path above base directory")) - } - }, - ) +) -> Result { + match full_path.absolutize() { + Err(e) => Err(PathValidationError::Other( + anyhow::Error::new(e) + .context(format!("Unable to resolve absolute path: {:?}", full_path)), + )), + Ok(p) => { + if p.starts_with(base) && (new_file || p.exists()) { + Ok(p.into_owned()) + } else if !p.exists() { + Err(PathValidationError::DoesNotExist(p.into_owned())) + } else { + Err(PathValidationError::AboveBase(p.into_owned())) + } + } + } } /// Handler for GPS summary endpoint @@ -950,13 +1088,20 @@ pub async fn get_gps_summary( let cx = opentelemetry::Context::current_with_span(span); - // The database stores relative paths, so we use the path as-is - // Normalize empty path or "/" to return all GPS photos + // The database stores relative paths, so we use the path as-is. + // Normalize empty path or "/" to return all GPS photos. Validation + // is purely a traversal guard — the path need not exist on disk + // under any particular library, because the DAO just does a prefix + // match against image_exif.rel_path (which is library-agnostic for + // this summary query). let requested_path = if req.path.is_empty() || req.path == "/" { String::new() } else { - // Validate path using the same check as all other endpoints - if is_valid_full_path(&app_state.base_path, &req.path, false).is_none() { + let req_path = PathBuf::from(&req.path); + let validated = app_state.libraries.iter().any(|lib| { + is_valid_full_path(&PathBuf::from(&lib.root_path), &req_path, true).is_some() + }); + if !validated { warn!("Invalid path for GPS summary: {}", req.path); cx.span().set_status(Status::error("Invalid path")); return Ok(HttpResponse::BadRequest().json(serde_json::json!({ @@ -1143,7 +1288,10 @@ impl Handler for StreamActor { let tracer = global_tracer(); let _ = tracer.start("RefreshThumbnailsMessage"); info!("Refreshing thumbnails after upload"); - create_thumbnails() + // The stub in lib.rs is a no-op; the real generation is driven by + // the file watcher tick in main.rs, which has access to the + // configured libraries. + create_thumbnails(&[]) } } @@ -1151,6 +1299,7 @@ impl Handler for StreamActor { mod tests { use super::*; use crate::database::DbError; + use ::anyhow::anyhow; use std::collections::HashMap; use std::env; use std::fs::File; @@ -1162,6 +1311,7 @@ mod tests { } impl FakeFileSystem { + #[allow(dead_code)] fn with_error() -> FakeFileSystem { FakeFileSystem { files: HashMap::new(), @@ -1212,6 +1362,7 @@ mod tests { // Return a dummy ImageExif for tests Ok(crate::database::models::ImageExif { id: 1, + library_id: data.library_id, file_path: data.file_path.to_string(), camera_make: data.camera_make.map(|s| s.to_string()), camera_model: data.camera_model.map(|s| s.to_string()), @@ -1229,6 +1380,8 @@ mod tests { date_taken: data.date_taken, created_time: data.created_time, last_modified: data.last_modified, + content_hash: data.content_hash.clone(), + size_bytes: data.size_bytes, }) } @@ -1248,6 +1401,7 @@ mod tests { // Return a dummy ImageExif for tests Ok(crate::database::models::ImageExif { id: 1, + library_id: data.library_id, file_path: data.file_path.to_string(), camera_make: data.camera_make.map(|s| s.to_string()), camera_model: data.camera_model.map(|s| s.to_string()), @@ -1265,6 +1419,8 @@ mod tests { date_taken: data.date_taken, created_time: data.created_time, last_modified: data.last_modified, + content_hash: data.content_hash.clone(), + size_bytes: data.size_bytes, }) } @@ -1279,6 +1435,7 @@ mod tests { fn get_all_with_date_taken( &mut self, _context: &opentelemetry::Context, + _library_id: Option, ) -> Result, DbError> { Ok(Vec::new()) } @@ -1335,6 +1492,84 @@ mod tests { ) -> Result)>, DbError> { todo!() } + + fn get_rows_missing_hash( + &mut self, + _context: &opentelemetry::Context, + _limit: i64, + ) -> Result, DbError> { + Ok(Vec::new()) + } + + fn backfill_content_hash( + &mut self, + _context: &opentelemetry::Context, + _library_id: i32, + _rel_path: &str, + _hash: &str, + _size_bytes: i64, + ) -> Result<(), DbError> { + Ok(()) + } + + fn find_by_content_hash( + &mut self, + _context: &opentelemetry::Context, + _hash: &str, + ) -> Result, DbError> { + Ok(None) + } + + fn get_rel_paths_sharing_content( + &mut self, + _context: &opentelemetry::Context, + _library_id: i32, + rel_path: &str, + ) -> Result, DbError> { + Ok(vec![rel_path.to_string()]) + } + + fn get_rel_paths_for_library( + &mut self, + _context: &opentelemetry::Context, + _library_id: i32, + ) -> Result, DbError> { + Ok(vec![]) + } + + fn find_content_hash_anywhere( + &mut self, + _context: &opentelemetry::Context, + _rel_path: &str, + ) -> Result, DbError> { + Ok(None) + } + + fn get_rel_paths_by_hash( + &mut self, + _context: &opentelemetry::Context, + _hash: &str, + ) -> Result, DbError> { + Ok(vec![]) + } + + fn list_rel_paths_for_libraries( + &mut self, + _context: &opentelemetry::Context, + _library_ids: &[i32], + _path_prefix: Option<&str>, + ) -> Result, DbError> { + Ok(vec![]) + } + + fn delete_exif_by_library( + &mut self, + _context: &opentelemetry::Context, + _library_id: i32, + _rel_path: &str, + ) -> Result<(), DbError> { + Ok(()) + } } mod api { diff --git a/src/lib.rs b/src/lib.rs index bd4f7ab..cf0ba10 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,12 @@ +#![allow(clippy::too_many_arguments)] + #[macro_use] extern crate diesel; pub mod ai; pub mod auth; pub mod cleanup; +pub mod content_hash; pub mod data; pub mod database; pub mod error; @@ -11,6 +14,7 @@ pub mod exif; pub mod file_types; pub mod files; pub mod geo; +pub mod libraries; pub mod memories; pub mod otel; pub mod parsers; @@ -32,7 +36,7 @@ pub use state::AppState; use std::path::Path; use walkdir::DirEntry; -pub fn create_thumbnails() { +pub fn create_thumbnails(_libs: &[libraries::Library]) { // Stub - implemented in main.rs } diff --git a/src/libraries.rs b/src/libraries.rs new file mode 100644 index 0000000..cc3f2f4 --- /dev/null +++ b/src/libraries.rs @@ -0,0 +1,282 @@ +use actix_web::{HttpResponse, Responder, get, web::Data}; +use chrono::Utc; +use diesel::prelude::*; +use diesel::sqlite::SqliteConnection; +use log::{info, warn}; +use std::path::{Path, PathBuf}; + +use crate::data::Claims; +use crate::database::models::{InsertLibrary, LibraryRow}; +use crate::database::schema::libraries; +use crate::state::AppState; + +/// Id of the primary library row seeded by the multi-library migration. +/// Used as the default `library_id` during the Phase 2 transitional shim, +/// before handlers/callers are library-aware. +pub const PRIMARY_LIBRARY_ID: i32 = 1; + +/// Placeholder value written into `libraries.root_path` by the migration. +/// Replaced on startup with the live `BASE_PATH` env var. +pub const ROOT_PATH_PLACEHOLDER: &str = "BASE_PATH_PLACEHOLDER"; + +/// A media library mount point: its numeric id, logical name, and absolute +/// root on disk. `rel_path` values stored in the DB are relative to this root. +#[derive(Clone, Debug, serde::Serialize)] +pub struct Library { + pub id: i32, + pub name: String, + pub root_path: String, +} + +impl Library { + /// Resolve a library-relative path into an absolute `PathBuf` under the + /// library root. Does not validate traversal — use `is_valid_full_path` + /// for untrusted input. + #[allow(dead_code)] + pub fn resolve(&self, rel_path: &str) -> PathBuf { + Path::new(&self.root_path).join(rel_path) + } + + /// Inverse of `resolve`: given an absolute path under this library's + /// root, return the root-relative portion. Returns `None` if the path + /// is not under the library. + #[allow(dead_code)] + pub fn strip_root(&self, abs_path: &Path) -> Option { + abs_path + .strip_prefix(&self.root_path) + .ok() + .map(|p| p.to_string_lossy().replace('\\', "/")) + } +} + +impl From for Library { + fn from(row: LibraryRow) -> Self { + Library { + id: row.id, + name: row.name, + root_path: row.root_path, + } + } +} + +/// Load all library rows from the database into `Library` values. +pub fn load_all(conn: &mut SqliteConnection) -> Vec { + libraries::table + .order(libraries::id.asc()) + .load::(conn) + .unwrap_or_else(|e| { + warn!("Failed to load libraries table: {:?}", e); + Vec::new() + }) + .into_iter() + .map(Library::from) + .collect() +} + +/// Ensure at least one library exists and that the seeded placeholder row is +/// patched with the live `BASE_PATH`. Safe to call on every startup; it only +/// writes when the placeholder is still present. +pub fn seed_or_patch_from_env(conn: &mut SqliteConnection, base_path: &str) { + // Check whether the primary row still carries the placeholder from the + // migration. If so, replace it with the live BASE_PATH. + let placeholder_count: i64 = libraries::table + .filter(libraries::root_path.eq(ROOT_PATH_PLACEHOLDER)) + .count() + .get_result(conn) + .unwrap_or(0); + + if placeholder_count > 0 { + diesel::update(libraries::table.filter(libraries::root_path.eq(ROOT_PATH_PLACEHOLDER))) + .set(libraries::root_path.eq(base_path)) + .execute(conn) + .map(|rows| { + info!( + "Patched {} library row(s) with BASE_PATH='{}'", + rows, base_path + ); + }) + .unwrap_or_else(|e| warn!("Failed to patch library root_path: {:?}", e)); + return; + } + + // If no rows exist at all (e.g. table created outside the seeded migration), + // insert a primary library pointing at BASE_PATH. + let total: i64 = libraries::table.count().get_result(conn).unwrap_or(0); + if total == 0 { + let now = Utc::now().timestamp(); + let result = diesel::insert_into(libraries::table) + .values(InsertLibrary { + name: "main", + root_path: base_path, + created_at: now, + }) + .execute(conn); + match result { + Ok(_) => info!( + "Seeded primary library 'main' with BASE_PATH='{}'", + base_path + ), + Err(e) => warn!("Failed to seed primary library: {:?}", e), + } + } +} + +/// Resolve a library request parameter (accepts numeric id as string or name) +/// against the configured libraries. Returns `Ok(None)` when the param is +/// absent, meaning "span all libraries". Returns `Err` when a value is +/// provided but does not match any library. +pub fn resolve_library_param<'a>( + state: &'a AppState, + param: Option<&str>, +) -> Result, String> { + let Some(raw) = param.map(str::trim).filter(|s| !s.is_empty()) else { + return Ok(None); + }; + + if let Ok(id) = raw.parse::() { + return state + .library_by_id(id) + .map(Some) + .ok_or_else(|| format!("unknown library id: {}", id)); + } + + state + .library_by_name(raw) + .map(Some) + .ok_or_else(|| format!("unknown library name: {}", raw)) +} + +#[derive(serde::Serialize)] +pub struct LibrariesResponse { + pub libraries: Vec, +} + +#[get("/libraries")] +pub async fn list_libraries(_claims: Claims, app_state: Data) -> impl Responder { + HttpResponse::Ok().json(LibrariesResponse { + libraries: app_state.libraries.clone(), + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::database::test::in_memory_db_connection; + + #[test] + fn seed_patches_placeholder() { + let mut conn = in_memory_db_connection(); + // Migration seeds one row with the placeholder. + seed_or_patch_from_env(&mut conn, "/tmp/media"); + let libs = load_all(&mut conn); + assert_eq!(libs.len(), 1); + assert_eq!(libs[0].id, 1); + assert_eq!(libs[0].name, "main"); + assert_eq!(libs[0].root_path, "/tmp/media"); + } + + #[test] + fn seed_is_idempotent() { + let mut conn = in_memory_db_connection(); + seed_or_patch_from_env(&mut conn, "/tmp/media"); + seed_or_patch_from_env(&mut conn, "/tmp/other"); + // Second call should not overwrite an already-patched row. + let libs = load_all(&mut conn); + assert_eq!(libs.len(), 1); + assert_eq!(libs[0].root_path, "/tmp/media"); + } + + #[test] + fn library_strip_root() { + let lib = Library { + id: 1, + name: "main".into(), + root_path: "/tmp/media".into(), + }; + let rel = lib.strip_root(Path::new("/tmp/media/2024/photo.jpg")); + assert_eq!(rel.as_deref(), Some("2024/photo.jpg")); + let outside = lib.strip_root(Path::new("/etc/passwd")); + assert!(outside.is_none()); + } + + #[test] + fn library_resolve_joins_under_root() { + let lib = Library { + id: 1, + name: "main".into(), + root_path: "/tmp/media".into(), + }; + let abs = lib.resolve("2024/photo.jpg"); + assert_eq!(abs, PathBuf::from("/tmp/media/2024/photo.jpg")); + } + + fn state_with_libraries(libs: Vec) -> AppState { + let mut state = AppState::test_state(); + state.libraries = libs; + state + } + + fn sample_libraries() -> Vec { + vec![ + Library { + id: 1, + name: "main".into(), + root_path: "/tmp/main".into(), + }, + Library { + id: 7, + name: "archive".into(), + root_path: "/tmp/archive".into(), + }, + ] + } + + #[actix_rt::test] + async fn resolve_library_param_absent_is_union() { + let state = state_with_libraries(sample_libraries()); + assert!(matches!(resolve_library_param(&state, None), Ok(None))); + } + + #[actix_rt::test] + async fn resolve_library_param_empty_or_whitespace_is_union() { + let state = state_with_libraries(sample_libraries()); + assert!(matches!(resolve_library_param(&state, Some("")), Ok(None))); + assert!(matches!( + resolve_library_param(&state, Some(" ")), + Ok(None) + )); + } + + #[actix_rt::test] + async fn resolve_library_param_numeric_id_matches() { + let state = state_with_libraries(sample_libraries()); + let lib = resolve_library_param(&state, Some("7")) + .expect("valid id") + .expect("some library"); + assert_eq!(lib.id, 7); + assert_eq!(lib.name, "archive"); + } + + #[actix_rt::test] + async fn resolve_library_param_name_matches() { + let state = state_with_libraries(sample_libraries()); + let lib = resolve_library_param(&state, Some("main")) + .expect("valid name") + .expect("some library"); + assert_eq!(lib.id, 1); + } + + #[actix_rt::test] + async fn resolve_library_param_unknown_id_errs() { + let state = state_with_libraries(sample_libraries()); + let err = resolve_library_param(&state, Some("999")).unwrap_err(); + assert!(err.contains("unknown library id")); + } + + #[actix_rt::test] + async fn resolve_library_param_unknown_name_errs() { + let state = state_with_libraries(sample_libraries()); + let err = resolve_library_param(&state, Some("missing")).unwrap_err(); + assert!(err.contains("unknown library name")); + } +} diff --git a/src/main.rs b/src/main.rs index 8a95d2d..570cf58 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,5 @@ +#![allow(clippy::too_many_arguments)] + #[macro_use] extern crate diesel; extern crate rayon; @@ -12,7 +14,10 @@ use prometheus::{self, IntGauge}; use std::error::Error; use std::sync::{Arc, Mutex}; use std::time::{Duration, SystemTime}; -use std::{collections::HashMap, io::prelude::*}; +use std::{ + collections::{HashMap, HashSet}, + io::prelude::*, +}; use std::{env, fs::File}; use std::{ io::ErrorKind, @@ -55,6 +60,7 @@ use opentelemetry::{KeyValue, global}; mod ai; mod auth; +mod content_hash; mod data; mod database; mod error; @@ -62,6 +68,7 @@ mod exif; mod file_types; mod files; mod geo; +mod libraries; mod state; mod tags; mod utils; @@ -95,28 +102,87 @@ async fn get_image( request: HttpRequest, req: web::Query, app_state: Data, + exif_dao: Data>>, ) -> impl Responder { let tracer = global_tracer(); let context = extract_context_from_request(&request); let mut span = tracer.start_with_context("get_image", &context); - if let Some(path) = is_valid_full_path(&app_state.base_path, &req.path, false) { + // Resolve library from query param; default to primary so clients that + // don't yet send `library=` continue to work. + let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(msg) => { + span.set_status(Status::error(msg.clone())); + return HttpResponse::BadRequest().body(msg); + } + }; + + // Union-mode search returns flat rel_paths with no library attribution, + // so clients may request a file under the wrong library. Try the + // resolved library first; if the file isn't there, fall back to any + // other library holding that rel_path on disk. + let resolved = is_valid_full_path(&library.root_path, &req.path, false) + .filter(|p| p.exists()) + .map(|p| (library, p)) + .or_else(|| { + app_state.libraries.iter().find_map(|lib| { + if lib.id == library.id { + return None; + } + is_valid_full_path(&lib.root_path, &req.path, false) + .filter(|p| p.exists()) + .map(|p| (lib, p)) + }) + }); + + if let Some((library, path)) = resolved { let image_size = req.size.unwrap_or(PhotoSize::Full); if image_size == PhotoSize::Thumb { let relative_path = path - .strip_prefix(&app_state.base_path) - .expect("Error stripping base path prefix from thumbnail"); + .strip_prefix(&library.root_path) + .expect("Error stripping library root prefix from thumbnail"); + let relative_path_str = relative_path.to_string_lossy().replace('\\', "/"); let thumbs = &app_state.thumbnail_path; - let mut thumb_path = Path::new(&thumbs).join(relative_path); + let legacy_thumb_path = Path::new(&thumbs).join(relative_path); - // If it's a video and GIF format is requested, try to serve GIF thumbnail + // Gif thumbnails are a separate lookup (video GIF previews). + // Dual-lookup for gif is out of scope; preserve existing flow. if req.format == Some(ThumbnailFormat::Gif) && is_video_file(&path) { - thumb_path = Path::new(&app_state.gif_path).join(relative_path); - thumb_path.set_extension("gif"); + let mut gif_path = Path::new(&app_state.gif_path).join(relative_path); + gif_path.set_extension("gif"); + trace!("Gif thumbnail path: {:?}", gif_path); + if let Ok(file) = NamedFile::open(&gif_path) { + span.set_status(Status::Ok); + return file + .use_etag(true) + .use_last_modified(true) + .prefer_utf8(true) + .into_response(&request); + } } + // Resolve the hash-keyed thumbnail (if the row already has a + // content_hash) and fall back to the legacy mirrored path. + let hash_thumb_path: Option = { + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + match dao.get_exif(&context, &relative_path_str) { + Ok(Some(row)) => row + .content_hash + .as_deref() + .map(|h| content_hash::thumbnail_path(Path::new(thumbs), h)), + _ => None, + } + }; + let thumb_path = hash_thumb_path + .as_ref() + .filter(|p| p.exists()) + .cloned() + .unwrap_or_else(|| legacy_thumb_path.clone()); + // Handle circular thumbnail request if req.shape == Some(ThumbnailShape::Circle) { match create_circular_thumbnail(&thumb_path, thumbs).await { @@ -140,8 +206,6 @@ async fn get_image( trace!("Thumbnail path: {:?}", thumb_path); if let Ok(file) = NamedFile::open(&thumb_path) { span.set_status(Status::Ok); - // The NamedFile will automatically set the correct content-type - // Enable ETag and set cache headers for thumbnails (1 day cache) return file .use_etag(true) .use_last_modified(true) @@ -163,9 +227,9 @@ async fn get_image( span.set_status(Status::error("Not found")); HttpResponse::NotFound().finish() } else { - span.set_status(Status::error("Bad photos request")); - error!("Bad photos request: {}", req.path); - HttpResponse::BadRequest().finish() + span.set_status(Status::error("Not found")); + error!("Path does not exist in any library: {}", req.path); + HttpResponse::NotFound().finish() } } @@ -250,15 +314,38 @@ async fn get_file_metadata( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let full_path = is_valid_full_path(&app_state.base_path, &path.path, false); + let library = libraries::resolve_library_param(&app_state, path.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); - match full_path + // Fall back to other libraries if the file isn't under the resolved one, + // matching the `/image` handler so union-mode search results resolve. + let resolved = is_valid_full_path(&library.root_path, &path.path, false) + .filter(|p| p.exists()) + .map(|p| (library, p)) + .or_else(|| { + app_state.libraries.iter().find_map(|lib| { + if lib.id == library.id { + return None; + } + is_valid_full_path(&lib.root_path, &path.path, false) + .filter(|p| p.exists()) + .map(|p| (lib, p)) + }) + }); + + match resolved .ok_or_else(|| ErrorKind::InvalidData.into()) - .and_then(File::open) - .and_then(|file| file.metadata()) - { - Ok(metadata) => { + .and_then(|(lib, full_path)| { + File::open(&full_path) + .and_then(|file| file.metadata()) + .map(|metadata| (lib, metadata)) + }) { + Ok((resolved_library, metadata)) => { let mut response: MetadataResponse = metadata.into(); + response.library_id = Some(resolved_library.id); + response.library_name = Some(resolved_library.name.clone()); // Extract date from filename if possible response.filename_date = @@ -289,10 +376,16 @@ async fn get_file_metadata( } } +#[derive(serde::Deserialize)] +struct UploadQuery { + library: Option, +} + #[post("/image")] async fn upload_image( _: Claims, request: HttpRequest, + query: web::Query, mut payload: mp::Multipart, app_state: Data, exif_dao: Data>>, @@ -303,6 +396,18 @@ async fn upload_image( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); + // Resolve the optional library selector. Absent → primary library + // (backwards-compatible with clients that don't yet send `library=`). + let target_library = + match libraries::resolve_library_param(&app_state, query.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(msg) => { + span.set_status(Status::error(msg.clone())); + return HttpResponse::BadRequest().body(msg); + } + }; + let mut file_content: BytesMut = BytesMut::new(); let mut file_name: Option = None; let mut file_path: Option = None; @@ -332,7 +437,7 @@ async fn upload_image( } } - let path = file_path.unwrap_or_else(|| app_state.base_path.clone()); + let path = file_path.unwrap_or_else(|| target_library.root_path.clone()); if !file_content.is_empty() { if file_name.is_none() { span.set_status(Status::error("No filename provided")); @@ -340,7 +445,7 @@ async fn upload_image( } let full_path = PathBuf::from(&path).join(file_name.unwrap()); if let Some(full_path) = is_valid_full_path( - &app_state.base_path, + &target_library.root_path, &full_path.to_str().unwrap().to_string(), true, ) { @@ -381,16 +486,29 @@ async fn upload_image( // Extract and store EXIF data if file supports it if exif::supports_exif(&uploaded_path) { let relative_path = uploaded_path - .strip_prefix(&app_state.base_path) - .expect("Error stripping base path prefix") + .strip_prefix(&target_library.root_path) + .expect("Error stripping library root prefix") .to_str() .unwrap() - .to_string(); + .replace('\\', "/"); match exif::extract_exif_from_path(&uploaded_path) { Ok(exif_data) => { let timestamp = Utc::now().timestamp(); + let (content_hash, size_bytes) = match content_hash::compute(&uploaded_path) + { + Ok(id) => (Some(id.content_hash), Some(id.size_bytes)), + Err(e) => { + warn!( + "Failed to hash uploaded {}: {:?}", + uploaded_path.display(), + e + ); + (None, None) + } + }; let insert_exif = InsertImageExif { + library_id: target_library.id, file_path: relative_path.clone(), camera_make: exif_data.camera_make, camera_model: exif_data.camera_model, @@ -408,6 +526,8 @@ async fn upload_image( date_taken: exif_data.date_taken, created_time: timestamp, last_modified: timestamp, + content_hash, + size_bytes, }; if let Ok(mut dao) = exif_dao.lock() { @@ -460,7 +580,28 @@ async fn generate_video( if let Some(name) = filename.file_name() { let filename = name.to_str().expect("Filename should convert to string"); let playlist = format!("{}/{}.m3u8", app_state.video_path, filename); - if let Some(path) = is_valid_full_path(&app_state.base_path, &body.path, false) { + + let library = libraries::resolve_library_param(&app_state, body.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); + + // Try the resolved library first, then fall back to any other library + // that actually contains the file — handles union-mode requests where + // the mobile client passes no library but the file lives in a + // non-primary library. + let resolved = is_valid_full_path(&library.root_path, &body.path, false) + .filter(|p| p.exists()) + .or_else(|| { + app_state.libraries.iter().find_map(|lib| { + if lib.id == library.id { + return None; + } + is_valid_full_path(&lib.root_path, &body.path, false).filter(|p| p.exists()) + }) + }); + + if let Some(path) = resolved { if let Ok(child) = create_playlist(path.to_str().unwrap(), &playlist).await { span.add_event( "playlist_created".to_string(), @@ -832,9 +973,12 @@ async fn favorites( .collect::>(); span.set_status(Status::Ok); + // Favorites are library-agnostic (shared by rel_path), so we + // intentionally leave photo_libraries empty to signal "no badge". HttpResponse::Ok().json(PhotosResponse { photos: favorites, dirs: Vec::new(), + photo_libraries: Vec::new(), total_count: None, has_more: None, next_offset: None, @@ -916,78 +1060,87 @@ async fn delete_favorite( } } -fn create_thumbnails() { +fn create_thumbnails(libs: &[libraries::Library]) { let tracer = global_tracer(); let span = tracer.start("creating thumbnails"); let thumbs = &dotenv::var("THUMBNAILS").expect("THUMBNAILS not defined"); let thumbnail_directory: &Path = Path::new(thumbs); - let images = PathBuf::from(dotenv::var("BASE_PATH").unwrap()); + for lib in libs { + info!( + "Scanning thumbnails for library '{}' at {}", + lib.name, lib.root_path + ); + let images = PathBuf::from(&lib.root_path); - WalkDir::new(&images) - .into_iter() - .collect::>>() - .into_par_iter() - .filter_map(|entry| entry.ok()) - .filter(|entry| entry.file_type().is_file()) - .filter(|entry| { - if is_video(entry) { - let relative_path = &entry.path().strip_prefix(&images).unwrap(); + WalkDir::new(&images) + .into_iter() + .collect::>>() + .into_par_iter() + .filter_map(|entry| entry.ok()) + .filter(|entry| entry.file_type().is_file()) + .filter(|entry| { + if is_video(entry) { + let relative_path = &entry.path().strip_prefix(&images).unwrap(); + let thumb_path = Path::new(thumbnail_directory).join(relative_path); + std::fs::create_dir_all( + thumb_path + .parent() + .unwrap_or_else(|| panic!("Thumbnail {:?} has no parent?", thumb_path)), + ) + .expect("Error creating directory"); + + let mut video_span = tracer.start_with_context( + "generate_video_thumbnail", + &opentelemetry::Context::new() + .with_remote_span_context(span.span_context().clone()), + ); + video_span.set_attributes(vec![ + KeyValue::new("type", "video"), + KeyValue::new("file-name", thumb_path.display().to_string()), + KeyValue::new("library", lib.name.clone()), + ]); + + debug!("Generating video thumbnail: {:?}", thumb_path); + generate_video_thumbnail(entry.path(), &thumb_path); + video_span.end(); + false + } else { + is_image(entry) + } + }) + .filter(|entry| { + let path = entry.path(); + let relative_path = &path.strip_prefix(&images).unwrap(); let thumb_path = Path::new(thumbnail_directory).join(relative_path); - std::fs::create_dir_all( - thumb_path - .parent() - .unwrap_or_else(|| panic!("Thumbnail {:?} has no parent?", thumb_path)), - ) - .expect("Error creating directory"); - - let mut video_span = tracer.start_with_context( - "generate_video_thumbnail", - &opentelemetry::Context::new() - .with_remote_span_context(span.span_context().clone()), - ); - video_span.set_attributes(vec![ - KeyValue::new("type", "video"), - KeyValue::new("file-name", thumb_path.display().to_string()), - ]); - - debug!("Generating video thumbnail: {:?}", thumb_path); - generate_video_thumbnail(entry.path(), &thumb_path); - video_span.end(); - false - } else { - is_image(entry) - } - }) - .filter(|entry| { - let path = entry.path(); - let relative_path = &path.strip_prefix(&images).unwrap(); - let thumb_path = Path::new(thumbnail_directory).join(relative_path); - !thumb_path.exists() - }) - .map(|entry| (image::open(entry.path()), entry.path().to_path_buf())) - .filter(|(img, path)| { - if let Err(e) = img { - error!("Unable to open image: {:?}. {}", path, e); - } - img.is_ok() - }) - .map(|(img, path)| (img.unwrap(), path)) - .map(|(image, path)| (image.thumbnail(200, u32::MAX), path)) - .map(|(image, path)| { - let relative_path = &path.strip_prefix(&images).unwrap(); - let thumb_path = Path::new(thumbnail_directory).join(relative_path); - std::fs::create_dir_all(thumb_path.parent().unwrap()) - .expect("There was an issue creating directory"); - info!("Saving thumbnail: {:?}", thumb_path); - image.save(thumb_path).expect("Failure saving thumbnail"); - }) - .for_each(drop); + !thumb_path.exists() + }) + .map(|entry| (image::open(entry.path()), entry.path().to_path_buf())) + .filter(|(img, path)| { + if let Err(e) = img { + error!("Unable to open image: {:?}. {}", path, e); + } + img.is_ok() + }) + .map(|(img, path)| (img.unwrap(), path)) + .map(|(image, path)| (image.thumbnail(200, u32::MAX), path)) + .map(|(image, path)| { + let relative_path = &path.strip_prefix(&images).unwrap(); + let thumb_path = Path::new(thumbnail_directory).join(relative_path); + std::fs::create_dir_all(thumb_path.parent().unwrap()) + .expect("There was an issue creating directory"); + info!("Saving thumbnail: {:?}", thumb_path); + image.save(thumb_path).expect("Failure saving thumbnail"); + }) + .for_each(drop); + } debug!("Finished making thumbnails"); - update_media_counts(&images); + for lib in libs { + update_media_counts(Path::new(&lib.root_path)); + } } fn update_media_counts(media_dir: &Path) { @@ -1035,11 +1188,22 @@ fn main() -> std::io::Result<()> { otel::init_tracing(); } - create_thumbnails(); - // generate_video_gifs().await; - + // AppState construction loads (and seeds if needed) the libraries + // table; we use that list to drive the initial thumbnail sweep. let app_data = Data::new(AppState::default()); + // Kick thumbnail generation onto a background thread so the HTTP + // server can accept traffic while large libraries are backfilling. + // Existing thumbs are re-used (exists() check inside the walk), + // so missed files are filled in over successive scans. + { + let libs = app_data.libraries.clone(); + std::thread::spawn(move || { + create_thumbnails(&libs); + }); + } + // generate_video_gifs().await; + let labels = HashMap::new(); let prometheus = PrometheusMetricsBuilder::new("api") .const_labels(labels) @@ -1056,14 +1220,20 @@ fn main() -> std::io::Result<()> { .unwrap(); let app_state = app_data.clone(); - app_state.playlist_manager.do_send(ScanDirectoryMessage { - directory: app_state.base_path.clone(), - }); + for lib in &app_state.libraries { + app_state.playlist_manager.do_send(ScanDirectoryMessage { + directory: lib.root_path.clone(), + }); + } // Start file watcher with playlist manager and preview generator let playlist_mgr_for_watcher = app_state.playlist_manager.as_ref().clone(); let preview_gen_for_watcher = app_state.preview_clip_generator.as_ref().clone(); - watch_files(playlist_mgr_for_watcher, preview_gen_for_watcher); + watch_files( + app_state.libraries.clone(), + playlist_mgr_for_watcher, + preview_gen_for_watcher, + ); // Start orphaned playlist cleanup job cleanup_orphaned_playlists(); @@ -1187,6 +1357,7 @@ fn main() -> std::io::Result<()> { .service(ai::get_available_models_handler) .service(ai::rate_insight_handler) .service(ai::export_training_data_handler) + .service(libraries::list_libraries) .add_feature(add_tag_services::<_, SqliteTagDao>) .add_feature(knowledge::add_knowledge_services::<_, SqliteKnowledgeDao>) .app_data(app_data.clone()) @@ -1371,13 +1542,11 @@ fn cleanup_orphaned_playlists() { } fn watch_files( + libs: Vec, playlist_manager: Addr, preview_generator: Addr, ) { std::thread::spawn(move || { - let base_str = dotenv::var("BASE_PATH").unwrap(); - let base_path = PathBuf::from(&base_str); - // Get polling intervals from environment variables // Quick scan: Check recently modified files (default: 60 seconds) let quick_interval_secs = dotenv::var("WATCH_QUICK_INTERVAL_SECONDS") @@ -1394,7 +1563,12 @@ fn watch_files( info!("Starting optimized file watcher"); info!(" Quick scan interval: {} seconds", quick_interval_secs); info!(" Full scan interval: {} seconds", full_interval_secs); - info!(" Watching directory: {}", base_str); + for lib in &libs { + info!( + " Watching library '{}' (id={}) at {}", + lib.name, lib.id, lib.root_path + ); + } // Create DAOs for tracking processed files let exif_dao = Arc::new(Mutex::new( @@ -1418,41 +1592,48 @@ fn watch_files( let is_full_scan = since_last_full.as_secs() >= full_interval_secs; - if is_full_scan { - info!("Running full scan (scan #{})", scan_count); - process_new_files( - &base_path, - Arc::clone(&exif_dao), - Arc::clone(&preview_dao), - None, - playlist_manager.clone(), - preview_generator.clone(), - ); - last_full_scan = now; - } else { - debug!( - "Running quick scan (checking files modified in last {} seconds)", - quick_interval_secs + 10 - ); - // Check files modified since last quick scan, plus 10 second buffer - let check_since = last_quick_scan - .checked_sub(Duration::from_secs(10)) - .unwrap_or(last_quick_scan); - process_new_files( - &base_path, - Arc::clone(&exif_dao), - Arc::clone(&preview_dao), - Some(check_since), - playlist_manager.clone(), - preview_generator.clone(), - ); + for lib in &libs { + if is_full_scan { + info!( + "Running full scan for library '{}' (scan #{})", + lib.name, scan_count + ); + process_new_files( + lib, + Arc::clone(&exif_dao), + Arc::clone(&preview_dao), + None, + playlist_manager.clone(), + preview_generator.clone(), + ); + } else { + debug!( + "Running quick scan for library '{}' (checking files modified in last {} seconds)", + lib.name, + quick_interval_secs + 10 + ); + let check_since = last_quick_scan + .checked_sub(Duration::from_secs(10)) + .unwrap_or(last_quick_scan); + process_new_files( + lib, + Arc::clone(&exif_dao), + Arc::clone(&preview_dao), + Some(check_since), + playlist_manager.clone(), + preview_generator.clone(), + ); + } + + // Update media counts per library (metric aggregates across all) + update_media_counts(Path::new(&lib.root_path)); } + if is_full_scan { + last_full_scan = now; + } last_quick_scan = now; scan_count += 1; - - // Update media counts - update_media_counts(&base_path); } }); } @@ -1481,7 +1662,7 @@ fn playlist_needs_generation(video_path: &Path, playlist_path: &Path) -> bool { } fn process_new_files( - base_path: &Path, + library: &libraries::Library, exif_dao: Arc>>, preview_dao: Arc>>, modified_since: Option, @@ -1491,6 +1672,7 @@ fn process_new_files( let context = opentelemetry::Context::new(); let thumbs = dotenv::var("THUMBNAILS").expect("THUMBNAILS not defined"); let thumbnail_directory = Path::new(&thumbs); + let base_path = Path::new(&library.root_path); // Collect all image and video files, optionally filtered by modification time let files: Vec<(PathBuf, String)> = WalkDir::new(base_path) @@ -1513,11 +1695,13 @@ fn process_new_files( .filter(|entry| is_image(entry) || is_video(entry)) .filter_map(|entry| { let file_path = entry.path().to_path_buf(); + // Canonical rel_path is forward-slash regardless of OS so DB + // comparisons against the batch EXIF lookup line up. let relative_path = file_path .strip_prefix(base_path) .ok()? .to_str()? - .to_string(); + .replace('\\', "/"); Some((file_path, relative_path)) }) .collect(); @@ -1547,79 +1731,110 @@ fn process_new_files( }; let mut new_files_found = false; - let mut files_needing_exif = Vec::new(); + let mut files_needing_row = Vec::new(); - // Check each file for missing thumbnail or EXIF data + // Register every image/video file in image_exif. Rows without EXIF + // still carry library_id, rel_path, content_hash, and size_bytes so + // derivative dedup and DB-indexed sort/filter work for every file, + // not just photos with parseable EXIF. for (file_path, relative_path) in &files { - // Check if thumbnail exists let thumb_path = thumbnail_directory.join(relative_path); let needs_thumbnail = !thumb_path.exists(); + let needs_row = !existing_exif_paths.contains_key(relative_path); - // Check if EXIF data exists (for supported files) - let needs_exif = if exif::supports_exif(file_path) { - !existing_exif_paths.contains_key(relative_path) - } else { - false - }; - - if needs_thumbnail || needs_exif { + if needs_thumbnail || needs_row { new_files_found = true; if needs_thumbnail { info!("New file detected (missing thumbnail): {}", relative_path); } - if needs_exif { - files_needing_exif.push((file_path.clone(), relative_path.clone())); + if needs_row { + files_needing_row.push((file_path.clone(), relative_path.clone())); } } } - // Process EXIF data for files that need it - if !files_needing_exif.is_empty() { + if !files_needing_row.is_empty() { info!( - "Processing EXIF data for {} files", - files_needing_exif.len() + "Registering {} new files in image_exif", + files_needing_row.len() ); - for (file_path, relative_path) in files_needing_exif { - match exif::extract_exif_from_path(&file_path) { - Ok(exif_data) => { - let timestamp = Utc::now().timestamp(); - let insert_exif = InsertImageExif { - file_path: relative_path.clone(), - camera_make: exif_data.camera_make, - camera_model: exif_data.camera_model, - lens_model: exif_data.lens_model, - width: exif_data.width, - height: exif_data.height, - orientation: exif_data.orientation, - gps_latitude: exif_data.gps_latitude.map(|v| v as f32), - gps_longitude: exif_data.gps_longitude.map(|v| v as f32), - gps_altitude: exif_data.gps_altitude.map(|v| v as f32), - focal_length: exif_data.focal_length.map(|v| v as f32), - aperture: exif_data.aperture.map(|v| v as f32), - shutter_speed: exif_data.shutter_speed, - iso: exif_data.iso, - date_taken: exif_data.date_taken, - created_time: timestamp, - last_modified: timestamp, - }; + for (file_path, relative_path) in files_needing_row { + let timestamp = Utc::now().timestamp(); - let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); - if let Err(e) = dao.store_exif(&context, insert_exif) { - error!("Failed to store EXIF data for {}: {:?}", relative_path, e); - } else { - debug!("EXIF data stored for {}", relative_path); + // Hash + size from filesystem metadata — always attempted so + // every file gets a content_hash, even when EXIF is absent. + let (content_hash, size_bytes) = match content_hash::compute(&file_path) { + Ok(id) => (Some(id.content_hash), Some(id.size_bytes)), + Err(e) => { + warn!("Failed to hash {}: {:?}", file_path.display(), e); + (None, None) + } + }; + + // EXIF is best-effort enrichment. When extraction fails (or the + // file type doesn't support EXIF) we still store a row with all + // EXIF fields NULL; the file remains visible to sort-by-date + // and tag queries via its rel_path and filesystem timestamps. + let exif_fields = if exif::supports_exif(&file_path) { + match exif::extract_exif_from_path(&file_path) { + Ok(data) => Some(data), + Err(e) => { + debug!( + "No EXIF or parse error for {}: {:?}", + file_path.display(), + e + ); + None } } - Err(e) => { - debug!( - "No EXIF data or error extracting from {}: {:?}", - file_path.display(), - e - ); - } + } else { + None + }; + + let insert_exif = InsertImageExif { + library_id: library.id, + file_path: relative_path.clone(), + camera_make: exif_fields.as_ref().and_then(|e| e.camera_make.clone()), + camera_model: exif_fields.as_ref().and_then(|e| e.camera_model.clone()), + lens_model: exif_fields.as_ref().and_then(|e| e.lens_model.clone()), + width: exif_fields.as_ref().and_then(|e| e.width), + height: exif_fields.as_ref().and_then(|e| e.height), + orientation: exif_fields.as_ref().and_then(|e| e.orientation), + gps_latitude: exif_fields + .as_ref() + .and_then(|e| e.gps_latitude.map(|v| v as f32)), + gps_longitude: exif_fields + .as_ref() + .and_then(|e| e.gps_longitude.map(|v| v as f32)), + gps_altitude: exif_fields + .as_ref() + .and_then(|e| e.gps_altitude.map(|v| v as f32)), + focal_length: exif_fields + .as_ref() + .and_then(|e| e.focal_length.map(|v| v as f32)), + aperture: exif_fields + .as_ref() + .and_then(|e| e.aperture.map(|v| v as f32)), + shutter_speed: exif_fields.as_ref().and_then(|e| e.shutter_speed.clone()), + iso: exif_fields.as_ref().and_then(|e| e.iso), + date_taken: exif_fields.as_ref().and_then(|e| e.date_taken), + created_time: timestamp, + last_modified: timestamp, + content_hash, + size_bytes, + }; + + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + if let Err(e) = dao.store_exif(&context, insert_exif) { + error!( + "Failed to register {} in image_exif: {:?}", + relative_path, e + ); + } else { + debug!("Registered {} in image_exif", relative_path); } } } @@ -1702,7 +1917,49 @@ fn process_new_files( // Generate thumbnails for all files that need them if new_files_found { info!("Processing thumbnails for new files..."); - create_thumbnails(); + create_thumbnails(std::slice::from_ref(library)); + } + + // Reconciliation: on a full scan, prune image_exif rows whose rel_path no + // longer exists on disk for this library. Keeps the DB in parity so + // downstream DB-backed listings (e.g. recursive /photos) don't return + // phantom files. Skipped on quick scans — those only look at recently + // modified files and can't distinguish "missing" from "unchanged". + if modified_since.is_none() { + let disk_paths: HashSet = files.iter().map(|(_, rel)| rel.clone()).collect(); + let db_paths: Vec = { + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + dao.get_rel_paths_for_library(&context, library.id) + .unwrap_or_else(|e| { + error!( + "Reconciliation: failed to load image_exif rel_paths for lib {}: {:?}", + library.id, e + ); + Vec::new() + }) + }; + + let stale: Vec = db_paths + .into_iter() + .filter(|p| !disk_paths.contains(p)) + .collect(); + + if !stale.is_empty() { + info!( + "Reconciliation: pruning {} stale image_exif rows for library '{}'", + stale.len(), + library.name + ); + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + for rel in &stale { + if let Err(e) = dao.delete_exif_by_library(&context, library.id, rel) { + warn!( + "Reconciliation: failed to delete {} (lib {}): {:?}", + rel, library.id, e + ); + } + } + } } } diff --git a/src/memories.rs b/src/memories.rs index c3754d3..875a72c 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -16,6 +16,7 @@ use walkdir::WalkDir; use crate::data::Claims; use crate::database::ExifDao; use crate::files::is_image_or_video; +use crate::libraries::Library; use crate::otel::{extract_context_from_request, global_tracer}; use crate::state::AppState; @@ -107,6 +108,9 @@ pub struct MemoriesRequest { pub span: Option, /// Client timezone offset in minutes from UTC (e.g., -480 for PST, 60 for CET) pub timezone_offset_minutes: Option, + /// Optional library filter. Accepts a library id (e.g. "1") or name + /// (e.g. "main"). When omitted, results span all libraries. + pub library: Option, } #[derive(Debug, Serialize, Clone)] @@ -114,6 +118,9 @@ pub struct MemoryItem { pub path: String, pub created: Option, pub modified: Option, + /// Id of the library this memory belongs to. Allows clients to show a + /// per-item source badge in union mode. + pub library_id: i32, } #[derive(Debug, Serialize)] @@ -363,6 +370,7 @@ fn collect_exif_memories( exif_dao: &Data>>, context: &opentelemetry::Context, base_path: &str, + library_id: i32, now: NaiveDate, span_mode: MemoriesSpan, years_back: u32, @@ -371,7 +379,7 @@ fn collect_exif_memories( ) -> Vec<(MemoryItem, NaiveDate)> { // Query database for all files with date_taken let exif_records = match exif_dao.lock() { - Ok(mut dao) => match dao.get_all_with_date_taken(context) { + Ok(mut dao) => match dao.get_all_with_date_taken(context, Some(library_id)) { Ok(records) => records, Err(e) => { warn!("Failed to query EXIF database: {:?}", e); @@ -417,6 +425,7 @@ fn collect_exif_memories( path: file_path.clone(), created, modified, + library_id, }, file_date, )) @@ -427,6 +436,7 @@ fn collect_exif_memories( /// Collect memories from file system scan (for files not in EXIF DB) fn collect_filesystem_memories( base_path: &str, + library_id: i32, path_excluder: &PathExcluder, skip_paths: &HashSet, now: NaiveDate, @@ -478,6 +488,7 @@ fn collect_filesystem_memories( path: path_relative, created, modified, + library_id, }, file_date, )) @@ -526,43 +537,60 @@ pub async fn list_memories( debug!("Now: {:?}", now); - let base = Path::new(&app_state.base_path); + // Resolve the optional library filter. Unknown values are a 400; None + // means "all libraries" — currently equivalent to the primary library + // while only one is configured. + let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) { + Ok(lib) => lib, + Err(msg) => { + warn!("Rejecting /memories request: {}", msg); + return HttpResponse::BadRequest().body(msg); + } + }; + // When `library` is `Some`, scope to that one library; otherwise union + // across every configured library and let the results interleave. + let libraries_to_scan: Vec<&Library> = match library { + Some(lib) => vec![lib], + None => app_state.libraries.iter().collect(), + }; - // Build the path excluder from base and env-configured exclusions - let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs); + let mut memories_with_dates: Vec<(MemoryItem, NaiveDate)> = Vec::new(); - // Phase 1: Query EXIF database - let exif_memories = collect_exif_memories( - &exif_dao, - &span_context, - &app_state.base_path, - now, - span_mode, - years_back, - &client_timezone, - &path_excluder, - ); + for lib in &libraries_to_scan { + let base = Path::new(&lib.root_path); + let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs); - // Build HashSet for deduplication - let exif_paths: HashSet = exif_memories - .iter() - .map(|(item, _)| PathBuf::from(&app_state.base_path).join(&item.path)) - .collect(); + let exif_memories = collect_exif_memories( + &exif_dao, + &span_context, + &lib.root_path, + lib.id, + now, + span_mode, + years_back, + &client_timezone, + &path_excluder, + ); - // Phase 2: File system scan (skip EXIF files) - let fs_memories = collect_filesystem_memories( - &app_state.base_path, - &path_excluder, - &exif_paths, - now, - span_mode, - years_back, - &client_timezone, - ); + let exif_paths: HashSet = exif_memories + .iter() + .map(|(item, _)| PathBuf::from(&lib.root_path).join(&item.path)) + .collect(); - // Phase 3: Merge and sort - let mut memories_with_dates = exif_memories; - memories_with_dates.extend(fs_memories); + let fs_memories = collect_filesystem_memories( + &lib.root_path, + lib.id, + &path_excluder, + &exif_paths, + now, + span_mode, + years_back, + &client_timezone, + ); + + memories_with_dates.extend(exif_memories); + memories_with_dates.extend(fs_memories); + } match span_mode { // Sort by absolute time for a more 'overview' @@ -795,7 +823,7 @@ mod tests { // Verify timestamp is within expected range (should be around 1422489671) let timestamp = date_time.timestamp(); - assert!(timestamp >= 1422480000 && timestamp <= 1422576000); // Jan 28-29, 2015 + assert!((1422480000..=1422576000).contains(×tamp)); // Jan 28-29, 2015 } #[test] @@ -813,7 +841,7 @@ mod tests { // Verify timestamp is within expected range (should be around 1422489664) let timestamp = date_time.timestamp(); - assert!(timestamp >= 1422480000 && timestamp <= 1422576000); // Jan 28-29, 2015 + assert!((1422480000..=1422576000).contains(×tamp)); // Jan 28-29, 2015 } #[test] @@ -1092,12 +1120,13 @@ mod tests { .and_utc() .timestamp(); - let mut memories_with_dates = vec![ + let mut memories_with_dates = [ ( MemoryItem { path: "photo1.jpg".to_string(), created: Some(jan_15_2024_9am), modified: Some(jan_15_2024_9am), + library_id: 1, }, NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(), ), @@ -1106,6 +1135,7 @@ mod tests { path: "photo2.jpg".to_string(), created: Some(jan_15_2020_10am), modified: Some(jan_15_2020_10am), + library_id: 1, }, NaiveDate::from_ymd_opt(2020, 1, 15).unwrap(), ), @@ -1114,6 +1144,7 @@ mod tests { path: "photo3.jpg".to_string(), created: Some(jan_16_2021_8am), modified: Some(jan_16_2021_8am), + library_id: 1, }, NaiveDate::from_ymd_opt(2021, 1, 16).unwrap(), ), diff --git a/src/state.rs b/src/state.rs index f85a2e6..78b98ad 100644 --- a/src/state.rs +++ b/src/state.rs @@ -3,8 +3,10 @@ use crate::database::{ CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao, SearchHistoryDao, SqliteCalendarEventDao, SqliteDailySummaryDao, SqliteExifDao, SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao, + connect, }; use crate::database::{PreviewDao, SqlitePreviewDao}; +use crate::libraries::{self, Library}; use crate::tags::{SqliteTagDao, TagDao}; use crate::video::actors::{ PlaylistGenerator, PreviewClipGenerator, StreamActor, VideoPlaylistManager, @@ -17,6 +19,11 @@ pub struct AppState { pub stream_manager: Arc>, pub playlist_manager: Arc>, pub preview_clip_generator: Arc>, + /// All configured media libraries. Ordered by `id` ascending; the first + /// entry is the primary library. + pub libraries: Vec, + /// Legacy shim equal to `libraries[0].root_path`. Phase 2 transitional — + /// new code should go through `primary_library()`. pub base_path: String, pub thumbnail_path: String, pub video_path: String, @@ -28,10 +35,26 @@ pub struct AppState { pub insight_generator: InsightGenerator, } +impl AppState { + pub fn primary_library(&self) -> &Library { + self.libraries + .first() + .expect("AppState constructed without any libraries") + } + + pub fn library_by_id(&self, id: i32) -> Option<&Library> { + self.libraries.iter().find(|l| l.id == id) + } + + pub fn library_by_name(&self, name: &str) -> Option<&Library> { + self.libraries.iter().find(|l| l.name == name) + } +} + impl AppState { pub fn new( stream_manager: Arc>, - base_path: String, + libraries_vec: Vec, thumbnail_path: String, video_path: String, gif_path: String, @@ -42,17 +65,26 @@ impl AppState { insight_generator: InsightGenerator, preview_dao: Arc>>, ) -> Self { + assert!( + !libraries_vec.is_empty(), + "AppState::new requires at least one library" + ); + let base_path = libraries_vec[0].root_path.clone(); let playlist_generator = PlaylistGenerator::new(); let video_playlist_manager = VideoPlaylistManager::new(video_path.clone(), playlist_generator.start()); - let preview_clip_generator = - PreviewClipGenerator::new(preview_clips_path.clone(), base_path.clone(), preview_dao); + let preview_clip_generator = PreviewClipGenerator::new( + preview_clips_path.clone(), + libraries_vec.clone(), + preview_dao, + ); Self { stream_manager, playlist_manager: Arc::new(video_playlist_manager.start()), preview_clip_generator: Arc::new(preview_clip_generator.start()), + libraries: libraries_vec, base_path, thumbnail_path, video_path, @@ -122,8 +154,16 @@ impl Default for AppState { let knowledge_dao: Arc>> = Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new()))); - // Load base path + // Load base path and ensure the primary library row reflects it. let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env"); + let mut seed_conn = connect(); + libraries::seed_or_patch_from_env(&mut seed_conn, &base_path); + let libraries_vec = libraries::load_all(&mut seed_conn); + assert!( + !libraries_vec.is_empty(), + "libraries table is empty after seed_or_patch_from_env" + ); + drop(seed_conn); // Initialize InsightGenerator with all data sources let insight_generator = InsightGenerator::new( @@ -137,7 +177,7 @@ impl Default for AppState { search_dao.clone(), tag_dao.clone(), knowledge_dao, - base_path.clone(), + libraries_vec.clone(), ); // Ensure preview clips directory exists @@ -148,7 +188,7 @@ impl Default for AppState { Self::new( Arc::new(StreamActor {}.start()), - base_path, + libraries_vec, env::var("THUMBNAILS").expect("THUMBNAILS was not set in the env"), env::var("VIDEO_PATH").expect("VIDEO_PATH was not set in the env"), env::var("GIFS_DIRECTORY").expect("GIFS_DIRECTORY was not set in the env"), @@ -208,6 +248,11 @@ impl AppState { // Initialize test InsightGenerator with all data sources let base_path_str = base_path.to_string_lossy().to_string(); + let test_lib = Library { + id: crate::libraries::PRIMARY_LIBRARY_ID, + name: "main".to_string(), + root_path: base_path_str.clone(), + }; let insight_generator = InsightGenerator::new( ollama.clone(), sms_client.clone(), @@ -219,7 +264,7 @@ impl AppState { search_dao.clone(), tag_dao.clone(), knowledge_dao, - base_path_str.clone(), + vec![test_lib], ); // Initialize test preview DAO @@ -227,9 +272,14 @@ impl AppState { Arc::new(Mutex::new(Box::new(SqlitePreviewDao::new()))); // Create the AppState with the temporary paths + let test_libraries = vec![Library { + id: crate::libraries::PRIMARY_LIBRARY_ID, + name: "main".to_string(), + root_path: base_path_str.clone(), + }]; AppState::new( Arc::new(StreamActor {}.start()), - base_path_str, + test_libraries, thumbnail_path.to_string_lossy().to_string(), video_path.to_string_lossy().to_string(), gif_path.to_string_lossy().to_string(), diff --git a/src/tags.rs b/src/tags.rs index 5da6d6e..b94cb3b 100644 --- a/src/tags.rs +++ b/src/tags.rs @@ -1,5 +1,8 @@ use crate::data::GetTagsRequest; +use crate::database::ExifDao; +use crate::libraries; use crate::otel::{extract_context_from_request, global_tracer, trace_db_call}; +use crate::state::AppState; use crate::utils::normalize_path; use crate::{Claims, ThumbnailRequest, connect, data::AddTagRequest, error::IntoHttpError, schema}; use actix_web::dev::{ServiceFactory, ServiceRequest}; @@ -71,15 +74,32 @@ async fn get_tags( _: Claims, http_request: HttpRequest, request: web::Query, + app_state: web::Data, tag_dao: web::Data>, + exif_dao: web::Data>>, ) -> impl Responder { let context = extract_context_from_request(&http_request); let span = global_tracer().start_with_context("get_tags", &context); let span_context = opentelemetry::Context::current_with_span(span); let normalized_path = normalize_path(&request.path); + + // Expand the query set to every rel_path that shares content with + // this file, so tags added under one library show up under the + // others when they hold the same file. Falls back to direct rel_path + // match when the file hasn't been hashed yet. + let library = libraries::resolve_library_param(&app_state, request.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); + let sibling_paths = { + let mut exif = exif_dao.lock().expect("Unable to get ExifDao"); + exif.get_rel_paths_sharing_content(&span_context, library.id, &normalized_path) + .unwrap_or_else(|_| vec![normalized_path.clone()]) + }; + let mut tag_dao = tag_dao.lock().expect("Unable to get TagDao"); tag_dao - .get_tags_for_path(&span_context, &normalized_path) + .get_tags_for_paths(&span_context, &sibling_paths) .map(|tags| { span_context.span().set_status(Status::Ok); HttpResponse::Ok().json(tags) @@ -254,6 +274,7 @@ pub struct InsertTag { #[diesel(table_name = tagged_photo)] pub struct InsertTaggedPhoto { pub tag_id: i32, + #[diesel(column_name = rel_path)] pub photo_name: String, pub created_time: i64, } @@ -263,6 +284,7 @@ pub struct TaggedPhoto { #[allow(dead_code)] // Part of API contract pub id: i32, #[allow(dead_code)] // Part of API contract + #[diesel(column_name = rel_path)] pub photo_name: String, #[allow(dead_code)] // Part of API contract pub tag_id: i32, @@ -287,6 +309,14 @@ pub trait TagDao: Send + Sync { context: &opentelemetry::Context, path: &str, ) -> anyhow::Result>; + /// Union of tags for every rel_path in `paths`. Used by content-hash + /// sharing: the caller resolves all rel_paths with the same content + /// via `ExifDao::get_rel_paths_sharing_content`, then passes them here. + fn get_tags_for_paths( + &mut self, + context: &opentelemetry::Context, + paths: &[String], + ) -> anyhow::Result>; fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result; fn remove_tag( &mut self, @@ -312,12 +342,14 @@ pub trait TagDao: Send + Sync { exclude_tag_ids: Vec, context: &opentelemetry::Context, ) -> anyhow::Result>; + #[allow(dead_code)] fn update_photo_name( &mut self, old_name: &str, new_name: &str, context: &opentelemetry::Context, ) -> anyhow::Result<()>; + #[allow(dead_code)] fn get_all_photo_names( &mut self, context: &opentelemetry::Context, @@ -334,6 +366,7 @@ pub struct SqliteTagDao { } impl SqliteTagDao { + #[allow(dead_code)] pub(crate) fn new(connection: Arc>) -> Self { SqliteTagDao { connection } } @@ -368,7 +401,7 @@ impl TagDao for SqliteTagDao { .inner_join(tagged_photo::table) .group_by(tags::id) .select((count_star(), id, name, created_time)) - .filter(tagged_photo::photo_name.like(path)) + .filter(tagged_photo::rel_path.like(path)) .get_results(conn.deref_mut()) .map::, _>(|tags_with_count: Vec<(i64, i32, String, i64)>| { tags_with_count @@ -404,13 +437,39 @@ impl TagDao for SqliteTagDao { debug!("Getting Tags for path: {:?}", path); tags::table .left_join(tagged_photo::table) - .filter(tagged_photo::photo_name.eq(&path)) + .filter(tagged_photo::rel_path.eq(&path)) .select((tags::id, tags::name, tags::created_time)) .get_results::(conn.deref_mut()) .with_context(|| "Unable to get tags from Sqlite") }) } + fn get_tags_for_paths( + &mut self, + context: &opentelemetry::Context, + paths: &[String], + ) -> anyhow::Result> { + if paths.is_empty() { + return Ok(Vec::new()); + } + let mut conn = self + .connection + .lock() + .expect("Unable to lock SqliteTagDao connection"); + trace_db_call(context, "query", "get_tags_for_paths", |span| { + span.set_attribute(KeyValue::new("path_count", paths.len() as i64)); + // DISTINCT across tag ids so two rel_paths carrying the same + // tag don't produce a duplicate entry in the response. + tags::table + .inner_join(tagged_photo::table) + .filter(tagged_photo::rel_path.eq_any(paths)) + .select((tags::id, tags::name, tags::created_time)) + .distinct() + .get_results::(conn.deref_mut()) + .with_context(|| "Unable to get tags from Sqlite") + }) + } + fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result { let mut conn = self .connection @@ -474,7 +533,7 @@ impl TagDao for SqliteTagDao { diesel::delete( tagged_photo::table .filter(tagged_photo::tag_id.eq(tag.id)) - .filter(tagged_photo::photo_name.eq(path)), + .filter(tagged_photo::rel_path.eq(path)), ) .execute(conn.deref_mut()) .with_context(|| format!("Unable to delete tag: '{}'", &tag.name)) @@ -558,23 +617,23 @@ impl TagDao for SqliteTagDao { let query = sql_query(format!( r#" WITH filtered_photos AS ( - SELECT photo_name + SELECT rel_path FROM tagged_photo tp WHERE tp.tag_id IN ({}) - AND tp.photo_name NOT IN ( - SELECT photo_name + AND tp.rel_path NOT IN ( + SELECT rel_path FROM tagged_photo WHERE tag_id IN ({}) ) - GROUP BY photo_name + GROUP BY rel_path HAVING COUNT(DISTINCT tag_id) >= {} ) SELECT - fp.photo_name as file_name, + fp.rel_path as file_name, COUNT(DISTINCT tp2.tag_id) as tag_count FROM filtered_photos fp - JOIN tagged_photo tp2 ON fp.photo_name = tp2.photo_name - GROUP BY fp.photo_name"#, + JOIN tagged_photo tp2 ON fp.rel_path = tp2.rel_path + GROUP BY fp.rel_path"#, tag_placeholders, exclude_placeholders, tag_ids.len() @@ -618,21 +677,21 @@ impl TagDao for SqliteTagDao { let query = sql_query(format!( r#" WITH filtered_photos AS ( - SELECT DISTINCT photo_name + SELECT DISTINCT rel_path FROM tagged_photo tp WHERE tp.tag_id IN ({}) - AND tp.photo_name NOT IN ( - SELECT photo_name + AND tp.rel_path NOT IN ( + SELECT rel_path FROM tagged_photo WHERE tag_id IN ({}) ) ) SELECT - fp.photo_name as file_name, + fp.rel_path as file_name, COUNT(DISTINCT tp2.tag_id) as tag_count FROM filtered_photos fp - JOIN tagged_photo tp2 ON fp.photo_name = tp2.photo_name - GROUP BY fp.photo_name"#, + JOIN tagged_photo tp2 ON fp.rel_path = tp2.rel_path + GROUP BY fp.rel_path"#, tag_placeholders, exclude_placeholders )) .into_boxed(); @@ -663,8 +722,8 @@ impl TagDao for SqliteTagDao { .connection .lock() .expect("Unable to lock SqliteTagDao connection"); - diesel::update(tagged_photo.filter(photo_name.eq(old_name))) - .set(photo_name.eq(new_name)) + diesel::update(tagged_photo.filter(rel_path.eq(old_name))) + .set(rel_path.eq(new_name)) .execute(conn.deref_mut())?; Ok(()) } @@ -680,7 +739,7 @@ impl TagDao for SqliteTagDao { .lock() .expect("Unable to lock SqliteTagDao connection"); tagged_photo - .select(photo_name) + .select(rel_path) .distinct() .load(conn.deref_mut()) .with_context(|| "Unable to get photo names") @@ -714,10 +773,10 @@ impl TagDao for SqliteTagDao { let query_str = format!( r#" - SELECT photo_name, COUNT(DISTINCT tag_id) as tag_count + SELECT rel_path AS photo_name, COUNT(DISTINCT tag_id) as tag_count FROM tagged_photo - WHERE photo_name IN ({}) - GROUP BY photo_name + WHERE rel_path IN ({}) + GROUP BY rel_path "#, placeholders ); @@ -815,6 +874,25 @@ mod tests { .clone()) } + fn get_tags_for_paths( + &mut self, + _context: &opentelemetry::Context, + paths: &[String], + ) -> anyhow::Result> { + let tagged = self.tagged_photos.borrow(); + let mut out: Vec = Vec::new(); + for p in paths { + if let Some(tags) = tagged.get(p) { + for t in tags { + if !out.iter().any(|existing| existing.id == t.id) { + out.push(t.clone()); + } + } + } + } + Ok(out) + } + fn create_tag( &mut self, _context: &opentelemetry::Context, diff --git a/src/testhelpers.rs b/src/testhelpers.rs index d07699a..1536dbb 100644 --- a/src/testhelpers.rs +++ b/src/testhelpers.rs @@ -14,6 +14,12 @@ pub struct TestUserDao { pub user_map: RefCell>, } +impl Default for TestUserDao { + fn default() -> Self { + Self::new() + } +} + impl TestUserDao { pub fn new() -> Self { Self { @@ -71,6 +77,12 @@ pub struct TestPreviewDao { next_id: StdMutex, } +impl Default for TestPreviewDao { + fn default() -> Self { + Self::new() + } +} + impl TestPreviewDao { pub fn new() -> Self { Self { @@ -98,6 +110,7 @@ impl PreviewDao for TestPreviewDao { file_path_val.to_string(), VideoPreviewClip { id: *id, + library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: file_path_val.to_string(), status: status_val.to_string(), duration_seconds: None, diff --git a/src/video/actors.rs b/src/video/actors.rs index e90bbe1..284c8e3 100644 --- a/src/video/actors.rs +++ b/src/video/actors.rs @@ -1,5 +1,6 @@ use crate::database::PreviewDao; use crate::is_video; +use crate::libraries::Library; use crate::otel::global_tracer; use crate::video::ffmpeg::generate_preview_clip; use actix::prelude::*; @@ -500,23 +501,38 @@ pub struct GeneratePreviewClipMessage { pub struct PreviewClipGenerator { semaphore: Arc, preview_clips_dir: String, - base_path: String, + libraries: Vec, preview_dao: Arc>>, } impl PreviewClipGenerator { pub fn new( preview_clips_dir: String, - base_path: String, + libraries: Vec, preview_dao: Arc>>, ) -> Self { PreviewClipGenerator { semaphore: Arc::new(Semaphore::new(2)), preview_clips_dir, - base_path, + libraries, preview_dao, } } + + /// Strip whichever library root actually contains `video_path`. + /// Falls back to the first library if none match, so we never + /// accidentally emit the absolute input path as the output path + /// (which ffmpeg rejects as "cannot edit existing files in place"). + fn relativize(&self, video_path: &str) -> String { + for lib in &self.libraries { + if let Some(stripped) = video_path.strip_prefix(&lib.root_path) { + return stripped.trim_start_matches(['/', '\\']).replace('\\', "/"); + } + } + video_path + .trim_start_matches(['/', '\\']) + .replace('\\', "/") + } } impl Actor for PreviewClipGenerator { @@ -533,9 +549,10 @@ impl Handler for PreviewClipGenerator { ) -> Self::Result { let semaphore = self.semaphore.clone(); let preview_clips_dir = self.preview_clips_dir.clone(); - let base_path = self.base_path.clone(); let preview_dao = self.preview_dao.clone(); let video_path = msg.video_path; + // Resolve against whichever library actually owns this video. + let relative_path = self.relativize(&video_path); Box::pin(async move { let permit = semaphore @@ -543,13 +560,6 @@ impl Handler for PreviewClipGenerator { .await .expect("Unable to acquire preview semaphore"); - // Compute relative path (from BASE_PATH) for DB operations, consistent with EXIF convention - let relative_path = video_path - .strip_prefix(&base_path) - .unwrap_or(&video_path) - .trim_start_matches(['/', '\\']) - .to_string(); - // Update status to processing { let otel_ctx = opentelemetry::Context::current(); diff --git a/src/video/ffmpeg.rs b/src/video/ffmpeg.rs index b40b175..5ed9308 100644 --- a/src/video/ffmpeg.rs +++ b/src/video/ffmpeg.rs @@ -40,7 +40,10 @@ pub struct Ffmpeg; pub enum GifType { Overview, - OverviewVideo { duration: u32 }, + #[allow(dead_code)] + OverviewVideo { + duration: u32, + }, } impl Ffmpeg {