From ffcddbb84362e2c608231e3e14d5f6ce73ea1cad Mon Sep 17 00:00:00 2001 From: Cameron Date: Fri, 17 Apr 2026 15:28:30 -0400 Subject: [PATCH 01/19] feat: multi-library foundation (schema + libraries module) Adds a `libraries` registry table and threads library_id through per-instance metadata tables (image_exif, photo_insights, entity_photo_links, video_preview_clips). File-path columns renamed to rel_path to make the relative-to-root semantics explicit. Adds content_hash + size_bytes on image_exif to support future hash-keyed thumbnail/HLS dedup. Tags and favorites stay library-agnostic so they share across libraries by rel_path. Behavior is unchanged: a single primary library (id=1) is seeded from BASE_PATH on first boot; all handlers and DAOs route through it as a transitional shim until the API gains a library query param. Co-Authored-By: Claude Opus 4.7 --- .../2026-04-17-000000_multi_library/down.sql | 155 +++++++++++++ .../2026-04-17-000000_multi_library/up.sql | 216 ++++++++++++++++++ src/ai/insight_generator.rs | 3 + src/bin/migrate_exif.rs | 3 + src/database/insights_dao.rs | 23 +- src/database/knowledge_dao.rs | 13 +- src/database/mod.rs | 76 +++--- src/database/models.rs | 45 +++- src/database/preview_dao.rs | 7 +- src/database/schema.rs | 47 ++-- src/files.rs | 6 + src/lib.rs | 1 + src/libraries.rs | 159 +++++++++++++ src/main.rs | 7 + src/state.rs | 50 +++- src/tags.rs | 46 ++-- src/testhelpers.rs | 1 + 17 files changed, 750 insertions(+), 108 deletions(-) create mode 100644 migrations/2026-04-17-000000_multi_library/down.sql create mode 100644 migrations/2026-04-17-000000_multi_library/up.sql create mode 100644 src/libraries.rs diff --git a/migrations/2026-04-17-000000_multi_library/down.sql b/migrations/2026-04-17-000000_multi_library/down.sql new file mode 100644 index 0000000..9dcb5c7 --- /dev/null +++ b/migrations/2026-04-17-000000_multi_library/down.sql @@ -0,0 +1,155 @@ +-- Revert multi-library support. +-- Drops library_id/content_hash/size_bytes, renames rel_path back to the +-- original column names, and drops the libraries table. Rows originally +-- from non-primary libraries (id > 1) would be orphaned, so the rollback +-- keeps only rows from library_id=1. + +PRAGMA foreign_keys=OFF; + +-- tagged_photo: rel_path → photo_name. +DROP INDEX IF EXISTS idx_tagged_photo_relpath_tag; +DROP INDEX IF EXISTS idx_tagged_photo_rel_path; +ALTER TABLE tagged_photo RENAME COLUMN rel_path TO photo_name; +CREATE INDEX IF NOT EXISTS idx_tagged_photo_photo_name ON tagged_photo(photo_name); +CREATE INDEX IF NOT EXISTS idx_tagged_photo_count ON tagged_photo(photo_name, tag_id); + +-- favorites: rel_path → path. +DROP INDEX IF EXISTS idx_favorites_unique; +DROP INDEX IF EXISTS idx_favorites_rel_path; +ALTER TABLE favorites RENAME COLUMN rel_path TO path; +CREATE INDEX IF NOT EXISTS idx_favorites_path ON favorites(path); +CREATE UNIQUE INDEX IF NOT EXISTS idx_favorites_unique ON favorites(userid, path); + +-- video_preview_clips: drop library_id, rel_path → file_path. +CREATE TABLE video_preview_clips_old ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + file_path TEXT NOT NULL UNIQUE, + status TEXT NOT NULL DEFAULT 'pending', + duration_seconds REAL, + file_size_bytes INTEGER, + error_message TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +INSERT INTO video_preview_clips_old ( + id, file_path, status, duration_seconds, file_size_bytes, + error_message, created_at, updated_at +) +SELECT + id, rel_path, status, duration_seconds, file_size_bytes, + error_message, created_at, updated_at +FROM video_preview_clips +WHERE library_id = 1; + +DROP TABLE video_preview_clips; +ALTER TABLE video_preview_clips_old RENAME TO video_preview_clips; + +CREATE INDEX idx_preview_clips_file_path ON video_preview_clips(file_path); +CREATE INDEX idx_preview_clips_status ON video_preview_clips(status); + +-- entity_photo_links: drop library_id, rel_path → file_path. +CREATE TABLE entity_photo_links_old ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + entity_id INTEGER NOT NULL, + file_path TEXT NOT NULL, + role TEXT NOT NULL, + CONSTRAINT fk_epl_entity FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE CASCADE, + UNIQUE(entity_id, file_path, role) +); + +INSERT INTO entity_photo_links_old (id, entity_id, file_path, role) +SELECT id, entity_id, rel_path, role +FROM entity_photo_links +WHERE library_id = 1; + +DROP TABLE entity_photo_links; +ALTER TABLE entity_photo_links_old RENAME TO entity_photo_links; + +CREATE INDEX idx_entity_photo_links_entity ON entity_photo_links(entity_id); +CREATE INDEX idx_entity_photo_links_photo ON entity_photo_links(file_path); + +-- photo_insights: drop library_id, rel_path → file_path. +CREATE TABLE photo_insights_old ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + file_path TEXT NOT NULL, + title TEXT NOT NULL, + summary TEXT NOT NULL, + generated_at BIGINT NOT NULL, + model_version TEXT NOT NULL, + is_current BOOLEAN NOT NULL DEFAULT 0, + training_messages TEXT, + approved BOOLEAN +); + +INSERT INTO photo_insights_old ( + id, file_path, title, summary, generated_at, model_version, is_current, + training_messages, approved +) +SELECT + id, rel_path, title, summary, generated_at, model_version, is_current, + training_messages, approved +FROM photo_insights +WHERE library_id = 1; + +DROP TABLE photo_insights; +ALTER TABLE photo_insights_old RENAME TO photo_insights; + +CREATE INDEX idx_photo_insights_file_path ON photo_insights(file_path); +CREATE INDEX idx_photo_insights_current ON photo_insights(file_path, is_current); + +-- image_exif: drop library_id/content_hash/size_bytes, rel_path → file_path. +CREATE TABLE image_exif_old ( + id INTEGER PRIMARY KEY NOT NULL, + file_path TEXT NOT NULL UNIQUE, + camera_make TEXT, + camera_model TEXT, + lens_model TEXT, + width INTEGER, + height INTEGER, + orientation INTEGER, + gps_latitude REAL, + gps_longitude REAL, + gps_altitude REAL, + focal_length REAL, + aperture REAL, + shutter_speed TEXT, + iso INTEGER, + date_taken BIGINT, + created_time BIGINT NOT NULL, + last_modified BIGINT NOT NULL +); + +INSERT INTO image_exif_old ( + id, file_path, + camera_make, camera_model, lens_model, + width, height, orientation, + gps_latitude, gps_longitude, gps_altitude, + focal_length, aperture, shutter_speed, iso, date_taken, + created_time, last_modified +) +SELECT + id, rel_path, + camera_make, camera_model, lens_model, + width, height, orientation, + gps_latitude, gps_longitude, gps_altitude, + focal_length, aperture, shutter_speed, iso, date_taken, + created_time, last_modified +FROM image_exif +WHERE library_id = 1; + +DROP TABLE image_exif; +ALTER TABLE image_exif_old RENAME TO image_exif; + +CREATE INDEX idx_image_exif_file_path ON image_exif(file_path); +CREATE INDEX idx_image_exif_camera ON image_exif(camera_make, camera_model); +CREATE INDEX idx_image_exif_gps ON image_exif(gps_latitude, gps_longitude); +CREATE INDEX idx_image_exif_date_taken ON image_exif(date_taken); +CREATE INDEX idx_image_exif_date_path ON image_exif(date_taken DESC, file_path); + +-- Finally, drop the libraries registry. +DROP TABLE libraries; + +PRAGMA foreign_keys=ON; + +ANALYZE; diff --git a/migrations/2026-04-17-000000_multi_library/up.sql b/migrations/2026-04-17-000000_multi_library/up.sql new file mode 100644 index 0000000..7b32c31 --- /dev/null +++ b/migrations/2026-04-17-000000_multi_library/up.sql @@ -0,0 +1,216 @@ +-- Multi-library support. +-- Adds `libraries` registry table and a `library_id` column on per-instance +-- metadata tables. Renames `file_path` / `photo_name` to `rel_path` for +-- semantic clarity (values already stored relative to BASE_PATH). +-- Adds `content_hash` + `size_bytes` to `image_exif` to support +-- content-based dedup of thumbnails and HLS output across libraries. +-- +-- SQLite cannot alter column constraints in place, so per-instance tables +-- are recreated following the idiom established in +-- 2026-04-02-000000_photo_insights_history/up.sql. Existing row `id`s are +-- preserved so foreign keys (entity_facts.source_insight_id, etc.) remain +-- valid after migration. + +PRAGMA foreign_keys=OFF; + +-- --------------------------------------------------------------------------- +-- 1. Libraries registry. +-- Seeded with a placeholder for the primary library; AppState patches +-- `root_path` from the BASE_PATH env var on first boot. Subsequent +-- prod-to-dev DB syncs update this row via a single SQL UPDATE. +-- --------------------------------------------------------------------------- +CREATE TABLE libraries ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name TEXT NOT NULL UNIQUE, + root_path TEXT NOT NULL, + created_at BIGINT NOT NULL +); + +INSERT INTO libraries (id, name, root_path, created_at) +VALUES (1, 'main', 'BASE_PATH_PLACEHOLDER', strftime('%s','now')); + +-- --------------------------------------------------------------------------- +-- 2. image_exif: + library_id, file_path → rel_path, + content_hash/size_bytes. +-- --------------------------------------------------------------------------- +CREATE TABLE image_exif_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + library_id INTEGER NOT NULL REFERENCES libraries(id), + rel_path TEXT NOT NULL, + + -- Camera information + camera_make TEXT, + camera_model TEXT, + lens_model TEXT, + + -- Image properties + width INTEGER, + height INTEGER, + orientation INTEGER, + + -- GPS + gps_latitude REAL, + gps_longitude REAL, + gps_altitude REAL, + + -- Capture settings + focal_length REAL, + aperture REAL, + shutter_speed TEXT, + iso INTEGER, + date_taken BIGINT, + + -- Housekeeping + created_time BIGINT NOT NULL, + last_modified BIGINT NOT NULL, + + -- Content identity (backfilled by the `backfill_hashes` binary and by the watcher for new files) + content_hash TEXT, + size_bytes BIGINT, + + UNIQUE(library_id, rel_path) +); + +INSERT INTO image_exif_new ( + id, library_id, rel_path, + camera_make, camera_model, lens_model, + width, height, orientation, + gps_latitude, gps_longitude, gps_altitude, + focal_length, aperture, shutter_speed, iso, date_taken, + created_time, last_modified +) +SELECT + id, 1, file_path, + camera_make, camera_model, lens_model, + width, height, orientation, + gps_latitude, gps_longitude, gps_altitude, + focal_length, aperture, shutter_speed, iso, date_taken, + created_time, last_modified +FROM image_exif; + +DROP TABLE image_exif; +ALTER TABLE image_exif_new RENAME TO image_exif; + +CREATE INDEX idx_image_exif_rel_path ON image_exif(rel_path); +CREATE INDEX idx_image_exif_camera ON image_exif(camera_make, camera_model); +CREATE INDEX idx_image_exif_gps ON image_exif(gps_latitude, gps_longitude); +CREATE INDEX idx_image_exif_date_taken ON image_exif(date_taken); +CREATE INDEX idx_image_exif_date_path ON image_exif(date_taken DESC, rel_path); +CREATE INDEX idx_image_exif_lib_date ON image_exif(library_id, date_taken); +CREATE INDEX idx_image_exif_content_hash ON image_exif(content_hash); + +-- --------------------------------------------------------------------------- +-- 3. photo_insights: + library_id, file_path → rel_path. +-- Preserve `id` so entity_facts.source_insight_id FKs remain valid. +-- --------------------------------------------------------------------------- +CREATE TABLE photo_insights_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + library_id INTEGER NOT NULL REFERENCES libraries(id), + rel_path TEXT NOT NULL, + title TEXT NOT NULL, + summary TEXT NOT NULL, + generated_at BIGINT NOT NULL, + model_version TEXT NOT NULL, + is_current BOOLEAN NOT NULL DEFAULT 0, + training_messages TEXT, + approved BOOLEAN +); + +INSERT INTO photo_insights_new ( + id, library_id, rel_path, title, summary, generated_at, model_version, + is_current, training_messages, approved +) +SELECT + id, 1, file_path, title, summary, generated_at, model_version, + is_current, training_messages, approved +FROM photo_insights; + +DROP TABLE photo_insights; +ALTER TABLE photo_insights_new RENAME TO photo_insights; + +CREATE INDEX idx_photo_insights_rel_path ON photo_insights(rel_path); +CREATE INDEX idx_photo_insights_current ON photo_insights(library_id, rel_path, is_current); + +-- --------------------------------------------------------------------------- +-- 4. entity_photo_links: + library_id, file_path → rel_path. +-- Preserves entity FK; UNIQUE now includes library_id to allow the same +-- rel_path to link entities in multiple libraries independently. +-- --------------------------------------------------------------------------- +CREATE TABLE entity_photo_links_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + entity_id INTEGER NOT NULL, + library_id INTEGER NOT NULL REFERENCES libraries(id), + rel_path TEXT NOT NULL, + role TEXT NOT NULL, + CONSTRAINT fk_epl_entity FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE CASCADE, + UNIQUE(entity_id, library_id, rel_path, role) +); + +INSERT INTO entity_photo_links_new (id, entity_id, library_id, rel_path, role) +SELECT id, entity_id, 1, file_path, role FROM entity_photo_links; + +DROP TABLE entity_photo_links; +ALTER TABLE entity_photo_links_new RENAME TO entity_photo_links; + +CREATE INDEX idx_entity_photo_links_entity ON entity_photo_links(entity_id); +CREATE INDEX idx_entity_photo_links_photo ON entity_photo_links(library_id, rel_path); + +-- --------------------------------------------------------------------------- +-- 5. video_preview_clips: + library_id, file_path → rel_path. +-- --------------------------------------------------------------------------- +CREATE TABLE video_preview_clips_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + library_id INTEGER NOT NULL REFERENCES libraries(id), + rel_path TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + duration_seconds REAL, + file_size_bytes INTEGER, + error_message TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + UNIQUE(library_id, rel_path) +); + +INSERT INTO video_preview_clips_new ( + id, library_id, rel_path, status, duration_seconds, file_size_bytes, + error_message, created_at, updated_at +) +SELECT + id, 1, file_path, status, duration_seconds, file_size_bytes, + error_message, created_at, updated_at +FROM video_preview_clips; + +DROP TABLE video_preview_clips; +ALTER TABLE video_preview_clips_new RENAME TO video_preview_clips; + +CREATE INDEX idx_preview_clips_rel_path ON video_preview_clips(rel_path); +CREATE INDEX idx_preview_clips_status ON video_preview_clips(status); + +-- --------------------------------------------------------------------------- +-- 6. favorites: path → rel_path. Library-agnostic (cross-library sharing). +-- --------------------------------------------------------------------------- +ALTER TABLE favorites RENAME COLUMN path TO rel_path; + +DROP INDEX IF EXISTS idx_favorites_path; +DROP INDEX IF EXISTS idx_favorites_unique; +CREATE INDEX idx_favorites_rel_path ON favorites(rel_path); +CREATE UNIQUE INDEX idx_favorites_unique ON favorites(userid, rel_path); + +-- --------------------------------------------------------------------------- +-- 7. tagged_photo: photo_name → rel_path. Library-agnostic. +-- Dedup first so the (rel_path, tag_id) unique index can be created safely. +-- --------------------------------------------------------------------------- +ALTER TABLE tagged_photo RENAME COLUMN photo_name TO rel_path; + +DELETE FROM tagged_photo +WHERE id NOT IN ( + SELECT MIN(id) FROM tagged_photo GROUP BY rel_path, tag_id +); + +DROP INDEX IF EXISTS idx_tagged_photo_photo_name; +DROP INDEX IF EXISTS idx_tagged_photo_count; +CREATE INDEX idx_tagged_photo_rel_path ON tagged_photo(rel_path); +CREATE UNIQUE INDEX idx_tagged_photo_relpath_tag ON tagged_photo(rel_path, tag_id); + +PRAGMA foreign_keys=ON; + +ANALYZE; diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 2ef503d..6f0b319 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -1187,6 +1187,7 @@ impl InsightGenerator { // 11. Store in database let insight = InsertPhotoInsight { + library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: file_path.to_string(), title, summary, @@ -2031,6 +2032,7 @@ Return ONLY the summary, nothing else."#, // Upsert a photo link so this entity is associated with this photo let link = InsertEntityPhotoLink { entity_id: subject_entity_id, + library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: file_path.to_string(), role: photo_role, }; @@ -2742,6 +2744,7 @@ Return ONLY the summary, nothing else."#, // 15. Store insight (returns the persisted row including its new id) let insight = InsertPhotoInsight { + library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: file_path.to_string(), title, summary: final_content, diff --git a/src/bin/migrate_exif.rs b/src/bin/migrate_exif.rs index 3235a63..3266a89 100644 --- a/src/bin/migrate_exif.rs +++ b/src/bin/migrate_exif.rs @@ -94,6 +94,7 @@ fn main() -> anyhow::Result<()> { Ok(exif_data) => { let timestamp = Utc::now().timestamp(); let insert_exif = InsertImageExif { + library_id: image_api::libraries::PRIMARY_LIBRARY_ID, file_path: relative_path.clone(), camera_make: exif_data.camera_make, camera_model: exif_data.camera_model, @@ -114,6 +115,8 @@ fn main() -> anyhow::Result<()> { .map(|e| e.created_time) .unwrap_or(timestamp), last_modified: timestamp, + content_hash: None, + size_bytes: None, }; // Store or update in database diff --git a/src/database/insights_dao.rs b/src/database/insights_dao.rs index 473bb3c..6b15717 100644 --- a/src/database/insights_dao.rs +++ b/src/database/insights_dao.rs @@ -86,10 +86,14 @@ impl InsightDao for SqliteInsightDao { let mut connection = self.connection.lock().expect("Unable to get InsightDao"); // Mark all existing insights for this file as no longer current - diesel::update(photo_insights.filter(file_path.eq(&insight.file_path))) - .set(is_current.eq(false)) - .execute(connection.deref_mut()) - .map_err(|_| anyhow::anyhow!("Update is_current error"))?; + diesel::update( + photo_insights + .filter(library_id.eq(insight.library_id)) + .filter(rel_path.eq(&insight.file_path)), + ) + .set(is_current.eq(false)) + .execute(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Update is_current error"))?; // Insert the new insight as current diesel::insert_into(photo_insights) @@ -99,7 +103,8 @@ impl InsightDao for SqliteInsightDao { // Retrieve the inserted record (is_current = true) photo_insights - .filter(file_path.eq(&insight.file_path)) + .filter(library_id.eq(insight.library_id)) + .filter(rel_path.eq(&insight.file_path)) .filter(is_current.eq(true)) .first::(connection.deref_mut()) .map_err(|_| anyhow::anyhow!("Query error")) @@ -118,7 +123,7 @@ impl InsightDao for SqliteInsightDao { let mut connection = self.connection.lock().expect("Unable to get InsightDao"); photo_insights - .filter(file_path.eq(path)) + .filter(rel_path.eq(path)) .filter(is_current.eq(true)) .first::(connection.deref_mut()) .optional() @@ -138,7 +143,7 @@ impl InsightDao for SqliteInsightDao { let mut connection = self.connection.lock().expect("Unable to get InsightDao"); photo_insights - .filter(file_path.eq(path)) + .filter(rel_path.eq(path)) .order(generated_at.desc()) .load::(connection.deref_mut()) .map_err(|_| anyhow::anyhow!("Query error")) @@ -156,7 +161,7 @@ impl InsightDao for SqliteInsightDao { let mut connection = self.connection.lock().expect("Unable to get InsightDao"); - diesel::delete(photo_insights.filter(file_path.eq(path))) + diesel::delete(photo_insights.filter(rel_path.eq(path))) .execute(connection.deref_mut()) .map(|_| ()) .map_err(|_| anyhow::anyhow!("Delete error")) @@ -195,7 +200,7 @@ impl InsightDao for SqliteInsightDao { diesel::update( photo_insights - .filter(file_path.eq(path)) + .filter(rel_path.eq(path)) .filter(is_current.eq(true)), ) .set(approved.eq(Some(is_approved))) diff --git a/src/database/knowledge_dao.rs b/src/database/knowledge_dao.rs index 05d1865..a9f75fe 100644 --- a/src/database/knowledge_dao.rs +++ b/src/database/knowledge_dao.rs @@ -550,8 +550,8 @@ impl KnowledgeDao for SqliteKnowledgeDao { // 3. Copy photo links to target (INSERT OR IGNORE to skip duplicates) let links_updated = diesel::sql_query( - "INSERT OR IGNORE INTO entity_photo_links (entity_id, file_path, role) \ - SELECT ?, file_path, role FROM entity_photo_links WHERE entity_id = ?", + "INSERT OR IGNORE INTO entity_photo_links (entity_id, library_id, rel_path, role) \ + SELECT ?, library_id, rel_path, role FROM entity_photo_links WHERE entity_id = ?", ) .bind::(target_id) .bind::(source_id) @@ -781,11 +781,12 @@ impl KnowledgeDao for SqliteKnowledgeDao { ) -> Result<(), DbError> { trace_db_call(cx, "insert", "upsert_photo_link", |_span| { let mut conn = self.connection.lock().expect("KnowledgeDao lock"); - // INSERT OR IGNORE respects the UNIQUE(entity_id, file_path, role) constraint + // INSERT OR IGNORE respects the UNIQUE(entity_id, library_id, rel_path, role) constraint diesel::sql_query( - "INSERT OR IGNORE INTO entity_photo_links (entity_id, file_path, role) VALUES (?, ?, ?)" + "INSERT OR IGNORE INTO entity_photo_links (entity_id, library_id, rel_path, role) VALUES (?, ?, ?, ?)" ) .bind::(link.entity_id) + .bind::(link.library_id) .bind::(&link.file_path) .bind::(&link.role) .execute(conn.deref_mut()) @@ -803,7 +804,7 @@ impl KnowledgeDao for SqliteKnowledgeDao { trace_db_call(cx, "delete", "delete_photo_links_for_file", |_span| { use schema::entity_photo_links::dsl::*; let mut conn = self.connection.lock().expect("KnowledgeDao lock"); - diesel::delete(entity_photo_links.filter(file_path.eq(file_path_val))) + diesel::delete(entity_photo_links.filter(rel_path.eq(file_path_val))) .execute(conn.deref_mut()) .map(|_| ()) .map_err(|e| anyhow::anyhow!("Delete error: {}", e)) @@ -820,7 +821,7 @@ impl KnowledgeDao for SqliteKnowledgeDao { use schema::entity_photo_links::dsl::*; let mut conn = self.connection.lock().expect("KnowledgeDao lock"); entity_photo_links - .filter(file_path.eq(file_path_val)) + .filter(rel_path.eq(file_path_val)) .load::(conn.deref_mut()) .map_err(|e| anyhow::anyhow!("Query error: {}", e)) }) diff --git a/src/database/mod.rs b/src/database/mod.rs index 78cac22..e1c1c01 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -184,7 +184,7 @@ impl FavoriteDao for SqliteFavoriteDao { let mut connection = self.connection.lock().expect("Unable to get FavoriteDao"); if favorites - .filter(userid.eq(user_id).and(path.eq(&favorite_path))) + .filter(userid.eq(user_id).and(rel_path.eq(&favorite_path))) .first::(connection.deref_mut()) .is_err() { @@ -204,7 +204,7 @@ impl FavoriteDao for SqliteFavoriteDao { use schema::favorites::dsl::*; diesel::delete(favorites) - .filter(userid.eq(user_id).and(path.eq(favorite_path))) + .filter(userid.eq(user_id).and(rel_path.eq(favorite_path))) .execute(self.connection.lock().unwrap().deref_mut()) .unwrap(); } @@ -221,8 +221,8 @@ impl FavoriteDao for SqliteFavoriteDao { fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError> { use schema::favorites::dsl::*; - diesel::update(favorites.filter(path.eq(old_path))) - .set(path.eq(new_path)) + diesel::update(favorites.filter(rel_path.eq(old_path))) + .set(rel_path.eq(new_path)) .execute(self.connection.lock().unwrap().deref_mut()) .map_err(|_| DbError::new(DbErrorKind::UpdateError))?; Ok(()) @@ -232,7 +232,7 @@ impl FavoriteDao for SqliteFavoriteDao { use schema::favorites::dsl::*; favorites - .select(path) + .select(rel_path) .distinct() .load(self.connection.lock().unwrap().deref_mut()) .map_err(|_| DbError::new(DbErrorKind::QueryError)) @@ -349,7 +349,8 @@ impl ExifDao for SqliteExifDao { .map_err(|_| anyhow::anyhow!("Insert error"))?; image_exif - .filter(file_path.eq(&exif_data.file_path)) + .filter(library_id.eq(exif_data.library_id)) + .filter(rel_path.eq(&exif_data.file_path)) .first::(connection.deref_mut()) .map_err(|_| anyhow::anyhow!("Query error")) }) @@ -372,7 +373,7 @@ impl ExifDao for SqliteExifDao { let windows_path = path.replace('/', "\\"); match image_exif - .filter(file_path.eq(&normalized).or(file_path.eq(&windows_path))) + .filter(rel_path.eq(&normalized).or(rel_path.eq(&windows_path))) .first::(connection.deref_mut()) { Ok(exif) => Ok(Some(exif)), @@ -393,29 +394,34 @@ impl ExifDao for SqliteExifDao { let mut connection = self.connection.lock().expect("Unable to get ExifDao"); - diesel::update(image_exif.filter(file_path.eq(&exif_data.file_path))) - .set(( - camera_make.eq(&exif_data.camera_make), - camera_model.eq(&exif_data.camera_model), - lens_model.eq(&exif_data.lens_model), - width.eq(&exif_data.width), - height.eq(&exif_data.height), - orientation.eq(&exif_data.orientation), - gps_latitude.eq(&exif_data.gps_latitude), - gps_longitude.eq(&exif_data.gps_longitude), - gps_altitude.eq(&exif_data.gps_altitude), - focal_length.eq(&exif_data.focal_length), - aperture.eq(&exif_data.aperture), - shutter_speed.eq(&exif_data.shutter_speed), - iso.eq(&exif_data.iso), - date_taken.eq(&exif_data.date_taken), - last_modified.eq(&exif_data.last_modified), - )) - .execute(connection.deref_mut()) - .map_err(|_| anyhow::anyhow!("Update error"))?; + diesel::update( + image_exif + .filter(library_id.eq(exif_data.library_id)) + .filter(rel_path.eq(&exif_data.file_path)), + ) + .set(( + camera_make.eq(&exif_data.camera_make), + camera_model.eq(&exif_data.camera_model), + lens_model.eq(&exif_data.lens_model), + width.eq(&exif_data.width), + height.eq(&exif_data.height), + orientation.eq(&exif_data.orientation), + gps_latitude.eq(&exif_data.gps_latitude), + gps_longitude.eq(&exif_data.gps_longitude), + gps_altitude.eq(&exif_data.gps_altitude), + focal_length.eq(&exif_data.focal_length), + aperture.eq(&exif_data.aperture), + shutter_speed.eq(&exif_data.shutter_speed), + iso.eq(&exif_data.iso), + date_taken.eq(&exif_data.date_taken), + last_modified.eq(&exif_data.last_modified), + )) + .execute(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Update error"))?; image_exif - .filter(file_path.eq(&exif_data.file_path)) + .filter(library_id.eq(exif_data.library_id)) + .filter(rel_path.eq(&exif_data.file_path)) .first::(connection.deref_mut()) .map_err(|_| anyhow::anyhow!("Query error")) }) @@ -426,7 +432,7 @@ impl ExifDao for SqliteExifDao { trace_db_call(context, "delete", "delete_exif", |_span| { use schema::image_exif::dsl::*; - diesel::delete(image_exif.filter(file_path.eq(path))) + diesel::delete(image_exif.filter(rel_path.eq(path))) .execute(self.connection.lock().unwrap().deref_mut()) .map(|_| ()) .map_err(|_| anyhow::anyhow!("Delete error")) @@ -444,7 +450,7 @@ impl ExifDao for SqliteExifDao { let mut connection = self.connection.lock().expect("Unable to get ExifDao"); image_exif - .select((file_path, date_taken)) + .select((rel_path, date_taken)) .filter(date_taken.is_not_null()) .load::<(String, Option)>(connection.deref_mut()) .map(|records| { @@ -473,7 +479,7 @@ impl ExifDao for SqliteExifDao { let mut connection = self.connection.lock().expect("Unable to get ExifDao"); image_exif - .filter(file_path.eq_any(file_paths)) + .filter(rel_path.eq_any(file_paths)) .load::(connection.deref_mut()) .map_err(|_| anyhow::anyhow!("Query error")) }) @@ -572,8 +578,8 @@ impl ExifDao for SqliteExifDao { let mut connection = self.connection.lock().expect("Unable to get ExifDao"); - diesel::update(image_exif.filter(file_path.eq(old_path))) - .set(file_path.eq(new_path)) + diesel::update(image_exif.filter(rel_path.eq(old_path))) + .set(rel_path.eq(new_path)) .execute(connection.deref_mut()) .map_err(|_| anyhow::anyhow!("Update error"))?; Ok(()) @@ -591,7 +597,7 @@ impl ExifDao for SqliteExifDao { let mut connection = self.connection.lock().expect("Unable to get ExifDao"); image_exif - .select(file_path) + .select(rel_path) .load(connection.deref_mut()) .map_err(|_| anyhow::anyhow!("Query error")) }) @@ -627,7 +633,7 @@ impl ExifDao for SqliteExifDao { // Otherwise filter by path prefix if !base_path.is_empty() && base_path != "/" { // Match base path as prefix (with wildcard) - query = query.filter(file_path.like(format!("{}%", base_path))); + query = query.filter(rel_path.like(format!("{}%", base_path))); span.set_attribute(KeyValue::new("path_filter_applied", true)); } else { diff --git a/src/database/models.rs b/src/database/models.rs index 237e9b4..d95876b 100644 --- a/src/database/models.rs +++ b/src/database/models.rs @@ -1,6 +1,6 @@ use crate::database::schema::{ - entities, entity_facts, entity_photo_links, favorites, image_exif, photo_insights, users, - video_preview_clips, + entities, entity_facts, entity_photo_links, favorites, image_exif, libraries, photo_insights, + users, video_preview_clips, }; use serde::Serialize; @@ -23,6 +23,7 @@ pub struct User { #[diesel(table_name = favorites)] pub struct InsertFavorite<'a> { pub userid: &'a i32, + #[diesel(column_name = rel_path)] pub path: &'a str, } @@ -30,12 +31,15 @@ pub struct InsertFavorite<'a> { pub struct Favorite { pub id: i32, pub userid: i32, + #[diesel(column_name = rel_path)] pub path: String, } #[derive(Insertable)] #[diesel(table_name = image_exif)] pub struct InsertImageExif { + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub camera_make: Option, pub camera_model: Option, @@ -53,11 +57,16 @@ pub struct InsertImageExif { pub date_taken: Option, pub created_time: i64, pub last_modified: i64, + pub content_hash: Option, + pub size_bytes: Option, } +// Field order matches the post-migration column order in `image_exif`. #[derive(Serialize, Queryable, Clone, Debug)] pub struct ImageExif { pub id: i32, + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub camera_make: Option, pub camera_model: Option, @@ -75,11 +84,15 @@ pub struct ImageExif { pub date_taken: Option, pub created_time: i64, pub last_modified: i64, + pub content_hash: Option, + pub size_bytes: Option, } #[derive(Insertable)] #[diesel(table_name = photo_insights)] pub struct InsertPhotoInsight { + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub title: String, pub summary: String, @@ -92,6 +105,8 @@ pub struct InsertPhotoInsight { #[derive(Serialize, Queryable, Clone, Debug)] pub struct PhotoInsight { pub id: i32, + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub title: String, pub summary: String, @@ -102,6 +117,24 @@ pub struct PhotoInsight { pub approved: Option, } +// --- Libraries --- + +#[derive(Serialize, Queryable, Clone, Debug)] +pub struct LibraryRow { + pub id: i32, + pub name: String, + pub root_path: String, + pub created_at: i64, +} + +#[derive(Insertable)] +#[diesel(table_name = libraries)] +pub struct InsertLibrary<'a> { + pub name: &'a str, + pub root_path: &'a str, + pub created_at: i64, +} + // --- Knowledge memory models --- #[derive(Insertable)] @@ -162,6 +195,8 @@ pub struct EntityFact { #[diesel(table_name = entity_photo_links)] pub struct InsertEntityPhotoLink { pub entity_id: i32, + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub role: String, } @@ -170,6 +205,8 @@ pub struct InsertEntityPhotoLink { pub struct EntityPhotoLink { pub id: i32, pub entity_id: i32, + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub role: String, } @@ -177,6 +214,8 @@ pub struct EntityPhotoLink { #[derive(Insertable)] #[diesel(table_name = video_preview_clips)] pub struct InsertVideoPreviewClip { + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub status: String, pub created_at: String, @@ -186,6 +225,8 @@ pub struct InsertVideoPreviewClip { #[derive(Serialize, Queryable, Clone, Debug)] pub struct VideoPreviewClip { pub id: i32, + pub library_id: i32, + #[diesel(column_name = rel_path)] pub file_path: String, pub status: String, pub duration_seconds: Option, diff --git a/src/database/preview_dao.rs b/src/database/preview_dao.rs index fe90f4d..6098c60 100644 --- a/src/database/preview_dao.rs +++ b/src/database/preview_dao.rs @@ -84,6 +84,7 @@ impl PreviewDao for SqlitePreviewDao { diesel::insert_or_ignore_into(video_preview_clips) .values(InsertVideoPreviewClip { + library_id: 1, file_path: file_path_val.to_string(), status: status_val.to_string(), created_at: now.clone(), @@ -111,7 +112,7 @@ impl PreviewDao for SqlitePreviewDao { let mut connection = self.connection.lock().expect("Unable to get PreviewDao"); let now = chrono::Utc::now().to_rfc3339(); - diesel::update(video_preview_clips.filter(file_path.eq(file_path_val))) + diesel::update(video_preview_clips.filter(rel_path.eq(file_path_val))) .set(( status.eq(status_val), duration_seconds.eq(duration), @@ -137,7 +138,7 @@ impl PreviewDao for SqlitePreviewDao { let mut connection = self.connection.lock().expect("Unable to get PreviewDao"); match video_preview_clips - .filter(file_path.eq(file_path_val)) + .filter(rel_path.eq(file_path_val)) .first::(connection.deref_mut()) { Ok(clip) => Ok(Some(clip)), @@ -163,7 +164,7 @@ impl PreviewDao for SqlitePreviewDao { let mut connection = self.connection.lock().expect("Unable to get PreviewDao"); video_preview_clips - .filter(file_path.eq_any(file_paths)) + .filter(rel_path.eq_any(file_paths)) .load::(connection.deref_mut()) .map_err(|e| anyhow::anyhow!("Query error: {}", e)) }) diff --git a/src/database/schema.rs b/src/database/schema.rs index bddced4..3352ca6 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -64,7 +64,8 @@ diesel::table! { entity_photo_links (id) { id -> Integer, entity_id -> Integer, - file_path -> Text, + library_id -> Integer, + rel_path -> Text, role -> Text, } } @@ -73,14 +74,15 @@ diesel::table! { favorites (id) { id -> Integer, userid -> Integer, - path -> Text, + rel_path -> Text, } } diesel::table! { image_exif (id) { id -> Integer, - file_path -> Text, + library_id -> Integer, + rel_path -> Text, camera_make -> Nullable, camera_model -> Nullable, lens_model -> Nullable, @@ -97,18 +99,17 @@ diesel::table! { date_taken -> Nullable, created_time -> BigInt, last_modified -> BigInt, + content_hash -> Nullable, + size_bytes -> Nullable, } } diesel::table! { - knowledge_embeddings (id) { + libraries (id) { id -> Integer, - keyword -> Text, - description -> Text, - category -> Nullable, - embedding -> Binary, + name -> Text, + root_path -> Text, created_at -> BigInt, - model_version -> Text, } } @@ -129,23 +130,11 @@ diesel::table! { } } -diesel::table! { - message_embeddings (id) { - id -> Integer, - contact -> Text, - body -> Text, - timestamp -> BigInt, - is_sent -> Bool, - embedding -> Binary, - created_at -> BigInt, - model_version -> Text, - } -} - diesel::table! { photo_insights (id) { id -> Integer, - file_path -> Text, + library_id -> Integer, + rel_path -> Text, title -> Text, summary -> Text, generated_at -> BigInt, @@ -171,7 +160,7 @@ diesel::table! { diesel::table! { tagged_photo (id) { id -> Integer, - photo_name -> Text, + rel_path -> Text, tag_id -> Integer, created_time -> BigInt, } @@ -196,7 +185,8 @@ diesel::table! { diesel::table! { video_preview_clips (id) { id -> Integer, - file_path -> Text, + library_id -> Integer, + rel_path -> Text, status -> Text, duration_seconds -> Nullable, file_size_bytes -> Nullable, @@ -208,7 +198,11 @@ diesel::table! { diesel::joinable!(entity_facts -> photo_insights (source_insight_id)); diesel::joinable!(entity_photo_links -> entities (entity_id)); +diesel::joinable!(entity_photo_links -> libraries (library_id)); +diesel::joinable!(image_exif -> libraries (library_id)); +diesel::joinable!(photo_insights -> libraries (library_id)); diesel::joinable!(tagged_photo -> tags (tag_id)); +diesel::joinable!(video_preview_clips -> libraries (library_id)); diesel::allow_tables_to_appear_in_same_query!( calendar_events, @@ -218,9 +212,8 @@ diesel::allow_tables_to_appear_in_same_query!( entity_photo_links, favorites, image_exif, - knowledge_embeddings, + libraries, location_history, - message_embeddings, photo_insights, search_history, tagged_photo, diff --git a/src/files.rs b/src/files.rs index f3cd8fa..29a4d12 100644 --- a/src/files.rs +++ b/src/files.rs @@ -1212,6 +1212,7 @@ mod tests { // Return a dummy ImageExif for tests Ok(crate::database::models::ImageExif { id: 1, + library_id: data.library_id, file_path: data.file_path.to_string(), camera_make: data.camera_make.map(|s| s.to_string()), camera_model: data.camera_model.map(|s| s.to_string()), @@ -1229,6 +1230,8 @@ mod tests { date_taken: data.date_taken, created_time: data.created_time, last_modified: data.last_modified, + content_hash: data.content_hash.clone(), + size_bytes: data.size_bytes, }) } @@ -1248,6 +1251,7 @@ mod tests { // Return a dummy ImageExif for tests Ok(crate::database::models::ImageExif { id: 1, + library_id: data.library_id, file_path: data.file_path.to_string(), camera_make: data.camera_make.map(|s| s.to_string()), camera_model: data.camera_model.map(|s| s.to_string()), @@ -1265,6 +1269,8 @@ mod tests { date_taken: data.date_taken, created_time: data.created_time, last_modified: data.last_modified, + content_hash: data.content_hash.clone(), + size_bytes: data.size_bytes, }) } diff --git a/src/lib.rs b/src/lib.rs index bd4f7ab..9d785fe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,7 @@ pub mod exif; pub mod file_types; pub mod files; pub mod geo; +pub mod libraries; pub mod memories; pub mod otel; pub mod parsers; diff --git a/src/libraries.rs b/src/libraries.rs new file mode 100644 index 0000000..a706507 --- /dev/null +++ b/src/libraries.rs @@ -0,0 +1,159 @@ +use chrono::Utc; +use diesel::prelude::*; +use diesel::sqlite::SqliteConnection; +use log::{info, warn}; +use std::path::{Path, PathBuf}; + +use crate::database::models::{InsertLibrary, LibraryRow}; +use crate::database::schema::libraries; + +/// Id of the primary library row seeded by the multi-library migration. +/// Used as the default `library_id` during the Phase 2 transitional shim, +/// before handlers/callers are library-aware. +pub const PRIMARY_LIBRARY_ID: i32 = 1; + +/// Placeholder value written into `libraries.root_path` by the migration. +/// Replaced on startup with the live `BASE_PATH` env var. +pub const ROOT_PATH_PLACEHOLDER: &str = "BASE_PATH_PLACEHOLDER"; + +/// A media library mount point: its numeric id, logical name, and absolute +/// root on disk. `rel_path` values stored in the DB are relative to this root. +#[derive(Clone, Debug, serde::Serialize)] +pub struct Library { + pub id: i32, + pub name: String, + pub root_path: String, +} + +impl Library { + /// Resolve a library-relative path into an absolute `PathBuf` under the + /// library root. Does not validate traversal — use `is_valid_full_path` + /// for untrusted input. + pub fn resolve(&self, rel_path: &str) -> PathBuf { + Path::new(&self.root_path).join(rel_path) + } + + /// Inverse of `resolve`: given an absolute path under this library's + /// root, return the root-relative portion. Returns `None` if the path + /// is not under the library. + pub fn strip_root(&self, abs_path: &Path) -> Option { + abs_path + .strip_prefix(&self.root_path) + .ok() + .map(|p| p.to_string_lossy().replace('\\', "/")) + } +} + +impl From for Library { + fn from(row: LibraryRow) -> Self { + Library { + id: row.id, + name: row.name, + root_path: row.root_path, + } + } +} + +/// Load all library rows from the database into `Library` values. +pub fn load_all(conn: &mut SqliteConnection) -> Vec { + libraries::table + .order(libraries::id.asc()) + .load::(conn) + .unwrap_or_else(|e| { + warn!("Failed to load libraries table: {:?}", e); + Vec::new() + }) + .into_iter() + .map(Library::from) + .collect() +} + +/// Ensure at least one library exists and that the seeded placeholder row is +/// patched with the live `BASE_PATH`. Safe to call on every startup; it only +/// writes when the placeholder is still present. +pub fn seed_or_patch_from_env(conn: &mut SqliteConnection, base_path: &str) { + // Check whether the primary row still carries the placeholder from the + // migration. If so, replace it with the live BASE_PATH. + let placeholder_count: i64 = libraries::table + .filter(libraries::root_path.eq(ROOT_PATH_PLACEHOLDER)) + .count() + .get_result(conn) + .unwrap_or(0); + + if placeholder_count > 0 { + diesel::update(libraries::table.filter(libraries::root_path.eq(ROOT_PATH_PLACEHOLDER))) + .set(libraries::root_path.eq(base_path)) + .execute(conn) + .map(|rows| { + info!( + "Patched {} library row(s) with BASE_PATH='{}'", + rows, base_path + ); + }) + .unwrap_or_else(|e| warn!("Failed to patch library root_path: {:?}", e)); + return; + } + + // If no rows exist at all (e.g. table created outside the seeded migration), + // insert a primary library pointing at BASE_PATH. + let total: i64 = libraries::table + .count() + .get_result(conn) + .unwrap_or(0); + if total == 0 { + let now = Utc::now().timestamp(); + let result = diesel::insert_into(libraries::table) + .values(InsertLibrary { + name: "main", + root_path: base_path, + created_at: now, + }) + .execute(conn); + match result { + Ok(_) => info!("Seeded primary library 'main' with BASE_PATH='{}'", base_path), + Err(e) => warn!("Failed to seed primary library: {:?}", e), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::database::test::in_memory_db_connection; + + #[test] + fn seed_patches_placeholder() { + let mut conn = in_memory_db_connection(); + // Migration seeds one row with the placeholder. + seed_or_patch_from_env(&mut conn, "/tmp/media"); + let libs = load_all(&mut conn); + assert_eq!(libs.len(), 1); + assert_eq!(libs[0].id, 1); + assert_eq!(libs[0].name, "main"); + assert_eq!(libs[0].root_path, "/tmp/media"); + } + + #[test] + fn seed_is_idempotent() { + let mut conn = in_memory_db_connection(); + seed_or_patch_from_env(&mut conn, "/tmp/media"); + seed_or_patch_from_env(&mut conn, "/tmp/other"); + // Second call should not overwrite an already-patched row. + let libs = load_all(&mut conn); + assert_eq!(libs.len(), 1); + assert_eq!(libs[0].root_path, "/tmp/media"); + } + + #[test] + fn library_strip_root() { + let lib = Library { + id: 1, + name: "main".into(), + root_path: "/tmp/media".into(), + }; + let rel = lib.strip_root(Path::new("/tmp/media/2024/photo.jpg")); + assert_eq!(rel.as_deref(), Some("2024/photo.jpg")); + let outside = lib.strip_root(Path::new("/etc/passwd")); + assert!(outside.is_none()); + } +} diff --git a/src/main.rs b/src/main.rs index 8a95d2d..c440cc1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -62,6 +62,7 @@ mod exif; mod file_types; mod files; mod geo; +mod libraries; mod state; mod tags; mod utils; @@ -391,6 +392,7 @@ async fn upload_image( Ok(exif_data) => { let timestamp = Utc::now().timestamp(); let insert_exif = InsertImageExif { + library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: relative_path.clone(), camera_make: exif_data.camera_make, camera_model: exif_data.camera_model, @@ -408,6 +410,8 @@ async fn upload_image( date_taken: exif_data.date_taken, created_time: timestamp, last_modified: timestamp, + content_hash: None, + size_bytes: None, }; if let Ok(mut dao) = exif_dao.lock() { @@ -1587,6 +1591,7 @@ fn process_new_files( Ok(exif_data) => { let timestamp = Utc::now().timestamp(); let insert_exif = InsertImageExif { + library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: relative_path.clone(), camera_make: exif_data.camera_make, camera_model: exif_data.camera_model, @@ -1604,6 +1609,8 @@ fn process_new_files( date_taken: exif_data.date_taken, created_time: timestamp, last_modified: timestamp, + content_hash: None, + size_bytes: None, }; let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); diff --git a/src/state.rs b/src/state.rs index f85a2e6..8f0dc69 100644 --- a/src/state.rs +++ b/src/state.rs @@ -3,8 +3,10 @@ use crate::database::{ CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao, SearchHistoryDao, SqliteCalendarEventDao, SqliteDailySummaryDao, SqliteExifDao, SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao, + connect, }; use crate::database::{PreviewDao, SqlitePreviewDao}; +use crate::libraries::{self, Library}; use crate::tags::{SqliteTagDao, TagDao}; use crate::video::actors::{ PlaylistGenerator, PreviewClipGenerator, StreamActor, VideoPlaylistManager, @@ -17,6 +19,11 @@ pub struct AppState { pub stream_manager: Arc>, pub playlist_manager: Arc>, pub preview_clip_generator: Arc>, + /// All configured media libraries. Ordered by `id` ascending; the first + /// entry is the primary library. + pub libraries: Vec, + /// Legacy shim equal to `libraries[0].root_path`. Phase 2 transitional — + /// new code should go through `primary_library()`. pub base_path: String, pub thumbnail_path: String, pub video_path: String, @@ -28,10 +35,26 @@ pub struct AppState { pub insight_generator: InsightGenerator, } +impl AppState { + pub fn primary_library(&self) -> &Library { + self.libraries + .first() + .expect("AppState constructed without any libraries") + } + + pub fn library_by_id(&self, id: i32) -> Option<&Library> { + self.libraries.iter().find(|l| l.id == id) + } + + pub fn library_by_name(&self, name: &str) -> Option<&Library> { + self.libraries.iter().find(|l| l.name == name) + } +} + impl AppState { pub fn new( stream_manager: Arc>, - base_path: String, + libraries_vec: Vec, thumbnail_path: String, video_path: String, gif_path: String, @@ -42,6 +65,11 @@ impl AppState { insight_generator: InsightGenerator, preview_dao: Arc>>, ) -> Self { + assert!( + !libraries_vec.is_empty(), + "AppState::new requires at least one library" + ); + let base_path = libraries_vec[0].root_path.clone(); let playlist_generator = PlaylistGenerator::new(); let video_playlist_manager = VideoPlaylistManager::new(video_path.clone(), playlist_generator.start()); @@ -53,6 +81,7 @@ impl AppState { stream_manager, playlist_manager: Arc::new(video_playlist_manager.start()), preview_clip_generator: Arc::new(preview_clip_generator.start()), + libraries: libraries_vec, base_path, thumbnail_path, video_path, @@ -122,8 +151,16 @@ impl Default for AppState { let knowledge_dao: Arc>> = Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new()))); - // Load base path + // Load base path and ensure the primary library row reflects it. let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env"); + let mut seed_conn = connect(); + libraries::seed_or_patch_from_env(&mut seed_conn, &base_path); + let libraries_vec = libraries::load_all(&mut seed_conn); + assert!( + !libraries_vec.is_empty(), + "libraries table is empty after seed_or_patch_from_env" + ); + drop(seed_conn); // Initialize InsightGenerator with all data sources let insight_generator = InsightGenerator::new( @@ -148,7 +185,7 @@ impl Default for AppState { Self::new( Arc::new(StreamActor {}.start()), - base_path, + libraries_vec, env::var("THUMBNAILS").expect("THUMBNAILS was not set in the env"), env::var("VIDEO_PATH").expect("VIDEO_PATH was not set in the env"), env::var("GIFS_DIRECTORY").expect("GIFS_DIRECTORY was not set in the env"), @@ -227,9 +264,14 @@ impl AppState { Arc::new(Mutex::new(Box::new(SqlitePreviewDao::new()))); // Create the AppState with the temporary paths + let test_libraries = vec![Library { + id: crate::libraries::PRIMARY_LIBRARY_ID, + name: "main".to_string(), + root_path: base_path_str.clone(), + }]; AppState::new( Arc::new(StreamActor {}.start()), - base_path_str, + test_libraries, thumbnail_path.to_string_lossy().to_string(), video_path.to_string_lossy().to_string(), gif_path.to_string_lossy().to_string(), diff --git a/src/tags.rs b/src/tags.rs index 5da6d6e..2834b62 100644 --- a/src/tags.rs +++ b/src/tags.rs @@ -254,6 +254,7 @@ pub struct InsertTag { #[diesel(table_name = tagged_photo)] pub struct InsertTaggedPhoto { pub tag_id: i32, + #[diesel(column_name = rel_path)] pub photo_name: String, pub created_time: i64, } @@ -263,6 +264,7 @@ pub struct TaggedPhoto { #[allow(dead_code)] // Part of API contract pub id: i32, #[allow(dead_code)] // Part of API contract + #[diesel(column_name = rel_path)] pub photo_name: String, #[allow(dead_code)] // Part of API contract pub tag_id: i32, @@ -368,7 +370,7 @@ impl TagDao for SqliteTagDao { .inner_join(tagged_photo::table) .group_by(tags::id) .select((count_star(), id, name, created_time)) - .filter(tagged_photo::photo_name.like(path)) + .filter(tagged_photo::rel_path.like(path)) .get_results(conn.deref_mut()) .map::, _>(|tags_with_count: Vec<(i64, i32, String, i64)>| { tags_with_count @@ -404,7 +406,7 @@ impl TagDao for SqliteTagDao { debug!("Getting Tags for path: {:?}", path); tags::table .left_join(tagged_photo::table) - .filter(tagged_photo::photo_name.eq(&path)) + .filter(tagged_photo::rel_path.eq(&path)) .select((tags::id, tags::name, tags::created_time)) .get_results::(conn.deref_mut()) .with_context(|| "Unable to get tags from Sqlite") @@ -474,7 +476,7 @@ impl TagDao for SqliteTagDao { diesel::delete( tagged_photo::table .filter(tagged_photo::tag_id.eq(tag.id)) - .filter(tagged_photo::photo_name.eq(path)), + .filter(tagged_photo::rel_path.eq(path)), ) .execute(conn.deref_mut()) .with_context(|| format!("Unable to delete tag: '{}'", &tag.name)) @@ -558,23 +560,23 @@ impl TagDao for SqliteTagDao { let query = sql_query(format!( r#" WITH filtered_photos AS ( - SELECT photo_name + SELECT rel_path FROM tagged_photo tp WHERE tp.tag_id IN ({}) - AND tp.photo_name NOT IN ( - SELECT photo_name + AND tp.rel_path NOT IN ( + SELECT rel_path FROM tagged_photo WHERE tag_id IN ({}) ) - GROUP BY photo_name + GROUP BY rel_path HAVING COUNT(DISTINCT tag_id) >= {} ) SELECT - fp.photo_name as file_name, + fp.rel_path as file_name, COUNT(DISTINCT tp2.tag_id) as tag_count FROM filtered_photos fp - JOIN tagged_photo tp2 ON fp.photo_name = tp2.photo_name - GROUP BY fp.photo_name"#, + JOIN tagged_photo tp2 ON fp.rel_path = tp2.rel_path + GROUP BY fp.rel_path"#, tag_placeholders, exclude_placeholders, tag_ids.len() @@ -618,21 +620,21 @@ impl TagDao for SqliteTagDao { let query = sql_query(format!( r#" WITH filtered_photos AS ( - SELECT DISTINCT photo_name + SELECT DISTINCT rel_path FROM tagged_photo tp WHERE tp.tag_id IN ({}) - AND tp.photo_name NOT IN ( - SELECT photo_name + AND tp.rel_path NOT IN ( + SELECT rel_path FROM tagged_photo WHERE tag_id IN ({}) ) ) SELECT - fp.photo_name as file_name, + fp.rel_path as file_name, COUNT(DISTINCT tp2.tag_id) as tag_count FROM filtered_photos fp - JOIN tagged_photo tp2 ON fp.photo_name = tp2.photo_name - GROUP BY fp.photo_name"#, + JOIN tagged_photo tp2 ON fp.rel_path = tp2.rel_path + GROUP BY fp.rel_path"#, tag_placeholders, exclude_placeholders )) .into_boxed(); @@ -663,8 +665,8 @@ impl TagDao for SqliteTagDao { .connection .lock() .expect("Unable to lock SqliteTagDao connection"); - diesel::update(tagged_photo.filter(photo_name.eq(old_name))) - .set(photo_name.eq(new_name)) + diesel::update(tagged_photo.filter(rel_path.eq(old_name))) + .set(rel_path.eq(new_name)) .execute(conn.deref_mut())?; Ok(()) } @@ -680,7 +682,7 @@ impl TagDao for SqliteTagDao { .lock() .expect("Unable to lock SqliteTagDao connection"); tagged_photo - .select(photo_name) + .select(rel_path) .distinct() .load(conn.deref_mut()) .with_context(|| "Unable to get photo names") @@ -714,10 +716,10 @@ impl TagDao for SqliteTagDao { let query_str = format!( r#" - SELECT photo_name, COUNT(DISTINCT tag_id) as tag_count + SELECT rel_path AS photo_name, COUNT(DISTINCT tag_id) as tag_count FROM tagged_photo - WHERE photo_name IN ({}) - GROUP BY photo_name + WHERE rel_path IN ({}) + GROUP BY rel_path "#, placeholders ); diff --git a/src/testhelpers.rs b/src/testhelpers.rs index d07699a..f4150e4 100644 --- a/src/testhelpers.rs +++ b/src/testhelpers.rs @@ -98,6 +98,7 @@ impl PreviewDao for TestPreviewDao { file_path_val.to_string(), VideoPreviewClip { id: *id, + library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: file_path_val.to_string(), status: status_val.to_string(), duration_seconds: None, -- 2.49.1 From 48e5de6eab23ab2b77f107176012529beefa8474 Mon Sep 17 00:00:00 2001 From: Cameron Date: Fri, 17 Apr 2026 15:35:14 -0400 Subject: [PATCH 02/19] feat: add GET /libraries and library query param plumbing New `/libraries` endpoint returns configured libraries so clients can discover them. `FilesRequest` and `MemoriesRequest` gain an optional `library` param (accepts name or numeric id). Unknown values are rejected with 400; absent values span all libraries. `/memories` now scopes its filesystem walk + EXIF query to the resolved library. `MemoryItem` carries `library_id` so union-mode clients can render a per-item source badge. Behavior is unchanged in single-library mode: omitting `library` still returns results from the primary library, which is the only one configured until a second row is added to the libraries table. Co-Authored-By: Claude Opus 4.7 --- src/data/mod.rs | 4 ++++ src/files.rs | 16 ++++++++++++++++ src/libraries.rs | 40 ++++++++++++++++++++++++++++++++++++++++ src/main.rs | 1 + src/memories.rs | 35 +++++++++++++++++++++++++++++++---- 5 files changed, 92 insertions(+), 4 deletions(-) diff --git a/src/data/mod.rs b/src/data/mod.rs index 6935819..2aedbae 100644 --- a/src/data/mod.rs +++ b/src/data/mod.rs @@ -155,6 +155,10 @@ pub struct FilesRequest { // Pagination parameters (optional - backward compatible) pub limit: Option, pub offset: Option, + + /// Optional library filter. Accepts a library id (e.g. "1") or name + /// (e.g. "main"). When omitted, results span all libraries. + pub library: Option, } #[derive(Copy, Clone, Deserialize, PartialEq, Debug)] diff --git a/src/files.rs b/src/files.rs index 29a4d12..224c801 100644 --- a/src/files.rs +++ b/src/files.rs @@ -235,8 +235,24 @@ pub async fn list_photos( ) .to_string(), ), + KeyValue::new("library", req.library.clone().unwrap_or_default()), ]); + // Resolve the optional library filter. Unknown values return 400. + // For Phase 3 the filesystem walk still operates against a single + // library's root; Phase 4 introduces multi-root union scanning. + let library = match crate::libraries::resolve_library_param( + &app_state, + req.library.as_deref(), + ) { + Ok(lib) => lib, + Err(msg) => { + log::warn!("Rejecting /photos request: {}", msg); + return HttpResponse::BadRequest().body(msg); + } + }; + let scoped_library = library.unwrap_or_else(|| app_state.primary_library()); + let span_context = opentelemetry::Context::current_with_span(span); // Check if EXIF filtering is requested diff --git a/src/libraries.rs b/src/libraries.rs index a706507..22a4214 100644 --- a/src/libraries.rs +++ b/src/libraries.rs @@ -1,11 +1,14 @@ +use actix_web::{HttpResponse, Responder, get, web::Data}; use chrono::Utc; use diesel::prelude::*; use diesel::sqlite::SqliteConnection; use log::{info, warn}; use std::path::{Path, PathBuf}; +use crate::data::Claims; use crate::database::models::{InsertLibrary, LibraryRow}; use crate::database::schema::libraries; +use crate::state::AppState; /// Id of the primary library row seeded by the multi-library migration. /// Used as the default `library_id` during the Phase 2 transitional shim, @@ -116,6 +119,43 @@ pub fn seed_or_patch_from_env(conn: &mut SqliteConnection, base_path: &str) { } } +/// Resolve a library request parameter (accepts numeric id as string or name) +/// against the configured libraries. Returns `Ok(None)` when the param is +/// absent, meaning "span all libraries". Returns `Err` when a value is +/// provided but does not match any library. +pub fn resolve_library_param<'a>( + state: &'a AppState, + param: Option<&str>, +) -> Result, String> { + let Some(raw) = param.map(str::trim).filter(|s| !s.is_empty()) else { + return Ok(None); + }; + + if let Ok(id) = raw.parse::() { + return state + .library_by_id(id) + .map(Some) + .ok_or_else(|| format!("unknown library id: {}", id)); + } + + state + .library_by_name(raw) + .map(Some) + .ok_or_else(|| format!("unknown library name: {}", raw)) +} + +#[derive(serde::Serialize)] +pub struct LibrariesResponse { + pub libraries: Vec, +} + +#[get("/libraries")] +pub async fn list_libraries(_claims: Claims, app_state: Data) -> impl Responder { + HttpResponse::Ok().json(LibrariesResponse { + libraries: app_state.libraries.clone(), + }) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/main.rs b/src/main.rs index c440cc1..044d1a3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1191,6 +1191,7 @@ fn main() -> std::io::Result<()> { .service(ai::get_available_models_handler) .service(ai::rate_insight_handler) .service(ai::export_training_data_handler) + .service(libraries::list_libraries) .add_feature(add_tag_services::<_, SqliteTagDao>) .add_feature(knowledge::add_knowledge_services::<_, SqliteKnowledgeDao>) .app_data(app_data.clone()) diff --git a/src/memories.rs b/src/memories.rs index c3754d3..89a7028 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -107,6 +107,9 @@ pub struct MemoriesRequest { pub span: Option, /// Client timezone offset in minutes from UTC (e.g., -480 for PST, 60 for CET) pub timezone_offset_minutes: Option, + /// Optional library filter. Accepts a library id (e.g. "1") or name + /// (e.g. "main"). When omitted, results span all libraries. + pub library: Option, } #[derive(Debug, Serialize, Clone)] @@ -114,6 +117,9 @@ pub struct MemoryItem { pub path: String, pub created: Option, pub modified: Option, + /// Id of the library this memory belongs to. Allows clients to show a + /// per-item source badge in union mode. + pub library_id: i32, } #[derive(Debug, Serialize)] @@ -417,6 +423,7 @@ fn collect_exif_memories( path: file_path.clone(), created, modified, + library_id: crate::libraries::PRIMARY_LIBRARY_ID, }, file_date, )) @@ -478,6 +485,7 @@ fn collect_filesystem_memories( path: path_relative, created, modified, + library_id: crate::libraries::PRIMARY_LIBRARY_ID, }, file_date, )) @@ -526,7 +534,23 @@ pub async fn list_memories( debug!("Now: {:?}", now); - let base = Path::new(&app_state.base_path); + // Resolve the optional library filter. Unknown values are a 400; None + // means "all libraries" — currently equivalent to the primary library + // while only one is configured. + let library = match crate::libraries::resolve_library_param( + &app_state, + q.library.as_deref(), + ) { + Ok(lib) => lib, + Err(msg) => { + warn!("Rejecting /memories request: {}", msg); + return HttpResponse::BadRequest().body(msg); + } + }; + // For Phase 3 the walker still operates against a single library's root. + // Multi-library union support for the filesystem walk comes in Phase 4. + let scoped_library = library.unwrap_or_else(|| app_state.primary_library()); + let base = Path::new(&scoped_library.root_path); // Build the path excluder from base and env-configured exclusions let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs); @@ -535,7 +559,7 @@ pub async fn list_memories( let exif_memories = collect_exif_memories( &exif_dao, &span_context, - &app_state.base_path, + &scoped_library.root_path, now, span_mode, years_back, @@ -546,12 +570,12 @@ pub async fn list_memories( // Build HashSet for deduplication let exif_paths: HashSet = exif_memories .iter() - .map(|(item, _)| PathBuf::from(&app_state.base_path).join(&item.path)) + .map(|(item, _)| PathBuf::from(&scoped_library.root_path).join(&item.path)) .collect(); // Phase 2: File system scan (skip EXIF files) let fs_memories = collect_filesystem_memories( - &app_state.base_path, + &scoped_library.root_path, &path_excluder, &exif_paths, now, @@ -1098,6 +1122,7 @@ mod tests { path: "photo1.jpg".to_string(), created: Some(jan_15_2024_9am), modified: Some(jan_15_2024_9am), + library_id: crate::libraries::PRIMARY_LIBRARY_ID, }, NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(), ), @@ -1106,6 +1131,7 @@ mod tests { path: "photo2.jpg".to_string(), created: Some(jan_15_2020_10am), modified: Some(jan_15_2020_10am), + library_id: crate::libraries::PRIMARY_LIBRARY_ID, }, NaiveDate::from_ymd_opt(2020, 1, 15).unwrap(), ), @@ -1114,6 +1140,7 @@ mod tests { path: "photo3.jpg".to_string(), created: Some(jan_16_2021_8am), modified: Some(jan_16_2021_8am), + library_id: crate::libraries::PRIMARY_LIBRARY_ID, }, NaiveDate::from_ymd_opt(2021, 1, 16).unwrap(), ), -- 2.49.1 From ce5b337582c3157f7833f74875727c04837ecdcf Mon Sep 17 00:00:00 2001 From: Cameron Date: Fri, 17 Apr 2026 15:41:38 -0400 Subject: [PATCH 03/19] feat: make file watcher, thumbnails, and upload library-aware `watch_files` and `create_thumbnails` now iterate every configured library, tagging rows with the correct `library_id`. `process_new_files` takes a `&Library` so InsertImageExif no longer hardcodes the primary library. Upload accepts an optional `library` query param to pick a target library; omitted still defaults to primary for backwards compatibility. Hash-keyed thumbnail/HLS storage with dual-lookup fallback is deferred to Phase 5, where it's bundled with the content hash backfill that actually makes the hash-keyed paths meaningful. Until hashes are populated, the legacy mirrored layout is a no-op to change. Co-Authored-By: Claude Opus 4.7 --- src/files.rs | 5 +- src/lib.rs | 2 +- src/main.rs | 270 ++++++++++++++++++++++++++++++--------------------- 3 files changed, 164 insertions(+), 113 deletions(-) diff --git a/src/files.rs b/src/files.rs index 224c801..3c25597 100644 --- a/src/files.rs +++ b/src/files.rs @@ -1159,7 +1159,10 @@ impl Handler for StreamActor { let tracer = global_tracer(); let _ = tracer.start("RefreshThumbnailsMessage"); info!("Refreshing thumbnails after upload"); - create_thumbnails() + // The stub in lib.rs is a no-op; the real generation is driven by + // the file watcher tick in main.rs, which has access to the + // configured libraries. + create_thumbnails(&[]) } } diff --git a/src/lib.rs b/src/lib.rs index 9d785fe..12e0bc0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -33,7 +33,7 @@ pub use state::AppState; use std::path::Path; use walkdir::DirEntry; -pub fn create_thumbnails() { +pub fn create_thumbnails(_libs: &[libraries::Library]) { // Stub - implemented in main.rs } diff --git a/src/main.rs b/src/main.rs index 044d1a3..cec0474 100644 --- a/src/main.rs +++ b/src/main.rs @@ -290,10 +290,16 @@ async fn get_file_metadata( } } +#[derive(serde::Deserialize)] +struct UploadQuery { + library: Option, +} + #[post("/image")] async fn upload_image( _: Claims, request: HttpRequest, + query: web::Query, mut payload: mp::Multipart, app_state: Data, exif_dao: Data>>, @@ -304,6 +310,20 @@ async fn upload_image( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); + // Resolve the optional library selector. Absent → primary library + // (backwards-compatible with clients that don't yet send `library=`). + let target_library = match libraries::resolve_library_param( + &app_state, + query.library.as_deref(), + ) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(msg) => { + span.set_status(Status::error(msg.clone())); + return HttpResponse::BadRequest().body(msg); + } + }; + let mut file_content: BytesMut = BytesMut::new(); let mut file_name: Option = None; let mut file_path: Option = None; @@ -333,7 +353,7 @@ async fn upload_image( } } - let path = file_path.unwrap_or_else(|| app_state.base_path.clone()); + let path = file_path.unwrap_or_else(|| target_library.root_path.clone()); if !file_content.is_empty() { if file_name.is_none() { span.set_status(Status::error("No filename provided")); @@ -341,7 +361,7 @@ async fn upload_image( } let full_path = PathBuf::from(&path).join(file_name.unwrap()); if let Some(full_path) = is_valid_full_path( - &app_state.base_path, + &target_library.root_path, &full_path.to_str().unwrap().to_string(), true, ) { @@ -382,8 +402,8 @@ async fn upload_image( // Extract and store EXIF data if file supports it if exif::supports_exif(&uploaded_path) { let relative_path = uploaded_path - .strip_prefix(&app_state.base_path) - .expect("Error stripping base path prefix") + .strip_prefix(&target_library.root_path) + .expect("Error stripping library root prefix") .to_str() .unwrap() .to_string(); @@ -392,7 +412,7 @@ async fn upload_image( Ok(exif_data) => { let timestamp = Utc::now().timestamp(); let insert_exif = InsertImageExif { - library_id: crate::libraries::PRIMARY_LIBRARY_ID, + library_id: target_library.id, file_path: relative_path.clone(), camera_make: exif_data.camera_make, camera_model: exif_data.camera_model, @@ -920,78 +940,87 @@ async fn delete_favorite( } } -fn create_thumbnails() { +fn create_thumbnails(libs: &[libraries::Library]) { let tracer = global_tracer(); let span = tracer.start("creating thumbnails"); let thumbs = &dotenv::var("THUMBNAILS").expect("THUMBNAILS not defined"); let thumbnail_directory: &Path = Path::new(thumbs); - let images = PathBuf::from(dotenv::var("BASE_PATH").unwrap()); + for lib in libs { + info!( + "Scanning thumbnails for library '{}' at {}", + lib.name, lib.root_path + ); + let images = PathBuf::from(&lib.root_path); - WalkDir::new(&images) - .into_iter() - .collect::>>() - .into_par_iter() - .filter_map(|entry| entry.ok()) - .filter(|entry| entry.file_type().is_file()) - .filter(|entry| { - if is_video(entry) { - let relative_path = &entry.path().strip_prefix(&images).unwrap(); + WalkDir::new(&images) + .into_iter() + .collect::>>() + .into_par_iter() + .filter_map(|entry| entry.ok()) + .filter(|entry| entry.file_type().is_file()) + .filter(|entry| { + if is_video(entry) { + let relative_path = &entry.path().strip_prefix(&images).unwrap(); + let thumb_path = Path::new(thumbnail_directory).join(relative_path); + std::fs::create_dir_all( + thumb_path + .parent() + .unwrap_or_else(|| panic!("Thumbnail {:?} has no parent?", thumb_path)), + ) + .expect("Error creating directory"); + + let mut video_span = tracer.start_with_context( + "generate_video_thumbnail", + &opentelemetry::Context::new() + .with_remote_span_context(span.span_context().clone()), + ); + video_span.set_attributes(vec![ + KeyValue::new("type", "video"), + KeyValue::new("file-name", thumb_path.display().to_string()), + KeyValue::new("library", lib.name.clone()), + ]); + + debug!("Generating video thumbnail: {:?}", thumb_path); + generate_video_thumbnail(entry.path(), &thumb_path); + video_span.end(); + false + } else { + is_image(entry) + } + }) + .filter(|entry| { + let path = entry.path(); + let relative_path = &path.strip_prefix(&images).unwrap(); let thumb_path = Path::new(thumbnail_directory).join(relative_path); - std::fs::create_dir_all( - thumb_path - .parent() - .unwrap_or_else(|| panic!("Thumbnail {:?} has no parent?", thumb_path)), - ) - .expect("Error creating directory"); - - let mut video_span = tracer.start_with_context( - "generate_video_thumbnail", - &opentelemetry::Context::new() - .with_remote_span_context(span.span_context().clone()), - ); - video_span.set_attributes(vec![ - KeyValue::new("type", "video"), - KeyValue::new("file-name", thumb_path.display().to_string()), - ]); - - debug!("Generating video thumbnail: {:?}", thumb_path); - generate_video_thumbnail(entry.path(), &thumb_path); - video_span.end(); - false - } else { - is_image(entry) - } - }) - .filter(|entry| { - let path = entry.path(); - let relative_path = &path.strip_prefix(&images).unwrap(); - let thumb_path = Path::new(thumbnail_directory).join(relative_path); - !thumb_path.exists() - }) - .map(|entry| (image::open(entry.path()), entry.path().to_path_buf())) - .filter(|(img, path)| { - if let Err(e) = img { - error!("Unable to open image: {:?}. {}", path, e); - } - img.is_ok() - }) - .map(|(img, path)| (img.unwrap(), path)) - .map(|(image, path)| (image.thumbnail(200, u32::MAX), path)) - .map(|(image, path)| { - let relative_path = &path.strip_prefix(&images).unwrap(); - let thumb_path = Path::new(thumbnail_directory).join(relative_path); - std::fs::create_dir_all(thumb_path.parent().unwrap()) - .expect("There was an issue creating directory"); - info!("Saving thumbnail: {:?}", thumb_path); - image.save(thumb_path).expect("Failure saving thumbnail"); - }) - .for_each(drop); + !thumb_path.exists() + }) + .map(|entry| (image::open(entry.path()), entry.path().to_path_buf())) + .filter(|(img, path)| { + if let Err(e) = img { + error!("Unable to open image: {:?}. {}", path, e); + } + img.is_ok() + }) + .map(|(img, path)| (img.unwrap(), path)) + .map(|(image, path)| (image.thumbnail(200, u32::MAX), path)) + .map(|(image, path)| { + let relative_path = &path.strip_prefix(&images).unwrap(); + let thumb_path = Path::new(thumbnail_directory).join(relative_path); + std::fs::create_dir_all(thumb_path.parent().unwrap()) + .expect("There was an issue creating directory"); + info!("Saving thumbnail: {:?}", thumb_path); + image.save(thumb_path).expect("Failure saving thumbnail"); + }) + .for_each(drop); + } debug!("Finished making thumbnails"); - update_media_counts(&images); + for lib in libs { + update_media_counts(Path::new(&lib.root_path)); + } } fn update_media_counts(media_dir: &Path) { @@ -1039,11 +1068,13 @@ fn main() -> std::io::Result<()> { otel::init_tracing(); } - create_thumbnails(); - // generate_video_gifs().await; - + // AppState construction loads (and seeds if needed) the libraries + // table; we use that list to drive the initial thumbnail sweep. let app_data = Data::new(AppState::default()); + create_thumbnails(&app_data.libraries); + // generate_video_gifs().await; + let labels = HashMap::new(); let prometheus = PrometheusMetricsBuilder::new("api") .const_labels(labels) @@ -1060,14 +1091,20 @@ fn main() -> std::io::Result<()> { .unwrap(); let app_state = app_data.clone(); - app_state.playlist_manager.do_send(ScanDirectoryMessage { - directory: app_state.base_path.clone(), - }); + for lib in &app_state.libraries { + app_state.playlist_manager.do_send(ScanDirectoryMessage { + directory: lib.root_path.clone(), + }); + } // Start file watcher with playlist manager and preview generator let playlist_mgr_for_watcher = app_state.playlist_manager.as_ref().clone(); let preview_gen_for_watcher = app_state.preview_clip_generator.as_ref().clone(); - watch_files(playlist_mgr_for_watcher, preview_gen_for_watcher); + watch_files( + app_state.libraries.clone(), + playlist_mgr_for_watcher, + preview_gen_for_watcher, + ); // Start orphaned playlist cleanup job cleanup_orphaned_playlists(); @@ -1376,13 +1413,11 @@ fn cleanup_orphaned_playlists() { } fn watch_files( + libs: Vec, playlist_manager: Addr, preview_generator: Addr, ) { std::thread::spawn(move || { - let base_str = dotenv::var("BASE_PATH").unwrap(); - let base_path = PathBuf::from(&base_str); - // Get polling intervals from environment variables // Quick scan: Check recently modified files (default: 60 seconds) let quick_interval_secs = dotenv::var("WATCH_QUICK_INTERVAL_SECONDS") @@ -1399,7 +1434,12 @@ fn watch_files( info!("Starting optimized file watcher"); info!(" Quick scan interval: {} seconds", quick_interval_secs); info!(" Full scan interval: {} seconds", full_interval_secs); - info!(" Watching directory: {}", base_str); + for lib in &libs { + info!( + " Watching library '{}' (id={}) at {}", + lib.name, lib.id, lib.root_path + ); + } // Create DAOs for tracking processed files let exif_dao = Arc::new(Mutex::new( @@ -1423,41 +1463,48 @@ fn watch_files( let is_full_scan = since_last_full.as_secs() >= full_interval_secs; - if is_full_scan { - info!("Running full scan (scan #{})", scan_count); - process_new_files( - &base_path, - Arc::clone(&exif_dao), - Arc::clone(&preview_dao), - None, - playlist_manager.clone(), - preview_generator.clone(), - ); - last_full_scan = now; - } else { - debug!( - "Running quick scan (checking files modified in last {} seconds)", - quick_interval_secs + 10 - ); - // Check files modified since last quick scan, plus 10 second buffer - let check_since = last_quick_scan - .checked_sub(Duration::from_secs(10)) - .unwrap_or(last_quick_scan); - process_new_files( - &base_path, - Arc::clone(&exif_dao), - Arc::clone(&preview_dao), - Some(check_since), - playlist_manager.clone(), - preview_generator.clone(), - ); + for lib in &libs { + if is_full_scan { + info!( + "Running full scan for library '{}' (scan #{})", + lib.name, scan_count + ); + process_new_files( + lib, + Arc::clone(&exif_dao), + Arc::clone(&preview_dao), + None, + playlist_manager.clone(), + preview_generator.clone(), + ); + } else { + debug!( + "Running quick scan for library '{}' (checking files modified in last {} seconds)", + lib.name, + quick_interval_secs + 10 + ); + let check_since = last_quick_scan + .checked_sub(Duration::from_secs(10)) + .unwrap_or(last_quick_scan); + process_new_files( + lib, + Arc::clone(&exif_dao), + Arc::clone(&preview_dao), + Some(check_since), + playlist_manager.clone(), + preview_generator.clone(), + ); + } + + // Update media counts per library (metric aggregates across all) + update_media_counts(Path::new(&lib.root_path)); } + if is_full_scan { + last_full_scan = now; + } last_quick_scan = now; scan_count += 1; - - // Update media counts - update_media_counts(&base_path); } }); } @@ -1486,7 +1533,7 @@ fn playlist_needs_generation(video_path: &Path, playlist_path: &Path) -> bool { } fn process_new_files( - base_path: &Path, + library: &libraries::Library, exif_dao: Arc>>, preview_dao: Arc>>, modified_since: Option, @@ -1496,6 +1543,7 @@ fn process_new_files( let context = opentelemetry::Context::new(); let thumbs = dotenv::var("THUMBNAILS").expect("THUMBNAILS not defined"); let thumbnail_directory = Path::new(&thumbs); + let base_path = Path::new(&library.root_path); // Collect all image and video files, optionally filtered by modification time let files: Vec<(PathBuf, String)> = WalkDir::new(base_path) @@ -1592,7 +1640,7 @@ fn process_new_files( Ok(exif_data) => { let timestamp = Utc::now().timestamp(); let insert_exif = InsertImageExif { - library_id: crate::libraries::PRIMARY_LIBRARY_ID, + library_id: library.id, file_path: relative_path.clone(), camera_make: exif_data.camera_make, camera_model: exif_data.camera_model, @@ -1710,7 +1758,7 @@ fn process_new_files( // Generate thumbnails for all files that need them if new_files_found { info!("Processing thumbnails for new files..."); - create_thumbnails(); + create_thumbnails(std::slice::from_ref(library)); } } -- 2.49.1 From 0aaea91cc2cd745f5263ca7322b3784c9f110e68 Mon Sep 17 00:00:00 2001 From: Cameron Date: Fri, 17 Apr 2026 16:25:39 -0400 Subject: [PATCH 04/19] feat: add content_hash backfill + register every media file Adds blake3 content hashing as the basis for derivative dedup (thumbnails, HLS) across libraries. Computed inline by the watcher on ingest and by a new `backfill_hashes` binary for historical rows. Key changes: - `content_hash` and `size_bytes` are now populated on new image_exif rows; a new ExifDao surface (`get_rows_missing_hash`, `backfill_content_hash`, `find_by_content_hash`) supports backfill and future hash-keyed lookups. - The watcher now registers every image/video in image_exif, not just files with parseable EXIF. EXIF becomes optional enrichment; videos and other non-EXIF files still get a hashed row. This also makes DB-indexed sort/filter cover the full library. - `/image` thumbnail serve dual-looks up hash-keyed path first, then falls back to the legacy mirrored layout. - Upload flow accepts `?library=` query param + hashes uploaded files. - Store_exif logs the underlying Diesel error on insert failure so constraint violations surface instead of hiding behind a generic InsertError. - New migration normalizes rel_path separators to forward slash across all tables, deduplicating any rows that collide after normalization. Fixes spurious UNIQUE violations from mixed backslash/forward-slash paths on Windows ingest. Co-Authored-By: Claude Opus 4.7 --- Cargo.lock | 38 +++- Cargo.toml | 1 + .../down.sql | 4 + .../up.sql | 85 ++++++++ src/bin/backfill_hashes.rs | 184 ++++++++++++++++ src/bin/migrate_exif.rs | 2 +- src/content_hash.rs | 103 +++++++++ src/database/mod.rs | 107 +++++++++- src/files.rs | 27 +++ src/lib.rs | 1 + src/main.rs | 198 ++++++++++++------ 11 files changed, 681 insertions(+), 69 deletions(-) create mode 100644 migrations/2026-04-17-000100_normalize_path_separators/down.sql create mode 100644 migrations/2026-04-17-000100_normalize_path_separators/up.sql create mode 100644 src/bin/backfill_hashes.rs create mode 100644 src/content_hash.rs diff --git a/Cargo.lock b/Cargo.lock index 3e1e750..e301577 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -474,6 +474,12 @@ dependencies = [ "syn", ] +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + [[package]] name = "arrayvec" version = "0.7.6" @@ -572,6 +578,20 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6099cdc01846bc367c4e7dd630dc5966dccf36b652fae7a74e17b640411a91b2" +[[package]] +name = "blake3" +version = "1.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "cpufeatures 0.3.0", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -766,6 +786,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + [[package]] name = "convert_case" version = "0.4.0" @@ -808,6 +834,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -1810,6 +1845,7 @@ dependencies = [ "anyhow", "base64", "bcrypt", + "blake3", "chrono", "clap", "diesel", @@ -3365,7 +3401,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] diff --git a/Cargo.toml b/Cargo.toml index 88b9f09..4dd9da2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,3 +55,4 @@ zerocopy = "0.8" ical = "0.11" scraper = "0.20" base64 = "0.22" +blake3 = "1.5" diff --git a/migrations/2026-04-17-000100_normalize_path_separators/down.sql b/migrations/2026-04-17-000100_normalize_path_separators/down.sql new file mode 100644 index 0000000..4f3169c --- /dev/null +++ b/migrations/2026-04-17-000100_normalize_path_separators/down.sql @@ -0,0 +1,4 @@ +-- No-op: there's no sensible way to recover which rows originally used +-- backslashes, and there's no reason to want backslashes back. The +-- deleted duplicates are also gone. +SELECT 1; diff --git a/migrations/2026-04-17-000100_normalize_path_separators/up.sql b/migrations/2026-04-17-000100_normalize_path_separators/up.sql new file mode 100644 index 0000000..fc3bcdf --- /dev/null +++ b/migrations/2026-04-17-000100_normalize_path_separators/up.sql @@ -0,0 +1,85 @@ +-- Normalize `rel_path` columns to forward slashes. Windows ingest +-- historically produced a mix of `\` and `/`, which broke lookups and +-- caused spurious UNIQUE-constraint violations on re-registration. +-- +-- SQLite enforces UNIQUE per-row during UPDATE, so we have to drop +-- losing duplicates BEFORE normalizing. For each table that has a +-- UNIQUE on rel_path, we delete rows whose normalized form already +-- exists in canonical (forward-slash) form — keeping the existing +-- forward-slash row as the survivor. Then a flat UPDATE finishes the +-- job for remaining backslash rows. + +-- image_exif: UNIQUE(library_id, rel_path) +DELETE FROM image_exif + WHERE rel_path LIKE '%\%' + AND EXISTS ( + SELECT 1 FROM image_exif AS other + WHERE other.library_id = image_exif.library_id + AND other.rel_path = REPLACE(image_exif.rel_path, '\', '/') + AND other.id != image_exif.id + ); +UPDATE image_exif + SET rel_path = REPLACE(rel_path, '\', '/') + WHERE rel_path LIKE '%\%'; + +-- favorites: UNIQUE(userid, rel_path) +DELETE FROM favorites + WHERE rel_path LIKE '%\%' + AND EXISTS ( + SELECT 1 FROM favorites AS other + WHERE other.userid = favorites.userid + AND other.rel_path = REPLACE(favorites.rel_path, '\', '/') + AND other.id != favorites.id + ); +UPDATE favorites + SET rel_path = REPLACE(rel_path, '\', '/') + WHERE rel_path LIKE '%\%'; + +-- tagged_photo: UNIQUE(rel_path, tag_id) +DELETE FROM tagged_photo + WHERE rel_path LIKE '%\%' + AND EXISTS ( + SELECT 1 FROM tagged_photo AS other + WHERE other.tag_id = tagged_photo.tag_id + AND other.rel_path = REPLACE(tagged_photo.rel_path, '\', '/') + AND other.id != tagged_photo.id + ); +UPDATE tagged_photo + SET rel_path = REPLACE(rel_path, '\', '/') + WHERE rel_path LIKE '%\%'; + +-- entity_photo_links: UNIQUE(entity_id, library_id, rel_path, role) +DELETE FROM entity_photo_links + WHERE rel_path LIKE '%\%' + AND EXISTS ( + SELECT 1 FROM entity_photo_links AS other + WHERE other.entity_id = entity_photo_links.entity_id + AND other.library_id = entity_photo_links.library_id + AND other.role = entity_photo_links.role + AND other.rel_path = REPLACE(entity_photo_links.rel_path, '\', '/') + AND other.id != entity_photo_links.id + ); +UPDATE entity_photo_links + SET rel_path = REPLACE(rel_path, '\', '/') + WHERE rel_path LIKE '%\%'; + +-- video_preview_clips: UNIQUE(library_id, rel_path) +DELETE FROM video_preview_clips + WHERE rel_path LIKE '%\%' + AND EXISTS ( + SELECT 1 FROM video_preview_clips AS other + WHERE other.library_id = video_preview_clips.library_id + AND other.rel_path = REPLACE(video_preview_clips.rel_path, '\', '/') + AND other.id != video_preview_clips.id + ); +UPDATE video_preview_clips + SET rel_path = REPLACE(rel_path, '\', '/') + WHERE rel_path LIKE '%\%'; + +-- photo_insights has no UNIQUE on rel_path (history table), so a plain +-- normalize is safe. +UPDATE photo_insights + SET rel_path = REPLACE(rel_path, '\', '/') + WHERE rel_path LIKE '%\%'; + +ANALYZE; diff --git a/src/bin/backfill_hashes.rs b/src/bin/backfill_hashes.rs new file mode 100644 index 0000000..807c386 --- /dev/null +++ b/src/bin/backfill_hashes.rs @@ -0,0 +1,184 @@ +//! Backfill `image_exif.content_hash` + `size_bytes` for rows that were +//! ingested before hash computation was wired into the watcher. +//! +//! The watcher computes hashes for new files as they're ingested, so this +//! binary is a one-shot tool for the historical backlog. Safe to re-run; +//! only rows with NULL content_hash are processed. + +use std::path::Path; +use std::sync::{Arc, Mutex}; +use std::time::Instant; + +use clap::Parser; +use rayon::prelude::*; + +use image_api::content_hash; +use image_api::database::{ExifDao, SqliteExifDao, connect}; +use image_api::libraries::{self, Library}; + +#[derive(Parser, Debug)] +#[command(name = "backfill_hashes")] +#[command(about = "Compute content_hash for image_exif rows missing one")] +struct Args { + /// Max rows to hash per batch. The process loops until no rows remain. + #[arg(long, default_value_t = 500)] + batch_size: i64, + + /// Rayon parallelism override. 0 uses the default thread pool size. + #[arg(long, default_value_t = 0)] + parallelism: usize, + + /// Dry-run: log what would be hashed without writing to the DB. + #[arg(long)] + dry_run: bool, +} + +fn main() -> anyhow::Result<()> { + env_logger::init(); + dotenv::dotenv().ok(); + + let args = Args::parse(); + if args.parallelism > 0 { + rayon::ThreadPoolBuilder::new() + .num_threads(args.parallelism) + .build_global() + .expect("Unable to configure rayon thread pool"); + } + + // Resolve libraries (patch placeholder if still unset) so we can map + // library_id back to a root_path on disk. + let base_path = dotenv::var("BASE_PATH").ok(); + let mut seed_conn = connect(); + if let Some(base) = base_path.as_deref() { + libraries::seed_or_patch_from_env(&mut seed_conn, base); + } + let libs = libraries::load_all(&mut seed_conn); + drop(seed_conn); + if libs.is_empty() { + anyhow::bail!("No libraries configured; cannot backfill hashes"); + } + let libs_by_id: std::collections::HashMap = + libs.into_iter().map(|lib| (lib.id, lib)).collect(); + println!( + "Configured libraries: {}", + libs_by_id + .values() + .map(|l| format!("{} -> {}", l.name, l.root_path)) + .collect::>() + .join(", ") + ); + + let dao: Arc>> = + Arc::new(Mutex::new(Box::new(SqliteExifDao::new()))); + let ctx = opentelemetry::Context::new(); + + let mut total_hashed = 0u64; + let mut total_missing = 0u64; + let mut total_errors = 0u64; + let start = Instant::now(); + + loop { + let rows = { + let mut guard = dao.lock().expect("Unable to lock ExifDao"); + guard + .get_rows_missing_hash(&ctx, args.batch_size) + .map_err(|e| anyhow::anyhow!("DB error: {:?}", e))? + }; + if rows.is_empty() { + break; + } + println!("Processing batch of {} rows", rows.len()); + + // Compute hashes in parallel (I/O-bound; rayon helps on local disks, + // throttled by network on SMB mounts — use --parallelism to tune). + let results: Vec<(i32, String, Option)> = rows + .into_par_iter() + .map(|(library_id, rel_path)| { + let abs = libs_by_id + .get(&library_id) + .map(|lib| Path::new(&lib.root_path).join(&rel_path)); + match abs { + Some(abs_path) if abs_path.exists() => { + match content_hash::compute(&abs_path) { + Ok(id) => (library_id, rel_path, Some(id)), + Err(e) => { + eprintln!("hash error for {}: {:?}", abs_path.display(), e); + (library_id, rel_path, None) + } + } + } + Some(_) => (library_id, rel_path, None), // file missing on disk + None => { + eprintln!("Row refers to unknown library_id {}", library_id); + (library_id, rel_path, None) + } + } + }) + .collect(); + + // Persist sequentially — SQLite writes serialize anyway. + if !args.dry_run { + let mut guard = dao.lock().expect("Unable to lock ExifDao"); + for (library_id, rel_path, ident) in &results { + match ident { + Some(id) => { + match guard.backfill_content_hash( + &ctx, + *library_id, + rel_path, + &id.content_hash, + id.size_bytes, + ) { + Ok(_) => total_hashed += 1, + Err(e) => { + eprintln!("persist error for {}: {:?}", rel_path, e); + total_errors += 1; + } + } + } + None => { + total_missing += 1; + } + } + } + } else { + for (_, rel_path, ident) in &results { + match ident { + Some(id) => { + println!( + "[dry-run] {} -> {} ({} bytes)", + rel_path, id.content_hash, id.size_bytes + ); + total_hashed += 1; + } + None => { + total_missing += 1; + } + } + } + println!( + "[dry-run] processed one batch of {}. Stopping — a real run would continue \ + until no NULL content_hash rows remain.", + results.len() + ); + break; + } + + let elapsed = start.elapsed().as_secs_f64().max(0.001); + let rate = total_hashed as f64 / elapsed; + println!( + " hashed={} missing={} errors={} ({:.1} files/sec)", + total_hashed, total_missing, total_errors, rate + ); + } + + println!(); + println!( + "Done. hashed={}, skipped (missing on disk)={}, errors={}, elapsed={:.1}s", + total_hashed, + total_missing, + total_errors, + start.elapsed().as_secs_f64() + ); + Ok(()) +} diff --git a/src/bin/migrate_exif.rs b/src/bin/migrate_exif.rs index 3266a89..2f8f868 100644 --- a/src/bin/migrate_exif.rs +++ b/src/bin/migrate_exif.rs @@ -67,7 +67,7 @@ fn main() -> anyhow::Result<()> { let context = opentelemetry::Context::new(); let relative_path = match path.strip_prefix(&base) { - Ok(p) => p.to_str().unwrap().to_string(), + Ok(p) => p.to_str().unwrap().replace('\\', "/"), Err(_) => { eprintln!( "Error: Could not create relative path for {}", diff --git a/src/content_hash.rs b/src/content_hash.rs new file mode 100644 index 0000000..63be295 --- /dev/null +++ b/src/content_hash.rs @@ -0,0 +1,103 @@ +//! Content-based file identity used to dedup derivative outputs +//! (thumbnails, HLS segments) across libraries. +//! +//! Hashes are computed with blake3 streaming so that network-mounted +//! libraries don't need to load whole files into memory. The result is +//! a 64-character hex string; we shard derivative directories on the +//! first two characters to keep any single directory's fanout bounded. + +use std::fs::File; +use std::io::{self, Read}; +use std::path::{Path, PathBuf}; + +/// Size of the read buffer used when streaming a file through blake3. +/// 1 MiB trades a bit of RSS for fewer syscalls on slow network mounts. +const HASH_BUFFER_SIZE: usize = 1024 * 1024; + +/// Hash identity of a file, together with its byte length. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct FileIdentity { + pub content_hash: String, + pub size_bytes: i64, +} + +/// Stream a file through blake3 and return the hex-encoded digest + size. +pub fn compute(path: &Path) -> io::Result { + let mut file = File::open(path)?; + let size_bytes = file.metadata()?.len() as i64; + + let mut hasher = blake3::Hasher::new(); + let mut buf = vec![0u8; HASH_BUFFER_SIZE]; + loop { + let n = file.read(&mut buf)?; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + } + + Ok(FileIdentity { + content_hash: hasher.finalize().to_hex().to_string(), + size_bytes, + }) +} + +/// Hash-keyed thumbnail path: `//.jpg`. +/// Generation and serving both consult this first; the legacy mirrored +/// path acts as a fallback for pre-backfill rows. +pub fn thumbnail_path(thumbs_dir: &Path, hash: &str) -> PathBuf { + let shard = shard_prefix(hash); + thumbs_dir.join(shard).join(format!("{}.jpg", hash)) +} + +/// Hash-keyed HLS output directory: `///`. +/// The playlist lives at `playlist.m3u8` inside this directory and its +/// segments are co-located so HLS relative references Just Work. +pub fn hls_dir(video_dir: &Path, hash: &str) -> PathBuf { + let shard = shard_prefix(hash); + video_dir.join(shard).join(hash) +} + +fn shard_prefix(hash: &str) -> &str { + let end = hash.char_indices().nth(2).map(|(i, _)| i).unwrap_or(hash.len()); + &hash[..end] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn identical_content_yields_identical_hash() { + let dir = tempfile::tempdir().unwrap(); + let a = dir.path().join("a.bin"); + let b = dir.path().join("b.bin"); + std::fs::write(&a, b"hello world").unwrap(); + std::fs::write(&b, b"hello world").unwrap(); + let ha = compute(&a).unwrap(); + let hb = compute(&b).unwrap(); + assert_eq!(ha, hb); + assert_eq!(ha.size_bytes, 11); + } + + #[test] + fn different_content_yields_different_hash() { + let dir = tempfile::tempdir().unwrap(); + let a = dir.path().join("a.bin"); + let b = dir.path().join("b.bin"); + std::fs::write(&a, b"aaa").unwrap(); + std::fs::write(&b, b"bbb").unwrap(); + assert_ne!(compute(&a).unwrap(), compute(&b).unwrap()); + } + + #[test] + fn derivative_paths_shard_by_first_two_hex() { + let thumbs = Path::new("/tmp/thumbs"); + let p = thumbnail_path(thumbs, "abcdef0123"); + assert_eq!(p, PathBuf::from("/tmp/thumbs/ab/abcdef0123.jpg")); + + let video = Path::new("/tmp/video"); + let d = hls_dir(video, "1234deadbeef"); + assert_eq!(d, PathBuf::from("/tmp/video/12/1234deadbeef")); + } +} diff --git a/src/database/mod.rs b/src/database/mod.rs index e1c1c01..f5fe56a 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -312,6 +312,35 @@ pub trait ExifDao: Sync + Send { base_path: &str, recursive: bool, ) -> Result)>, DbError>; + + /// Return rows that still lack a `content_hash`, oldest first. Used by + /// the `backfill_hashes` binary to batch through the historical + /// backlog. Returns `(library_id, rel_path)` tuples so the caller can + /// resolve each file on disk. + fn get_rows_missing_hash( + &mut self, + context: &opentelemetry::Context, + limit: i64, + ) -> Result, DbError>; + + /// Persist the computed blake3 hash + file size for an existing row. + fn backfill_content_hash( + &mut self, + context: &opentelemetry::Context, + library_id: i32, + rel_path: &str, + hash: &str, + size_bytes: i64, + ) -> Result<(), DbError>; + + /// Return the first EXIF row with the given content hash (any library). + /// Used by thumbnail/HLS generation to detect pre-existing derivatives + /// from another library before regenerating. + fn find_by_content_hash( + &mut self, + context: &opentelemetry::Context, + hash: &str, + ) -> Result, DbError>; } pub struct SqliteExifDao { @@ -346,13 +375,21 @@ impl ExifDao for SqliteExifDao { diesel::insert_into(image_exif) .values(&exif_data) .execute(connection.deref_mut()) - .map_err(|_| anyhow::anyhow!("Insert error"))?; + .map_err(|e| { + log::warn!( + "image_exif insert failed (lib={}, rel_path={:?}): {}", + exif_data.library_id, + exif_data.file_path, + e + ); + anyhow::anyhow!("Insert error: {}", e) + })?; image_exif .filter(library_id.eq(exif_data.library_id)) .filter(rel_path.eq(&exif_data.file_path)) .first::(connection.deref_mut()) - .map_err(|_| anyhow::anyhow!("Query error")) + .map_err(|e| anyhow::anyhow!("Post-insert lookup failed: {}", e)) }) .map_err(|_| DbError::new(DbErrorKind::InsertError)) } @@ -672,4 +709,70 @@ impl ExifDao for SqliteExifDao { }) .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + + fn get_rows_missing_hash( + &mut self, + context: &opentelemetry::Context, + limit: i64, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_rows_missing_hash", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + image_exif + .filter(content_hash.is_null()) + .select((library_id, rel_path)) + .order(id.asc()) + .limit(limit) + .load::<(i32, String)>(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn backfill_content_hash( + &mut self, + context: &opentelemetry::Context, + library_id_val: i32, + rel_path_val: &str, + hash: &str, + size_val: i64, + ) -> Result<(), DbError> { + trace_db_call(context, "update", "backfill_content_hash", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + diesel::update( + image_exif + .filter(library_id.eq(library_id_val)) + .filter(rel_path.eq(rel_path_val)), + ) + .set((content_hash.eq(hash), size_bytes.eq(size_val))) + .execute(connection.deref_mut()) + .map(|_| ()) + .map_err(|_| anyhow::anyhow!("Update error")) + }) + .map_err(|_| DbError::new(DbErrorKind::UpdateError)) + } + + fn find_by_content_hash( + &mut self, + context: &opentelemetry::Context, + hash: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "find_by_content_hash", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + image_exif + .filter(content_hash.eq(hash)) + .first::(connection.deref_mut()) + .optional() + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } } diff --git a/src/files.rs b/src/files.rs index 3c25597..acb8fc5 100644 --- a/src/files.rs +++ b/src/files.rs @@ -1360,6 +1360,33 @@ mod tests { ) -> Result)>, DbError> { todo!() } + + fn get_rows_missing_hash( + &mut self, + _context: &opentelemetry::Context, + _limit: i64, + ) -> Result, DbError> { + Ok(Vec::new()) + } + + fn backfill_content_hash( + &mut self, + _context: &opentelemetry::Context, + _library_id: i32, + _rel_path: &str, + _hash: &str, + _size_bytes: i64, + ) -> Result<(), DbError> { + Ok(()) + } + + fn find_by_content_hash( + &mut self, + _context: &opentelemetry::Context, + _hash: &str, + ) -> Result, DbError> { + Ok(None) + } } mod api { diff --git a/src/lib.rs b/src/lib.rs index 12e0bc0..d74fc2b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ extern crate diesel; pub mod ai; pub mod auth; pub mod cleanup; +pub mod content_hash; pub mod data; pub mod database; pub mod error; diff --git a/src/main.rs b/src/main.rs index cec0474..7305bae 100644 --- a/src/main.rs +++ b/src/main.rs @@ -61,6 +61,7 @@ mod error; mod exif; mod file_types; mod files; +mod content_hash; mod geo; mod libraries; mod state; @@ -96,6 +97,7 @@ async fn get_image( request: HttpRequest, req: web::Query, app_state: Data, + exif_dao: Data>>, ) -> impl Responder { let tracer = global_tracer(); let context = extract_context_from_request(&request); @@ -108,16 +110,45 @@ async fn get_image( let relative_path = path .strip_prefix(&app_state.base_path) .expect("Error stripping base path prefix from thumbnail"); + let relative_path_str = relative_path.to_string_lossy().replace('\\', "/"); let thumbs = &app_state.thumbnail_path; - let mut thumb_path = Path::new(&thumbs).join(relative_path); + let legacy_thumb_path = Path::new(&thumbs).join(relative_path); - // If it's a video and GIF format is requested, try to serve GIF thumbnail + // Gif thumbnails are a separate lookup (video GIF previews). + // Dual-lookup for gif is out of scope; preserve existing flow. if req.format == Some(ThumbnailFormat::Gif) && is_video_file(&path) { - thumb_path = Path::new(&app_state.gif_path).join(relative_path); - thumb_path.set_extension("gif"); + let mut gif_path = Path::new(&app_state.gif_path).join(relative_path); + gif_path.set_extension("gif"); + trace!("Gif thumbnail path: {:?}", gif_path); + if let Ok(file) = NamedFile::open(&gif_path) { + span.set_status(Status::Ok); + return file + .use_etag(true) + .use_last_modified(true) + .prefer_utf8(true) + .into_response(&request); + } } + // Resolve the hash-keyed thumbnail (if the row already has a + // content_hash) and fall back to the legacy mirrored path. + let hash_thumb_path: Option = { + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + match dao.get_exif(&context, &relative_path_str) { + Ok(Some(row)) => row + .content_hash + .as_deref() + .map(|h| content_hash::thumbnail_path(Path::new(thumbs), h)), + _ => None, + } + }; + let thumb_path = hash_thumb_path + .as_ref() + .filter(|p| p.exists()) + .cloned() + .unwrap_or_else(|| legacy_thumb_path.clone()); + // Handle circular thumbnail request if req.shape == Some(ThumbnailShape::Circle) { match create_circular_thumbnail(&thumb_path, thumbs).await { @@ -141,8 +172,6 @@ async fn get_image( trace!("Thumbnail path: {:?}", thumb_path); if let Ok(file) = NamedFile::open(&thumb_path) { span.set_status(Status::Ok); - // The NamedFile will automatically set the correct content-type - // Enable ETag and set cache headers for thumbnails (1 day cache) return file .use_etag(true) .use_last_modified(true) @@ -406,11 +435,23 @@ async fn upload_image( .expect("Error stripping library root prefix") .to_str() .unwrap() - .to_string(); + .replace('\\', "/"); match exif::extract_exif_from_path(&uploaded_path) { Ok(exif_data) => { let timestamp = Utc::now().timestamp(); + let (content_hash, size_bytes) = + match content_hash::compute(&uploaded_path) { + Ok(id) => (Some(id.content_hash), Some(id.size_bytes)), + Err(e) => { + warn!( + "Failed to hash uploaded {}: {:?}", + uploaded_path.display(), + e + ); + (None, None) + } + }; let insert_exif = InsertImageExif { library_id: target_library.id, file_path: relative_path.clone(), @@ -430,8 +471,8 @@ async fn upload_image( date_taken: exif_data.date_taken, created_time: timestamp, last_modified: timestamp, - content_hash: None, - size_bytes: None, + content_hash, + size_bytes, }; if let Ok(mut dao) = exif_dao.lock() { @@ -1566,11 +1607,13 @@ fn process_new_files( .filter(|entry| is_image(entry) || is_video(entry)) .filter_map(|entry| { let file_path = entry.path().to_path_buf(); + // Canonical rel_path is forward-slash regardless of OS so DB + // comparisons against the batch EXIF lookup line up. let relative_path = file_path .strip_prefix(base_path) .ok()? .to_str()? - .to_string(); + .replace('\\', "/"); Some((file_path, relative_path)) }) .collect(); @@ -1600,82 +1643,107 @@ fn process_new_files( }; let mut new_files_found = false; - let mut files_needing_exif = Vec::new(); + let mut files_needing_row = Vec::new(); - // Check each file for missing thumbnail or EXIF data + // Register every image/video file in image_exif. Rows without EXIF + // still carry library_id, rel_path, content_hash, and size_bytes so + // derivative dedup and DB-indexed sort/filter work for every file, + // not just photos with parseable EXIF. for (file_path, relative_path) in &files { - // Check if thumbnail exists let thumb_path = thumbnail_directory.join(relative_path); let needs_thumbnail = !thumb_path.exists(); + let needs_row = !existing_exif_paths.contains_key(relative_path); - // Check if EXIF data exists (for supported files) - let needs_exif = if exif::supports_exif(file_path) { - !existing_exif_paths.contains_key(relative_path) - } else { - false - }; - - if needs_thumbnail || needs_exif { + if needs_thumbnail || needs_row { new_files_found = true; if needs_thumbnail { info!("New file detected (missing thumbnail): {}", relative_path); } - if needs_exif { - files_needing_exif.push((file_path.clone(), relative_path.clone())); + if needs_row { + files_needing_row.push((file_path.clone(), relative_path.clone())); } } } - // Process EXIF data for files that need it - if !files_needing_exif.is_empty() { + if !files_needing_row.is_empty() { info!( - "Processing EXIF data for {} files", - files_needing_exif.len() + "Registering {} new files in image_exif", + files_needing_row.len() ); - for (file_path, relative_path) in files_needing_exif { - match exif::extract_exif_from_path(&file_path) { - Ok(exif_data) => { - let timestamp = Utc::now().timestamp(); - let insert_exif = InsertImageExif { - library_id: library.id, - file_path: relative_path.clone(), - camera_make: exif_data.camera_make, - camera_model: exif_data.camera_model, - lens_model: exif_data.lens_model, - width: exif_data.width, - height: exif_data.height, - orientation: exif_data.orientation, - gps_latitude: exif_data.gps_latitude.map(|v| v as f32), - gps_longitude: exif_data.gps_longitude.map(|v| v as f32), - gps_altitude: exif_data.gps_altitude.map(|v| v as f32), - focal_length: exif_data.focal_length.map(|v| v as f32), - aperture: exif_data.aperture.map(|v| v as f32), - shutter_speed: exif_data.shutter_speed, - iso: exif_data.iso, - date_taken: exif_data.date_taken, - created_time: timestamp, - last_modified: timestamp, - content_hash: None, - size_bytes: None, - }; + for (file_path, relative_path) in files_needing_row { + let timestamp = Utc::now().timestamp(); - let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); - if let Err(e) = dao.store_exif(&context, insert_exif) { - error!("Failed to store EXIF data for {}: {:?}", relative_path, e); - } else { - debug!("EXIF data stored for {}", relative_path); + // Hash + size from filesystem metadata — always attempted so + // every file gets a content_hash, even when EXIF is absent. + let (content_hash, size_bytes) = match content_hash::compute(&file_path) { + Ok(id) => (Some(id.content_hash), Some(id.size_bytes)), + Err(e) => { + warn!("Failed to hash {}: {:?}", file_path.display(), e); + (None, None) + } + }; + + // EXIF is best-effort enrichment. When extraction fails (or the + // file type doesn't support EXIF) we still store a row with all + // EXIF fields NULL; the file remains visible to sort-by-date + // and tag queries via its rel_path and filesystem timestamps. + let exif_fields = if exif::supports_exif(&file_path) { + match exif::extract_exif_from_path(&file_path) { + Ok(data) => Some(data), + Err(e) => { + debug!( + "No EXIF or parse error for {}: {:?}", + file_path.display(), + e + ); + None } } - Err(e) => { - debug!( - "No EXIF data or error extracting from {}: {:?}", - file_path.display(), - e - ); - } + } else { + None + }; + + let insert_exif = InsertImageExif { + library_id: library.id, + file_path: relative_path.clone(), + camera_make: exif_fields.as_ref().and_then(|e| e.camera_make.clone()), + camera_model: exif_fields.as_ref().and_then(|e| e.camera_model.clone()), + lens_model: exif_fields.as_ref().and_then(|e| e.lens_model.clone()), + width: exif_fields.as_ref().and_then(|e| e.width), + height: exif_fields.as_ref().and_then(|e| e.height), + orientation: exif_fields.as_ref().and_then(|e| e.orientation), + gps_latitude: exif_fields + .as_ref() + .and_then(|e| e.gps_latitude.map(|v| v as f32)), + gps_longitude: exif_fields + .as_ref() + .and_then(|e| e.gps_longitude.map(|v| v as f32)), + gps_altitude: exif_fields + .as_ref() + .and_then(|e| e.gps_altitude.map(|v| v as f32)), + focal_length: exif_fields + .as_ref() + .and_then(|e| e.focal_length.map(|v| v as f32)), + aperture: exif_fields + .as_ref() + .and_then(|e| e.aperture.map(|v| v as f32)), + shutter_speed: exif_fields.as_ref().and_then(|e| e.shutter_speed.clone()), + iso: exif_fields.as_ref().and_then(|e| e.iso), + date_taken: exif_fields.as_ref().and_then(|e| e.date_taken), + created_time: timestamp, + last_modified: timestamp, + content_hash, + size_bytes, + }; + + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + if let Err(e) = dao.store_exif(&context, insert_exif) { + error!("Failed to register {} in image_exif: {:?}", relative_path, e); + } else { + debug!("Registered {} in image_exif", relative_path); } } } -- 2.49.1 From c01a0479b7b44f7c220c61b5d6afa9c2cc59f02b Mon Sep 17 00:00:00 2001 From: Cameron Date: Fri, 17 Apr 2026 17:16:11 -0400 Subject: [PATCH 05/19] fix: honor library param in /image, /photos, /memories The Phase 3 plumbing accepted `library=` but didn't actually route requests through the scoped library once it was resolved. Three concrete bugs surfaced when testing against a second mounted library: - `/image` always resolved paths against AppState.base_path (primary), so thumbnails for non-primary libraries 400'd when their rel_paths didn't exist under primary. Now resolves against the scoped library and defaults to primary when the param is omitted. - `/memories` walked the scoped library correctly but its helper functions hardcoded `library_id: PRIMARY_LIBRARY_ID` on every MemoryItem, causing clients to route thumbnails back to primary regardless of which library the memory actually came from. - `/photos` non-recursive listing delegated to a `RealFileSystem` constructed from AppState.base_path at startup, so walks always hit primary even when `library=2` was passed. The non-primary path now uses list_files against the scoped library's root; primary still goes through FileSystemAccess to preserve the existing test mock plumbing. Also adds `library` to ThumbnailRequest so the /image query param is actually parsed. Co-Authored-By: Claude Opus 4.7 --- src/data/mod.rs | 4 ++++ src/files.rs | 40 ++++++++++++++++++++++++++++------------ src/main.rs | 20 +++++++++++++++++--- src/memories.rs | 14 +++++++++----- 4 files changed, 58 insertions(+), 20 deletions(-) diff --git a/src/data/mod.rs b/src/data/mod.rs index 2aedbae..ff9ac25 100644 --- a/src/data/mod.rs +++ b/src/data/mod.rs @@ -192,6 +192,10 @@ pub struct ThumbnailRequest { pub(crate) format: Option, #[serde(default)] pub(crate) shape: Option, + /// Optional library filter. Accepts a library id (e.g. "1") or name + /// (e.g. "main"). When omitted, defaults to the primary library. + #[serde(default)] + pub(crate) library: Option, } #[derive(Debug, Deserialize, PartialEq)] diff --git a/src/files.rs b/src/files.rs index acb8fc5..552fb58 100644 --- a/src/files.rs +++ b/src/files.rs @@ -422,7 +422,7 @@ pub async fn list_photos( sort_type, &mut exif_dao_guard, &span_context, - app_state.base_path.as_ref(), + scoped_library.root_path.as_ref(), limit, offset, ); @@ -473,11 +473,14 @@ pub async fn list_photos( .unwrap_or_else(|e| e.error_response()); } - // Use recursive or non-recursive file listing based on flag + // Use recursive or non-recursive file listing based on flag. Both + // paths must walk the *scoped* library's root; the generic + // FileSystemAccess trait (file_system.get_files_for_path) is pinned + // to AppState's base_path at construction time and doesn't know + // which library the request targets. let files_result = if search_recursively { - // For recursive search without tags, manually list files recursively is_valid_full_path( - &PathBuf::from(&app_state.base_path), + &PathBuf::from(&scoped_library.root_path), &PathBuf::from(search_path), false, ) @@ -486,8 +489,21 @@ pub async fn list_photos( list_files_recursive(&path).unwrap_or_default() }) .context("Invalid path") - } else { + } else if scoped_library.id == app_state.primary_library().id { + // Primary library: preserve the original FileSystemAccess path so + // the test-mock path (MockFileSystem) continues to work. file_system.get_files_for_path(search_path) + } else { + is_valid_full_path( + &PathBuf::from(&scoped_library.root_path), + &PathBuf::from(search_path), + false, + ) + .map(|path| { + debug!("Valid path for non-recursive search: {:?}", path); + list_files(&path).unwrap_or_default() + }) + .context("Invalid path") }; match files_result { @@ -510,10 +526,10 @@ pub async fn list_photos( match path.metadata() { Ok(md) => { let relative = - path.strip_prefix(&app_state.base_path).unwrap_or_else(|_| { + path.strip_prefix(&scoped_library.root_path).unwrap_or_else(|_| { panic!( - "Unable to strip base path {} from file path {}", - &app_state.base_path.path(), + "Unable to strip library root {} from file path {}", + &scoped_library.root_path, path.display() ) }); @@ -530,11 +546,11 @@ pub async fn list_photos( // Include files without metadata if they have extensions if path.extension().is_some() { let relative = path - .strip_prefix(&app_state.base_path) + .strip_prefix(&scoped_library.root_path) .unwrap_or_else(|_| { panic!( - "Unable to strip base path {} from file path {}", - &app_state.base_path.path(), + "Unable to strip library root {} from file path {}", + &scoped_library.root_path, path.display() ) }); @@ -668,7 +684,7 @@ pub async fn list_photos( sort_type, &mut exif_dao_guard, &span_context, - app_state.base_path.as_ref(), + scoped_library.root_path.as_ref(), limit, offset, ); diff --git a/src/main.rs b/src/main.rs index 7305bae..0f3f6c8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -104,12 +104,26 @@ async fn get_image( let mut span = tracer.start_with_context("get_image", &context); - if let Some(path) = is_valid_full_path(&app_state.base_path, &req.path, false) { + // Resolve library from query param; default to primary so clients that + // don't yet send `library=` continue to work. + let library = match libraries::resolve_library_param( + &app_state, + req.library.as_deref(), + ) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(msg) => { + span.set_status(Status::error(msg.clone())); + return HttpResponse::BadRequest().body(msg); + } + }; + + if let Some(path) = is_valid_full_path(&library.root_path, &req.path, false) { let image_size = req.size.unwrap_or(PhotoSize::Full); if image_size == PhotoSize::Thumb { let relative_path = path - .strip_prefix(&app_state.base_path) - .expect("Error stripping base path prefix from thumbnail"); + .strip_prefix(&library.root_path) + .expect("Error stripping library root prefix from thumbnail"); let relative_path_str = relative_path.to_string_lossy().replace('\\', "/"); let thumbs = &app_state.thumbnail_path; diff --git a/src/memories.rs b/src/memories.rs index 89a7028..23086f6 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -369,6 +369,7 @@ fn collect_exif_memories( exif_dao: &Data>>, context: &opentelemetry::Context, base_path: &str, + library_id: i32, now: NaiveDate, span_mode: MemoriesSpan, years_back: u32, @@ -423,7 +424,7 @@ fn collect_exif_memories( path: file_path.clone(), created, modified, - library_id: crate::libraries::PRIMARY_LIBRARY_ID, + library_id, }, file_date, )) @@ -434,6 +435,7 @@ fn collect_exif_memories( /// Collect memories from file system scan (for files not in EXIF DB) fn collect_filesystem_memories( base_path: &str, + library_id: i32, path_excluder: &PathExcluder, skip_paths: &HashSet, now: NaiveDate, @@ -485,7 +487,7 @@ fn collect_filesystem_memories( path: path_relative, created, modified, - library_id: crate::libraries::PRIMARY_LIBRARY_ID, + library_id, }, file_date, )) @@ -560,6 +562,7 @@ pub async fn list_memories( &exif_dao, &span_context, &scoped_library.root_path, + scoped_library.id, now, span_mode, years_back, @@ -576,6 +579,7 @@ pub async fn list_memories( // Phase 2: File system scan (skip EXIF files) let fs_memories = collect_filesystem_memories( &scoped_library.root_path, + scoped_library.id, &path_excluder, &exif_paths, now, @@ -1122,7 +1126,7 @@ mod tests { path: "photo1.jpg".to_string(), created: Some(jan_15_2024_9am), modified: Some(jan_15_2024_9am), - library_id: crate::libraries::PRIMARY_LIBRARY_ID, + library_id: 1, }, NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(), ), @@ -1131,7 +1135,7 @@ mod tests { path: "photo2.jpg".to_string(), created: Some(jan_15_2020_10am), modified: Some(jan_15_2020_10am), - library_id: crate::libraries::PRIMARY_LIBRARY_ID, + library_id: 1, }, NaiveDate::from_ymd_opt(2020, 1, 15).unwrap(), ), @@ -1140,7 +1144,7 @@ mod tests { path: "photo3.jpg".to_string(), created: Some(jan_16_2021_8am), modified: Some(jan_16_2021_8am), - library_id: crate::libraries::PRIMARY_LIBRARY_ID, + library_id: 1, }, NaiveDate::from_ymd_opt(2021, 1, 16).unwrap(), ), -- 2.49.1 From 2d942a9926eda947931bce053847d8ea5aa9b05f Mon Sep 17 00:00:00 2001 From: Cameron Date: Fri, 17 Apr 2026 18:06:02 -0400 Subject: [PATCH 06/19] feat: content-hash-aware tag/insight sharing + library scoping Tags and insights now follow content across libraries via content_hash lookups on the read path, so the same file indexed at different rel_paths in multiple libraries shares its annotations. Recursive tag search scopes hits to the selected library by checking each tagged rel_path against the library's disk (with a content-hash sibling fallback so tags attached under one library's rel_path still match a content-equivalent file in another). The /image and /image/metadata handlers fall back across libraries when the file isn't under the resolved one, so union-mode search results (which carry no library attribution in the response) still serve correctly. Co-Authored-By: Claude Opus 4.7 --- src/ai/handlers.rs | 25 ++++++- src/database/insights_dao.rs | 34 +++++++++ src/database/mod.rs | 138 +++++++++++++++++++++++++++++++++++ src/files.rs | 68 +++++++++++++++++ src/main.rs | 44 +++++++++-- src/tags.rs | 75 ++++++++++++++++++- 6 files changed, 376 insertions(+), 8 deletions(-) diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index cf7fd5b..a49c229 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -5,8 +5,10 @@ use serde::{Deserialize, Serialize}; use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient}; use crate::data::Claims; -use crate::database::InsightDao; +use crate::database::{ExifDao, InsightDao}; +use crate::libraries; use crate::otel::{extract_context_from_request, global_tracer}; +use crate::state::AppState; use crate::utils::normalize_path; #[derive(Debug, Deserialize)] @@ -31,6 +33,10 @@ pub struct GeneratePhotoInsightRequest { #[derive(Debug, Deserialize)] pub struct GetPhotoInsightQuery { pub path: String, + /// Library context for this lookup. Used to pick the right content + /// hash when the same rel_path exists under multiple roots. + #[serde(default)] + pub library: Option, } #[derive(Debug, Deserialize)] @@ -146,15 +152,30 @@ pub async fn generate_insight_handler( pub async fn get_insight_handler( _claims: Claims, query: web::Query, + app_state: web::Data, insight_dao: web::Data>>, + exif_dao: web::Data>>, ) -> impl Responder { let normalized_path = normalize_path(&query.path); log::debug!("Fetching insight for {}", normalized_path); let otel_context = opentelemetry::Context::new(); + + // Expand to rel_paths sharing content so an insight generated under + // library 1 still shows when the same photo is viewed from library 2. + let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); + let sibling_paths = { + let mut exif = exif_dao.lock().expect("Unable to lock ExifDao"); + exif.get_rel_paths_sharing_content(&otel_context, library.id, &normalized_path) + .unwrap_or_else(|_| vec![normalized_path.clone()]) + }; + let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); - match dao.get_insight(&otel_context, &normalized_path) { + match dao.get_insight_for_paths(&otel_context, &sibling_paths) { Ok(Some(insight)) => { let response = PhotoInsightResponse { id: insight.id, diff --git a/src/database/insights_dao.rs b/src/database/insights_dao.rs index 6b15717..d54904f 100644 --- a/src/database/insights_dao.rs +++ b/src/database/insights_dao.rs @@ -21,6 +21,16 @@ pub trait InsightDao: Sync + Send { file_path: &str, ) -> Result, DbError>; + /// Return the most recent current insight whose rel_path is one of + /// `paths`. Used for content-hash sharing: the caller expands a + /// single file into all rel_paths with the same content_hash, then + /// asks here for any existing insight attached to any of them. + fn get_insight_for_paths( + &mut self, + context: &opentelemetry::Context, + paths: &[String], + ) -> Result, DbError>; + fn get_insight_history( &mut self, context: &opentelemetry::Context, @@ -132,6 +142,30 @@ impl InsightDao for SqliteInsightDao { .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + fn get_insight_for_paths( + &mut self, + context: &opentelemetry::Context, + paths: &[String], + ) -> Result, DbError> { + if paths.is_empty() { + return Ok(None); + } + trace_db_call(context, "query", "get_insight_for_paths", |_span| { + use schema::photo_insights::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get InsightDao"); + + photo_insights + .filter(rel_path.eq_any(paths)) + .filter(is_current.eq(true)) + .order(generated_at.desc()) + .first::(connection.deref_mut()) + .optional() + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + fn get_insight_history( &mut self, context: &opentelemetry::Context, diff --git a/src/database/mod.rs b/src/database/mod.rs index f5fe56a..2e3dca1 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -341,6 +341,45 @@ pub trait ExifDao: Sync + Send { context: &opentelemetry::Context, hash: &str, ) -> Result, DbError>; + + /// Given a file instance `(library_id, rel_path)`, return every distinct + /// rel_path in `image_exif` whose `content_hash` matches this file's. + /// Used by tag and insight read-paths so annotations follow content + /// rather than path, even when the same file is indexed under + /// different library roots. Falls back to `[rel_path]` when the file + /// hasn't been hashed yet. + fn get_rel_paths_sharing_content( + &mut self, + context: &opentelemetry::Context, + library_id: i32, + rel_path: &str, + ) -> Result, DbError>; + + /// All rel_paths known to live in a given library. Used by search to + /// scope tag-based (path-keyed) hits to a single library after joining + /// through the library-agnostic tag tables. + fn get_rel_paths_for_library( + &mut self, + context: &opentelemetry::Context, + library_id: i32, + ) -> Result, DbError>; + + /// Look up a content_hash for a rel_path in *any* library. Useful when + /// the caller has a library-agnostic rel_path (e.g. from tagged_photo) + /// and wants to find content-equivalent siblings without knowing the + /// file's original library. + fn find_content_hash_anywhere( + &mut self, + context: &opentelemetry::Context, + rel_path: &str, + ) -> Result, DbError>; + + /// Given a content_hash, return all rel_paths carrying that hash. + fn get_rel_paths_by_hash( + &mut self, + context: &opentelemetry::Context, + hash: &str, + ) -> Result, DbError>; } pub struct SqliteExifDao { @@ -775,4 +814,103 @@ impl ExifDao for SqliteExifDao { }) .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + + fn get_rel_paths_sharing_content( + &mut self, + context: &opentelemetry::Context, + library_id_val: i32, + rel_path_val: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_rel_paths_sharing_content", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + // Look up this file's content_hash. Missing row or NULL hash + // means we can't expand the match set; return the given + // rel_path so callers fall through to direct-match behavior. + let hash: Option = image_exif + .filter(library_id.eq(library_id_val)) + .filter(rel_path.eq(rel_path_val)) + .select(content_hash) + .first::>(connection.deref_mut()) + .optional() + .map_err(|_| anyhow::anyhow!("Query error"))? + .flatten(); + + let paths = match hash { + Some(h) => image_exif + .filter(content_hash.eq(h)) + .select(rel_path) + .distinct() + .load::(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error"))?, + None => vec![rel_path_val.to_string()], + }; + + Ok(paths) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn get_rel_paths_for_library( + &mut self, + context: &opentelemetry::Context, + library_id_val: i32, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_rel_paths_for_library", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + image_exif + .filter(library_id.eq(library_id_val)) + .select(rel_path) + .load::(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn find_content_hash_anywhere( + &mut self, + context: &opentelemetry::Context, + rel_path_val: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "find_content_hash_anywhere", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + image_exif + .filter(rel_path.eq(rel_path_val)) + .filter(content_hash.is_not_null()) + .select(content_hash) + .first::>(connection.deref_mut()) + .optional() + .map(|opt| opt.flatten()) + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn get_rel_paths_by_hash( + &mut self, + context: &opentelemetry::Context, + hash: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_rel_paths_by_hash", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + image_exif + .filter(content_hash.eq(hash)) + .select(rel_path) + .distinct() + .load::(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } } diff --git a/src/files.rs b/src/files.rs index 552fb58..11ad898 100644 --- a/src/files.rs +++ b/src/files.rs @@ -335,6 +335,13 @@ pub async fn list_photos( None }; + // When a specific library is selected, we'll gate tag-based results + // (which key on rel_path only, library-agnostic) by "does this + // rel_path actually exist on disk in the selected library's root". + // We check per-file below rather than pre-enumerating image_exif, + // since image_exif may lag a just-added library. + let library_for_scope: Option<&crate::libraries::Library> = library; + let search_recursively = req.recursive.unwrap_or(false); if let Some(tag_ids) = &req.tag_ids && search_recursively @@ -400,6 +407,34 @@ pub async fn list_photos( true } }) + .filter(|f| { + // Scope to the selected library by checking the file + // actually exists under its root. Falls back to the + // content-hash sibling set (looked up globally, since + // the tagged rel_path may have been registered under + // a different library than the one selected). + let Some(lib) = library_for_scope else { + return true; + }; + if PathBuf::from(&lib.root_path).join(&f.file_name).exists() { + return true; + } + let siblings = { + let mut dao = exif_dao.lock().expect("Unable to get ExifDao"); + match dao + .find_content_hash_anywhere(&span_context, &f.file_name) + .unwrap_or(None) + { + Some(hash) => dao + .get_rel_paths_by_hash(&span_context, &hash) + .unwrap_or_default(), + None => Vec::new(), + } + }; + siblings + .iter() + .any(|p| PathBuf::from(&lib.root_path).join(p).exists()) + }) .filter(|f| { // Apply media type filtering if specified if let Some(ref media_type) = req.media_type { @@ -1403,6 +1438,39 @@ mod tests { ) -> Result, DbError> { Ok(None) } + + fn get_rel_paths_sharing_content( + &mut self, + _context: &opentelemetry::Context, + _library_id: i32, + rel_path: &str, + ) -> Result, DbError> { + Ok(vec![rel_path.to_string()]) + } + + fn get_rel_paths_for_library( + &mut self, + _context: &opentelemetry::Context, + _library_id: i32, + ) -> Result, DbError> { + Ok(vec![]) + } + + fn find_content_hash_anywhere( + &mut self, + _context: &opentelemetry::Context, + _rel_path: &str, + ) -> Result, DbError> { + Ok(None) + } + + fn get_rel_paths_by_hash( + &mut self, + _context: &opentelemetry::Context, + _hash: &str, + ) -> Result, DbError> { + Ok(vec![]) + } } mod api { diff --git a/src/main.rs b/src/main.rs index 0f3f6c8..e28a43a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -118,7 +118,25 @@ async fn get_image( } }; - if let Some(path) = is_valid_full_path(&library.root_path, &req.path, false) { + // Union-mode search returns flat rel_paths with no library attribution, + // so clients may request a file under the wrong library. Try the + // resolved library first; if the file isn't there, fall back to any + // other library holding that rel_path on disk. + let resolved = is_valid_full_path(&library.root_path, &req.path, false) + .filter(|p| p.exists()) + .map(|p| (library, p)) + .or_else(|| { + app_state.libraries.iter().find_map(|lib| { + if lib.id == library.id { + return None; + } + is_valid_full_path(&lib.root_path, &req.path, false) + .filter(|p| p.exists()) + .map(|p| (lib, p)) + }) + }); + + if let Some((library, path)) = resolved { let image_size = req.size.unwrap_or(PhotoSize::Full); if image_size == PhotoSize::Thumb { let relative_path = path @@ -207,9 +225,9 @@ async fn get_image( span.set_status(Status::error("Not found")); HttpResponse::NotFound().finish() } else { - span.set_status(Status::error("Bad photos request")); - error!("Bad photos request: {}", req.path); - HttpResponse::BadRequest().finish() + span.set_status(Status::error("Not found")); + error!("Path does not exist in any library: {}", req.path); + HttpResponse::NotFound().finish() } } @@ -294,7 +312,23 @@ async fn get_file_metadata( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let full_path = is_valid_full_path(&app_state.base_path, &path.path, false); + let library = libraries::resolve_library_param(&app_state, path.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); + + // Fall back to other libraries if the file isn't under the resolved one, + // matching the `/image` handler so union-mode search results resolve. + let full_path = is_valid_full_path(&library.root_path, &path.path, false) + .filter(|p| p.exists()) + .or_else(|| { + app_state.libraries.iter().find_map(|lib| { + if lib.id == library.id { + return None; + } + is_valid_full_path(&lib.root_path, &path.path, false).filter(|p| p.exists()) + }) + }); match full_path .ok_or_else(|| ErrorKind::InvalidData.into()) diff --git a/src/tags.rs b/src/tags.rs index 2834b62..95e303f 100644 --- a/src/tags.rs +++ b/src/tags.rs @@ -1,5 +1,8 @@ use crate::data::GetTagsRequest; +use crate::database::ExifDao; +use crate::libraries; use crate::otel::{extract_context_from_request, global_tracer, trace_db_call}; +use crate::state::AppState; use crate::utils::normalize_path; use crate::{Claims, ThumbnailRequest, connect, data::AddTagRequest, error::IntoHttpError, schema}; use actix_web::dev::{ServiceFactory, ServiceRequest}; @@ -71,15 +74,32 @@ async fn get_tags( _: Claims, http_request: HttpRequest, request: web::Query, + app_state: web::Data, tag_dao: web::Data>, + exif_dao: web::Data>>, ) -> impl Responder { let context = extract_context_from_request(&http_request); let span = global_tracer().start_with_context("get_tags", &context); let span_context = opentelemetry::Context::current_with_span(span); let normalized_path = normalize_path(&request.path); + + // Expand the query set to every rel_path that shares content with + // this file, so tags added under one library show up under the + // others when they hold the same file. Falls back to direct rel_path + // match when the file hasn't been hashed yet. + let library = libraries::resolve_library_param(&app_state, request.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); + let sibling_paths = { + let mut exif = exif_dao.lock().expect("Unable to get ExifDao"); + exif.get_rel_paths_sharing_content(&span_context, library.id, &normalized_path) + .unwrap_or_else(|_| vec![normalized_path.clone()]) + }; + let mut tag_dao = tag_dao.lock().expect("Unable to get TagDao"); tag_dao - .get_tags_for_path(&span_context, &normalized_path) + .get_tags_for_paths(&span_context, &sibling_paths) .map(|tags| { span_context.span().set_status(Status::Ok); HttpResponse::Ok().json(tags) @@ -289,6 +309,14 @@ pub trait TagDao: Send + Sync { context: &opentelemetry::Context, path: &str, ) -> anyhow::Result>; + /// Union of tags for every rel_path in `paths`. Used by content-hash + /// sharing: the caller resolves all rel_paths with the same content + /// via `ExifDao::get_rel_paths_sharing_content`, then passes them here. + fn get_tags_for_paths( + &mut self, + context: &opentelemetry::Context, + paths: &[String], + ) -> anyhow::Result>; fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result; fn remove_tag( &mut self, @@ -413,6 +441,32 @@ impl TagDao for SqliteTagDao { }) } + fn get_tags_for_paths( + &mut self, + context: &opentelemetry::Context, + paths: &[String], + ) -> anyhow::Result> { + if paths.is_empty() { + return Ok(Vec::new()); + } + let mut conn = self + .connection + .lock() + .expect("Unable to lock SqliteTagDao connection"); + trace_db_call(context, "query", "get_tags_for_paths", |span| { + span.set_attribute(KeyValue::new("path_count", paths.len() as i64)); + // DISTINCT across tag ids so two rel_paths carrying the same + // tag don't produce a duplicate entry in the response. + tags::table + .inner_join(tagged_photo::table) + .filter(tagged_photo::rel_path.eq_any(paths)) + .select((tags::id, tags::name, tags::created_time)) + .distinct() + .get_results::(conn.deref_mut()) + .with_context(|| "Unable to get tags from Sqlite") + }) + } + fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result { let mut conn = self .connection @@ -817,6 +871,25 @@ mod tests { .clone()) } + fn get_tags_for_paths( + &mut self, + _context: &opentelemetry::Context, + paths: &[String], + ) -> anyhow::Result> { + let tagged = self.tagged_photos.borrow(); + let mut out: Vec = Vec::new(); + for p in paths { + if let Some(tags) = tagged.get(p) { + for t in tags { + if !out.iter().any(|existing| existing.id == t.id) { + out.push(t.clone()); + } + } + } + } + Ok(out) + } + fn create_tag( &mut self, _context: &opentelemetry::Context, -- 2.49.1 From e6ee38edec7521b01505538f1a4590bd9050730f Mon Sep 17 00:00:00 2001 From: Cameron Date: Sat, 18 Apr 2026 09:45:43 -0400 Subject: [PATCH 07/19] fix: resolve media across libraries for video, metadata, and insights The /video/generate and /image/metadata handlers assumed files live under the resolved library only, which broke when a mobile client passed no library (union mode) but the file lived in a non-primary library. Both now fall back to scanning every configured library for an existing file. InsightGenerator held a single base_path, so vision-model loads and filename-date fallbacks failed for non-primary libraries. It now takes Vec and probes each root in resolve_full_path. /image/metadata responses now carry library_id/library_name so the mobile viewer can surface which library a file belongs to. Thumbnail generation at startup is now spawned on a background thread so the HTTP server can accept traffic while large libraries backfill. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ai/insight_generator.rs | 35 +++++++++++++++++------ src/bin/populate_knowledge.rs | 9 +++++- src/data/mod.rs | 4 +++ src/main.rs | 54 +++++++++++++++++++++++++++++------ src/state.rs | 9 ++++-- 5 files changed, 92 insertions(+), 19 deletions(-) diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 6f0b319..07881c6 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -16,6 +16,7 @@ use crate::database::{ CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao, SearchHistoryDao, }; +use crate::libraries::Library; use crate::memories::extract_date_from_filename; use crate::otel::global_tracer; use crate::tags::TagDao; @@ -52,7 +53,7 @@ pub struct InsightGenerator { // Knowledge memory knowledge_dao: Arc>>, - base_path: String, + libraries: Vec, } impl InsightGenerator { @@ -67,7 +68,7 @@ impl InsightGenerator { search_dao: Arc>>, tag_dao: Arc>>, knowledge_dao: Arc>>, - base_path: String, + libraries: Vec, ) -> Self { Self { ollama, @@ -80,10 +81,25 @@ impl InsightGenerator { search_dao, tag_dao, knowledge_dao, - base_path, + libraries, } } + /// Resolve `rel_path` against the configured libraries, returning the + /// first root under which the file exists. Insights may be generated + /// for any library — the generator itself doesn't know which — so we + /// probe each root rather than trust a single `base_path`. + fn resolve_full_path(&self, rel_path: &str) -> Option { + use std::path::Path; + for lib in &self.libraries { + let candidate = Path::new(&lib.root_path).join(rel_path); + if candidate.exists() { + return Some(candidate); + } + } + None + } + /// Extract contact name from file path /// e.g., "Sarah/img.jpeg" -> Some("Sarah") /// e.g., "img.jpeg" -> None @@ -108,9 +124,13 @@ impl InsightGenerator { /// Resizes to max 1024px on longest edge to reduce context usage fn load_image_as_base64(&self, file_path: &str) -> Result { use image::imageops::FilterType; - use std::path::Path; - let full_path = Path::new(&self.base_path).join(file_path); + let full_path = self.resolve_full_path(file_path).ok_or_else(|| { + anyhow::anyhow!( + "File '{}' not found under any configured library", + file_path + ) + })?; log::debug!("Loading image for vision model: {:?}", full_path); @@ -725,8 +745,7 @@ impl InsightGenerator { extract_date_from_filename(&file_path) .map(|dt| dt.timestamp()) .or_else(|| { - // Combine base_path with file_path to get full path - let full_path = std::path::Path::new(&self.base_path).join(&file_path); + let full_path = self.resolve_full_path(&file_path)?; File::open(&full_path) .and_then(|f| f.metadata()) .and_then(|m| m.created().or(m.modified())) @@ -2455,7 +2474,7 @@ Return ONLY the summary, nothing else."#, extract_date_from_filename(&file_path) .map(|dt| dt.timestamp()) .or_else(|| { - let full_path = std::path::Path::new(&self.base_path).join(&file_path); + let full_path = self.resolve_full_path(&file_path)?; File::open(&full_path) .and_then(|f| f.metadata()) .and_then(|m| m.created().or(m.modified())) diff --git a/src/bin/populate_knowledge.rs b/src/bin/populate_knowledge.rs index f9373ad..bc37960 100644 --- a/src/bin/populate_knowledge.rs +++ b/src/bin/populate_knowledge.rs @@ -11,6 +11,7 @@ use image_api::database::{ SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao, }; use image_api::file_types::{IMAGE_EXTENSIONS, VIDEO_EXTENSIONS}; +use image_api::libraries::{self, Library}; use image_api::tags::{SqliteTagDao, TagDao}; #[derive(Parser, Debug)] @@ -125,6 +126,12 @@ async fn main() -> anyhow::Result<()> { let knowledge_dao: Arc>> = Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new()))); + let populate_lib = Library { + id: libraries::PRIMARY_LIBRARY_ID, + name: "main".to_string(), + root_path: base_path.clone(), + }; + let generator = InsightGenerator::new( ollama, sms_client, @@ -136,7 +143,7 @@ async fn main() -> anyhow::Result<()> { search_dao, tag_dao, knowledge_dao, - base_path.clone(), + vec![populate_lib], ); println!("Knowledge Base Population"); diff --git a/src/data/mod.rs b/src/data/mod.rs index ff9ac25..b6ba795 100644 --- a/src/data/mod.rs +++ b/src/data/mod.rs @@ -239,6 +239,8 @@ pub struct MetadataResponse { pub size: u64, pub exif: Option, pub filename_date: Option, // Date extracted from filename + pub library_id: Option, + pub library_name: Option, } impl From for MetadataResponse { @@ -255,6 +257,8 @@ impl From for MetadataResponse { size: metadata.len(), exif: None, filename_date: None, // Will be set in endpoint handler + library_id: None, + library_name: None, } } } diff --git a/src/main.rs b/src/main.rs index e28a43a..af20c4e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -319,24 +319,32 @@ async fn get_file_metadata( // Fall back to other libraries if the file isn't under the resolved one, // matching the `/image` handler so union-mode search results resolve. - let full_path = is_valid_full_path(&library.root_path, &path.path, false) + let resolved = is_valid_full_path(&library.root_path, &path.path, false) .filter(|p| p.exists()) + .map(|p| (library, p)) .or_else(|| { app_state.libraries.iter().find_map(|lib| { if lib.id == library.id { return None; } - is_valid_full_path(&lib.root_path, &path.path, false).filter(|p| p.exists()) + is_valid_full_path(&lib.root_path, &path.path, false) + .filter(|p| p.exists()) + .map(|p| (lib, p)) }) }); - match full_path + match resolved .ok_or_else(|| ErrorKind::InvalidData.into()) - .and_then(File::open) - .and_then(|file| file.metadata()) + .and_then(|(lib, full_path)| { + File::open(&full_path) + .and_then(|file| file.metadata()) + .map(|metadata| (lib, metadata)) + }) { - Ok(metadata) => { + Ok((resolved_library, metadata)) => { let mut response: MetadataResponse = metadata.into(); + response.library_id = Some(resolved_library.id); + response.library_name = Some(resolved_library.name.clone()); // Extract date from filename if possible response.filename_date = @@ -573,7 +581,28 @@ async fn generate_video( if let Some(name) = filename.file_name() { let filename = name.to_str().expect("Filename should convert to string"); let playlist = format!("{}/{}.m3u8", app_state.video_path, filename); - if let Some(path) = is_valid_full_path(&app_state.base_path, &body.path, false) { + + let library = libraries::resolve_library_param(&app_state, body.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); + + // Try the resolved library first, then fall back to any other library + // that actually contains the file — handles union-mode requests where + // the mobile client passes no library but the file lives in a + // non-primary library. + let resolved = is_valid_full_path(&library.root_path, &body.path, false) + .filter(|p| p.exists()) + .or_else(|| { + app_state.libraries.iter().find_map(|lib| { + if lib.id == library.id { + return None; + } + is_valid_full_path(&lib.root_path, &body.path, false).filter(|p| p.exists()) + }) + }); + + if let Some(path) = resolved { if let Ok(child) = create_playlist(path.to_str().unwrap(), &playlist).await { span.add_event( "playlist_created".to_string(), @@ -1161,7 +1190,16 @@ fn main() -> std::io::Result<()> { // table; we use that list to drive the initial thumbnail sweep. let app_data = Data::new(AppState::default()); - create_thumbnails(&app_data.libraries); + // Kick thumbnail generation onto a background thread so the HTTP + // server can accept traffic while large libraries are backfilling. + // Existing thumbs are re-used (exists() check inside the walk), + // so missed files are filled in over successive scans. + { + let libs = app_data.libraries.clone(); + std::thread::spawn(move || { + create_thumbnails(&libs); + }); + } // generate_video_gifs().await; let labels = HashMap::new(); diff --git a/src/state.rs b/src/state.rs index 8f0dc69..d901a66 100644 --- a/src/state.rs +++ b/src/state.rs @@ -174,7 +174,7 @@ impl Default for AppState { search_dao.clone(), tag_dao.clone(), knowledge_dao, - base_path.clone(), + libraries_vec.clone(), ); // Ensure preview clips directory exists @@ -245,6 +245,11 @@ impl AppState { // Initialize test InsightGenerator with all data sources let base_path_str = base_path.to_string_lossy().to_string(); + let test_lib = Library { + id: crate::libraries::PRIMARY_LIBRARY_ID, + name: "main".to_string(), + root_path: base_path_str.clone(), + }; let insight_generator = InsightGenerator::new( ollama.clone(), sms_client.clone(), @@ -256,7 +261,7 @@ impl AppState { search_dao.clone(), tag_dao.clone(), knowledge_dao, - base_path_str.clone(), + vec![test_lib], ); // Initialize test preview DAO -- 2.49.1 From 7becbc0737930fd2762bb98a21fad0abc4fb61eb Mon Sep 17 00:00:00 2001 From: Cameron Date: Sat, 18 Apr 2026 09:53:51 -0400 Subject: [PATCH 08/19] fix: normalize rel_path separators in non-recursive /photos listing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Windows, strip_prefix preserves backslashes, so the non-recursive branch was looking up tags for 'Melissa\img1.jpg' while tagged_photo stores 'Melissa/img1.jpg' — every file was filtered out. Normalize to '/' to match the watcher and populate_knowledge. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/files.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/files.rs b/src/files.rs index 11ad898..50d5c93 100644 --- a/src/files.rs +++ b/src/files.rs @@ -568,7 +568,14 @@ pub async fn list_photos( path.display() ) }); - let relative_str = relative.to_str().unwrap().to_string(); + // Normalize separators to '/' so downstream + // lookups (tags, EXIF, insights) that store + // rel_paths with forward slashes still match + // on Windows. + let relative_str = relative + .to_str() + .unwrap() + .replace('\\', "/"); if md.is_file() { files.push(relative_str); @@ -589,7 +596,7 @@ pub async fn list_photos( path.display() ) }); - files.push(relative.to_str().unwrap().to_string()); + files.push(relative.to_str().unwrap().replace('\\', "/")); } } } -- 2.49.1 From 54a1df60b8d9ea6fe25758b8848fb9ebf4cd46e6 Mon Sep 17 00:00:00 2001 From: Cameron Date: Sat, 18 Apr 2026 10:03:32 -0400 Subject: [PATCH 09/19] fix: resolve preview clip rel_path against all libraries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PreviewClipGenerator stripped a single base_path, so videos in a non-primary library ended up with the absolute path as 'relative'. On Windows, PathBuf::from(preview_clips_dir).join(absolute) replaces with the absolute path, and .with_extension("mp4") on a .mp4 input yields the input path — ffmpeg then errors out with 'cannot edit existing files in place'. The generator now holds Vec and strips whichever root actually contains the video, with separator normalization to match the rest of the code. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/state.rs | 7 +++++-- src/video/actors.rs | 34 +++++++++++++++++++++++----------- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/state.rs b/src/state.rs index d901a66..78b98ad 100644 --- a/src/state.rs +++ b/src/state.rs @@ -74,8 +74,11 @@ impl AppState { let video_playlist_manager = VideoPlaylistManager::new(video_path.clone(), playlist_generator.start()); - let preview_clip_generator = - PreviewClipGenerator::new(preview_clips_path.clone(), base_path.clone(), preview_dao); + let preview_clip_generator = PreviewClipGenerator::new( + preview_clips_path.clone(), + libraries_vec.clone(), + preview_dao, + ); Self { stream_manager, diff --git a/src/video/actors.rs b/src/video/actors.rs index e90bbe1..8af2482 100644 --- a/src/video/actors.rs +++ b/src/video/actors.rs @@ -1,5 +1,6 @@ use crate::database::PreviewDao; use crate::is_video; +use crate::libraries::Library; use crate::otel::global_tracer; use crate::video::ffmpeg::generate_preview_clip; use actix::prelude::*; @@ -500,23 +501,40 @@ pub struct GeneratePreviewClipMessage { pub struct PreviewClipGenerator { semaphore: Arc, preview_clips_dir: String, - base_path: String, + libraries: Vec, preview_dao: Arc>>, } impl PreviewClipGenerator { pub fn new( preview_clips_dir: String, - base_path: String, + libraries: Vec, preview_dao: Arc>>, ) -> Self { PreviewClipGenerator { semaphore: Arc::new(Semaphore::new(2)), preview_clips_dir, - base_path, + libraries, preview_dao, } } + + /// Strip whichever library root actually contains `video_path`. + /// Falls back to the first library if none match, so we never + /// accidentally emit the absolute input path as the output path + /// (which ffmpeg rejects as "cannot edit existing files in place"). + fn relativize(&self, video_path: &str) -> String { + for lib in &self.libraries { + if let Some(stripped) = video_path.strip_prefix(&lib.root_path) { + return stripped + .trim_start_matches(['/', '\\']) + .replace('\\', "/"); + } + } + video_path + .trim_start_matches(['/', '\\']) + .replace('\\', "/") + } } impl Actor for PreviewClipGenerator { @@ -533,9 +551,10 @@ impl Handler for PreviewClipGenerator { ) -> Self::Result { let semaphore = self.semaphore.clone(); let preview_clips_dir = self.preview_clips_dir.clone(); - let base_path = self.base_path.clone(); let preview_dao = self.preview_dao.clone(); let video_path = msg.video_path; + // Resolve against whichever library actually owns this video. + let relative_path = self.relativize(&video_path); Box::pin(async move { let permit = semaphore @@ -543,13 +562,6 @@ impl Handler for PreviewClipGenerator { .await .expect("Unable to acquire preview semaphore"); - // Compute relative path (from BASE_PATH) for DB operations, consistent with EXIF convention - let relative_path = video_path - .strip_prefix(&base_path) - .unwrap_or(&video_path) - .trim_start_matches(['/', '\\']) - .to_string(); - // Update status to processing { let otel_ctx = opentelemetry::Context::current(); -- 2.49.1 From a0f3bfab5f2bb47ac823748eb80e2766799295e6 Mon Sep 17 00:00:00 2001 From: Cameron Date: Sat, 18 Apr 2026 16:38:28 -0400 Subject: [PATCH 10/19] fix: validate gps-summary path against every library MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /photos/gps-summary handler validated the incoming path against the primary library's root with new_file=false, which requires the path to exist on disk. For a viewer opened on a file from a non-primary library, tapping the GPS link produced activePath = , the primary-only check failed, and the server 400'd — so the map came up empty. Validation here is purely a traversal guard (the DAO does a prefix LIKE against rel_path), so we now accept the path as long as any configured library can resolve it without escaping its root. Also applies cargo fmt drift on files touched this session. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ai/insight_generator.rs | 20 ++++++------- src/files.rs | 31 ++++++++++--------- src/main.rs | 59 ++++++++++++++++++------------------- src/video/actors.rs | 4 +-- 4 files changed, 55 insertions(+), 59 deletions(-) diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 07881c6..0e2ab94 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -1902,14 +1902,10 @@ Return ONLY the summary, nothing else."#, // those already). Results are appended to the tool response so the // model can choose to use an existing entity's ID instead. let similar_entities: Vec = { - use crate::database::{EntityFilter, KnowledgeDao}; use crate::database::knowledge_dao::normalize_entity_type; + use crate::database::{EntityFilter, KnowledgeDao}; let normalised_type = normalize_entity_type(&entity_type); - let first_token = name - .split_whitespace() - .next() - .unwrap_or(&name) - .to_string(); + let first_token = name.split_whitespace().next().unwrap_or(&name).to_string(); let filter = EntityFilter { entity_type: None, // search all types, filter client-side to avoid case issues status: Some("active".to_string()), @@ -1917,7 +1913,10 @@ Return ONLY the summary, nothing else."#, limit: 10, offset: 0, }; - let mut kdao = self.knowledge_dao.lock().expect("Unable to lock KnowledgeDao"); + let mut kdao = self + .knowledge_dao + .lock() + .expect("Unable to lock KnowledgeDao"); kdao.list_entities(cx, filter) .unwrap_or_default() .0 @@ -2725,10 +2724,9 @@ Return ONLY the summary, nothing else."#, messages.push(ChatMessage::user( "Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as Cameron.", )); - let (final_response, prompt_tokens, eval_tokens) = - ollama_client - .chat_with_tools(messages.clone(), vec![]) - .await?; + let (final_response, prompt_tokens, eval_tokens) = ollama_client + .chat_with_tools(messages.clone(), vec![]) + .await?; last_prompt_eval_count = prompt_tokens; last_eval_count = eval_tokens; final_content = final_response.content.clone(); diff --git a/src/files.rs b/src/files.rs index 50d5c93..d83325e 100644 --- a/src/files.rs +++ b/src/files.rs @@ -241,10 +241,8 @@ pub async fn list_photos( // Resolve the optional library filter. Unknown values return 400. // For Phase 3 the filesystem walk still operates against a single // library's root; Phase 4 introduces multi-root union scanning. - let library = match crate::libraries::resolve_library_param( - &app_state, - req.library.as_deref(), - ) { + let library = match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) + { Ok(lib) => lib, Err(msg) => { log::warn!("Rejecting /photos request: {}", msg); @@ -560,8 +558,9 @@ pub async fn list_photos( .fold((Vec::new(), Vec::new()), |(mut files, mut dirs), path| { match path.metadata() { Ok(md) => { - let relative = - path.strip_prefix(&scoped_library.root_path).unwrap_or_else(|_| { + let relative = path + .strip_prefix(&scoped_library.root_path) + .unwrap_or_else(|_| { panic!( "Unable to strip library root {} from file path {}", &scoped_library.root_path, @@ -572,10 +571,7 @@ pub async fn list_photos( // lookups (tags, EXIF, insights) that store // rel_paths with forward slashes still match // on Windows. - let relative_str = relative - .to_str() - .unwrap() - .replace('\\', "/"); + let relative_str = relative.to_str().unwrap().replace('\\', "/"); if md.is_file() { files.push(relative_str); @@ -1024,13 +1020,20 @@ pub async fn get_gps_summary( let cx = opentelemetry::Context::current_with_span(span); - // The database stores relative paths, so we use the path as-is - // Normalize empty path or "/" to return all GPS photos + // The database stores relative paths, so we use the path as-is. + // Normalize empty path or "/" to return all GPS photos. Validation + // is purely a traversal guard — the path need not exist on disk + // under any particular library, because the DAO just does a prefix + // match against image_exif.rel_path (which is library-agnostic for + // this summary query). let requested_path = if req.path.is_empty() || req.path == "/" { String::new() } else { - // Validate path using the same check as all other endpoints - if is_valid_full_path(&app_state.base_path, &req.path, false).is_none() { + let req_path = PathBuf::from(&req.path); + let validated = app_state.libraries.iter().any(|lib| { + is_valid_full_path(&PathBuf::from(&lib.root_path), &req_path, true).is_some() + }); + if !validated { warn!("Invalid path for GPS summary: {}", req.path); cx.span().set_status(Status::error("Invalid path")); return Ok(HttpResponse::BadRequest().json(serde_json::json!({ diff --git a/src/main.rs b/src/main.rs index af20c4e..53e96cf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -55,13 +55,13 @@ use opentelemetry::{KeyValue, global}; mod ai; mod auth; +mod content_hash; mod data; mod database; mod error; mod exif; mod file_types; mod files; -mod content_hash; mod geo; mod libraries; mod state; @@ -106,10 +106,7 @@ async fn get_image( // Resolve library from query param; default to primary so clients that // don't yet send `library=` continue to work. - let library = match libraries::resolve_library_param( - &app_state, - req.library.as_deref(), - ) { + let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { @@ -339,8 +336,7 @@ async fn get_file_metadata( File::open(&full_path) .and_then(|file| file.metadata()) .map(|metadata| (lib, metadata)) - }) - { + }) { Ok((resolved_library, metadata)) => { let mut response: MetadataResponse = metadata.into(); response.library_id = Some(resolved_library.id); @@ -397,17 +393,15 @@ async fn upload_image( // Resolve the optional library selector. Absent → primary library // (backwards-compatible with clients that don't yet send `library=`). - let target_library = match libraries::resolve_library_param( - &app_state, - query.library.as_deref(), - ) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(msg) => { - span.set_status(Status::error(msg.clone())); - return HttpResponse::BadRequest().body(msg); - } - }; + let target_library = + match libraries::resolve_library_param(&app_state, query.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(msg) => { + span.set_status(Status::error(msg.clone())); + return HttpResponse::BadRequest().body(msg); + } + }; let mut file_content: BytesMut = BytesMut::new(); let mut file_name: Option = None; @@ -496,18 +490,18 @@ async fn upload_image( match exif::extract_exif_from_path(&uploaded_path) { Ok(exif_data) => { let timestamp = Utc::now().timestamp(); - let (content_hash, size_bytes) = - match content_hash::compute(&uploaded_path) { - Ok(id) => (Some(id.content_hash), Some(id.size_bytes)), - Err(e) => { - warn!( - "Failed to hash uploaded {}: {:?}", - uploaded_path.display(), - e - ); - (None, None) - } - }; + let (content_hash, size_bytes) = match content_hash::compute(&uploaded_path) + { + Ok(id) => (Some(id.content_hash), Some(id.size_bytes)), + Err(e) => { + warn!( + "Failed to hash uploaded {}: {:?}", + uploaded_path.display(), + e + ); + (None, None) + } + }; let insert_exif = InsertImageExif { library_id: target_library.id, file_path: relative_path.clone(), @@ -1827,7 +1821,10 @@ fn process_new_files( let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); if let Err(e) = dao.store_exif(&context, insert_exif) { - error!("Failed to register {} in image_exif: {:?}", relative_path, e); + error!( + "Failed to register {} in image_exif: {:?}", + relative_path, e + ); } else { debug!("Registered {} in image_exif", relative_path); } diff --git a/src/video/actors.rs b/src/video/actors.rs index 8af2482..284c8e3 100644 --- a/src/video/actors.rs +++ b/src/video/actors.rs @@ -526,9 +526,7 @@ impl PreviewClipGenerator { fn relativize(&self, video_path: &str) -> String { for lib in &self.libraries { if let Some(stripped) = video_path.strip_prefix(&lib.root_path) { - return stripped - .trim_start_matches(['/', '\\']) - .replace('\\', "/"); + return stripped.trim_start_matches(['/', '\\']).replace('\\', "/"); } } video_path -- 2.49.1 From c2ee3996be7c18ccfcf61371fa906edfba40969e Mon Sep 17 00:00:00 2001 From: Cameron Date: Sat, 18 Apr 2026 16:50:15 -0400 Subject: [PATCH 11/19] chore: apply cargo fmt + clippy cleanup across crate Silence forward-looking dead_code on unused DAO modules, annotate individual placeholder items, rewrite tautological assert!(true/false) in token tests as panic! arms, and pick up fmt drift. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ai/handlers.rs | 5 ++- src/ai/insight_generator.rs | 54 +++++++++++++++---------------- src/ai/ollama.rs | 4 ++- src/bin/backfill_hashes.rs | 17 ++++------ src/content_hash.rs | 7 +++- src/data/mod.rs | 15 +++------ src/database/calendar_dao.rs | 2 ++ src/database/daily_summary_dao.rs | 2 ++ src/database/insights_dao.rs | 2 ++ src/database/knowledge_dao.rs | 5 +-- src/database/location_dao.rs | 2 ++ src/database/mod.rs | 5 ++- src/database/preview_dao.rs | 2 ++ src/database/search_dao.rs | 2 ++ src/files.rs | 2 +- src/lib.rs | 2 ++ src/libraries.rs | 12 ++++--- src/main.rs | 2 ++ src/memories.rs | 11 +++---- src/tags.rs | 3 ++ src/testhelpers.rs | 12 +++++++ src/video/ffmpeg.rs | 5 ++- 22 files changed, 106 insertions(+), 67 deletions(-) diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index a49c229..abf2369 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -503,7 +503,10 @@ pub async fn export_training_data_handler( HttpResponse::Ok() .content_type("application/jsonl") - .insert_header(("Content-Disposition", "attachment; filename=\"training_data.jsonl\"")) + .insert_header(( + "Content-Disposition", + "attachment; filename=\"training_data.jsonl\"", + )) .body(jsonl) } Err(e) => { diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 0e2ab94..ecc387d 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -1827,32 +1827,32 @@ Return ONLY the summary, nothing else."#, // For each linked entity, fetch its facts for entity_id in entity_ids { - if let Ok(entity) = kdao.get_entity_by_id(cx, entity_id) { - if let Some(e) = entity { - let role = links - .iter() - .find(|l| l.entity_id == entity_id) - .map(|l| l.role.as_str()) - .unwrap_or("subject"); - output_lines.push(format!( - "Entity: {} ({}, role: {})", - e.name, e.entity_type, role - )); - if let Ok(facts) = kdao.get_facts_for_entity(cx, entity_id) { - for f in facts.iter().filter(|f| f.status == "active") { - let obj = if let Some(ref v) = f.object_value { - v.clone() - } else if let Some(oid) = f.object_entity_id { - kdao.get_entity_by_id(cx, oid) - .ok() - .flatten() - .map(|e| format!("{} (entity ID: {})", e.name, e.id)) - .unwrap_or_else(|| format!("entity:{}", oid)) - } else { - "(unknown)".to_string() - }; - output_lines.push(format!(" - {} {}", f.predicate, obj)); - } + if let Ok(entity) = kdao.get_entity_by_id(cx, entity_id) + && let Some(e) = entity + { + let role = links + .iter() + .find(|l| l.entity_id == entity_id) + .map(|l| l.role.as_str()) + .unwrap_or("subject"); + output_lines.push(format!( + "Entity: {} ({}, role: {})", + e.name, e.entity_type, role + )); + if let Ok(facts) = kdao.get_facts_for_entity(cx, entity_id) { + for f in facts.iter().filter(|f| f.status == "active") { + let obj = if let Some(ref v) = f.object_value { + v.clone() + } else if let Some(oid) = f.object_entity_id { + kdao.get_entity_by_id(cx, oid) + .ok() + .flatten() + .map(|e| format!("{} (entity ID: {})", e.name, e.id)) + .unwrap_or_else(|| format!("entity:{}", oid)) + } else { + "(unknown)".to_string() + }; + output_lines.push(format!(" - {} {}", f.predicate, obj)); } } } @@ -1902,8 +1902,8 @@ Return ONLY the summary, nothing else."#, // those already). Results are appended to the tool response so the // model can choose to use an existing entity's ID instead. let similar_entities: Vec = { + use crate::database::EntityFilter; use crate::database::knowledge_dao::normalize_entity_type; - use crate::database::{EntityFilter, KnowledgeDao}; let normalised_type = normalize_entity_type(&entity_type); let first_token = name.split_whitespace().next().unwrap_or(&name).to_string(); let filter = EntityFilter { diff --git a/src/ai/ollama.rs b/src/ai/ollama.rs index 1f42b6c..184bc61 100644 --- a/src/ai/ollama.rs +++ b/src/ai/ollama.rs @@ -120,6 +120,7 @@ impl OllamaClient { /// Replace the HTTP client with one using a custom request timeout. /// Useful for slow models where the default 120s may be insufficient. + #[allow(dead_code)] pub fn with_request_timeout(mut self, secs: u64) -> Self { self.client = Client::builder() .connect_timeout(Duration::from_secs(5)) @@ -174,6 +175,7 @@ impl OllamaClient { } /// Clear the model list cache for a specific URL or all URLs + #[allow(dead_code)] pub fn clear_model_cache(url: Option<&str>) { let mut cache = MODEL_LIST_CACHE.lock().unwrap(); if let Some(url) = url { @@ -186,6 +188,7 @@ impl OllamaClient { } /// Clear the model capabilities cache for a specific URL or all URLs + #[allow(dead_code)] pub fn clear_capabilities_cache(url: Option<&str>) { let mut cache = MODEL_CAPABILITIES_CACHE.lock().unwrap(); if let Some(url) = url { @@ -992,7 +995,6 @@ struct OllamaEmbedResponse { #[cfg(test)] mod tests { - use super::*; #[test] fn generate_photo_description_prompt_is_concise() { diff --git a/src/bin/backfill_hashes.rs b/src/bin/backfill_hashes.rs index 807c386..ad9b20f 100644 --- a/src/bin/backfill_hashes.rs +++ b/src/bin/backfill_hashes.rs @@ -68,8 +68,7 @@ fn main() -> anyhow::Result<()> { .join(", ") ); - let dao: Arc>> = - Arc::new(Mutex::new(Box::new(SqliteExifDao::new()))); + let dao: Arc>> = Arc::new(Mutex::new(Box::new(SqliteExifDao::new()))); let ctx = opentelemetry::Context::new(); let mut total_hashed = 0u64; @@ -98,15 +97,13 @@ fn main() -> anyhow::Result<()> { .get(&library_id) .map(|lib| Path::new(&lib.root_path).join(&rel_path)); match abs { - Some(abs_path) if abs_path.exists() => { - match content_hash::compute(&abs_path) { - Ok(id) => (library_id, rel_path, Some(id)), - Err(e) => { - eprintln!("hash error for {}: {:?}", abs_path.display(), e); - (library_id, rel_path, None) - } + Some(abs_path) if abs_path.exists() => match content_hash::compute(&abs_path) { + Ok(id) => (library_id, rel_path, Some(id)), + Err(e) => { + eprintln!("hash error for {}: {:?}", abs_path.display(), e); + (library_id, rel_path, None) } - } + }, Some(_) => (library_id, rel_path, None), // file missing on disk None => { eprintln!("Row refers to unknown library_id {}", library_id); diff --git a/src/content_hash.rs b/src/content_hash.rs index 63be295..7f05f06 100644 --- a/src/content_hash.rs +++ b/src/content_hash.rs @@ -53,13 +53,18 @@ pub fn thumbnail_path(thumbs_dir: &Path, hash: &str) -> PathBuf { /// Hash-keyed HLS output directory: `///`. /// The playlist lives at `playlist.m3u8` inside this directory and its /// segments are co-located so HLS relative references Just Work. +#[allow(dead_code)] pub fn hls_dir(video_dir: &Path, hash: &str) -> PathBuf { let shard = shard_prefix(hash); video_dir.join(shard).join(hash) } fn shard_prefix(hash: &str) -> &str { - let end = hash.char_indices().nth(2).map(|(i, _)| i).unwrap_or(hash.len()); + let end = hash + .char_indices() + .nth(2) + .map(|(i, _)| i) + .unwrap_or(hash.len()); &hash[..end] } diff --git a/src/data/mod.rs b/src/data/mod.rs index b6ba795..e953f4d 100644 --- a/src/data/mod.rs +++ b/src/data/mod.rs @@ -191,6 +191,7 @@ pub struct ThumbnailRequest { #[allow(dead_code)] // Part of API contract, may be used in future pub(crate) format: Option, #[serde(default)] + #[allow(dead_code)] // Part of API contract, may be used in future pub(crate) shape: Option, /// Optional library filter. Accepts a library id (e.g. "1") or name /// (e.g. "main"). When omitted, defaults to the primary library. @@ -434,11 +435,8 @@ mod tests { ); match err.unwrap_err().into_kind() { - ErrorKind::ExpiredSignature => assert!(true), - kind => { - println!("Unexpected error: {:?}", kind); - assert!(false) - } + ErrorKind::ExpiredSignature => {} + kind => panic!("Unexpected error: {:?}", kind), } } @@ -447,11 +445,8 @@ mod tests { let err = Claims::from_str("uni-֍ՓՓՓՓՓՓՓՓՓՓՓՓՓՓՓ"); match err.unwrap_err().into_kind() { - ErrorKind::InvalidToken => assert!(true), - kind => { - println!("Unexpected error: {:?}", kind); - assert!(false) - } + ErrorKind::InvalidToken => {} + kind => panic!("Unexpected error: {:?}", kind), } } diff --git a/src/database/calendar_dao.rs b/src/database/calendar_dao.rs index 82eea20..b70a9f6 100644 --- a/src/database/calendar_dao.rs +++ b/src/database/calendar_dao.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use diesel::prelude::*; use diesel::sqlite::SqliteConnection; use serde::Serialize; diff --git a/src/database/daily_summary_dao.rs b/src/database/daily_summary_dao.rs index 5b1126f..6ea560a 100644 --- a/src/database/daily_summary_dao.rs +++ b/src/database/daily_summary_dao.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use chrono::NaiveDate; use diesel::prelude::*; use diesel::sqlite::SqliteConnection; diff --git a/src/database/insights_dao.rs b/src/database/insights_dao.rs index d54904f..553b579 100644 --- a/src/database/insights_dao.rs +++ b/src/database/insights_dao.rs @@ -31,6 +31,7 @@ pub trait InsightDao: Sync + Send { paths: &[String], ) -> Result, DbError>; + #[allow(dead_code)] fn get_insight_history( &mut self, context: &opentelemetry::Context, @@ -79,6 +80,7 @@ impl SqliteInsightDao { } #[cfg(test)] + #[allow(dead_code)] pub fn from_connection(conn: Arc>) -> Self { SqliteInsightDao { connection: conn } } diff --git a/src/database/knowledge_dao.rs b/src/database/knowledge_dao.rs index a9f75fe..f0d6c12 100644 --- a/src/database/knowledge_dao.rs +++ b/src/database/knowledge_dao.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use diesel::prelude::*; use diesel::sqlite::SqliteConnection; use std::ops::DerefMut; @@ -230,7 +232,7 @@ impl SqliteKnowledgeDao { } fn deserialize_embedding(bytes: &[u8]) -> Result, DbError> { - if bytes.len() % 4 != 0 { + if !bytes.len().is_multiple_of(4) { return Err(DbError::new(DbErrorKind::QueryError)); } Ok(bytes @@ -535,7 +537,6 @@ impl KnowledgeDao for SqliteKnowledgeDao { conn.transaction::<(i64, i64), diesel::result::Error, _>(|conn| { use schema::entity_facts::dsl as ef; - use schema::entity_photo_links::dsl as epl; // 1. Re-point facts where source is subject let facts_updated = diff --git a/src/database/location_dao.rs b/src/database/location_dao.rs index 73e1c10..95f5d8f 100644 --- a/src/database/location_dao.rs +++ b/src/database/location_dao.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use diesel::prelude::*; use diesel::sqlite::SqliteConnection; use serde::Serialize; diff --git a/src/database/mod.rs b/src/database/mod.rs index 2e3dca1..f29a212 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -155,7 +155,9 @@ pub trait FavoriteDao: Sync + Send { fn add_favorite(&mut self, user_id: i32, favorite_path: &str) -> Result; fn remove_favorite(&mut self, user_id: i32, favorite_path: String); fn get_favorites(&mut self, user_id: i32) -> Result, DbError>; + #[allow(dead_code)] fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError>; + #[allow(dead_code)] fn get_all_paths(&mut self) -> Result, DbError>; } @@ -239,6 +241,7 @@ impl FavoriteDao for SqliteFavoriteDao { } } +#[allow(dead_code)] pub trait ExifDao: Sync + Send { fn store_exif( &mut self, @@ -306,6 +309,7 @@ pub trait ExifDao: Sync + Send { /// Get all photos with GPS coordinates /// Returns Vec<(file_path, latitude, longitude, date_taken)> + #[allow(clippy::type_complexity)] fn get_all_with_gps( &mut self, context: &opentelemetry::Context, @@ -680,7 +684,6 @@ impl ExifDao for SqliteExifDao { .map_err(|_| DbError::new(DbErrorKind::QueryError)) } - fn get_all_with_gps( &mut self, context: &opentelemetry::Context, diff --git a/src/database/preview_dao.rs b/src/database/preview_dao.rs index 6098c60..c528327 100644 --- a/src/database/preview_dao.rs +++ b/src/database/preview_dao.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use diesel::prelude::*; use diesel::sqlite::SqliteConnection; use std::ops::DerefMut; diff --git a/src/database/search_dao.rs b/src/database/search_dao.rs index 04d0d2f..a74fd92 100644 --- a/src/database/search_dao.rs +++ b/src/database/search_dao.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use diesel::prelude::*; use diesel::sqlite::SqliteConnection; use serde::Serialize; diff --git a/src/files.rs b/src/files.rs index d83325e..20ea001 100644 --- a/src/files.rs +++ b/src/files.rs @@ -16,7 +16,6 @@ use crate::file_types; use crate::geo::{gps_bounding_box, haversine_distance}; use crate::memories::extract_date_from_filename; use crate::{AppState, create_thumbnails}; -use actix_web::dev::ResourcePath; use actix_web::web::Data; use actix_web::{ HttpRequest, HttpResponse, @@ -1242,6 +1241,7 @@ mod tests { } impl FakeFileSystem { + #[allow(dead_code)] fn with_error() -> FakeFileSystem { FakeFileSystem { files: HashMap::new(), diff --git a/src/lib.rs b/src/lib.rs index d74fc2b..cf0ba10 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +#![allow(clippy::too_many_arguments)] + #[macro_use] extern crate diesel; diff --git a/src/libraries.rs b/src/libraries.rs index 22a4214..3cfc0be 100644 --- a/src/libraries.rs +++ b/src/libraries.rs @@ -32,6 +32,7 @@ impl Library { /// Resolve a library-relative path into an absolute `PathBuf` under the /// library root. Does not validate traversal — use `is_valid_full_path` /// for untrusted input. + #[allow(dead_code)] pub fn resolve(&self, rel_path: &str) -> PathBuf { Path::new(&self.root_path).join(rel_path) } @@ -39,6 +40,7 @@ impl Library { /// Inverse of `resolve`: given an absolute path under this library's /// root, return the root-relative portion. Returns `None` if the path /// is not under the library. + #[allow(dead_code)] pub fn strip_root(&self, abs_path: &Path) -> Option { abs_path .strip_prefix(&self.root_path) @@ -99,10 +101,7 @@ pub fn seed_or_patch_from_env(conn: &mut SqliteConnection, base_path: &str) { // If no rows exist at all (e.g. table created outside the seeded migration), // insert a primary library pointing at BASE_PATH. - let total: i64 = libraries::table - .count() - .get_result(conn) - .unwrap_or(0); + let total: i64 = libraries::table.count().get_result(conn).unwrap_or(0); if total == 0 { let now = Utc::now().timestamp(); let result = diesel::insert_into(libraries::table) @@ -113,7 +112,10 @@ pub fn seed_or_patch_from_env(conn: &mut SqliteConnection, base_path: &str) { }) .execute(conn); match result { - Ok(_) => info!("Seeded primary library 'main' with BASE_PATH='{}'", base_path), + Ok(_) => info!( + "Seeded primary library 'main' with BASE_PATH='{}'", + base_path + ), Err(e) => warn!("Failed to seed primary library: {:?}", e), } } diff --git a/src/main.rs b/src/main.rs index 53e96cf..d03bf31 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,5 @@ +#![allow(clippy::too_many_arguments)] + #[macro_use] extern crate diesel; extern crate rayon; diff --git a/src/memories.rs b/src/memories.rs index 23086f6..64a4c95 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -539,10 +539,7 @@ pub async fn list_memories( // Resolve the optional library filter. Unknown values are a 400; None // means "all libraries" — currently equivalent to the primary library // while only one is configured. - let library = match crate::libraries::resolve_library_param( - &app_state, - q.library.as_deref(), - ) { + let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) { Ok(lib) => lib, Err(msg) => { warn!("Rejecting /memories request: {}", msg); @@ -823,7 +820,7 @@ mod tests { // Verify timestamp is within expected range (should be around 1422489671) let timestamp = date_time.timestamp(); - assert!(timestamp >= 1422480000 && timestamp <= 1422576000); // Jan 28-29, 2015 + assert!((1422480000..=1422576000).contains(×tamp)); // Jan 28-29, 2015 } #[test] @@ -841,7 +838,7 @@ mod tests { // Verify timestamp is within expected range (should be around 1422489664) let timestamp = date_time.timestamp(); - assert!(timestamp >= 1422480000 && timestamp <= 1422576000); // Jan 28-29, 2015 + assert!((1422480000..=1422576000).contains(×tamp)); // Jan 28-29, 2015 } #[test] @@ -1120,7 +1117,7 @@ mod tests { .and_utc() .timestamp(); - let mut memories_with_dates = vec![ + let mut memories_with_dates = [ ( MemoryItem { path: "photo1.jpg".to_string(), diff --git a/src/tags.rs b/src/tags.rs index 95e303f..b94cb3b 100644 --- a/src/tags.rs +++ b/src/tags.rs @@ -342,12 +342,14 @@ pub trait TagDao: Send + Sync { exclude_tag_ids: Vec, context: &opentelemetry::Context, ) -> anyhow::Result>; + #[allow(dead_code)] fn update_photo_name( &mut self, old_name: &str, new_name: &str, context: &opentelemetry::Context, ) -> anyhow::Result<()>; + #[allow(dead_code)] fn get_all_photo_names( &mut self, context: &opentelemetry::Context, @@ -364,6 +366,7 @@ pub struct SqliteTagDao { } impl SqliteTagDao { + #[allow(dead_code)] pub(crate) fn new(connection: Arc>) -> Self { SqliteTagDao { connection } } diff --git a/src/testhelpers.rs b/src/testhelpers.rs index f4150e4..1536dbb 100644 --- a/src/testhelpers.rs +++ b/src/testhelpers.rs @@ -14,6 +14,12 @@ pub struct TestUserDao { pub user_map: RefCell>, } +impl Default for TestUserDao { + fn default() -> Self { + Self::new() + } +} + impl TestUserDao { pub fn new() -> Self { Self { @@ -71,6 +77,12 @@ pub struct TestPreviewDao { next_id: StdMutex, } +impl Default for TestPreviewDao { + fn default() -> Self { + Self::new() + } +} + impl TestPreviewDao { pub fn new() -> Self { Self { diff --git a/src/video/ffmpeg.rs b/src/video/ffmpeg.rs index b40b175..5ed9308 100644 --- a/src/video/ffmpeg.rs +++ b/src/video/ffmpeg.rs @@ -40,7 +40,10 @@ pub struct Ffmpeg; pub enum GifType { Overview, - OverviewVideo { duration: u32 }, + #[allow(dead_code)] + OverviewVideo { + duration: u32, + }, } impl Ffmpeg { -- 2.49.1 From 586b735af51834a32fc87e07efe981af77dcaa8a Mon Sep 17 00:00:00 2001 From: Cameron Date: Sat, 18 Apr 2026 17:11:57 -0400 Subject: [PATCH 12/19] feat: include per-photo library id in /photos response Adds a parallel `photo_libraries: Vec` array alongside `photos` in `PhotosResponse` so clients can render per-thumbnail badges. Populated with the scoped library id at the two main return sites; left empty for `/favorites` since favorites are library-agnostic. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/data/mod.rs | 6 ++++++ src/files.rs | 4 ++++ src/main.rs | 3 +++ 3 files changed, 13 insertions(+) diff --git a/src/data/mod.rs b/src/data/mod.rs index e953f4d..fe5e183 100644 --- a/src/data/mod.rs +++ b/src/data/mod.rs @@ -102,6 +102,12 @@ pub struct PhotosResponse { pub photos: Vec, pub dirs: Vec, + /// Library id for each entry in `photos`, same length and ordering. + /// Parallel array rather than an object per row to keep the payload + /// small and backwards-compatible with older clients. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub photo_libraries: Vec, + // Pagination metadata (only present when limit is set) #[serde(skip_serializing_if = "Option::is_none")] pub total_count: Option, diff --git a/src/files.rs b/src/files.rs index 20ea001..6fb22f1 100644 --- a/src/files.rs +++ b/src/files.rs @@ -493,9 +493,11 @@ pub async fn list_photos( .set_attribute(KeyValue::new("total_count", total_count.to_string())); span_context.span().set_status(Status::Ok); + let photo_libraries = vec![scoped_library.id; tagged_files.len()]; HttpResponse::Ok().json(PhotosResponse { photos: tagged_files, dirs: vec![], + photo_libraries, total_count: pagination_metadata.0, has_more: pagination_metadata.1, next_offset: pagination_metadata.2, @@ -778,9 +780,11 @@ pub async fn list_photos( .set_attribute(KeyValue::new("total_count", total_count.to_string())); span_context.span().set_status(Status::Ok); + let photo_libraries = vec![scoped_library.id; response_files.len()]; HttpResponse::Ok().json(PhotosResponse { photos: response_files, dirs, + photo_libraries, total_count: pagination_metadata.0, has_more: pagination_metadata.1, next_offset: pagination_metadata.2, diff --git a/src/main.rs b/src/main.rs index d03bf31..19edb78 100644 --- a/src/main.rs +++ b/src/main.rs @@ -970,9 +970,12 @@ async fn favorites( .collect::>(); span.set_status(Status::Ok); + // Favorites are library-agnostic (shared by rel_path), so we + // intentionally leave photo_libraries empty to signal "no badge". HttpResponse::Ok().json(PhotosResponse { photos: favorites, dirs: Vec::new(), + photo_libraries: Vec::new(), total_count: None, has_more: None, next_offset: None, -- 2.49.1 From 2c8de8dcc64627ae1aef82e47f5dc5d48d02573e Mon Sep 17 00:00:00 2001 From: Cameron Date: Sat, 18 Apr 2026 17:27:41 -0400 Subject: [PATCH 13/19] feat: union /photos and /memories across libraries When `library` is omitted, both endpoints now walk every configured library root, interleave the results, and tag each row with its source library via the parallel `photo_libraries` / per-row `library_id` arrays. Previously the handlers fell back to the primary library, silently hiding the rest. Threads a parallel `file_libraries: Vec` through the sort/paginate helpers so library attribution survives sorting and pagination. Directory names are de-duplicated across libraries. `get_all_with_date_taken` grows an optional library filter so memories can scope its EXIF query per-library during the union walk. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/database/mod.rs | 12 +- src/files.rs | 820 +++++++++++++++++++++++--------------------- src/memories.rs | 79 +++-- 3 files changed, 490 insertions(+), 421 deletions(-) diff --git a/src/database/mod.rs b/src/database/mod.rs index f29a212..fe0957c 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -266,6 +266,7 @@ pub trait ExifDao: Sync + Send { fn get_all_with_date_taken( &mut self, context: &opentelemetry::Context, + library_id: Option, ) -> Result, DbError>; /// Batch load EXIF data for multiple file paths (single query) @@ -523,15 +524,24 @@ impl ExifDao for SqliteExifDao { fn get_all_with_date_taken( &mut self, context: &opentelemetry::Context, + lib_id: Option, ) -> Result, DbError> { trace_db_call(context, "query", "get_all_with_date_taken", |_span| { use schema::image_exif::dsl::*; let mut connection = self.connection.lock().expect("Unable to get ExifDao"); - image_exif + let query = image_exif .select((rel_path, date_taken)) .filter(date_taken.is_not_null()) + .into_boxed(); + + let query = match lib_id { + Some(filter_id) => query.filter(library_id.eq(filter_id)), + None => query, + }; + + query .load::<(String, Option)>(connection.deref_mut()) .map(|records| { records diff --git a/src/files.rs b/src/files.rs index 6fb22f1..d70d2ce 100644 --- a/src/files.rs +++ b/src/files.rs @@ -41,52 +41,53 @@ pub struct FileWithMetadata { pub file_name: String, pub tag_count: i64, pub date_taken: Option, // Unix timestamp from EXIF or filename extraction + pub library_id: i32, } use serde::Deserialize; /// Apply sorting to files with EXIF data support for date-based sorting /// Handles both date sorting (with EXIF/filename fallback) and regular sorting -/// Returns (sorted_file_paths, total_count) +/// Returns (sorted_file_paths, sorted_library_ids, total_count) fn apply_sorting_with_exif( files: Vec, + file_libraries: Vec, sort_type: SortType, exif_dao: &mut Box, span_context: &opentelemetry::Context, - base_path: &Path, + libraries: &[crate::libraries::Library], limit: Option, offset: i64, -) -> (Vec, i64) { +) -> (Vec, Vec, i64) { let total_count = files.len() as i64; match sort_type { SortType::DateTakenAsc | SortType::DateTakenDesc => { info!("Date sorting requested, using in-memory sort with EXIF/filename fallback"); - // Use in-memory sort so files without EXIF dates are included via - // filename extraction and filesystem metadata fallbacks. - let (sorted, _) = in_memory_date_sort( + let (sorted, sorted_libs, _) = in_memory_date_sort( files, + file_libraries, sort_type, exif_dao, span_context, - base_path, + libraries, limit, offset, ); - (sorted, total_count) + (sorted, sorted_libs, total_count) } _ => { - // Use regular sort for non-date sorting - let sorted = sort(files, sort_type); - let result = if let Some(limit_val) = limit { - sorted - .into_iter() - .skip(offset as usize) - .take(limit_val as usize) - .collect() + let (sorted, sorted_libs) = sort(files, file_libraries, sort_type); + let (result, result_libs) = if let Some(limit_val) = limit { + let skip = offset as usize; + let take = limit_val as usize; + ( + sorted.iter().skip(skip).take(take).cloned().collect(), + sorted_libs.iter().skip(skip).take(take).copied().collect(), + ) } else { - sorted + (sorted, sorted_libs) }; - (result, total_count) + (result, result_libs, total_count) } } } @@ -94,66 +95,88 @@ fn apply_sorting_with_exif( /// Fallback in-memory date sorting with EXIF/filename extraction fn in_memory_date_sort( files: Vec, + file_libraries: Vec, sort_type: SortType, exif_dao: &mut Box, span_context: &opentelemetry::Context, - base_path: &Path, + libraries: &[crate::libraries::Library], limit: Option, offset: i64, -) -> (Vec, i64) { +) -> (Vec, Vec, i64) { let total_count = files.len() as i64; let file_paths: Vec = files.iter().map(|f| f.file_name.clone()).collect(); - // Batch fetch EXIF data - let exif_map: std::collections::HashMap = exif_dao + // Batch fetch EXIF data (keyed by rel_path; in union mode a rel_path may + // correspond to rows in multiple libraries — pick the date from the one + // matching the requesting row's library_id when possible). + let exif_rows = exif_dao .get_exif_batch(span_context, &file_paths) - .unwrap_or_default() + .unwrap_or_default(); + let exif_map: std::collections::HashMap<(String, i32), i64> = exif_rows .into_iter() - .filter_map(|exif| exif.date_taken.map(|dt| (exif.file_path, dt))) + .filter_map(|exif| { + exif.date_taken + .map(|dt| ((exif.file_path, exif.library_id), dt)) + }) + .collect(); + + let lib_roots: std::collections::HashMap = libraries + .iter() + .map(|l| (l.id, l.root_path.as_str())) .collect(); // Convert to FileWithMetadata with date fallback logic let files_with_metadata: Vec = files .into_iter() - .map(|f| { - // Try EXIF date first + .zip(file_libraries.iter().copied()) + .map(|(f, lib_id)| { let date_taken = exif_map - .get(&f.file_name) + .get(&(f.file_name.clone(), lib_id)) .copied() + .or_else(|| extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp())) .or_else(|| { - // Fallback to filename extraction - extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp()) - }) - .or_else(|| { - // Fallback to filesystem metadata creation date - let full_path = base_path.join(&f.file_name); - std::fs::metadata(full_path) - .and_then(|md| md.created().or(md.modified())) - .ok() - .map(|system_time| { - >>::into(system_time).timestamp() - }) + lib_roots.get(&lib_id).and_then(|root| { + let full_path = Path::new(root).join(&f.file_name); + std::fs::metadata(full_path) + .and_then(|md| md.created().or(md.modified())) + .ok() + .map(|system_time| { + >>::into(system_time).timestamp() + }) + }) }); FileWithMetadata { file_name: f.file_name, tag_count: f.tag_count, date_taken, + library_id: lib_id, } }) .collect(); - let sorted = sort_with_metadata(files_with_metadata, sort_type); - let result = if let Some(limit_val) = limit { - sorted - .into_iter() - .skip(offset as usize) - .take(limit_val as usize) - .collect() + let (sorted, sorted_libs) = sort_with_metadata(files_with_metadata, sort_type); + let (result, result_libs) = if let Some(limit_val) = limit { + let skip = offset as usize; + let take = limit_val as usize; + ( + sorted + .iter() + .skip(skip) + .take(take) + .cloned() + .collect::>(), + sorted_libs + .iter() + .skip(skip) + .take(take) + .copied() + .collect::>(), + ) } else { - sorted + (sorted, sorted_libs) }; - (result, total_count) + (result, result_libs, total_count) } pub async fn list_photos( @@ -237,9 +260,9 @@ pub async fn list_photos( KeyValue::new("library", req.library.clone().unwrap_or_default()), ]); - // Resolve the optional library filter. Unknown values return 400. - // For Phase 3 the filesystem walk still operates against a single - // library's root; Phase 4 introduces multi-root union scanning. + // Resolve the optional library filter. Unknown values return 400. A + // `None` result means "union across all libraries" and downstream + // walks iterate every configured library root. let library = match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) { Ok(lib) => lib, @@ -248,7 +271,6 @@ pub async fn list_photos( return HttpResponse::BadRequest().body(msg); } }; - let scoped_library = library.unwrap_or_else(|| app_state.primary_library()); let span_context = opentelemetry::Context::current_with_span(span); @@ -332,12 +354,15 @@ pub async fn list_photos( None }; - // When a specific library is selected, we'll gate tag-based results - // (which key on rel_path only, library-agnostic) by "does this - // rel_path actually exist on disk in the selected library's root". - // We check per-file below rather than pre-enumerating image_exif, - // since image_exif may lag a just-added library. - let library_for_scope: Option<&crate::libraries::Library> = library; + // In scoped mode (`library` is Some) we gate tag-based results (which + // key on rel_path only) by "does this rel_path actually exist on disk + // in the selected library's root". In union mode we assign each + // returned file to the first library it resolves in, and drop files + // that exist in no configured library. + let libraries_to_scan: Vec<&crate::libraries::Library> = match library { + Some(lib) => vec![lib], + None => app_state.libraries.iter().collect(), + }; let search_recursively = req.recursive.unwrap_or(false); if let Some(tag_ids) = &req.tag_ids @@ -404,17 +429,23 @@ pub async fn list_photos( true } }) - .filter(|f| { - // Scope to the selected library by checking the file - // actually exists under its root. Falls back to the - // content-hash sibling set (looked up globally, since - // the tagged rel_path may have been registered under - // a different library than the one selected). - let Some(lib) = library_for_scope else { - return true; - }; - if PathBuf::from(&lib.root_path).join(&f.file_name).exists() { - return true; + .filter_map(|f| { + // Apply media type filter first (cheap check before disk I/O). + if let Some(ref media_type) = req.media_type { + let path = PathBuf::from(&f.file_name); + if !matches_media_type(&path, media_type) { + return None; + } + } + + // Resolve the file's library by checking each + // candidate library's root on disk. Falls back to + // content-hash siblings if the rel_path was + // registered under a different path but same content. + for lib in &libraries_to_scan { + if PathBuf::from(&lib.root_path).join(&f.file_name).exists() { + return Some((f, lib.id)); + } } let siblings = { let mut dao = exif_dao.lock().expect("Unable to get ExifDao"); @@ -428,41 +459,50 @@ pub async fn list_photos( None => Vec::new(), } }; - siblings - .iter() - .any(|p| PathBuf::from(&lib.root_path).join(p).exists()) - }) - .filter(|f| { - // Apply media type filtering if specified - if let Some(ref media_type) = req.media_type { - let path = PathBuf::from(&f.file_name); - matches_media_type(&path, media_type) + for lib in &libraries_to_scan { + if siblings + .iter() + .any(|p| PathBuf::from(&lib.root_path).join(p).exists()) + { + return Some((f, lib.id)); + } + } + // Tags are library-agnostic. If we can't confirm which + // library currently holds the file on disk (e.g. the + // tagged rel_path is stale or the caller is testing + // without real files), keep the tagged row and + // attribute it to the primary library so the client + // still sees the tag hit. + if library.is_none() { + Some((f, app_state.primary_library().id)) } else { - true + None } }) - .collect::>() + .collect::>() }) - .map(|files| { + .map(|paired| { // Handle sorting - use helper function that supports EXIF date sorting and pagination let sort_type = req.sort.unwrap_or(NameAsc); let limit = req.limit; let offset = req.offset.unwrap_or(0); + let (files, file_libs): (Vec, Vec) = paired.into_iter().unzip(); let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao"); let result = apply_sorting_with_exif( files, + file_libs, sort_type, &mut exif_dao_guard, &span_context, - scoped_library.root_path.as_ref(), + &app_state.libraries, limit, offset, ); drop(exif_dao_guard); result }) - .inspect(|(files, total)| debug!("Found {:?} files (total: {})", files.len(), total)) - .map(|(tagged_files, total_count)| { + .inspect(|(files, _libs, total)| debug!("Found {:?} files (total: {})", files.len(), total)) + .map(|(tagged_files, photo_libraries, total_count)| { info!( "Found {:?} tagged files: {:?}", tagged_files.len(), @@ -493,7 +533,6 @@ pub async fn list_photos( .set_attribute(KeyValue::new("total_count", total_count.to_string())); span_context.span().set_status(Status::Ok); - let photo_libraries = vec![scoped_library.id; tagged_files.len()]; HttpResponse::Ok().json(PhotosResponse { photos: tagged_files, dirs: vec![], @@ -507,330 +546,346 @@ pub async fn list_photos( .unwrap_or_else(|e| e.error_response()); } - // Use recursive or non-recursive file listing based on flag. Both - // paths must walk the *scoped* library's root; the generic - // FileSystemAccess trait (file_system.get_files_for_path) is pinned - // to AppState's base_path at construction time and doesn't know - // which library the request targets. - let files_result = if search_recursively { - is_valid_full_path( - &PathBuf::from(&scoped_library.root_path), - &PathBuf::from(search_path), - false, - ) - .map(|path| { - debug!("Valid path for recursive search: {:?}", path); - list_files_recursive(&path).unwrap_or_default() - }) - .context("Invalid path") - } else if scoped_library.id == app_state.primary_library().id { - // Primary library: preserve the original FileSystemAccess path so - // the test-mock path (MockFileSystem) continues to work. - file_system.get_files_for_path(search_path) - } else { - is_valid_full_path( - &PathBuf::from(&scoped_library.root_path), - &PathBuf::from(search_path), - false, - ) - .map(|path| { - debug!("Valid path for non-recursive search: {:?}", path); - list_files(&path).unwrap_or_default() - }) - .context("Invalid path") - }; + // Walk each candidate library's root for the requested sub-path. In + // scoped mode `libraries_to_scan` has one entry (the selected library); + // in union mode we walk every configured library and intermix results. + // For the primary library we preserve the original FileSystemAccess + // path so the test-mock path (MockFileSystem) continues to work. + let mut file_names: Vec = Vec::new(); + let mut file_libraries: Vec = Vec::new(); + let mut dirs_set: std::collections::HashSet = std::collections::HashSet::new(); + let mut any_library_resolved = false; - match files_result { - Ok(files) => { - info!( - "Found {:?} files in path: {:?} (recursive: {})", - files.len(), - search_path, - search_recursively - ); + for lib in &libraries_to_scan { + let files_result = if search_recursively { + is_valid_full_path( + &PathBuf::from(&lib.root_path), + &PathBuf::from(search_path), + false, + ) + .map(|path| { + debug!("Valid path for recursive search: {:?}", path); + list_files_recursive(&path).unwrap_or_default() + }) + .context("Invalid path") + } else if lib.id == app_state.primary_library().id { + file_system.get_files_for_path(search_path) + } else { + is_valid_full_path( + &PathBuf::from(&lib.root_path), + &PathBuf::from(search_path), + false, + ) + .map(|path| { + debug!("Valid path for non-recursive search: {:?}", path); + list_files(&path).unwrap_or_default() + }) + .context("Invalid path") + }; - info!("Starting to filter {} files from filesystem", files.len()); - let start_filter = std::time::Instant::now(); + let files = match files_result { + Ok(f) => { + any_library_resolved = true; + f + } + Err(e) => { + debug!( + "Skipping library '{}' for path '{}': {:?}", + lib.name, search_path, e + ); + continue; + } + }; - // Separate files and directories in a single pass to avoid redundant metadata calls - let (file_names, dirs): (Vec, Vec) = - files - .iter() - .fold((Vec::new(), Vec::new()), |(mut files, mut dirs), path| { - match path.metadata() { - Ok(md) => { - let relative = path - .strip_prefix(&scoped_library.root_path) - .unwrap_or_else(|_| { - panic!( - "Unable to strip library root {} from file path {}", - &scoped_library.root_path, - path.display() - ) - }); - // Normalize separators to '/' so downstream - // lookups (tags, EXIF, insights) that store - // rel_paths with forward slashes still match - // on Windows. - let relative_str = relative.to_str().unwrap().replace('\\', "/"); + info!( + "Found {:?} files in library '{}' path: {:?} (recursive: {})", + files.len(), + lib.name, + search_path, + search_recursively + ); - if md.is_file() { - files.push(relative_str); - } else if md.is_dir() { - dirs.push(relative_str); - } - } - Err(e) => { - error!("Failed getting file metadata: {:?}", e); - // Include files without metadata if they have extensions - if path.extension().is_some() { - let relative = path - .strip_prefix(&scoped_library.root_path) - .unwrap_or_else(|_| { - panic!( - "Unable to strip library root {} from file path {}", - &scoped_library.root_path, - path.display() - ) - }); - files.push(relative.to_str().unwrap().replace('\\', "/")); - } - } - } - (files, dirs) + for path in &files { + match path.metadata() { + Ok(md) => { + let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| { + panic!( + "Unable to strip library root {} from file path {}", + &lib.root_path, + path.display() + ) }); + // Normalize separators to '/' so downstream lookups + // (tags, EXIF, insights) that store rel_paths with + // forward slashes still match on Windows. + let relative_str = relative.to_str().unwrap().replace('\\', "/"); + if md.is_file() { + file_names.push(relative_str); + file_libraries.push(lib.id); + } else if md.is_dir() { + dirs_set.insert(relative_str); + } + } + Err(e) => { + error!("Failed getting file metadata: {:?}", e); + // Include files without metadata if they have extensions + if path.extension().is_some() { + let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| { + panic!( + "Unable to strip library root {} from file path {}", + &lib.root_path, + path.display() + ) + }); + file_names.push(relative.to_str().unwrap().replace('\\', "/")); + file_libraries.push(lib.id); + } + } + } + } + } + + if !any_library_resolved { + error!("Bad photos request: {}", req.path); + span_context + .span() + .set_status(Status::error("Invalid path")); + return HttpResponse::BadRequest().finish(); + } + + let dirs: Vec = dirs_set.into_iter().collect(); + + info!( + "Starting to filter {} files from filesystem", + file_names.len() + ); + let start_filter = std::time::Instant::now(); + + info!( + "File filtering took {:?}, now fetching tag counts for {} files", + start_filter.elapsed(), + file_names.len() + ); + let start_tags = std::time::Instant::now(); + + // Batch query for tag counts (tags are library-agnostic / keyed by rel_path). + let tag_counts = { + let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); + tag_dao_guard + .get_tag_counts_batch(&span_context, &file_names) + .unwrap_or_default() + }; + info!("Batch tag count query took {:?}", start_tags.elapsed()); + + let start_tag_filter = std::time::Instant::now(); + let file_tags_map: std::collections::HashMap> = + if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() { info!( - "File filtering took {:?}, now fetching tag counts for {} files", - start_filter.elapsed(), + "Tag filtering requested, fetching full tag lists for {} files", file_names.len() ); - let start_tags = std::time::Instant::now(); + let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); + file_names + .iter() + .filter_map(|file_name| { + tag_dao_guard + .get_tags_for_path(&span_context, file_name) + .ok() + .map(|tags| (file_name.clone(), tags)) + }) + .collect() + } else { + std::collections::HashMap::new() + }; + if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() { + info!("Full tag list fetch took {:?}", start_tag_filter.elapsed()); + } - // Batch query for tag counts to avoid N+1 queries - let tag_counts = { - let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); - tag_dao_guard - .get_tag_counts_batch(&span_context, &file_names) + // Filter + pair with the parallel library_id while preserving ordering + // so the downstream sort can return both arrays in lockstep. + let photos_with_libs: Vec<(FileWithTagCount, i32)> = file_names + .into_iter() + .zip(file_libraries.into_iter()) + .filter_map(|(file_name, lib_id)| { + let file_tags = file_tags_map.get(&file_name).cloned().unwrap_or_default(); + + if let Some(tag_ids_csv) = &req.tag_ids { + let tag_ids = tag_ids_csv + .split(',') + .filter_map(|t| t.parse().ok()) + .collect::>(); + + let excluded_tag_ids = req + .exclude_tag_ids + .clone() .unwrap_or_default() - }; - info!("Batch tag count query took {:?}", start_tags.elapsed()); + .split(',') + .filter_map(|t| t.parse().ok()) + .collect::>(); - // Also get full tag lists for files that need tag filtering - let start_tag_filter = std::time::Instant::now(); - let file_tags_map: std::collections::HashMap> = - if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() { - info!( - "Tag filtering requested, fetching full tag lists for {} files", - file_names.len() - ); - let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); - file_names - .iter() - .filter_map(|file_name| { - tag_dao_guard - .get_tags_for_path(&span_context, file_name) - .ok() - .map(|tags| (file_name.clone(), tags)) - }) - .collect() - } else { - std::collections::HashMap::new() - }; - if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() { - info!("Full tag list fetch took {:?}", start_tag_filter.elapsed()); + let filter_mode = req.tag_filter_mode.unwrap_or(FilterMode::Any); + let excluded = file_tags.iter().any(|t| excluded_tag_ids.contains(&t.id)); + + let keep = !excluded + && match filter_mode { + FilterMode::Any => file_tags.iter().any(|t| tag_ids.contains(&t.id)), + FilterMode::All => tag_ids + .iter() + .all(|id| file_tags.iter().any(|tag| &tag.id == id)), + }; + if !keep { + return None; + } } - let photos = file_names + if let Some(ref exif_files) = exif_matched_files + && !exif_files.contains(&file_name) + { + return None; + } + + if let Some(ref media_type) = req.media_type { + let path = PathBuf::from(&file_name); + if !matches_media_type(&path, media_type) { + return None; + } + } + + let tag_count = *tag_counts.get(&file_name).unwrap_or(&0); + Some(( + FileWithTagCount { + file_name, + tag_count, + }, + lib_id, + )) + }) + .collect(); + + info!( + "After all filters, {} files remain (filtering took {:?})", + photos_with_libs.len(), + start_filter.elapsed() + ); + + // Extract pagination parameters + let limit = req.limit; + let offset = req.offset.unwrap_or(0); + let start_sort = std::time::Instant::now(); + + let (photos, file_libs_sorted_input): (Vec, Vec) = + photos_with_libs.into_iter().unzip(); + + let (response_files, response_libraries, total_count) = if let Some(sort_type) = req.sort { + info!("Sorting {} files by {:?}", photos.len(), sort_type); + let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao"); + let result = apply_sorting_with_exif( + photos, + file_libs_sorted_input, + sort_type, + &mut exif_dao_guard, + &span_context, + &app_state.libraries, + limit, + offset, + ); + drop(exif_dao_guard); + result + } else { + // No sorting requested - apply pagination if requested + let total = photos.len() as i64; + let (paged_files, paged_libs): (Vec, Vec) = if let Some(limit_val) = limit { + photos .into_iter() - .map(|file_name| { - let file_tags = file_tags_map.get(&file_name).cloned().unwrap_or_default(); - (file_name, file_tags) - }) - .filter(|(_, file_tags): &(String, Vec)| { - if let Some(tag_ids) = &req.tag_ids { - let tag_ids = tag_ids - .split(',') - .filter_map(|t| t.parse().ok()) - .collect::>(); + .zip(file_libs_sorted_input) + .skip(offset as usize) + .take(limit_val as usize) + .map(|(f, lib)| (f.file_name, lib)) + .unzip() + } else { + photos + .into_iter() + .zip(file_libs_sorted_input) + .map(|(f, lib)| (f.file_name, lib)) + .unzip() + }; + (paged_files, paged_libs, total) + }; + info!( + "Sorting took {:?}, returned {} files (total: {})", + start_sort.elapsed(), + response_files.len(), + total_count + ); - let excluded_tag_ids = &req - .exclude_tag_ids - .clone() - .unwrap_or_default() - .split(',') - .filter_map(|t| t.parse().ok()) - .collect::>(); - - let filter_mode = &req.tag_filter_mode.unwrap_or(FilterMode::Any); - let excluded = file_tags.iter().any(|t| excluded_tag_ids.contains(&t.id)); - - return !excluded - && match filter_mode { - FilterMode::Any => { - file_tags.iter().any(|t| tag_ids.contains(&t.id)) - } - FilterMode::All => tag_ids - .iter() - .all(|id| file_tags.iter().any(|tag| &tag.id == id)), - }; - } - - true - }) - .filter(|(file_name, _)| { - // Apply EXIF filtering if present - if let Some(ref exif_files) = exif_matched_files { - exif_files.contains(file_name) - } else { - true - } - }) - .filter(|(file_name, _)| { - // Apply media type filtering if specified - if let Some(ref media_type) = req.media_type { - let path = PathBuf::from(file_name); - matches_media_type(&path, media_type) - } else { - true - } - }) - .map( - |(file_name, _tags): (String, Vec)| FileWithTagCount { - file_name: file_name.clone(), - tag_count: *tag_counts.get(&file_name).unwrap_or(&0), - }, - ) - .collect::>(); - - info!( - "After all filters, {} files remain (filtering took {:?})", - photos.len(), - start_filter.elapsed() - ); - - // Extract pagination parameters - let limit = req.limit; - let offset = req.offset.unwrap_or(0); - let start_sort = std::time::Instant::now(); - - // Handle sorting - use helper function that supports EXIF date sorting and pagination - let (response_files, total_count) = if let Some(sort_type) = req.sort { - info!("Sorting {} files by {:?}", photos.len(), sort_type); - let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao"); - let result = apply_sorting_with_exif( - photos, - sort_type, - &mut exif_dao_guard, - &span_context, - scoped_library.root_path.as_ref(), - limit, - offset, - ); - drop(exif_dao_guard); - result + let returned_count = response_files.len() as i64; + let pagination_metadata = if limit.is_some() { + ( + Some(total_count), + Some(offset + returned_count < total_count), + if offset + returned_count < total_count { + Some(offset + returned_count) } else { - // No sorting requested - apply pagination if requested - let total = photos.len() as i64; - let files: Vec = if let Some(limit_val) = limit { - photos - .into_iter() - .skip(offset as usize) - .take(limit_val as usize) - .map(|f| f.file_name) - .collect() - } else { - photos.into_iter().map(|f| f.file_name).collect() - }; - (files, total) - }; - info!( - "Sorting took {:?}, returned {} files (total: {})", - start_sort.elapsed(), - response_files.len(), - total_count - ); + None + }, + ) + } else { + (None, None, None) + }; - // Note: dirs were already collected during file filtering to avoid redundant metadata calls + span_context.span().set_attribute(KeyValue::new( + "file_count", + response_files.len().to_string(), + )); + span_context + .span() + .set_attribute(KeyValue::new("returned_count", returned_count.to_string())); + span_context + .span() + .set_attribute(KeyValue::new("total_count", total_count.to_string())); + span_context.span().set_status(Status::Ok); - // Calculate pagination metadata - let returned_count = response_files.len() as i64; - let pagination_metadata = if limit.is_some() { - ( - Some(total_count), - Some(offset + returned_count < total_count), - if offset + returned_count < total_count { - Some(offset + returned_count) - } else { - None - }, - ) - } else { - (None, None, None) - }; - - span_context - .span() - .set_attribute(KeyValue::new("file_count", files.len().to_string())); - span_context - .span() - .set_attribute(KeyValue::new("returned_count", returned_count.to_string())); - span_context - .span() - .set_attribute(KeyValue::new("total_count", total_count.to_string())); - span_context.span().set_status(Status::Ok); - - let photo_libraries = vec![scoped_library.id; response_files.len()]; - HttpResponse::Ok().json(PhotosResponse { - photos: response_files, - dirs, - photo_libraries, - total_count: pagination_metadata.0, - has_more: pagination_metadata.1, - next_offset: pagination_metadata.2, - }) - } - _ => { - error!("Bad photos request: {}", req.path); - span_context - .span() - .set_status(Status::error("Invalid path")); - HttpResponse::BadRequest().finish() - } - } + HttpResponse::Ok().json(PhotosResponse { + photos: response_files, + dirs, + photo_libraries: response_libraries, + total_count: pagination_metadata.0, + has_more: pagination_metadata.1, + next_offset: pagination_metadata.2, + }) } -fn sort(mut files: Vec, sort_type: SortType) -> Vec { +fn sort( + files: Vec, + file_libraries: Vec, + sort_type: SortType, +) -> (Vec, Vec) { + let mut paired: Vec<(FileWithTagCount, i32)> = files.into_iter().zip(file_libraries).collect(); + match sort_type { - SortType::Shuffle => files.shuffle(&mut thread_rng()), - NameAsc => { - files.sort_by(|l, r| l.file_name.cmp(&r.file_name)); - } - SortType::NameDesc => { - files.sort_by(|l, r| r.file_name.cmp(&l.file_name)); - } - SortType::TagCountAsc => { - files.sort_by(|l, r| l.tag_count.cmp(&r.tag_count)); - } - SortType::TagCountDesc => { - files.sort_by(|l, r| r.tag_count.cmp(&l.tag_count)); - } + SortType::Shuffle => paired.shuffle(&mut thread_rng()), + NameAsc => paired.sort_by(|l, r| l.0.file_name.cmp(&r.0.file_name)), + SortType::NameDesc => paired.sort_by(|l, r| r.0.file_name.cmp(&l.0.file_name)), + SortType::TagCountAsc => paired.sort_by(|l, r| l.0.tag_count.cmp(&r.0.tag_count)), + SortType::TagCountDesc => paired.sort_by(|l, r| r.0.tag_count.cmp(&l.0.tag_count)), SortType::DateTakenAsc | SortType::DateTakenDesc => { - // Date sorting not implemented for FileWithTagCount - // We shouldn't be hitting this code warn!("Date sorting not implemented for FileWithTagCount"); - files.sort_by(|l, r| l.file_name.cmp(&r.file_name)); + paired.sort_by(|l, r| l.0.file_name.cmp(&r.0.file_name)); } } - files - .iter() - .map(|f| f.file_name.clone()) - .collect::>() + paired + .into_iter() + .map(|(f, lib)| (f.file_name, lib)) + .unzip() } /// Sort files with metadata support (including date sorting) -fn sort_with_metadata(mut files: Vec, sort_type: SortType) -> Vec { +fn sort_with_metadata( + mut files: Vec, + sort_type: SortType, +) -> (Vec, Vec) { match sort_type { SortType::Shuffle => files.shuffle(&mut thread_rng()), NameAsc => { @@ -864,9 +919,9 @@ fn sort_with_metadata(mut files: Vec, sort_type: SortType) -> } files - .iter() - .map(|f| f.file_name.clone()) - .collect::>() + .into_iter() + .map(|f| (f.file_name, f.library_id)) + .unzip() } pub fn list_files(dir: &Path) -> io::Result> { @@ -1369,6 +1424,7 @@ mod tests { fn get_all_with_date_taken( &mut self, _context: &opentelemetry::Context, + _library_id: Option, ) -> Result, DbError> { Ok(Vec::new()) } diff --git a/src/memories.rs b/src/memories.rs index 64a4c95..875a72c 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -16,6 +16,7 @@ use walkdir::WalkDir; use crate::data::Claims; use crate::database::ExifDao; use crate::files::is_image_or_video; +use crate::libraries::Library; use crate::otel::{extract_context_from_request, global_tracer}; use crate::state::AppState; @@ -378,7 +379,7 @@ fn collect_exif_memories( ) -> Vec<(MemoryItem, NaiveDate)> { // Query database for all files with date_taken let exif_records = match exif_dao.lock() { - Ok(mut dao) => match dao.get_all_with_date_taken(context) { + Ok(mut dao) => match dao.get_all_with_date_taken(context, Some(library_id)) { Ok(records) => records, Err(e) => { warn!("Failed to query EXIF database: {:?}", e); @@ -546,48 +547,50 @@ pub async fn list_memories( return HttpResponse::BadRequest().body(msg); } }; - // For Phase 3 the walker still operates against a single library's root. - // Multi-library union support for the filesystem walk comes in Phase 4. - let scoped_library = library.unwrap_or_else(|| app_state.primary_library()); - let base = Path::new(&scoped_library.root_path); + // When `library` is `Some`, scope to that one library; otherwise union + // across every configured library and let the results interleave. + let libraries_to_scan: Vec<&Library> = match library { + Some(lib) => vec![lib], + None => app_state.libraries.iter().collect(), + }; - // Build the path excluder from base and env-configured exclusions - let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs); + let mut memories_with_dates: Vec<(MemoryItem, NaiveDate)> = Vec::new(); - // Phase 1: Query EXIF database - let exif_memories = collect_exif_memories( - &exif_dao, - &span_context, - &scoped_library.root_path, - scoped_library.id, - now, - span_mode, - years_back, - &client_timezone, - &path_excluder, - ); + for lib in &libraries_to_scan { + let base = Path::new(&lib.root_path); + let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs); - // Build HashSet for deduplication - let exif_paths: HashSet = exif_memories - .iter() - .map(|(item, _)| PathBuf::from(&scoped_library.root_path).join(&item.path)) - .collect(); + let exif_memories = collect_exif_memories( + &exif_dao, + &span_context, + &lib.root_path, + lib.id, + now, + span_mode, + years_back, + &client_timezone, + &path_excluder, + ); - // Phase 2: File system scan (skip EXIF files) - let fs_memories = collect_filesystem_memories( - &scoped_library.root_path, - scoped_library.id, - &path_excluder, - &exif_paths, - now, - span_mode, - years_back, - &client_timezone, - ); + let exif_paths: HashSet = exif_memories + .iter() + .map(|(item, _)| PathBuf::from(&lib.root_path).join(&item.path)) + .collect(); - // Phase 3: Merge and sort - let mut memories_with_dates = exif_memories; - memories_with_dates.extend(fs_memories); + let fs_memories = collect_filesystem_memories( + &lib.root_path, + lib.id, + &path_excluder, + &exif_paths, + now, + span_mode, + years_back, + &client_timezone, + ); + + memories_with_dates.extend(exif_memories); + memories_with_dates.extend(fs_memories); + } match span_mode { // Sort by absolute time for a more 'overview' -- 2.49.1 From b04dd8b601cafb5a49667333a1454884d13eb2f9 Mon Sep 17 00:00:00 2001 From: Cameron Date: Sat, 18 Apr 2026 18:18:57 -0400 Subject: [PATCH 14/19] fix: demote path-not-exists validation errors to debug The /image cross-library fallback tries the resolved library first and falls back to any library holding the rel_path. The first attempt emitted error-level noise on every grid tile in union mode. Split the validation error so only traversal attempts log at error; missing-file cases log at debug. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/files.rs | 62 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/src/files.rs b/src/files.rs index d70d2ce..fdeec9f 100644 --- a/src/files.rs +++ b/src/files.rs @@ -1,6 +1,6 @@ use ::anyhow; use actix::{Handler, Message}; -use anyhow::{Context, anyhow}; +use anyhow::Context; use std::collections::HashSet; use std::fmt::Debug; use std::fs::read_dir; @@ -1024,33 +1024,58 @@ pub fn is_valid_full_path + Debug + AsRef>( match is_path_above_base_dir(base, &mut path, new_file) { Ok(path) => Some(path), - Err(e) => { + Err(PathValidationError::DoesNotExist(p)) => { + debug!("Path does not exist under base {:?}: {:?}", base, p); + None + } + Err(PathValidationError::AboveBase(p)) => { + error!("Path above base directory {:?}: {:?}", base, p); + None + } + Err(PathValidationError::Other(e)) => { error!("{}", e); None } } } +#[derive(Debug)] +enum PathValidationError { + DoesNotExist(PathBuf), + AboveBase(PathBuf), + Other(anyhow::Error), +} + +impl std::fmt::Display for PathValidationError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + PathValidationError::DoesNotExist(p) => write!(f, "Path does not exist: {:?}", p), + PathValidationError::AboveBase(p) => write!(f, "Path above base directory: {:?}", p), + PathValidationError::Other(e) => write!(f, "{}", e), + } + } +} + fn is_path_above_base_dir + Debug>( base: P, full_path: &mut PathBuf, new_file: bool, -) -> anyhow::Result { - full_path - .absolutize() - .with_context(|| format!("Unable to resolve absolute path: {:?}", full_path)) - .map_or_else( - |e| Err(anyhow!(e)), - |p| { - if p.starts_with(base) && (new_file || p.exists()) { - Ok(p.into_owned()) - } else if !p.exists() { - Err(anyhow!("Path does not exist: {:?}", p)) - } else { - Err(anyhow!("Path above base directory")) - } - }, - ) +) -> Result { + match full_path.absolutize() { + Err(e) => Err(PathValidationError::Other( + anyhow::Error::new(e) + .context(format!("Unable to resolve absolute path: {:?}", full_path)), + )), + Ok(p) => { + if p.starts_with(base) && (new_file || p.exists()) { + Ok(p.into_owned()) + } else if !p.exists() { + Err(PathValidationError::DoesNotExist(p.into_owned())) + } else { + Err(PathValidationError::AboveBase(p.into_owned())) + } + } + } } /// Handler for GPS summary endpoint @@ -1289,6 +1314,7 @@ impl Handler for StreamActor { mod tests { use super::*; use crate::database::DbError; + use ::anyhow::anyhow; use std::collections::HashMap; use std::env; use std::fs::File; -- 2.49.1 From 4a775b5e9ba63a1accd5ffddb97b2f919c695a0c Mon Sep 17 00:00:00 2001 From: Cameron Date: Sat, 18 Apr 2026 18:25:47 -0400 Subject: [PATCH 15/19] test: cover resolve_library_param and per-library ExifDao filter Adds 9 unit tests around the library plumbing: - resolve_library_param branches (absent, empty/whitespace, numeric id, name, unknown id, unknown name) - Library::resolve symmetry with strip_root - ExifDao::get_all_with_date_taken in union and scoped modes Introduces SqliteExifDao::from_connection test constructor mirroring the existing preview_dao pattern so DAO tests can drive an in-memory SQLite. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/database/mod.rs | 96 +++++++++++++++++++++++++++++++++++++++++++++ src/libraries.rs | 81 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 177 insertions(+) diff --git a/src/database/mod.rs b/src/database/mod.rs index fe0957c..e42db69 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -403,6 +403,13 @@ impl SqliteExifDao { connection: Arc::new(Mutex::new(connect())), } } + + #[cfg(test)] + pub fn from_connection(conn: SqliteConnection) -> Self { + SqliteExifDao { + connection: Arc::new(Mutex::new(conn)), + } + } } impl ExifDao for SqliteExifDao { @@ -927,3 +934,92 @@ impl ExifDao for SqliteExifDao { .map_err(|_| DbError::new(DbErrorKind::QueryError)) } } + +#[cfg(test)] +mod exif_dao_tests { + use super::*; + use crate::database::models::InsertLibrary; + use crate::database::test::in_memory_db_connection; + + fn ctx() -> opentelemetry::Context { + opentelemetry::Context::new() + } + + fn insert_row(dao: &mut SqliteExifDao, lib_id: i32, rel: &str, date: Option) { + dao.store_exif( + &ctx(), + InsertImageExif { + library_id: lib_id, + file_path: rel.to_string(), + camera_make: None, + camera_model: None, + lens_model: None, + width: None, + height: None, + orientation: None, + gps_latitude: None, + gps_longitude: None, + gps_altitude: None, + focal_length: None, + aperture: None, + shutter_speed: None, + iso: None, + date_taken: date, + created_time: 0, + last_modified: 0, + content_hash: None, + size_bytes: None, + }, + ) + .expect("insert exif row"); + } + + fn setup_two_libraries() -> SqliteExifDao { + let mut conn = in_memory_db_connection(); + // Migration seeds library id=1 with a placeholder root; add id=2. + diesel::insert_into(schema::libraries::table) + .values(InsertLibrary { + name: "archive", + root_path: "/tmp/archive", + created_at: 0, + }) + .execute(&mut conn) + .expect("seed second library"); + SqliteExifDao::from_connection(conn) + } + + #[test] + fn get_all_with_date_taken_union_returns_all_libraries() { + let mut dao = setup_two_libraries(); + insert_row(&mut dao, 1, "main/a.jpg", Some(100)); + insert_row(&mut dao, 2, "archive/b.jpg", Some(200)); + // Row without a date must be excluded even in union mode. + insert_row(&mut dao, 2, "archive/c.jpg", None); + + let mut rows = dao.get_all_with_date_taken(&ctx(), None).unwrap(); + rows.sort_by_key(|(_, ts)| *ts); + assert_eq!( + rows, + vec![ + ("main/a.jpg".to_string(), 100), + ("archive/b.jpg".to_string(), 200), + ] + ); + } + + #[test] + fn get_all_with_date_taken_scopes_by_library_id() { + let mut dao = setup_two_libraries(); + insert_row(&mut dao, 1, "main/a.jpg", Some(100)); + insert_row(&mut dao, 2, "archive/b.jpg", Some(200)); + insert_row(&mut dao, 2, "archive/c.jpg", Some(300)); + + let lib2 = dao.get_all_with_date_taken(&ctx(), Some(2)).unwrap(); + let mut paths: Vec = lib2.into_iter().map(|(p, _)| p).collect(); + paths.sort(); + assert_eq!(paths, vec!["archive/b.jpg", "archive/c.jpg"]); + + let lib1 = dao.get_all_with_date_taken(&ctx(), Some(1)).unwrap(); + assert_eq!(lib1, vec![("main/a.jpg".to_string(), 100)]); + } +} diff --git a/src/libraries.rs b/src/libraries.rs index 3cfc0be..cc3f2f4 100644 --- a/src/libraries.rs +++ b/src/libraries.rs @@ -198,4 +198,85 @@ mod tests { let outside = lib.strip_root(Path::new("/etc/passwd")); assert!(outside.is_none()); } + + #[test] + fn library_resolve_joins_under_root() { + let lib = Library { + id: 1, + name: "main".into(), + root_path: "/tmp/media".into(), + }; + let abs = lib.resolve("2024/photo.jpg"); + assert_eq!(abs, PathBuf::from("/tmp/media/2024/photo.jpg")); + } + + fn state_with_libraries(libs: Vec) -> AppState { + let mut state = AppState::test_state(); + state.libraries = libs; + state + } + + fn sample_libraries() -> Vec { + vec![ + Library { + id: 1, + name: "main".into(), + root_path: "/tmp/main".into(), + }, + Library { + id: 7, + name: "archive".into(), + root_path: "/tmp/archive".into(), + }, + ] + } + + #[actix_rt::test] + async fn resolve_library_param_absent_is_union() { + let state = state_with_libraries(sample_libraries()); + assert!(matches!(resolve_library_param(&state, None), Ok(None))); + } + + #[actix_rt::test] + async fn resolve_library_param_empty_or_whitespace_is_union() { + let state = state_with_libraries(sample_libraries()); + assert!(matches!(resolve_library_param(&state, Some("")), Ok(None))); + assert!(matches!( + resolve_library_param(&state, Some(" ")), + Ok(None) + )); + } + + #[actix_rt::test] + async fn resolve_library_param_numeric_id_matches() { + let state = state_with_libraries(sample_libraries()); + let lib = resolve_library_param(&state, Some("7")) + .expect("valid id") + .expect("some library"); + assert_eq!(lib.id, 7); + assert_eq!(lib.name, "archive"); + } + + #[actix_rt::test] + async fn resolve_library_param_name_matches() { + let state = state_with_libraries(sample_libraries()); + let lib = resolve_library_param(&state, Some("main")) + .expect("valid name") + .expect("some library"); + assert_eq!(lib.id, 1); + } + + #[actix_rt::test] + async fn resolve_library_param_unknown_id_errs() { + let state = state_with_libraries(sample_libraries()); + let err = resolve_library_param(&state, Some("999")).unwrap_err(); + assert!(err.contains("unknown library id")); + } + + #[actix_rt::test] + async fn resolve_library_param_unknown_name_errs() { + let state = state_with_libraries(sample_libraries()); + let err = resolve_library_param(&state, Some("missing")).unwrap_err(); + assert!(err.contains("unknown library name")); + } } -- 2.49.1 From 3027a3ffda69c9aa37d3a4e654932bd68216afb4 Mon Sep 17 00:00:00 2001 From: Cameron Date: Sat, 18 Apr 2026 21:38:51 -0400 Subject: [PATCH 16/19] perf: DB-backed recursive /photos + watcher reconciliation Recursive listings now query image_exif instead of walking disk, taking union-mode /photos from ~17s to sub-second on a 10k-file library. The watcher's full scan prunes stale image_exif rows so the DB stays in parity with the filesystem when files are deleted externally. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/database/mod.rs | 75 +++++++++++++++ src/files.rs | 225 ++++++++++++++++++++++---------------------- src/main.rs | 47 ++++++++- 3 files changed, 235 insertions(+), 112 deletions(-) diff --git a/src/database/mod.rs b/src/database/mod.rs index e42db69..07406d6 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -385,6 +385,28 @@ pub trait ExifDao: Sync + Send { context: &opentelemetry::Context, hash: &str, ) -> Result, DbError>; + + /// List `(library_id, rel_path)` pairs for the given libraries, optionally + /// restricted to rows whose rel_path starts with `path_prefix`. When + /// `library_ids` is empty, rows from every library are returned. Used by + /// `/photos` recursive listing to skip the filesystem walk — the watcher + /// keeps image_exif in parity with disk via the reconciliation pass. + fn list_rel_paths_for_libraries( + &mut self, + context: &opentelemetry::Context, + library_ids: &[i32], + path_prefix: Option<&str>, + ) -> Result, DbError>; + + /// Delete a single image_exif row scoped to `(library_id, rel_path)`. + /// Distinct from `delete_exif`, which matches on rel_path alone and + /// would clobber same-named files across libraries. + fn delete_exif_by_library( + &mut self, + context: &opentelemetry::Context, + library_id: i32, + rel_path: &str, + ) -> Result<(), DbError>; } pub struct SqliteExifDao { @@ -933,6 +955,59 @@ impl ExifDao for SqliteExifDao { }) .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + + fn list_rel_paths_for_libraries( + &mut self, + context: &opentelemetry::Context, + library_ids: &[i32], + path_prefix: Option<&str>, + ) -> Result, DbError> { + trace_db_call(context, "query", "list_rel_paths_for_libraries", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + let mut query = image_exif.select((library_id, rel_path)).into_boxed(); + + if !library_ids.is_empty() { + query = query.filter(library_id.eq_any(library_ids.to_vec())); + } + + if let Some(prefix) = path_prefix.map(str::trim).filter(|s| !s.is_empty()) { + // Trailing slash normalization so "2024" matches "2024/..." + // without also matching "2024-archive/...". + let prefix = prefix.trim_end_matches('/'); + let pattern = format!("{}/%", prefix.replace('%', "\\%").replace('_', "\\_")); + query = query.filter(rel_path.like(pattern).escape('\\')); + } + + query + .load::<(i32, String)>(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn delete_exif_by_library( + &mut self, + context: &opentelemetry::Context, + library_id_val: i32, + rel_path_val: &str, + ) -> Result<(), DbError> { + trace_db_call(context, "delete", "delete_exif_by_library", |_span| { + use schema::image_exif::dsl::*; + + diesel::delete( + image_exif + .filter(library_id.eq(library_id_val)) + .filter(rel_path.eq(rel_path_val)), + ) + .execute(self.connection.lock().unwrap().deref_mut()) + .map(|_| ()) + .map_err(|_| anyhow::anyhow!("Delete error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } } #[cfg(test)] diff --git a/src/files.rs b/src/files.rs index fdeec9f..561414c 100644 --- a/src/files.rs +++ b/src/files.rs @@ -546,91 +546,89 @@ pub async fn list_photos( .unwrap_or_else(|e| e.error_response()); } - // Walk each candidate library's root for the requested sub-path. In - // scoped mode `libraries_to_scan` has one entry (the selected library); - // in union mode we walk every configured library and intermix results. - // For the primary library we preserve the original FileSystemAccess - // path so the test-mock path (MockFileSystem) continues to work. + // In scoped mode `libraries_to_scan` has one entry (the selected library); + // in union mode we enumerate every configured library and intermix results. + // + // Recursive mode pulls rel_paths from image_exif (kept in parity with disk + // by the watcher's full-scan reconciliation) instead of walking — a ~10k + // file library drops from multi-second to ~10ms for the listing itself. + // Non-recursive mode still walks because we need directory metadata for + // the `dirs` response and listing a single directory is cheap. let mut file_names: Vec = Vec::new(); let mut file_libraries: Vec = Vec::new(); let mut dirs_set: std::collections::HashSet = std::collections::HashSet::new(); let mut any_library_resolved = false; - for lib in &libraries_to_scan { - let files_result = if search_recursively { - is_valid_full_path( - &PathBuf::from(&lib.root_path), - &PathBuf::from(search_path), - false, - ) - .map(|path| { - debug!("Valid path for recursive search: {:?}", path); - list_files_recursive(&path).unwrap_or_default() - }) - .context("Invalid path") - } else if lib.id == app_state.primary_library().id { - file_system.get_files_for_path(search_path) + if search_recursively { + let start_db_list = std::time::Instant::now(); + let lib_ids: Vec = libraries_to_scan.iter().map(|l| l.id).collect(); + let trimmed = search_path.trim(); + let prefix = if trimmed.is_empty() || trimmed == "/" { + None } else { - is_valid_full_path( - &PathBuf::from(&lib.root_path), - &PathBuf::from(search_path), - false, - ) - .map(|path| { - debug!("Valid path for non-recursive search: {:?}", path); - list_files(&path).unwrap_or_default() - }) - .context("Invalid path") + Some(trimmed) }; - - let files = match files_result { - Ok(f) => { - any_library_resolved = true; - f - } - Err(e) => { - debug!( - "Skipping library '{}' for path '{}': {:?}", - lib.name, search_path, e - ); - continue; - } + let rows = { + let mut dao = exif_dao.lock().expect("Unable to get ExifDao"); + dao.list_rel_paths_for_libraries(&span_context, &lib_ids, prefix) + .unwrap_or_else(|e| { + warn!("list_rel_paths_for_libraries failed: {:?}", e); + Vec::new() + }) }; - info!( - "Found {:?} files in library '{}' path: {:?} (recursive: {})", - files.len(), - lib.name, - search_path, - search_recursively + "DB-backed recursive listing: {} files across {} libraries in {:?}", + rows.len(), + lib_ids.len(), + start_db_list.elapsed() ); + any_library_resolved = true; + for (lib_id, path) in rows { + file_libraries.push(lib_id); + file_names.push(path); + } + } else { + for lib in &libraries_to_scan { + let files_result = if lib.id == app_state.primary_library().id { + file_system.get_files_for_path(search_path) + } else { + is_valid_full_path( + &PathBuf::from(&lib.root_path), + &PathBuf::from(search_path), + false, + ) + .map(|path| { + debug!("Valid path for non-recursive search: {:?}", path); + list_files(&path).unwrap_or_default() + }) + .context("Invalid path") + }; - for path in &files { - match path.metadata() { - Ok(md) => { - let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| { - panic!( - "Unable to strip library root {} from file path {}", - &lib.root_path, - path.display() - ) - }); - // Normalize separators to '/' so downstream lookups - // (tags, EXIF, insights) that store rel_paths with - // forward slashes still match on Windows. - let relative_str = relative.to_str().unwrap().replace('\\', "/"); - - if md.is_file() { - file_names.push(relative_str); - file_libraries.push(lib.id); - } else if md.is_dir() { - dirs_set.insert(relative_str); - } + let files = match files_result { + Ok(f) => { + any_library_resolved = true; + f } Err(e) => { - error!("Failed getting file metadata: {:?}", e); - // Include files without metadata if they have extensions - if path.extension().is_some() { + debug!( + "Skipping library '{}' for path '{}': {:?}", + lib.name, search_path, e + ); + continue; + } + }; + + info!( + "Found {:?} files in library '{}' path: {:?} (recursive: {})", + files.len(), + lib.name, + search_path, + search_recursively + ); + + for path in &files { + match path.metadata() { + Ok(md) => { let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| { panic!( "Unable to strip library root {} from file path {}", @@ -638,8 +636,32 @@ pub async fn list_photos( path.display() ) }); - file_names.push(relative.to_str().unwrap().replace('\\', "/")); - file_libraries.push(lib.id); + // Normalize separators to '/' so downstream lookups + // (tags, EXIF, insights) that store rel_paths with + // forward slashes still match on Windows. + let relative_str = relative.to_str().unwrap().replace('\\', "/"); + + if md.is_file() { + file_names.push(relative_str); + file_libraries.push(lib.id); + } else if md.is_dir() { + dirs_set.insert(relative_str); + } + } + Err(e) => { + error!("Failed getting file metadata: {:?}", e); + // Include files without metadata if they have extensions + if path.extension().is_some() { + let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| { + panic!( + "Unable to strip library root {} from file path {}", + &lib.root_path, + path.display() + ) + }); + file_names.push(relative.to_str().unwrap().replace('\\', "/")); + file_libraries.push(lib.id); + } } } } @@ -943,43 +965,6 @@ pub fn list_files(dir: &Path) -> io::Result> { Ok(files) } -pub fn list_files_recursive(dir: &Path) -> io::Result> { - let tracer = global_tracer(); - let mut span = tracer.start("list_files_recursive"); - let dir_name_string = dir.to_str().unwrap_or_default().to_string(); - span.set_attribute(KeyValue::new("dir", dir_name_string)); - info!("Recursively listing files in: {:?}", dir); - - let mut result = Vec::new(); - - fn visit_dirs(dir: &Path, files: &mut Vec) -> io::Result<()> { - if dir.is_dir() { - for entry in read_dir(dir)? { - let entry = entry?; - let path = entry.path(); - - if path.is_dir() { - visit_dirs(&path, files)?; - } else if is_image_or_video(&path) { - files.push(path); - } - } - } - Ok(()) - } - - visit_dirs(dir, &mut result)?; - - span.set_attribute(KeyValue::new("file_count", result.len().to_string())); - span.set_status(Status::Ok); - info!( - "Found {:?} files recursively in directory: {:?}", - result.len(), - dir - ); - Ok(result) -} - pub fn is_image_or_video(path: &Path) -> bool { file_types::is_media_file(path) } @@ -1567,6 +1552,24 @@ mod tests { ) -> Result, DbError> { Ok(vec![]) } + + fn list_rel_paths_for_libraries( + &mut self, + _context: &opentelemetry::Context, + _library_ids: &[i32], + _path_prefix: Option<&str>, + ) -> Result, DbError> { + Ok(vec![]) + } + + fn delete_exif_by_library( + &mut self, + _context: &opentelemetry::Context, + _library_id: i32, + _rel_path: &str, + ) -> Result<(), DbError> { + Ok(()) + } } mod api { diff --git a/src/main.rs b/src/main.rs index 19edb78..570cf58 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,7 +14,10 @@ use prometheus::{self, IntGauge}; use std::error::Error; use std::sync::{Arc, Mutex}; use std::time::{Duration, SystemTime}; -use std::{collections::HashMap, io::prelude::*}; +use std::{ + collections::{HashMap, HashSet}, + io::prelude::*, +}; use std::{env, fs::File}; use std::{ io::ErrorKind, @@ -1916,6 +1919,48 @@ fn process_new_files( info!("Processing thumbnails for new files..."); create_thumbnails(std::slice::from_ref(library)); } + + // Reconciliation: on a full scan, prune image_exif rows whose rel_path no + // longer exists on disk for this library. Keeps the DB in parity so + // downstream DB-backed listings (e.g. recursive /photos) don't return + // phantom files. Skipped on quick scans — those only look at recently + // modified files and can't distinguish "missing" from "unchanged". + if modified_since.is_none() { + let disk_paths: HashSet = files.iter().map(|(_, rel)| rel.clone()).collect(); + let db_paths: Vec = { + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + dao.get_rel_paths_for_library(&context, library.id) + .unwrap_or_else(|e| { + error!( + "Reconciliation: failed to load image_exif rel_paths for lib {}: {:?}", + library.id, e + ); + Vec::new() + }) + }; + + let stale: Vec = db_paths + .into_iter() + .filter(|p| !disk_paths.contains(p)) + .collect(); + + if !stale.is_empty() { + info!( + "Reconciliation: pruning {} stale image_exif rows for library '{}'", + stale.len(), + library.name + ); + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + for rel in &stale { + if let Err(e) = dao.delete_exif_by_library(&context, library.id, rel) { + warn!( + "Reconciliation: failed to delete {} (lib {}): {:?}", + rel, library.id, e + ); + } + } + } + } } #[cfg(test)] -- 2.49.1 From a35b45fd36f35316182be12c90f80b14882200a4 Mon Sep 17 00:00:00 2001 From: Cameron Date: Sun, 19 Apr 2026 20:17:12 -0400 Subject: [PATCH 17/19] feat: expand insight tool result caps and render timestamps in local time Doubled default row caps for search_rag/get_sms_messages/get_calendar_events/recall_entities and exposed an optional `limit` parameter on each so the agent can tune per call. Render all LLM-facing timestamps as server-local time with explicit offset so smaller models stop misreading UTC as wall-clock time. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ai/insight_generator.rs | 86 ++++++++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 16 deletions(-) diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index ecc387d..6b555d6 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -440,7 +440,11 @@ impl InsightGenerator { .iter() .map(|e| { let date = DateTime::from_timestamp(e.start_time, 0) - .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) + .map(|dt| { + dt.with_timezone(&Local) + .format("%Y-%m-%d %H:%M %:z") + .to_string() + }) .unwrap_or_else(|| "unknown".to_string()); let attendees = e @@ -1354,7 +1358,11 @@ Return ONLY the summary, nothing else."#, .map(|m| { let sender = if m.is_sent { "Me" } else { &m.contact }; let timestamp = chrono::DateTime::from_timestamp(m.timestamp, 0) - .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) + .map(|dt| { + dt.with_timezone(&Local) + .format("%Y-%m-%d %H:%M %:z") + .to_string() + }) .unwrap_or_else(|| "unknown time".to_string()); format!("[{}] {}: {}", timestamp, sender, m.body) }) @@ -1449,16 +1457,22 @@ Return ONLY the summary, nothing else."#, .get("contact") .and_then(|v| v.as_str()) .map(|s| s.to_string()); + let limit = args + .get("limit") + .and_then(|v| v.as_i64()) + .unwrap_or(10) + .clamp(1, 25) as usize; log::info!( - "tool_search_rag: query='{}', date={}, contact={:?}", + "tool_search_rag: query='{}', date={}, contact={:?}, limit={}", query, date, - contact + contact, + limit ); match self - .find_relevant_messages_rag(date, None, contact.as_deref(), None, 5, Some(&query)) + .find_relevant_messages_rag(date, None, contact.as_deref(), None, limit, Some(&query)) .await { Ok(results) if !results.is_empty() => results.join("\n\n"), @@ -1485,6 +1499,11 @@ Return ONLY the summary, nothing else."#, .get("days_radius") .and_then(|v| v.as_i64()) .unwrap_or(4); + let limit = args + .get("limit") + .and_then(|v| v.as_i64()) + .unwrap_or(60) + .clamp(1, 150) as usize; let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") { Ok(d) => d, @@ -1493,10 +1512,11 @@ Return ONLY the summary, nothing else."#, let timestamp = date.and_hms_opt(12, 0, 0).unwrap().and_utc().timestamp(); log::info!( - "tool_get_sms_messages: date={}, contact={:?}, days_radius={}", + "tool_get_sms_messages: date={}, contact={:?}, days_radius={}, limit={}", date, contact, - days_radius + days_radius, + limit ); match self @@ -1507,11 +1527,15 @@ Return ONLY the summary, nothing else."#, Ok(messages) if !messages.is_empty() => { let formatted: Vec = messages .iter() - .take(30) + .take(limit) .map(|m| { let sender = if m.is_sent { "Me" } else { &m.contact }; let ts = DateTime::from_timestamp(m.timestamp, 0) - .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) + .map(|dt| { + dt.with_timezone(&Local) + .format("%Y-%m-%d %H:%M %:z") + .to_string() + }) .unwrap_or_else(|| "unknown".to_string()); format!("[{}] {}: {}", ts, sender, m.body) }) @@ -1544,6 +1568,11 @@ Return ONLY the summary, nothing else."#, .get("days_radius") .and_then(|v| v.as_i64()) .unwrap_or(7); + let limit = args + .get("limit") + .and_then(|v| v.as_i64()) + .unwrap_or(20) + .clamp(1, 50) as usize; let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") { Ok(d) => d, @@ -1552,9 +1581,10 @@ Return ONLY the summary, nothing else."#, let timestamp = date.and_hms_opt(12, 0, 0).unwrap().and_utc().timestamp(); log::info!( - "tool_get_calendar_events: date={}, days_radius={}", + "tool_get_calendar_events: date={}, days_radius={}, limit={}", date, - days_radius + days_radius, + limit ); let events = { @@ -1562,7 +1592,7 @@ Return ONLY the summary, nothing else."#, .calendar_dao .lock() .expect("Unable to lock CalendarEventDao"); - dao.find_relevant_events_hybrid(cx, timestamp, days_radius, None, 10) + dao.find_relevant_events_hybrid(cx, timestamp, days_radius, None, limit) .ok() }; @@ -1572,7 +1602,11 @@ Return ONLY the summary, nothing else."#, .iter() .map(|e| { let dt = DateTime::from_timestamp(e.start_time, 0) - .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) + .map(|dt| { + dt.with_timezone(&Local) + .format("%Y-%m-%d %H:%M %:z") + .to_string() + }) .unwrap_or_else(|| "unknown".to_string()); let loc = e .location @@ -1644,7 +1678,11 @@ Return ONLY the summary, nothing else."#, .take(20) .map(|loc| { let dt = DateTime::from_timestamp(loc.timestamp, 0) - .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) + .map(|dt| { + dt.with_timezone(&Local) + .format("%Y-%m-%d %H:%M %:z") + .to_string() + }) .unwrap_or_else(|| "unknown".to_string()); let activity = loc .activity @@ -1753,7 +1791,11 @@ Return ONLY the summary, nothing else."#, .get("entity_type") .and_then(|v| v.as_str()) .map(|s| s.to_string()); - let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(10); + let limit = args + .get("limit") + .and_then(|v| v.as_i64()) + .unwrap_or(20) + .clamp(1, 50); log::info!( "tool_recall_entities: name={:?}, type={:?}, limit={}", @@ -2106,6 +2148,10 @@ Return ONLY the summary, nothing else."#, "contact": { "type": "string", "description": "Optional contact name to filter results" + }, + "limit": { + "type": "integer", + "description": "Maximum number of results to return (default: 10, max: 25)" } } }), @@ -2128,6 +2174,10 @@ Return ONLY the summary, nothing else."#, "days_radius": { "type": "integer", "description": "Number of days before and after the date to search (default: 4)" + }, + "limit": { + "type": "integer", + "description": "Maximum number of messages to return (default: 60, max: 150)" } } }), @@ -2146,6 +2196,10 @@ Return ONLY the summary, nothing else."#, "days_radius": { "type": "integer", "description": "Number of days before and after the date to search (default: 7)" + }, + "limit": { + "type": "integer", + "description": "Maximum number of events to return (default: 20, max: 50)" } } }), @@ -2221,7 +2275,7 @@ Return ONLY the summary, nothing else."#, }, "limit": { "type": "integer", - "description": "Maximum number of results to return (default: 10)" + "description": "Maximum number of results to return (default: 20, max: 50)" } } }), -- 2.49.1 From 39c212b0e6b9ea4f2d925cc9c67e15c1ea509540 Mon Sep 17 00:00:00 2001 From: Cameron Date: Sun, 19 Apr 2026 20:24:18 -0400 Subject: [PATCH 18/19] Bump to 1.0.0 for multi-library support --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e301577..4f04521 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1832,7 +1832,7 @@ dependencies = [ [[package]] name = "image-api" -version = "0.5.2" +version = "1.0.0" dependencies = [ "actix", "actix-cors", diff --git a/Cargo.toml b/Cargo.toml index 4dd9da2..1e606b0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "image-api" -version = "0.5.2" +version = "1.0.0" authors = ["Cameron Cordes "] edition = "2024" -- 2.49.1 From bffe604527442945c141290b727b5764a2f385fd Mon Sep 17 00:00:00 2001 From: Cameron Date: Mon, 20 Apr 2026 21:53:15 -0400 Subject: [PATCH 19/19] Remove potentially confusing TZ from insight generator --- src/ai/insight_generator.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 6b555d6..18e50c7 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -442,7 +442,7 @@ impl InsightGenerator { let date = DateTime::from_timestamp(e.start_time, 0) .map(|dt| { dt.with_timezone(&Local) - .format("%Y-%m-%d %H:%M %:z") + .format("%Y-%m-%d %H:%M") .to_string() }) .unwrap_or_else(|| "unknown".to_string()); @@ -1360,7 +1360,7 @@ Return ONLY the summary, nothing else."#, let timestamp = chrono::DateTime::from_timestamp(m.timestamp, 0) .map(|dt| { dt.with_timezone(&Local) - .format("%Y-%m-%d %H:%M %:z") + .format("%Y-%m-%d %H:%M") .to_string() }) .unwrap_or_else(|| "unknown time".to_string()); @@ -1533,7 +1533,7 @@ Return ONLY the summary, nothing else."#, let ts = DateTime::from_timestamp(m.timestamp, 0) .map(|dt| { dt.with_timezone(&Local) - .format("%Y-%m-%d %H:%M %:z") + .format("%Y-%m-%d %H:%M") .to_string() }) .unwrap_or_else(|| "unknown".to_string()); @@ -1604,7 +1604,7 @@ Return ONLY the summary, nothing else."#, let dt = DateTime::from_timestamp(e.start_time, 0) .map(|dt| { dt.with_timezone(&Local) - .format("%Y-%m-%d %H:%M %:z") + .format("%Y-%m-%d %H:%M") .to_string() }) .unwrap_or_else(|| "unknown".to_string()); @@ -1680,7 +1680,7 @@ Return ONLY the summary, nothing else."#, let dt = DateTime::from_timestamp(loc.timestamp, 0) .map(|dt| { dt.with_timezone(&Local) - .format("%Y-%m-%d %H:%M %:z") + .format("%Y-%m-%d %H:%M") .to_string() }) .unwrap_or_else(|| "unknown".to_string()); -- 2.49.1