From 61f98066f6f35f3b5830923674e7f71dbc1e34d2 Mon Sep 17 00:00:00 2001 From: Cameron Date: Fri, 17 Apr 2026 18:06:02 -0400 Subject: [PATCH] feat: content-hash-aware tag/insight sharing + library scoping Tags and insights now follow content across libraries via content_hash lookups on the read path, so the same file indexed at different rel_paths in multiple libraries shares its annotations. Recursive tag search scopes hits to the selected library by checking each tagged rel_path against the library's disk (with a content-hash sibling fallback so tags attached under one library's rel_path still match a content-equivalent file in another). The /image and /image/metadata handlers fall back across libraries when the file isn't under the resolved one, so union-mode search results (which carry no library attribution in the response) still serve correctly. Co-Authored-By: Claude Opus 4.7 --- src/ai/handlers.rs | 25 ++++++- src/database/insights_dao.rs | 34 +++++++++ src/database/mod.rs | 138 +++++++++++++++++++++++++++++++++++ src/files.rs | 68 +++++++++++++++++ src/main.rs | 44 +++++++++-- src/tags.rs | 75 ++++++++++++++++++- 6 files changed, 376 insertions(+), 8 deletions(-) diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index cf7fd5b..a49c229 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -5,8 +5,10 @@ use serde::{Deserialize, Serialize}; use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient}; use crate::data::Claims; -use crate::database::InsightDao; +use crate::database::{ExifDao, InsightDao}; +use crate::libraries; use crate::otel::{extract_context_from_request, global_tracer}; +use crate::state::AppState; use crate::utils::normalize_path; #[derive(Debug, Deserialize)] @@ -31,6 +33,10 @@ pub struct GeneratePhotoInsightRequest { #[derive(Debug, Deserialize)] pub struct GetPhotoInsightQuery { pub path: String, + /// Library context for this lookup. Used to pick the right content + /// hash when the same rel_path exists under multiple roots. + #[serde(default)] + pub library: Option, } #[derive(Debug, Deserialize)] @@ -146,15 +152,30 @@ pub async fn generate_insight_handler( pub async fn get_insight_handler( _claims: Claims, query: web::Query, + app_state: web::Data, insight_dao: web::Data>>, + exif_dao: web::Data>>, ) -> impl Responder { let normalized_path = normalize_path(&query.path); log::debug!("Fetching insight for {}", normalized_path); let otel_context = opentelemetry::Context::new(); + + // Expand to rel_paths sharing content so an insight generated under + // library 1 still shows when the same photo is viewed from library 2. + let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); + let sibling_paths = { + let mut exif = exif_dao.lock().expect("Unable to lock ExifDao"); + exif.get_rel_paths_sharing_content(&otel_context, library.id, &normalized_path) + .unwrap_or_else(|_| vec![normalized_path.clone()]) + }; + let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); - match dao.get_insight(&otel_context, &normalized_path) { + match dao.get_insight_for_paths(&otel_context, &sibling_paths) { Ok(Some(insight)) => { let response = PhotoInsightResponse { id: insight.id, diff --git a/src/database/insights_dao.rs b/src/database/insights_dao.rs index 6b15717..d54904f 100644 --- a/src/database/insights_dao.rs +++ b/src/database/insights_dao.rs @@ -21,6 +21,16 @@ pub trait InsightDao: Sync + Send { file_path: &str, ) -> Result, DbError>; + /// Return the most recent current insight whose rel_path is one of + /// `paths`. Used for content-hash sharing: the caller expands a + /// single file into all rel_paths with the same content_hash, then + /// asks here for any existing insight attached to any of them. + fn get_insight_for_paths( + &mut self, + context: &opentelemetry::Context, + paths: &[String], + ) -> Result, DbError>; + fn get_insight_history( &mut self, context: &opentelemetry::Context, @@ -132,6 +142,30 @@ impl InsightDao for SqliteInsightDao { .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + fn get_insight_for_paths( + &mut self, + context: &opentelemetry::Context, + paths: &[String], + ) -> Result, DbError> { + if paths.is_empty() { + return Ok(None); + } + trace_db_call(context, "query", "get_insight_for_paths", |_span| { + use schema::photo_insights::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get InsightDao"); + + photo_insights + .filter(rel_path.eq_any(paths)) + .filter(is_current.eq(true)) + .order(generated_at.desc()) + .first::(connection.deref_mut()) + .optional() + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + fn get_insight_history( &mut self, context: &opentelemetry::Context, diff --git a/src/database/mod.rs b/src/database/mod.rs index f5fe56a..2e3dca1 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -341,6 +341,45 @@ pub trait ExifDao: Sync + Send { context: &opentelemetry::Context, hash: &str, ) -> Result, DbError>; + + /// Given a file instance `(library_id, rel_path)`, return every distinct + /// rel_path in `image_exif` whose `content_hash` matches this file's. + /// Used by tag and insight read-paths so annotations follow content + /// rather than path, even when the same file is indexed under + /// different library roots. Falls back to `[rel_path]` when the file + /// hasn't been hashed yet. + fn get_rel_paths_sharing_content( + &mut self, + context: &opentelemetry::Context, + library_id: i32, + rel_path: &str, + ) -> Result, DbError>; + + /// All rel_paths known to live in a given library. Used by search to + /// scope tag-based (path-keyed) hits to a single library after joining + /// through the library-agnostic tag tables. + fn get_rel_paths_for_library( + &mut self, + context: &opentelemetry::Context, + library_id: i32, + ) -> Result, DbError>; + + /// Look up a content_hash for a rel_path in *any* library. Useful when + /// the caller has a library-agnostic rel_path (e.g. from tagged_photo) + /// and wants to find content-equivalent siblings without knowing the + /// file's original library. + fn find_content_hash_anywhere( + &mut self, + context: &opentelemetry::Context, + rel_path: &str, + ) -> Result, DbError>; + + /// Given a content_hash, return all rel_paths carrying that hash. + fn get_rel_paths_by_hash( + &mut self, + context: &opentelemetry::Context, + hash: &str, + ) -> Result, DbError>; } pub struct SqliteExifDao { @@ -775,4 +814,103 @@ impl ExifDao for SqliteExifDao { }) .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + + fn get_rel_paths_sharing_content( + &mut self, + context: &opentelemetry::Context, + library_id_val: i32, + rel_path_val: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_rel_paths_sharing_content", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + // Look up this file's content_hash. Missing row or NULL hash + // means we can't expand the match set; return the given + // rel_path so callers fall through to direct-match behavior. + let hash: Option = image_exif + .filter(library_id.eq(library_id_val)) + .filter(rel_path.eq(rel_path_val)) + .select(content_hash) + .first::>(connection.deref_mut()) + .optional() + .map_err(|_| anyhow::anyhow!("Query error"))? + .flatten(); + + let paths = match hash { + Some(h) => image_exif + .filter(content_hash.eq(h)) + .select(rel_path) + .distinct() + .load::(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error"))?, + None => vec![rel_path_val.to_string()], + }; + + Ok(paths) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn get_rel_paths_for_library( + &mut self, + context: &opentelemetry::Context, + library_id_val: i32, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_rel_paths_for_library", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + image_exif + .filter(library_id.eq(library_id_val)) + .select(rel_path) + .load::(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn find_content_hash_anywhere( + &mut self, + context: &opentelemetry::Context, + rel_path_val: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "find_content_hash_anywhere", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + image_exif + .filter(rel_path.eq(rel_path_val)) + .filter(content_hash.is_not_null()) + .select(content_hash) + .first::>(connection.deref_mut()) + .optional() + .map(|opt| opt.flatten()) + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn get_rel_paths_by_hash( + &mut self, + context: &opentelemetry::Context, + hash: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_rel_paths_by_hash", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + image_exif + .filter(content_hash.eq(hash)) + .select(rel_path) + .distinct() + .load::(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } } diff --git a/src/files.rs b/src/files.rs index 552fb58..11ad898 100644 --- a/src/files.rs +++ b/src/files.rs @@ -335,6 +335,13 @@ pub async fn list_photos( None }; + // When a specific library is selected, we'll gate tag-based results + // (which key on rel_path only, library-agnostic) by "does this + // rel_path actually exist on disk in the selected library's root". + // We check per-file below rather than pre-enumerating image_exif, + // since image_exif may lag a just-added library. + let library_for_scope: Option<&crate::libraries::Library> = library; + let search_recursively = req.recursive.unwrap_or(false); if let Some(tag_ids) = &req.tag_ids && search_recursively @@ -400,6 +407,34 @@ pub async fn list_photos( true } }) + .filter(|f| { + // Scope to the selected library by checking the file + // actually exists under its root. Falls back to the + // content-hash sibling set (looked up globally, since + // the tagged rel_path may have been registered under + // a different library than the one selected). + let Some(lib) = library_for_scope else { + return true; + }; + if PathBuf::from(&lib.root_path).join(&f.file_name).exists() { + return true; + } + let siblings = { + let mut dao = exif_dao.lock().expect("Unable to get ExifDao"); + match dao + .find_content_hash_anywhere(&span_context, &f.file_name) + .unwrap_or(None) + { + Some(hash) => dao + .get_rel_paths_by_hash(&span_context, &hash) + .unwrap_or_default(), + None => Vec::new(), + } + }; + siblings + .iter() + .any(|p| PathBuf::from(&lib.root_path).join(p).exists()) + }) .filter(|f| { // Apply media type filtering if specified if let Some(ref media_type) = req.media_type { @@ -1403,6 +1438,39 @@ mod tests { ) -> Result, DbError> { Ok(None) } + + fn get_rel_paths_sharing_content( + &mut self, + _context: &opentelemetry::Context, + _library_id: i32, + rel_path: &str, + ) -> Result, DbError> { + Ok(vec![rel_path.to_string()]) + } + + fn get_rel_paths_for_library( + &mut self, + _context: &opentelemetry::Context, + _library_id: i32, + ) -> Result, DbError> { + Ok(vec![]) + } + + fn find_content_hash_anywhere( + &mut self, + _context: &opentelemetry::Context, + _rel_path: &str, + ) -> Result, DbError> { + Ok(None) + } + + fn get_rel_paths_by_hash( + &mut self, + _context: &opentelemetry::Context, + _hash: &str, + ) -> Result, DbError> { + Ok(vec![]) + } } mod api { diff --git a/src/main.rs b/src/main.rs index 0f3f6c8..e28a43a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -118,7 +118,25 @@ async fn get_image( } }; - if let Some(path) = is_valid_full_path(&library.root_path, &req.path, false) { + // Union-mode search returns flat rel_paths with no library attribution, + // so clients may request a file under the wrong library. Try the + // resolved library first; if the file isn't there, fall back to any + // other library holding that rel_path on disk. + let resolved = is_valid_full_path(&library.root_path, &req.path, false) + .filter(|p| p.exists()) + .map(|p| (library, p)) + .or_else(|| { + app_state.libraries.iter().find_map(|lib| { + if lib.id == library.id { + return None; + } + is_valid_full_path(&lib.root_path, &req.path, false) + .filter(|p| p.exists()) + .map(|p| (lib, p)) + }) + }); + + if let Some((library, path)) = resolved { let image_size = req.size.unwrap_or(PhotoSize::Full); if image_size == PhotoSize::Thumb { let relative_path = path @@ -207,9 +225,9 @@ async fn get_image( span.set_status(Status::error("Not found")); HttpResponse::NotFound().finish() } else { - span.set_status(Status::error("Bad photos request")); - error!("Bad photos request: {}", req.path); - HttpResponse::BadRequest().finish() + span.set_status(Status::error("Not found")); + error!("Path does not exist in any library: {}", req.path); + HttpResponse::NotFound().finish() } } @@ -294,7 +312,23 @@ async fn get_file_metadata( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let full_path = is_valid_full_path(&app_state.base_path, &path.path, false); + let library = libraries::resolve_library_param(&app_state, path.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); + + // Fall back to other libraries if the file isn't under the resolved one, + // matching the `/image` handler so union-mode search results resolve. + let full_path = is_valid_full_path(&library.root_path, &path.path, false) + .filter(|p| p.exists()) + .or_else(|| { + app_state.libraries.iter().find_map(|lib| { + if lib.id == library.id { + return None; + } + is_valid_full_path(&lib.root_path, &path.path, false).filter(|p| p.exists()) + }) + }); match full_path .ok_or_else(|| ErrorKind::InvalidData.into()) diff --git a/src/tags.rs b/src/tags.rs index 2834b62..95e303f 100644 --- a/src/tags.rs +++ b/src/tags.rs @@ -1,5 +1,8 @@ use crate::data::GetTagsRequest; +use crate::database::ExifDao; +use crate::libraries; use crate::otel::{extract_context_from_request, global_tracer, trace_db_call}; +use crate::state::AppState; use crate::utils::normalize_path; use crate::{Claims, ThumbnailRequest, connect, data::AddTagRequest, error::IntoHttpError, schema}; use actix_web::dev::{ServiceFactory, ServiceRequest}; @@ -71,15 +74,32 @@ async fn get_tags( _: Claims, http_request: HttpRequest, request: web::Query, + app_state: web::Data, tag_dao: web::Data>, + exif_dao: web::Data>>, ) -> impl Responder { let context = extract_context_from_request(&http_request); let span = global_tracer().start_with_context("get_tags", &context); let span_context = opentelemetry::Context::current_with_span(span); let normalized_path = normalize_path(&request.path); + + // Expand the query set to every rel_path that shares content with + // this file, so tags added under one library show up under the + // others when they hold the same file. Falls back to direct rel_path + // match when the file hasn't been hashed yet. + let library = libraries::resolve_library_param(&app_state, request.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); + let sibling_paths = { + let mut exif = exif_dao.lock().expect("Unable to get ExifDao"); + exif.get_rel_paths_sharing_content(&span_context, library.id, &normalized_path) + .unwrap_or_else(|_| vec![normalized_path.clone()]) + }; + let mut tag_dao = tag_dao.lock().expect("Unable to get TagDao"); tag_dao - .get_tags_for_path(&span_context, &normalized_path) + .get_tags_for_paths(&span_context, &sibling_paths) .map(|tags| { span_context.span().set_status(Status::Ok); HttpResponse::Ok().json(tags) @@ -289,6 +309,14 @@ pub trait TagDao: Send + Sync { context: &opentelemetry::Context, path: &str, ) -> anyhow::Result>; + /// Union of tags for every rel_path in `paths`. Used by content-hash + /// sharing: the caller resolves all rel_paths with the same content + /// via `ExifDao::get_rel_paths_sharing_content`, then passes them here. + fn get_tags_for_paths( + &mut self, + context: &opentelemetry::Context, + paths: &[String], + ) -> anyhow::Result>; fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result; fn remove_tag( &mut self, @@ -413,6 +441,32 @@ impl TagDao for SqliteTagDao { }) } + fn get_tags_for_paths( + &mut self, + context: &opentelemetry::Context, + paths: &[String], + ) -> anyhow::Result> { + if paths.is_empty() { + return Ok(Vec::new()); + } + let mut conn = self + .connection + .lock() + .expect("Unable to lock SqliteTagDao connection"); + trace_db_call(context, "query", "get_tags_for_paths", |span| { + span.set_attribute(KeyValue::new("path_count", paths.len() as i64)); + // DISTINCT across tag ids so two rel_paths carrying the same + // tag don't produce a duplicate entry in the response. + tags::table + .inner_join(tagged_photo::table) + .filter(tagged_photo::rel_path.eq_any(paths)) + .select((tags::id, tags::name, tags::created_time)) + .distinct() + .get_results::(conn.deref_mut()) + .with_context(|| "Unable to get tags from Sqlite") + }) + } + fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result { let mut conn = self .connection @@ -817,6 +871,25 @@ mod tests { .clone()) } + fn get_tags_for_paths( + &mut self, + _context: &opentelemetry::Context, + paths: &[String], + ) -> anyhow::Result> { + let tagged = self.tagged_photos.borrow(); + let mut out: Vec = Vec::new(); + for p in paths { + if let Some(tags) = tagged.get(p) { + for t in tags { + if !out.iter().any(|existing| existing.id == t.id) { + out.push(t.clone()); + } + } + } + } + Ok(out) + } + fn create_tag( &mut self, _context: &opentelemetry::Context,