Face Recognition / People Integration #61

Merged
cameron merged 23 commits from feature/face-recog-phase3-file-watch into master 2026-04-30 17:22:09 +00:00
Showing only changes of commit 3112260dc8 - Show all commits

View File

@@ -32,6 +32,7 @@ where
) )
.service(web::resource("image/tags/all").route(web::get().to(get_all_tags::<TagD>))) .service(web::resource("image/tags/all").route(web::get().to(get_all_tags::<TagD>)))
.service(web::resource("image/tags/batch").route(web::post().to(update_tags::<TagD>))) .service(web::resource("image/tags/batch").route(web::post().to(update_tags::<TagD>)))
.service(web::resource("image/tags/lookup").route(web::post().to(lookup_tags_batch::<TagD>)))
} }
async fn add_tag<D: TagDao>( async fn add_tag<D: TagDao>(
@@ -238,6 +239,51 @@ async fn update_tags<D: TagDao>(
.into_http_internal_err() .into_http_internal_err()
} }
#[derive(Deserialize, Debug)]
pub struct LookupTagsBatchRequest {
pub paths: Vec<String>,
}
/// Bulk per-path tag lookup. Apollo's photo-match flow used to fan out
/// one ``GET /image/tags?path=`` per record (~4k for a wide window) —
/// each call locked the dao briefly and the round-trip cost dwarfed
/// the actual SQL. This collapses the whole fan-out into one POST and
/// one (chunked) JOIN. Body: ``{paths: [...]}``; response:
/// ``{path: [{id, name, ...}]}`` with **only paths that have at least
/// one tag** in the map (the caller treats absence as empty list).
///
/// Trade-off: this matches by ``rel_path`` directly and does NOT do
/// the cross-library content-hash sibling expansion that the per-path
/// ``GET /image/tags`` does. For Apollo's grid view the simpler match
/// is fine — it's the common case for single-library deploys; the
/// carousel still uses the per-path endpoint and resolves siblings on
/// demand. If multi-library content-hash sharing becomes load-bearing
/// for the grid, extend this to JOIN ``image_exif`` on content_hash.
async fn lookup_tags_batch<D: TagDao>(
_: Claims,
http_request: HttpRequest,
body: web::Json<LookupTagsBatchRequest>,
tag_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&http_request);
let span = global_tracer().start_with_context("lookup_tags_batch", &context);
let span_context = opentelemetry::Context::current_with_span(span);
if body.paths.is_empty() {
return HttpResponse::Ok().json(std::collections::HashMap::<String, Vec<Tag>>::new());
}
let normalized: Vec<String> = body.paths.iter().map(|p| normalize_path(p)).collect();
let mut dao = tag_dao.lock().expect("Unable to get TagDao");
match dao.get_tags_grouped_by_paths(&span_context, &normalized) {
Ok(grouped) => {
span_context.span().set_status(Status::Ok);
HttpResponse::Ok().json(grouped)
}
Err(e) => HttpResponse::InternalServerError().body(format!("{}", e)),
}
}
#[derive(Serialize, Queryable, Clone, Debug, PartialEq)] #[derive(Serialize, Queryable, Clone, Debug, PartialEq)]
pub struct Tag { pub struct Tag {
pub id: i32, pub id: i32,
@@ -317,6 +363,14 @@ pub trait TagDao: Send + Sync {
context: &opentelemetry::Context, context: &opentelemetry::Context,
paths: &[String], paths: &[String],
) -> anyhow::Result<Vec<Tag>>; ) -> anyhow::Result<Vec<Tag>>;
/// Per-path grouped lookup: ``rel_path → [tags]``. Used by the
/// ``/image/tags/lookup`` batch endpoint. Returns only paths that
/// have at least one tag; the caller treats absence as empty.
fn get_tags_grouped_by_paths(
&mut self,
context: &opentelemetry::Context,
paths: &[String],
) -> anyhow::Result<std::collections::HashMap<String, Vec<Tag>>>;
fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result<Tag>; fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result<Tag>;
fn remove_tag( fn remove_tag(
&mut self, &mut self,
@@ -470,6 +524,51 @@ impl TagDao for SqliteTagDao {
}) })
} }
fn get_tags_grouped_by_paths(
&mut self,
context: &opentelemetry::Context,
paths: &[String],
) -> anyhow::Result<std::collections::HashMap<String, Vec<Tag>>> {
use std::collections::HashMap;
let mut out: HashMap<String, Vec<Tag>> = HashMap::new();
if paths.is_empty() {
return Ok(out);
}
let mut conn = self
.connection
.lock()
.expect("Unable to lock SqliteTagDao connection");
trace_db_call(context, "query", "get_tags_grouped_by_paths", |span| {
span.set_attribute(KeyValue::new("path_count", paths.len() as i64));
// SQLite's default SQLITE_LIMIT_VARIABLE_NUMBER is 32766 in
// modern builds (999 in old ones). Chunk at 500 to stay
// safely under both — five queries for a 4k-photo grid is
// still ~800x cheaper than 4k single-row HTTP calls.
const CHUNK: usize = 500;
for chunk in paths.chunks(CHUNK) {
let rows: Vec<(String, i32, String, i64)> = tagged_photo::table
.inner_join(tags::table)
.filter(tagged_photo::rel_path.eq_any(chunk))
.select((
tagged_photo::rel_path,
tags::id,
tags::name,
tags::created_time,
))
.get_results(conn.deref_mut())
.with_context(|| "Unable to get tags grouped from Sqlite")?;
for (rel_path, id, name, created_time) in rows {
out.entry(rel_path).or_default().push(Tag {
id,
name,
created_time,
});
}
}
Ok(out)
})
}
fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result<Tag> { fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result<Tag> {
let mut conn = self let mut conn = self
.connection .connection
@@ -893,6 +992,23 @@ mod tests {
Ok(out) Ok(out)
} }
fn get_tags_grouped_by_paths(
&mut self,
_context: &opentelemetry::Context,
paths: &[String],
) -> anyhow::Result<std::collections::HashMap<String, Vec<Tag>>> {
let tagged = self.tagged_photos.borrow();
let mut out = std::collections::HashMap::new();
for p in paths {
if let Some(tags) = tagged.get(p)
&& !tags.is_empty()
{
out.insert(p.clone(), tags.clone());
}
}
Ok(out)
}
fn create_tag( fn create_tag(
&mut self, &mut self,
_context: &opentelemetry::Context, _context: &opentelemetry::Context,