From f77e44b34df7aff3b031d786aba6c608c81e8862 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Wed, 29 Apr 2026 18:09:44 +0000 Subject: [PATCH] faces: fix PathExcluder false-positive + cover face_client/crop in tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PathExcluder was iterating every component of the absolute path, including the system prefix. Two of the existing memories tests had been failing on master because tempdir() lives under /tmp on Linux and a pattern like "tmp" then matched the system /tmp component rather than anything the user actually asked to exclude. Phase 3's file-watch hook will use the same code to skip @eaDir / .thumbnails under each library's BASE_PATH, so the bug would hide every photo on a host whose BASE_PATH passes through a directory named the same as a user pattern. Fix: store base in PathExcluder and strip it before scanning components. A path that lives outside base falls through to the no-match branch (defensive — nothing legit hits that today). Also extracted the face_client error classification into a pure classify_error_response(status, body) so the marker-row contract with Apollo (422 → Permanent / 'failed', 5xx → Transient / defer) is unit-testable without spinning up an HTTP server. New tests: memories::tests::test_path_excluder_* — 2 previously failing tests now pass. ai::face_client::tests::classify_* — 4 cases: 422 decode_failed → Permanent, 503 cuda_oom → Transient (handles both string and {code:..} detail shapes), 5xx → Transient + other 4xx → Permanent, unparseable HTML body still classifies on status. faces::tests::crop_* — 3 cases: invalid bbox rejected, valid bbox round-trips through JPEG decode, corner crop with 10% padding clamps inside source. cargo test --lib: 165 passed / 0 failed (was 156 / 2 failed). cargo fmt and clippy on new code clean. The remaining sort_by clippy warnings in pre-existing files (memories.rs, files.rs, exif.rs) are unrelated and present on master. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ai/face_client.rs | 160 ++++++++++++++++++++++++++++-------------- src/faces.rs | 61 ++++++++++++++++ src/memories.rs | 50 +++++++------ 3 files changed, 200 insertions(+), 71 deletions(-) diff --git a/src/ai/face_client.rs b/src/ai/face_client.rs index 3bd2472..8a52812 100644 --- a/src/ai/face_client.rs +++ b/src/ai/face_client.rs @@ -257,56 +257,114 @@ impl FaceClient { } let body_text = resp.text().await.unwrap_or_default(); - // Apollo encodes its error class in the JSON body's `detail`. Try - // to parse it; fall back to status-only classification. - let detail_code = serde_json::from_str::(&body_text) - .ok() - .and_then(|v| { - // detail can be a string ("decode_failed") or an object - // ({"code": "cuda_oom", ...}) depending on the endpoint - // and Apollo's response shape — handle both. - v.get("detail") - .and_then(|d| d.as_str().map(str::to_string)) - .or_else(|| { - v.get("detail") - .and_then(|d| d.get("code")) - .and_then(|c| c.as_str()) - .map(str::to_string) - }) - }) - .unwrap_or_default(); - - if status == reqwest::StatusCode::UNPROCESSABLE_ENTITY { - return Err(FaceDetectError::Permanent(anyhow::anyhow!( - "face detect 422 {}: {}", - detail_code, - body_text - ))); - } - if status == reqwest::StatusCode::SERVICE_UNAVAILABLE { - return Err(FaceDetectError::Transient(anyhow::anyhow!( - "face detect 503 {}: {}", - detail_code, - body_text - ))); - } - // Any other 4xx: be conservative and treat as Permanent so we - // don't loop forever on a stable rejection. Any other 5xx: - // Transient — likely intermittent. - if status.is_client_error() { - Err(FaceDetectError::Permanent(anyhow::anyhow!( - "face detect {} {}: {}", - status.as_u16(), - detail_code, - body_text - ))) - } else { - Err(FaceDetectError::Transient(anyhow::anyhow!( - "face detect {} {}: {}", - status.as_u16(), - detail_code, - body_text - ))) - } + Err(classify_error_response(status.as_u16(), &body_text)) + } +} + +/// Map an Apollo HTTP error response to a FaceDetectError. Pulled out as a +/// pure function so the marker-row contract (422 → Permanent, 503 → +/// Transient) is unit-testable without spinning up an HTTP server. +fn classify_error_response(status: u16, body_text: &str) -> FaceDetectError { + // Apollo encodes its error class in the JSON body's `detail`. Try to + // parse it; fall back to status-only classification. + let detail_code = serde_json::from_str::(body_text) + .ok() + .and_then(|v| { + // detail can be a string ("decode_failed") or an object + // ({"code": "cuda_oom", ...}) depending on the endpoint and + // Apollo's response shape — handle both. + v.get("detail") + .and_then(|d| d.as_str().map(str::to_string)) + .or_else(|| { + v.get("detail") + .and_then(|d| d.get("code")) + .and_then(|c| c.as_str()) + .map(str::to_string) + }) + }) + .unwrap_or_default(); + + if status == 422 { + return FaceDetectError::Permanent(anyhow::anyhow!( + "face detect 422 {}: {}", + detail_code, + body_text + )); + } + if status == 503 { + return FaceDetectError::Transient(anyhow::anyhow!( + "face detect 503 {}: {}", + detail_code, + body_text + )); + } + // Any other 4xx: be conservative and treat as Permanent so we don't + // loop forever on a stable rejection. Any other 5xx: Transient — + // likely intermittent. + if (400..500).contains(&status) { + FaceDetectError::Permanent(anyhow::anyhow!( + "face detect {} {}: {}", + status, + detail_code, + body_text + )) + } else { + FaceDetectError::Transient(anyhow::anyhow!( + "face detect {} {}: {}", + status, + detail_code, + body_text + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn is_permanent(e: &FaceDetectError) -> bool { + matches!(e, FaceDetectError::Permanent(_)) + } + fn is_transient(e: &FaceDetectError) -> bool { + matches!(e, FaceDetectError::Transient(_)) + } + + #[test] + fn classify_422_decode_failed_is_permanent() { + // Permanent → ImageApi marks status='failed' and stops retrying. + let e = classify_error_response(422, r#"{"detail":"decode_failed: bad bytes"}"#); + assert!(is_permanent(&e), "422 decode_failed must be Permanent"); + assert!(format!("{e}").contains("decode_failed")); + } + + #[test] + fn classify_503_cuda_oom_is_transient() { + // Transient → ImageApi must NOT write a marker so the next scan + // retries. The detail.code is nested in an object rather than a + // bare string; the parser handles both. + let e = classify_error_response( + 503, + r#"{"detail":{"code":"cuda_oom","error":"out of memory"}}"#, + ); + assert!(is_transient(&e), "503 cuda_oom must be Transient"); + assert!(format!("{e}").contains("cuda_oom")); + } + + #[test] + fn classify_500_is_transient_other_4xx_is_permanent() { + // Conservative split: 5xx defers (intermittent), other 4xx + // is treated as a stable rejection so we don't loop forever. + assert!(is_transient(&classify_error_response(500, ""))); + assert!(is_transient(&classify_error_response(502, "{}"))); + assert!(is_permanent(&classify_error_response(400, "{}"))); + assert!(is_permanent(&classify_error_response(404, "{}"))); + } + + #[test] + fn classify_handles_unparseable_body() { + // Apollo can return non-JSON on misroute / proxy errors; the + // classifier must still produce a useful variant. + let e = classify_error_response(503, "nginx"); + assert!(is_transient(&e)); } } diff --git a/src/faces.rs b/src/faces.rs index aa79e14..ffd106e 100644 --- a/src/faces.rs +++ b/src/faces.rs @@ -1860,4 +1860,65 @@ mod tests { assert_eq!(faces.len(), 1); assert_eq!(faces[0].person_id, Some(alice.id)); } + + // ── crop_image_to_bbox ────────────────────────────────────────────── + // Pure helper used by the manual face-create handler. Generate a tiny + // image in memory, write it to a temp file, then exercise the bbox + // validation + crop math. + + fn write_solid_image(w: u32, h: u32) -> tempfile::NamedTempFile { + let mut img = image::RgbImage::new(w, h); + for p in img.pixels_mut() { + *p = image::Rgb([200, 200, 200]); + } + let f = tempfile::Builder::new() + .suffix(".jpg") + .tempfile() + .expect("tempfile"); + image::DynamicImage::ImageRgb8(img) + .save(f.path()) + .expect("save jpg"); + f + } + + #[test] + fn crop_rejects_invalid_bbox() { + let f = write_solid_image(64, 64); + // x out of [0,1] + assert!(crop_image_to_bbox(f.path(), -0.1, 0.0, 0.5, 0.5).is_err()); + assert!(crop_image_to_bbox(f.path(), 1.5, 0.0, 0.5, 0.5).is_err()); + // zero / negative dimensions + assert!(crop_image_to_bbox(f.path(), 0.0, 0.0, 0.0, 0.5).is_err()); + assert!(crop_image_to_bbox(f.path(), 0.0, 0.0, 0.5, -0.1).is_err()); + // overflows the image + assert!(crop_image_to_bbox(f.path(), 0.7, 0.0, 0.5, 0.5).is_err()); + } + + #[test] + fn crop_returns_decodable_jpeg() { + let f = write_solid_image(200, 200); + let bytes = crop_image_to_bbox(f.path(), 0.25, 0.25, 0.5, 0.5).expect("center crop"); + // Re-decode to confirm the pipeline produced a valid JPEG. Exact + // dimensions depend on the 10% padding clamp, so just assert + // sanity bounds rather than pinning numbers (padding math can + // legitimately drift if we tweak the heuristic later). + let img = image::load_from_memory(&bytes).expect("decode crop"); + let (w, h) = (img.width(), img.height()); + assert!((80..=200).contains(&w), "unexpected crop width: {w}"); + assert!((80..=200).contains(&h), "unexpected crop height: {h}"); + } + + #[test] + fn crop_padding_clamps_to_image_bounds() { + // A bbox right at the corner should pad inward as far as it can, + // never outside the image — otherwise we'd pass invalid coords + // to the embedding service. + let f = write_solid_image(100, 100); + let bytes = crop_image_to_bbox(f.path(), 0.9, 0.9, 0.1, 0.1).expect("corner crop"); + let img = image::load_from_memory(&bytes).expect("decode corner crop"); + // Padded crop must fit within the source's 100x100. + assert!(img.width() <= 100); + assert!(img.height() <= 100); + assert!(img.width() > 0 && img.height() > 0); + } } diff --git a/src/memories.rs b/src/memories.rs index 0e2aad5..95de714 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -23,7 +23,8 @@ use crate::utils::earliest_fs_time; // Helper that encapsulates path-exclusion semantics #[derive(Debug)] -struct PathExcluder { +pub struct PathExcluder { + base: PathBuf, excluded_dirs: Vec, excluded_patterns: Vec, } @@ -34,9 +35,12 @@ impl PathExcluder { /// Rules: /// - Entries starting with '/' are interpreted as "absolute under base" /// (e.g. "/photos/private" -> base/photos/private). - /// - Entries without '/' are treated as substring patterns that match - /// anywhere in the full path string (still scoped under base). - fn new(base: &Path, raw_excluded: &[String]) -> Self { + /// - Entries without '/' are treated as path-component patterns that + /// match a directory or file name *under* `base`. The base prefix is + /// stripped before matching so a system-level component (e.g. the + /// `tmp` in `/tmp/...` when running tests) doesn't masquerade as a + /// user-defined exclude. + pub fn new(base: &Path, raw_excluded: &[String]) -> Self { let mut excluded_dirs = Vec::new(); let mut excluded_patterns = Vec::new(); @@ -53,18 +57,19 @@ impl PathExcluder { } debug!( - "PathExcluder created. dirs={:?}, patterns={:?}", - excluded_dirs, excluded_patterns + "PathExcluder created. base={:?}, dirs={:?}, patterns={:?}", + base, excluded_dirs, excluded_patterns ); Self { + base: base.to_path_buf(), excluded_dirs, excluded_patterns, } } /// Returns true if `path` should be excluded. - fn is_excluded(&self, path: &Path) -> bool { + pub fn is_excluded(&self, path: &Path) -> bool { // Directory-based exclusions for excluded in &self.excluded_dirs { if path.starts_with(excluded) { @@ -76,19 +81,24 @@ impl PathExcluder { } } - // Pattern-based exclusions: match whole path components (dir or file name), - // not substrings. - if !self.excluded_patterns.is_empty() { - for component in path.components() { - if let Some(comp_str) = component.as_os_str().to_str() - && self.excluded_patterns.iter().any(|pat| pat == comp_str) - { - trace!( - "PathExcluder: excluded by component pattern: {:?} (component: {:?}, patterns: {:?})", - path, comp_str, self.excluded_patterns - ); - return true; - } + if self.excluded_patterns.is_empty() { + return false; + } + + // Strip the base prefix before scanning components. Without this, + // every path component above `base` (e.g. `tmp` in `/tmp/test123` + // under tempdir, or the user's `home` in `/home/user/Pictures`) + // would match user-defined patterns and produce false positives. + let scan_root = path.strip_prefix(&self.base).unwrap_or(path); + for component in scan_root.components() { + if let Some(comp_str) = component.as_os_str().to_str() + && self.excluded_patterns.iter().any(|pat| pat == comp_str) + { + trace!( + "PathExcluder: excluded by component pattern: {:?} (component: {:?}, patterns: {:?})", + path, comp_str, self.excluded_patterns + ); + return true; } }