faces: fix PathExcluder false-positive + cover face_client/crop in tests
PathExcluder was iterating every component of the absolute path,
including the system prefix. Two of the existing memories tests had
been failing on master because tempdir() lives under /tmp on Linux
and a pattern like "tmp" then matched the system /tmp component
rather than anything the user actually asked to exclude. Phase 3's
file-watch hook will use the same code to skip @eaDir / .thumbnails
under each library's BASE_PATH, so the bug would hide every photo
on a host whose BASE_PATH passes through a directory named the same
as a user pattern.
Fix: store base in PathExcluder and strip it before scanning
components. A path that lives outside base falls through to the
no-match branch (defensive — nothing legit hits that today).
Also extracted the face_client error classification into a pure
classify_error_response(status, body) so the marker-row contract
with Apollo (422 → Permanent / 'failed', 5xx → Transient / defer)
is unit-testable without spinning up an HTTP server.
New tests:
memories::tests::test_path_excluder_* — 2 previously
failing tests now pass.
ai::face_client::tests::classify_* — 4 cases:
422 decode_failed → Permanent, 503 cuda_oom → Transient
(handles both string and {code:..} detail shapes), 5xx →
Transient + other 4xx → Permanent, unparseable HTML body still
classifies on status.
faces::tests::crop_* — 3 cases:
invalid bbox rejected, valid bbox round-trips through JPEG
decode, corner crop with 10% padding clamps inside source.
cargo test --lib: 165 passed / 0 failed (was 156 / 2 failed).
cargo fmt and clippy on new code clean. The remaining
sort_by clippy warnings in pre-existing files (memories.rs,
files.rs, exif.rs) are unrelated and present on master.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -257,14 +257,22 @@ impl FaceClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let body_text = resp.text().await.unwrap_or_default();
|
let body_text = resp.text().await.unwrap_or_default();
|
||||||
// Apollo encodes its error class in the JSON body's `detail`. Try
|
Err(classify_error_response(status.as_u16(), &body_text))
|
||||||
// to parse it; fall back to status-only classification.
|
}
|
||||||
let detail_code = serde_json::from_str::<serde_json::Value>(&body_text)
|
}
|
||||||
|
|
||||||
|
/// Map an Apollo HTTP error response to a FaceDetectError. Pulled out as a
|
||||||
|
/// pure function so the marker-row contract (422 → Permanent, 503 →
|
||||||
|
/// Transient) is unit-testable without spinning up an HTTP server.
|
||||||
|
fn classify_error_response(status: u16, body_text: &str) -> FaceDetectError {
|
||||||
|
// Apollo encodes its error class in the JSON body's `detail`. Try to
|
||||||
|
// parse it; fall back to status-only classification.
|
||||||
|
let detail_code = serde_json::from_str::<serde_json::Value>(body_text)
|
||||||
.ok()
|
.ok()
|
||||||
.and_then(|v| {
|
.and_then(|v| {
|
||||||
// detail can be a string ("decode_failed") or an object
|
// detail can be a string ("decode_failed") or an object
|
||||||
// ({"code": "cuda_oom", ...}) depending on the endpoint
|
// ({"code": "cuda_oom", ...}) depending on the endpoint and
|
||||||
// and Apollo's response shape — handle both.
|
// Apollo's response shape — handle both.
|
||||||
v.get("detail")
|
v.get("detail")
|
||||||
.and_then(|d| d.as_str().map(str::to_string))
|
.and_then(|d| d.as_str().map(str::to_string))
|
||||||
.or_else(|| {
|
.or_else(|| {
|
||||||
@@ -276,37 +284,87 @@ impl FaceClient {
|
|||||||
})
|
})
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
|
|
||||||
if status == reqwest::StatusCode::UNPROCESSABLE_ENTITY {
|
if status == 422 {
|
||||||
return Err(FaceDetectError::Permanent(anyhow::anyhow!(
|
return FaceDetectError::Permanent(anyhow::anyhow!(
|
||||||
"face detect 422 {}: {}",
|
"face detect 422 {}: {}",
|
||||||
detail_code,
|
detail_code,
|
||||||
body_text
|
body_text
|
||||||
)));
|
));
|
||||||
}
|
}
|
||||||
if status == reqwest::StatusCode::SERVICE_UNAVAILABLE {
|
if status == 503 {
|
||||||
return Err(FaceDetectError::Transient(anyhow::anyhow!(
|
return FaceDetectError::Transient(anyhow::anyhow!(
|
||||||
"face detect 503 {}: {}",
|
"face detect 503 {}: {}",
|
||||||
detail_code,
|
detail_code,
|
||||||
body_text
|
body_text
|
||||||
)));
|
));
|
||||||
}
|
}
|
||||||
// Any other 4xx: be conservative and treat as Permanent so we
|
// Any other 4xx: be conservative and treat as Permanent so we don't
|
||||||
// don't loop forever on a stable rejection. Any other 5xx:
|
// loop forever on a stable rejection. Any other 5xx: Transient —
|
||||||
// Transient — likely intermittent.
|
// likely intermittent.
|
||||||
if status.is_client_error() {
|
if (400..500).contains(&status) {
|
||||||
Err(FaceDetectError::Permanent(anyhow::anyhow!(
|
FaceDetectError::Permanent(anyhow::anyhow!(
|
||||||
"face detect {} {}: {}",
|
"face detect {} {}: {}",
|
||||||
status.as_u16(),
|
status,
|
||||||
detail_code,
|
detail_code,
|
||||||
body_text
|
body_text
|
||||||
)))
|
))
|
||||||
} else {
|
} else {
|
||||||
Err(FaceDetectError::Transient(anyhow::anyhow!(
|
FaceDetectError::Transient(anyhow::anyhow!(
|
||||||
"face detect {} {}: {}",
|
"face detect {} {}: {}",
|
||||||
status.as_u16(),
|
status,
|
||||||
detail_code,
|
detail_code,
|
||||||
body_text
|
body_text
|
||||||
)))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn is_permanent(e: &FaceDetectError) -> bool {
|
||||||
|
matches!(e, FaceDetectError::Permanent(_))
|
||||||
|
}
|
||||||
|
fn is_transient(e: &FaceDetectError) -> bool {
|
||||||
|
matches!(e, FaceDetectError::Transient(_))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn classify_422_decode_failed_is_permanent() {
|
||||||
|
// Permanent → ImageApi marks status='failed' and stops retrying.
|
||||||
|
let e = classify_error_response(422, r#"{"detail":"decode_failed: bad bytes"}"#);
|
||||||
|
assert!(is_permanent(&e), "422 decode_failed must be Permanent");
|
||||||
|
assert!(format!("{e}").contains("decode_failed"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn classify_503_cuda_oom_is_transient() {
|
||||||
|
// Transient → ImageApi must NOT write a marker so the next scan
|
||||||
|
// retries. The detail.code is nested in an object rather than a
|
||||||
|
// bare string; the parser handles both.
|
||||||
|
let e = classify_error_response(
|
||||||
|
503,
|
||||||
|
r#"{"detail":{"code":"cuda_oom","error":"out of memory"}}"#,
|
||||||
|
);
|
||||||
|
assert!(is_transient(&e), "503 cuda_oom must be Transient");
|
||||||
|
assert!(format!("{e}").contains("cuda_oom"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn classify_500_is_transient_other_4xx_is_permanent() {
|
||||||
|
// Conservative split: 5xx defers (intermittent), other 4xx
|
||||||
|
// is treated as a stable rejection so we don't loop forever.
|
||||||
|
assert!(is_transient(&classify_error_response(500, "")));
|
||||||
|
assert!(is_transient(&classify_error_response(502, "{}")));
|
||||||
|
assert!(is_permanent(&classify_error_response(400, "{}")));
|
||||||
|
assert!(is_permanent(&classify_error_response(404, "{}")));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn classify_handles_unparseable_body() {
|
||||||
|
// Apollo can return non-JSON on misroute / proxy errors; the
|
||||||
|
// classifier must still produce a useful variant.
|
||||||
|
let e = classify_error_response(503, "<html>nginx</html>");
|
||||||
|
assert!(is_transient(&e));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
61
src/faces.rs
61
src/faces.rs
@@ -1860,4 +1860,65 @@ mod tests {
|
|||||||
assert_eq!(faces.len(), 1);
|
assert_eq!(faces.len(), 1);
|
||||||
assert_eq!(faces[0].person_id, Some(alice.id));
|
assert_eq!(faces[0].person_id, Some(alice.id));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── crop_image_to_bbox ──────────────────────────────────────────────
|
||||||
|
// Pure helper used by the manual face-create handler. Generate a tiny
|
||||||
|
// image in memory, write it to a temp file, then exercise the bbox
|
||||||
|
// validation + crop math.
|
||||||
|
|
||||||
|
fn write_solid_image(w: u32, h: u32) -> tempfile::NamedTempFile {
|
||||||
|
let mut img = image::RgbImage::new(w, h);
|
||||||
|
for p in img.pixels_mut() {
|
||||||
|
*p = image::Rgb([200, 200, 200]);
|
||||||
|
}
|
||||||
|
let f = tempfile::Builder::new()
|
||||||
|
.suffix(".jpg")
|
||||||
|
.tempfile()
|
||||||
|
.expect("tempfile");
|
||||||
|
image::DynamicImage::ImageRgb8(img)
|
||||||
|
.save(f.path())
|
||||||
|
.expect("save jpg");
|
||||||
|
f
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn crop_rejects_invalid_bbox() {
|
||||||
|
let f = write_solid_image(64, 64);
|
||||||
|
// x out of [0,1]
|
||||||
|
assert!(crop_image_to_bbox(f.path(), -0.1, 0.0, 0.5, 0.5).is_err());
|
||||||
|
assert!(crop_image_to_bbox(f.path(), 1.5, 0.0, 0.5, 0.5).is_err());
|
||||||
|
// zero / negative dimensions
|
||||||
|
assert!(crop_image_to_bbox(f.path(), 0.0, 0.0, 0.0, 0.5).is_err());
|
||||||
|
assert!(crop_image_to_bbox(f.path(), 0.0, 0.0, 0.5, -0.1).is_err());
|
||||||
|
// overflows the image
|
||||||
|
assert!(crop_image_to_bbox(f.path(), 0.7, 0.0, 0.5, 0.5).is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn crop_returns_decodable_jpeg() {
|
||||||
|
let f = write_solid_image(200, 200);
|
||||||
|
let bytes = crop_image_to_bbox(f.path(), 0.25, 0.25, 0.5, 0.5).expect("center crop");
|
||||||
|
// Re-decode to confirm the pipeline produced a valid JPEG. Exact
|
||||||
|
// dimensions depend on the 10% padding clamp, so just assert
|
||||||
|
// sanity bounds rather than pinning numbers (padding math can
|
||||||
|
// legitimately drift if we tweak the heuristic later).
|
||||||
|
let img = image::load_from_memory(&bytes).expect("decode crop");
|
||||||
|
let (w, h) = (img.width(), img.height());
|
||||||
|
assert!((80..=200).contains(&w), "unexpected crop width: {w}");
|
||||||
|
assert!((80..=200).contains(&h), "unexpected crop height: {h}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn crop_padding_clamps_to_image_bounds() {
|
||||||
|
// A bbox right at the corner should pad inward as far as it can,
|
||||||
|
// never outside the image — otherwise we'd pass invalid coords
|
||||||
|
// to the embedding service.
|
||||||
|
let f = write_solid_image(100, 100);
|
||||||
|
let bytes = crop_image_to_bbox(f.path(), 0.9, 0.9, 0.1, 0.1).expect("corner crop");
|
||||||
|
let img = image::load_from_memory(&bytes).expect("decode corner crop");
|
||||||
|
// Padded crop must fit within the source's 100x100.
|
||||||
|
assert!(img.width() <= 100);
|
||||||
|
assert!(img.height() <= 100);
|
||||||
|
assert!(img.width() > 0 && img.height() > 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,7 +23,8 @@ use crate::utils::earliest_fs_time;
|
|||||||
|
|
||||||
// Helper that encapsulates path-exclusion semantics
|
// Helper that encapsulates path-exclusion semantics
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct PathExcluder {
|
pub struct PathExcluder {
|
||||||
|
base: PathBuf,
|
||||||
excluded_dirs: Vec<PathBuf>,
|
excluded_dirs: Vec<PathBuf>,
|
||||||
excluded_patterns: Vec<String>,
|
excluded_patterns: Vec<String>,
|
||||||
}
|
}
|
||||||
@@ -34,9 +35,12 @@ impl PathExcluder {
|
|||||||
/// Rules:
|
/// Rules:
|
||||||
/// - Entries starting with '/' are interpreted as "absolute under base"
|
/// - Entries starting with '/' are interpreted as "absolute under base"
|
||||||
/// (e.g. "/photos/private" -> base/photos/private).
|
/// (e.g. "/photos/private" -> base/photos/private).
|
||||||
/// - Entries without '/' are treated as substring patterns that match
|
/// - Entries without '/' are treated as path-component patterns that
|
||||||
/// anywhere in the full path string (still scoped under base).
|
/// match a directory or file name *under* `base`. The base prefix is
|
||||||
fn new(base: &Path, raw_excluded: &[String]) -> Self {
|
/// stripped before matching so a system-level component (e.g. the
|
||||||
|
/// `tmp` in `/tmp/...` when running tests) doesn't masquerade as a
|
||||||
|
/// user-defined exclude.
|
||||||
|
pub fn new(base: &Path, raw_excluded: &[String]) -> Self {
|
||||||
let mut excluded_dirs = Vec::new();
|
let mut excluded_dirs = Vec::new();
|
||||||
let mut excluded_patterns = Vec::new();
|
let mut excluded_patterns = Vec::new();
|
||||||
|
|
||||||
@@ -53,18 +57,19 @@ impl PathExcluder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
"PathExcluder created. dirs={:?}, patterns={:?}",
|
"PathExcluder created. base={:?}, dirs={:?}, patterns={:?}",
|
||||||
excluded_dirs, excluded_patterns
|
base, excluded_dirs, excluded_patterns
|
||||||
);
|
);
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
|
base: base.to_path_buf(),
|
||||||
excluded_dirs,
|
excluded_dirs,
|
||||||
excluded_patterns,
|
excluded_patterns,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if `path` should be excluded.
|
/// Returns true if `path` should be excluded.
|
||||||
fn is_excluded(&self, path: &Path) -> bool {
|
pub fn is_excluded(&self, path: &Path) -> bool {
|
||||||
// Directory-based exclusions
|
// Directory-based exclusions
|
||||||
for excluded in &self.excluded_dirs {
|
for excluded in &self.excluded_dirs {
|
||||||
if path.starts_with(excluded) {
|
if path.starts_with(excluded) {
|
||||||
@@ -76,10 +81,16 @@ impl PathExcluder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pattern-based exclusions: match whole path components (dir or file name),
|
if self.excluded_patterns.is_empty() {
|
||||||
// not substrings.
|
return false;
|
||||||
if !self.excluded_patterns.is_empty() {
|
}
|
||||||
for component in path.components() {
|
|
||||||
|
// Strip the base prefix before scanning components. Without this,
|
||||||
|
// every path component above `base` (e.g. `tmp` in `/tmp/test123`
|
||||||
|
// under tempdir, or the user's `home` in `/home/user/Pictures`)
|
||||||
|
// would match user-defined patterns and produce false positives.
|
||||||
|
let scan_root = path.strip_prefix(&self.base).unwrap_or(path);
|
||||||
|
for component in scan_root.components() {
|
||||||
if let Some(comp_str) = component.as_os_str().to_str()
|
if let Some(comp_str) = component.as_os_str().to_str()
|
||||||
&& self.excluded_patterns.iter().any(|pat| pat == comp_str)
|
&& self.excluded_patterns.iter().any(|pat| pat == comp_str)
|
||||||
{
|
{
|
||||||
@@ -90,7 +101,6 @@ impl PathExcluder {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user