From c2c1fe5b8b07af161e9ce289cb3feacb4127254f Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Thu, 30 Apr 2026 01:06:08 +0000 Subject: [PATCH] faces: bbox crop respects EXIF orientation + pads enough for RetinaFace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two reasons manually-drawn bboxes were never resolving a face on re-detection: (1) The bbox arrives in display space (browser already applied EXIF orientation when rendering the carousel), but the `image` crate in crop_image_to_bbox opens raw pre-rotation pixels. For any phone photo with Orientation 6/8/etc., applying the bbox without rotating first crops a completely different region of the image — landing on background, hair, or empty pixels. Now reads the EXIF Orientation tag and applies it before indexing into the canonical-oriented dims. (2) Padding was 10 % on each side. A typical 200×250 face bbox + 10 % becomes ~240×300; insightface resizes that to det_size=640, so the face fills ~95 % of the input. RetinaFace's anchors expect faces at 20–60 % of input dimensions; at 95 % it routinely returns zero detections. Bumped to 50 % padding so the crop is 2× the bbox dims and the face occupies ~50 % of the input — anchor-friendly. Bbox is still clamped to image bounds, so edge-of-image cases just get less padding on the clipped side. Together these explain why bbox-edit re-embed practically always fell into the "no face detected" branch (and bbox-edit reverts without the recent soft-fallback commit). Per-photo embedding quality also improves slightly — same face, more context, better landmarks for ArcFace. --- src/faces.rs | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/faces.rs b/src/faces.rs index 6bcce3a..f45afa4 100644 --- a/src/faces.rs +++ b/src/faces.rs @@ -20,6 +20,7 @@ use crate::Claims; use crate::ai::face_client::{DetectMeta, FaceClient, FaceDetectError}; +use crate::exif; use crate::database::schema::{face_detections, image_exif, persons}; use crate::error::IntoHttpError; use crate::libraries::{self, Library}; @@ -2508,7 +2509,18 @@ fn crop_image_to_bbox( if nw <= 0.0 || nh <= 0.0 || nx + nw > 1.001 || ny + nh > 1.001 { return Err(anyhow!("bbox wh out of bounds or zero")); } - let img = image::open(abs_path).with_context(|| format!("open {:?}", abs_path))?; + let raw = image::open(abs_path).with_context(|| format!("open {:?}", abs_path))?; + // EXIF rotation: the bbox arrives in display space (the carousel / + // overlay are rendered post-rotation by the browser), but the + // `image` crate hands us raw pre-rotation pixels. For any phone + // photo with Orientation 6/8/etc., applying the bbox without + // rotating first lands the crop on a completely different region + // of the image — which is why manually-drawn bboxes basically + // never resolved a face on re-detection. Apply the orientation + // first, then index into the canonical-oriented dims. Photos with + // no EXIF rotation tag pay nothing (apply_orientation is a no-op). + let orientation = exif::read_orientation(abs_path).unwrap_or(1); + let img = exif::apply_orientation(raw, orientation); let (w, h) = img.dimensions(); let px = (nx * w as f32).round().clamp(0.0, w as f32 - 1.0) as u32; let py = (ny * h as f32).round().clamp(0.0, h as f32 - 1.0) as u32; @@ -2517,11 +2529,17 @@ fn crop_image_to_bbox( if pw == 0 || ph == 0 { return Err(anyhow!("crop produced zero-dim image")); } - // Pad the crop a bit so the detector has context — a tightly-drawn - // bbox often clips ears/jaw which hurts the embedding. 10% on each - // side is a reasonable default. - let pad_x = (pw / 10).max(1); - let pad_y = (ph / 10).max(1); + // Generous padding so RetinaFace has anchor-friendly context. + // Insightface internally resizes to det_size=640 (square). A + // tightly-drawn 200×250 face bbox + 10 % padding becomes ~240×300, + // which after resize fills ~95 % of the input — near the upper + // edge of RetinaFace's anchor scales, where it routinely returns + // zero detections. Padding to 50 % on each side makes the crop + // 2× the bbox dims (face occupies ~50 % of the input), where + // anchors hit cleanly. Bbox is clamped to image bounds, so + // edge-of-image bboxes just get less padding on the clipped side. + let pad_x = (pw / 2).max(1); + let pad_y = (ph / 2).max(1); let cx = px.saturating_sub(pad_x); let cy = py.saturating_sub(pad_y); let cw = (pw + 2 * pad_x).min(w - cx);