faces: force-create path for regions the detector can't see

Adds an opt-in 'force' flag to POST /image/faces. When set, the handler
skips the Apollo embed call entirely and stores the row with a
2048-byte zero-vector embedding under the sentinel model_version
'manual_no_embed'. The row participates as a browse-by-person tag but
is excluded from clustering and auto-bind:

- face_clustering._decode_b64_embedding filters norm<=0 (already)
- cluster suggester groups by model_version, so the sentinel never
  mixes with real buffalo_l rows
- cosine_similarity with a zero vector resolves to 0/NaN, never
  crossing the 0.4 auto-bind threshold

Use case: tag someone looking away from the camera, profile shot,
heavily-occluded face — anywhere the detector returns no_face_in_crop
on the user's drawn region. The frontend only sets force=true after a
422 from a strict create plus an explicit operator confirmation, so
the normal "draw a centered face" UX still gets a real ArcFace
embedding.
This commit is contained in:
Cameron Cordes
2026-04-29 23:49:34 +00:00
parent 0eaf27d2d3
commit 891a9982ef

View File

@@ -287,6 +287,16 @@ pub struct CreateFaceReq {
/// box and immediately picks a name from the autocomplete. /// box and immediately picks a name from the autocomplete.
#[serde(default)] #[serde(default)]
pub person_id: Option<i32>, pub person_id: Option<i32>,
/// Skip the embedding step. Set when the user wants to tag a region
/// the detector can't find a face in (back of head, profile partly
/// occluded, etc.). The row is stored with a zero-vector embedding,
/// which the cluster suggester filters on `norm <= 0` and auto-bind
/// cosine resolves to 0 against — so the row participates only as a
/// browse-by-person tag, not in similarity matching. The frontend
/// only sets this after a 422 from a strict create plus an explicit
/// operator confirmation.
#[serde(default)]
pub force: bool,
} }
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
@@ -2023,7 +2033,10 @@ async fn create_face_handler<D: FaceDao>(
let span = global_tracer().start_with_context("faces.create_manual", &context); let span = global_tracer().start_with_context("faces.create_manual", &context);
let span_context = opentelemetry::Context::current_with_span(span); let span_context = opentelemetry::Context::current_with_span(span);
if !face_client.is_enabled() { // The force path doesn't need Apollo at all (no embed call); the
// strict path does. Surface the disabled state only when we'd
// actually use the client.
if !body.force && !face_client.is_enabled() {
return HttpResponse::ServiceUnavailable().body("face client disabled"); return HttpResponse::ServiceUnavailable().body("face client disabled");
} }
@@ -2049,56 +2062,73 @@ async fn create_face_handler<D: FaceDao>(
} }
}; };
// 2. Read full image, crop to bbox, encode as JPEG for transport. // 2 + 3. Crop + embed via Apollo (strict path), or skip both (force).
let abs_path = library.resolve(&normalized_path); //
let crop_bytes = match crop_image_to_bbox( // Force is the "tag a face the detector can't see" path — back of
&abs_path, // head, heavily-occluded profile, etc. We store a zero-vector
body.bbox.x, // embedding under a sentinel model_version so the row participates
body.bbox.y, // only as a browse-by-person tag: clustering filters norm<=0 (see
body.bbox.w, // face_clustering._decode_b64_embedding) and auto-bind cosine
body.bbox.h, // resolves to 0 / NaN, never crossing the threshold. Cluster
) { // suggester also groups by model_version so this sentinel never
Ok(b) => b, // mixes with real buffalo_l rows.
Err(e) => { let (embedding_bytes, model_version, confidence) = if body.force {
warn!("crop_image_to_bbox failed for {:?}: {:?}", abs_path, e); info!(
return HttpResponse::BadRequest().body(format!("cannot crop photo: {}", e)); "manual face (force): skipping detection for {:?} bbox=({},{},{},{})",
} normalized_path, body.bbox.x, body.bbox.y, body.bbox.w, body.bbox.h
}; );
(vec![0u8; 2048], "manual_no_embed".to_string(), 0.0_f32)
} else {
let abs_path = library.resolve(&normalized_path);
let crop_bytes = match crop_image_to_bbox(
&abs_path,
body.bbox.x,
body.bbox.y,
body.bbox.w,
body.bbox.h,
) {
Ok(b) => b,
Err(e) => {
warn!("crop_image_to_bbox failed for {:?}: {:?}", abs_path, e);
return HttpResponse::BadRequest().body(format!("cannot crop photo: {}", e));
}
};
// 3. Send the crop to Apollo for embedding extraction. let meta = DetectMeta {
let meta = DetectMeta { content_hash: hash.clone(),
content_hash: hash.clone(), library_id: library.id,
library_id: library.id, rel_path: normalized_path.clone(),
rel_path: normalized_path.clone(), orientation: None,
orientation: None, model_version: None,
model_version: None, };
}; let detect = match face_client.embed(crop_bytes, meta).await {
let detect = match face_client.embed(crop_bytes, meta).await { Ok(r) => r,
Ok(r) => r, Err(FaceDetectError::Permanent(e)) => {
Err(FaceDetectError::Permanent(e)) => { return HttpResponse::UnprocessableEntity().body(format!("{}", e));
return HttpResponse::UnprocessableEntity().body(format!("{}", e)); }
} Err(FaceDetectError::Transient(e)) => {
Err(FaceDetectError::Transient(e)) => { return HttpResponse::ServiceUnavailable().body(format!("{}", e));
return HttpResponse::ServiceUnavailable().body(format!("{}", e)); }
} Err(FaceDetectError::Disabled) => {
Err(FaceDetectError::Disabled) => { return HttpResponse::ServiceUnavailable().body("face client disabled");
return HttpResponse::ServiceUnavailable().body("face client disabled"); }
} };
};
let detected = match detect.faces.first() { let detected = match detect.faces.first() {
Some(f) => f.clone(), Some(f) => f.clone(),
None => { None => {
// Apollo would have returned 422 on no_face_in_crop; defensive. // Apollo would have returned 422 on no_face_in_crop; defensive.
return HttpResponse::UnprocessableEntity().body("no face in crop"); return HttpResponse::UnprocessableEntity().body("no face in crop");
} }
}; };
let embedding_bytes = match detected.decode_embedding() { let bytes = match detected.decode_embedding() {
Ok(b) => b, Ok(b) => b,
Err(e) => { Err(e) => {
warn!("manual face: decode embedding failed: {:?}", e); warn!("manual face: decode embedding failed: {:?}", e);
return HttpResponse::BadGateway().body("invalid embedding from face service"); return HttpResponse::BadGateway().body("invalid embedding from face service");
} }
};
(bytes, detect.model_version, detected.confidence)
}; };
// 4. Insert the manual row using the bbox the user drew (NOT the // 4. Insert the manual row using the bbox the user drew (NOT the
@@ -2114,11 +2144,11 @@ async fn create_face_handler<D: FaceDao>(
rel_path: normalized_path, rel_path: normalized_path,
bbox: Some((body.bbox.x, body.bbox.y, body.bbox.w, body.bbox.h)), bbox: Some((body.bbox.x, body.bbox.y, body.bbox.w, body.bbox.h)),
embedding: Some(embedding_bytes), embedding: Some(embedding_bytes),
confidence: Some(detected.confidence), confidence: Some(confidence),
source: "manual".to_string(), source: "manual".to_string(),
person_id: body.person_id, person_id: body.person_id,
status: "detected".to_string(), status: "detected".to_string(),
model_version: detect.model_version, model_version,
}, },
) { ) {
Ok(r) => r, Ok(r) => r,