From 1d9b9a0bc484c2e4c688472cb0ef2d853ecde187 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Fri, 1 May 2026 19:00:55 -0400 Subject: [PATCH] faces: avoid 40 MB row clone in /faces/embeddings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit list_embeddings cloned the full FaceDetectionRow inside the filter_map just to pair it with the base64-encoded embedding. The 2 KB BLOB was already on the row — at 20k unassigned faces that's 40 MB of pointless heap traffic per Apollo cluster-suggest run. Move the bytes out via Option::take() so the row drops the BLOB instead of duplicating it. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/faces.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/faces.rs b/src/faces.rs index 35c8995..fb2fb87 100644 --- a/src/faces.rs +++ b/src/faces.rs @@ -884,14 +884,18 @@ impl FaceDao for SqliteFaceDao { // Pair with the base64-encoded embedding string so the handler // doesn't need to know the wire format. Skip rows with NULL // embedding (shouldn't happen on detected rows, but defensive). + // `embedding.take()` moves the bytes out of the row so we can + // hand the (now-empty-embedding) row plus the encoded string + // back to the caller without cloning the whole row — at 20k + // rows × 2 KB that clone was 40 MB of pointless heap traffic + // per cluster-suggest run. use base64::Engine; Ok(rows .into_iter() - .filter_map(|r| { - r.embedding.as_ref().map(|bytes| { - let b64 = base64::engine::general_purpose::STANDARD.encode(bytes); - (r.clone(), b64) - }) + .filter_map(|mut r| { + let bytes = r.embedding.take()?; + let b64 = base64::engine::general_purpose::STANDARD.encode(&bytes); + Some((r, b64)) }) .collect()) })