From ee2ed3005bf69734d7ade114499a626efd6bb6e0 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Thu, 14 May 2026 14:12:51 -0400 Subject: [PATCH] clip-search: document env knobs in .env.example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit APOLLO_CLIP_API_BASE_URL (falls back to APOLLO_API_BASE_URL), CLIP_BACKLOG_MAX_PER_TICK, CLIP_ENCODE_CONCURRENCY, and CLIP_REQUEST_TIMEOUT_SEC — all of which the code already reads. Apollo's side was documented earlier; this closes the parity gap. Co-Authored-By: Claude Opus 4.7 (1M context) --- .env.example | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/.env.example b/.env.example index b520940..718f6bd 100644 --- a/.env.example +++ b/.env.example @@ -54,10 +54,12 @@ AGENTIC_CHAT_MAX_ITERATIONS=6 # OPENROUTER_APP_TITLE=ImageApi # ── AI Insights — sibling services (optional) ─────────────────────────── -# Apollo (places + face inference). Single Apollo deploys typically set -# only APOLLO_API_BASE_URL and let the face client fall back to it. +# Apollo (places, face inference, CLIP encoders). Single-Apollo deploys +# typically set only APOLLO_API_BASE_URL and let the face + CLIP +# clients fall back to it. # APOLLO_API_BASE_URL=http://apollo.lan:8000 # APOLLO_FACE_API_BASE_URL=http://apollo.lan:8000 +# APOLLO_CLIP_API_BASE_URL=http://apollo.lan:8000 # SMS_API_URL=http://localhost:8000 # SMS_API_TOKEN= @@ -80,6 +82,23 @@ FACE_DETECT_TIMEOUT_SEC=60 FACE_BACKLOG_MAX_PER_TICK=64 FACE_HASH_BACKFILL_MAX_PER_TICK=2000 +# ── CLIP semantic photo search ────────────────────────────────────────── +# ImageApi calls Apollo's /api/internal/clip/{encode_image,encode_text} +# to populate per-photo embeddings during the watcher's backlog drain +# and to encode user queries at /photos/search time. Disabled when +# neither APOLLO_CLIP_API_BASE_URL nor APOLLO_API_BASE_URL is set. +# +# Per-watcher-tick cap on the encode drain. Default 32 ≈ ~1 photo/sec +# on CPU, ~30 photos/sec on a single-GPU host (Apollo's threadpool +# is 1 on CUDA, so concurrency is bounded server-side regardless of +# our setting). Bump on a fresh deploy to clear the backlog faster. +CLIP_BACKLOG_MAX_PER_TICK=32 +# Client-side parallel encode calls per drain pass. Apollo's GPU pool +# serializes server-side; this just overlaps file-IO with inference. +CLIP_ENCODE_CONCURRENCY=4 +# Per-encode HTTP timeout. CPU-only Apollo deploys may need higher. +CLIP_REQUEST_TIMEOUT_SEC=60 + # ── RAG / search ──────────────────────────────────────────────────────── # Set to `1` to enable cross-encoder reranking on /search results. SEARCH_RAG_RERANK=0