# ImageApi configuration template. Copy to `.env` and fill in for your # deploy. Comments mirror the canonical docs in CLAUDE.md — see there # for the full picture (especially the AI-Insights / Apollo / face # integration sections). # ── Required ──────────────────────────────────────────────────────────── DATABASE_URL=./database.db BASE_PATH=/path/to/media THUMBNAILS=/path/to/thumbnails VIDEO_PATH=/path/to/video/hls GIFS_DIRECTORY=/path/to/gifs PREVIEW_CLIPS_DIRECTORY=/path/to/preview-clips BIND_URL=0.0.0.0:8080 CORS_ALLOWED_ORIGINS=http://localhost:3000 SECRET_KEY=replace-me-with-a-long-random-secret RUST_LOG=info # ── File watching ─────────────────────────────────────────────────────── # Quick scan = recently-modified-files only; full scan = comprehensive walk. WATCH_QUICK_INTERVAL_SECONDS=60 WATCH_FULL_INTERVAL_SECONDS=3600 # Comma-separated path prefixes / component names to skip in /memories # AND in face detection (e.g. @eaDir, .thumbnails, /private). EXCLUDED_DIRS= # ── Video / HLS ───────────────────────────────────────────────────────── HLS_CONCURRENCY=2 HLS_TIMEOUT_SECONDS=900 PLAYLIST_CLEANUP_INTERVAL_SECONDS=86400 # ── Telemetry (release builds only) ───────────────────────────────────── # OTLP_OTLS_ENDPOINT=http://localhost:4317 # ── AI Insights — Ollama (local LLM) ──────────────────────────────────── OLLAMA_PRIMARY_URL=http://localhost:11434 OLLAMA_PRIMARY_MODEL=nemotron-3-nano:30b # Optional fallback server tried on connection failure. # OLLAMA_FALLBACK_URL=http://server:11434 # OLLAMA_FALLBACK_MODEL=llama3.2:3b OLLAMA_REQUEST_TIMEOUT_SECONDS=120 # Cap on tool-calling iterations per chat turn / agentic insight. AGENTIC_MAX_ITERATIONS=6 AGENTIC_CHAT_MAX_ITERATIONS=6 # ── AI Insights — OpenRouter (hybrid backend, optional) ───────────────── # Set OPENROUTER_API_KEY to enable the hybrid backend (vision stays # local on Ollama, chat routes to OpenRouter). # OPENROUTER_API_KEY=sk-or-... # OPENROUTER_DEFAULT_MODEL=anthropic/claude-sonnet-4 # OPENROUTER_ALLOWED_MODELS=openai/gpt-4o-mini,anthropic/claude-haiku-4-5,google/gemini-2.5-flash # OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 # OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small # OPENROUTER_HTTP_REFERER=https://your-site.example # OPENROUTER_APP_TITLE=ImageApi # ── AI Insights — llama.cpp / llama-swap (optional) ───────────────────── # Set LLAMA_SWAP_URL to enable the `llamacpp` chat_backend. Talks # OpenAI-compatible /v1 to a llama-swap proxy that fronts per-slot # llama-server instances (chat / vision / embed). Like hybrid, the # agentic loop describes images via the vision slot then inlines the # text into the chat slot — so the chat slot itself can be text-only. # LLAMA_SWAP_URL=http://localhost:9292/v1 # LLAMA_SWAP_PRIMARY_MODEL=chat # LLAMA_SWAP_VISION_MODEL=vision # LLAMA_SWAP_EMBEDDING_MODEL=embed # Comma-separated allowlist of model ids the /v1/models endpoint should # advertise as vision-capable (llama-swap doesn't report modality). # LLAMA_SWAP_VISION_MODELS=vision # Comma-separated allowlist surfaced by /insights/llamacpp/models. # LLAMA_SWAP_ALLOWED_MODELS=chat,vision,embed # LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=120 # Routes hybrid mode's vision-describe pass through llama-swap's vision # slot instead of Ollama (chat still goes to OpenRouter). Values: # `ollama` (default) | `llamacpp`. # HYBRID_VISION_BACKEND=ollama # ── AI Insights — sibling services (optional) ─────────────────────────── # Apollo (places, face inference, CLIP encoders). Single-Apollo deploys # typically set only APOLLO_API_BASE_URL and let the face + CLIP # clients fall back to it. # APOLLO_API_BASE_URL=http://apollo.lan:8000 # APOLLO_FACE_API_BASE_URL=http://apollo.lan:8000 # APOLLO_CLIP_API_BASE_URL=http://apollo.lan:8000 # SMS_API_URL=http://localhost:8000 # SMS_API_TOKEN= # Display name used in agentic prompts when the LLM refers to "you". USER_NAME= # ── Face detection (Phase 3+) ─────────────────────────────────────────── # Cosine-sim floor for auto-binding a detected face to an existing # same-named person on detection. 0.4 ≈ moderate-confidence match. FACE_AUTOBIND_MIN_COS=0.4 # Per-scan-tick fan-out into Apollo's detect endpoint. Apollo's GPU # pool serializes server-side; this just overlaps file-IO with # inference RTT. FACE_DETECT_CONCURRENCY=8 # Per-detect HTTP timeout. CPU-only Apollo deploys may need higher. FACE_DETECT_TIMEOUT_SEC=60 # Per-tick caps on the two backlog drains (independent of WATCH_* # quick / full scans). Tune up if you have a large unscanned backlog # and want it to clear faster; tune down if Apollo is overloaded. FACE_BACKLOG_MAX_PER_TICK=64 FACE_HASH_BACKFILL_MAX_PER_TICK=2000 # ── CLIP semantic photo search ────────────────────────────────────────── # ImageApi calls Apollo's /api/internal/clip/{encode_image,encode_text} # to populate per-photo embeddings during the watcher's backlog drain # and to encode user queries at /photos/search time. Disabled when # neither APOLLO_CLIP_API_BASE_URL nor APOLLO_API_BASE_URL is set. # # Per-watcher-tick cap on the encode drain. Default 32 ≈ ~1 photo/sec # on CPU, ~30 photos/sec on a single-GPU host (Apollo's threadpool # is 1 on CUDA, so concurrency is bounded server-side regardless of # our setting). Bump on a fresh deploy to clear the backlog faster. CLIP_BACKLOG_MAX_PER_TICK=32 # Client-side parallel encode calls per drain pass. Apollo's GPU pool # serializes server-side; this just overlaps file-IO with inference. CLIP_ENCODE_CONCURRENCY=4 # Per-encode HTTP timeout. CPU-only Apollo deploys may need higher. CLIP_REQUEST_TIMEOUT_SEC=60 # ── RAG / search ──────────────────────────────────────────────────────── # Set to `1` to enable cross-encoder reranking on /search results. SEARCH_RAG_RERANK=0