diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..b520940
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,85 @@
+# ImageApi configuration template. Copy to `.env` and fill in for your
+# deploy. Comments mirror the canonical docs in CLAUDE.md — see there
+# for the full picture (especially the AI-Insights / Apollo / face
+# integration sections).
+
+# ── Required ────────────────────────────────────────────────────────────
+DATABASE_URL=./database.db
+BASE_PATH=/path/to/media
+THUMBNAILS=/path/to/thumbnails
+VIDEO_PATH=/path/to/video/hls
+GIFS_DIRECTORY=/path/to/gifs
+PREVIEW_CLIPS_DIRECTORY=/path/to/preview-clips
+BIND_URL=0.0.0.0:8080
+CORS_ALLOWED_ORIGINS=http://localhost:3000
+SECRET_KEY=replace-me-with-a-long-random-secret
+RUST_LOG=info
+
+# ── File watching ───────────────────────────────────────────────────────
+# Quick scan = recently-modified-files only; full scan = comprehensive walk.
+WATCH_QUICK_INTERVAL_SECONDS=60
+WATCH_FULL_INTERVAL_SECONDS=3600
+# Comma-separated path prefixes / component names to skip in /memories
+# AND in face detection (e.g. @eaDir, .thumbnails, /private).
+EXCLUDED_DIRS=
+
+# ── Video / HLS ─────────────────────────────────────────────────────────
+HLS_CONCURRENCY=2
+HLS_TIMEOUT_SECONDS=900
+PLAYLIST_CLEANUP_INTERVAL_SECONDS=86400
+
+# ── Telemetry (release builds only) ─────────────────────────────────────
+# OTLP_OTLS_ENDPOINT=http://localhost:4317
+
+# ── AI Insights — Ollama (local LLM) ────────────────────────────────────
+OLLAMA_PRIMARY_URL=http://localhost:11434
+OLLAMA_PRIMARY_MODEL=nemotron-3-nano:30b
+# Optional fallback server tried on connection failure.
+# OLLAMA_FALLBACK_URL=http://server:11434
+# OLLAMA_FALLBACK_MODEL=llama3.2:3b
+OLLAMA_REQUEST_TIMEOUT_SECONDS=120
+# Cap on tool-calling iterations per chat turn / agentic insight.
+AGENTIC_MAX_ITERATIONS=6
+AGENTIC_CHAT_MAX_ITERATIONS=6
+
+# ── AI Insights — OpenRouter (hybrid backend, optional) ─────────────────
+# Set OPENROUTER_API_KEY to enable the hybrid backend (vision stays
+# local on Ollama, chat routes to OpenRouter).
+# OPENROUTER_API_KEY=sk-or-...
+# OPENROUTER_DEFAULT_MODEL=anthropic/claude-sonnet-4
+# OPENROUTER_ALLOWED_MODELS=openai/gpt-4o-mini,anthropic/claude-haiku-4-5,google/gemini-2.5-flash
+# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
+# OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small
+# OPENROUTER_HTTP_REFERER=https://your-site.example
+# OPENROUTER_APP_TITLE=ImageApi
+
+# ── AI Insights — sibling services (optional) ───────────────────────────
+# Apollo (places + face inference). Single Apollo deploys typically set
+# only APOLLO_API_BASE_URL and let the face client fall back to it.
+# APOLLO_API_BASE_URL=http://apollo.lan:8000
+# APOLLO_FACE_API_BASE_URL=http://apollo.lan:8000
+# SMS_API_URL=http://localhost:8000
+# SMS_API_TOKEN=
+
+# Display name used in agentic prompts when the LLM refers to "you".
+USER_NAME=
+
+# ── Face detection (Phase 3+) ───────────────────────────────────────────
+# Cosine-sim floor for auto-binding a detected face to an existing
+# same-named person on detection. 0.4 ≈ moderate-confidence match.
+FACE_AUTOBIND_MIN_COS=0.4
+# Per-scan-tick fan-out into Apollo's detect endpoint. Apollo's GPU
+# pool serializes server-side; this just overlaps file-IO with
+# inference RTT.
+FACE_DETECT_CONCURRENCY=8
+# Per-detect HTTP timeout. CPU-only Apollo deploys may need higher.
+FACE_DETECT_TIMEOUT_SEC=60
+# Per-tick caps on the two backlog drains (independent of WATCH_*
+# quick / full scans). Tune up if you have a large unscanned backlog
+# and want it to clear faster; tune down if Apollo is overloaded.
+FACE_BACKLOG_MAX_PER_TICK=64
+FACE_HASH_BACKFILL_MAX_PER_TICK=2000
+
+# ── RAG / search ────────────────────────────────────────────────────────
+# Set to `1` to enable cross-encoder reranking on /search results.
+SEARCH_RAG_RERANK=0