llamacpp models now receive images directly instead of describe-then-inline. LLAMA_SWAP_VISION_MODEL defaults to the primary model. Document the ResolvedBackend dispatch pattern. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
132 lines
7.0 KiB
Plaintext
132 lines
7.0 KiB
Plaintext
# ImageApi configuration template. Copy to `.env` and fill in for your
|
|
# deploy. Comments mirror the canonical docs in CLAUDE.md — see there
|
|
# for the full picture (especially the AI-Insights / Apollo / face
|
|
# integration sections).
|
|
|
|
# ── Required ────────────────────────────────────────────────────────────
|
|
DATABASE_URL=./database.db
|
|
BASE_PATH=/path/to/media
|
|
THUMBNAILS=/path/to/thumbnails
|
|
VIDEO_PATH=/path/to/video/hls
|
|
GIFS_DIRECTORY=/path/to/gifs
|
|
PREVIEW_CLIPS_DIRECTORY=/path/to/preview-clips
|
|
BIND_URL=0.0.0.0:8080
|
|
CORS_ALLOWED_ORIGINS=http://localhost:3000
|
|
SECRET_KEY=replace-me-with-a-long-random-secret
|
|
RUST_LOG=info
|
|
|
|
# ── File watching ───────────────────────────────────────────────────────
|
|
# Quick scan = recently-modified-files only; full scan = comprehensive walk.
|
|
WATCH_QUICK_INTERVAL_SECONDS=60
|
|
WATCH_FULL_INTERVAL_SECONDS=3600
|
|
# Comma-separated path prefixes / component names to skip in /memories
|
|
# AND in face detection (e.g. @eaDir, .thumbnails, /private).
|
|
EXCLUDED_DIRS=
|
|
|
|
# ── Video / HLS ─────────────────────────────────────────────────────────
|
|
HLS_CONCURRENCY=2
|
|
HLS_TIMEOUT_SECONDS=900
|
|
PLAYLIST_CLEANUP_INTERVAL_SECONDS=86400
|
|
|
|
# ── Telemetry (release builds only) ─────────────────────────────────────
|
|
# OTLP_OTLS_ENDPOINT=http://localhost:4317
|
|
|
|
# ── AI Insights — Ollama (local LLM) ────────────────────────────────────
|
|
OLLAMA_PRIMARY_URL=http://localhost:11434
|
|
OLLAMA_PRIMARY_MODEL=nemotron-3-nano:30b
|
|
# Optional fallback server tried on connection failure.
|
|
# OLLAMA_FALLBACK_URL=http://server:11434
|
|
# OLLAMA_FALLBACK_MODEL=llama3.2:3b
|
|
OLLAMA_REQUEST_TIMEOUT_SECONDS=120
|
|
# Cap on tool-calling iterations per chat turn / agentic insight.
|
|
AGENTIC_MAX_ITERATIONS=6
|
|
AGENTIC_CHAT_MAX_ITERATIONS=6
|
|
|
|
# ── AI Insights — OpenRouter (hybrid backend, optional) ─────────────────
|
|
# Set OPENROUTER_API_KEY to enable the hybrid backend (vision stays
|
|
# local on Ollama, chat routes to OpenRouter).
|
|
# OPENROUTER_API_KEY=sk-or-...
|
|
# OPENROUTER_DEFAULT_MODEL=anthropic/claude-sonnet-4
|
|
# OPENROUTER_ALLOWED_MODELS=openai/gpt-4o-mini,anthropic/claude-haiku-4-5,google/gemini-2.5-flash
|
|
# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
|
# OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small
|
|
# OPENROUTER_HTTP_REFERER=https://your-site.example
|
|
# OPENROUTER_APP_TITLE=ImageApi
|
|
|
|
# ── AI Insights — local backend switch ──────────────────────────────────
|
|
# Picks which local LLM stack the server uses for chat, vision describe,
|
|
# and embeddings. `ollama` (default) uses the OLLAMA_* settings above;
|
|
# `llamacpp` uses the LLAMA_SWAP_* settings below. The switch is global
|
|
# and applies to both `backend=local` and `backend=hybrid` (hybrid keeps
|
|
# chat on OpenRouter but still uses this stack for the describe pass).
|
|
# Don't flip mid-deploy without re-embedding existing index rows —
|
|
# mixed vector spaces break similarity search.
|
|
# LLM_BACKEND=ollama
|
|
|
|
# ── AI Insights — llama.cpp / llama-swap (optional) ─────────────────────
|
|
# Set LLAMA_SWAP_URL plus LLM_BACKEND=llamacpp to swap the local stack
|
|
# off Ollama. Talks OpenAI-compatible /v1 to a llama-swap proxy fronting
|
|
# per-slot llama-server instances. Chat models receive images directly
|
|
# via content-parts (vision-capable models assumed); a separate vision
|
|
# slot is used only by the describe_photo tool and describe-image utility.
|
|
# LLAMA_SWAP_URL=http://localhost:9292/v1
|
|
# LLAMA_SWAP_PRIMARY_MODEL=chat
|
|
# Optional dedicated vision slot for describe_image. Defaults to
|
|
# PRIMARY_MODEL so describe_photo works without extra config.
|
|
# LLAMA_SWAP_VISION_MODEL=vision
|
|
# LLAMA_SWAP_EMBEDDING_MODEL=embed
|
|
# Comma-separated allowlist surfaced by /insights/models when
|
|
# LLM_BACKEND=llamacpp. All report has_vision=true.
|
|
# LLAMA_SWAP_ALLOWED_MODELS=chat,vision,embed
|
|
# LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180
|
|
|
|
# ── AI Insights — sibling services (optional) ───────────────────────────
|
|
# Apollo (places, face inference, CLIP encoders). Single-Apollo deploys
|
|
# typically set only APOLLO_API_BASE_URL and let the face + CLIP
|
|
# clients fall back to it.
|
|
# APOLLO_API_BASE_URL=http://apollo.lan:8000
|
|
# APOLLO_FACE_API_BASE_URL=http://apollo.lan:8000
|
|
# APOLLO_CLIP_API_BASE_URL=http://apollo.lan:8000
|
|
# SMS_API_URL=http://localhost:8000
|
|
# SMS_API_TOKEN=
|
|
|
|
# Display name used in agentic prompts when the LLM refers to "you".
|
|
USER_NAME=
|
|
|
|
# ── Face detection (Phase 3+) ───────────────────────────────────────────
|
|
# Cosine-sim floor for auto-binding a detected face to an existing
|
|
# same-named person on detection. 0.4 ≈ moderate-confidence match.
|
|
FACE_AUTOBIND_MIN_COS=0.4
|
|
# Per-scan-tick fan-out into Apollo's detect endpoint. Apollo's GPU
|
|
# pool serializes server-side; this just overlaps file-IO with
|
|
# inference RTT.
|
|
FACE_DETECT_CONCURRENCY=8
|
|
# Per-detect HTTP timeout. CPU-only Apollo deploys may need higher.
|
|
FACE_DETECT_TIMEOUT_SEC=60
|
|
# Per-tick caps on the two backlog drains (independent of WATCH_*
|
|
# quick / full scans). Tune up if you have a large unscanned backlog
|
|
# and want it to clear faster; tune down if Apollo is overloaded.
|
|
FACE_BACKLOG_MAX_PER_TICK=64
|
|
FACE_HASH_BACKFILL_MAX_PER_TICK=2000
|
|
|
|
# ── CLIP semantic photo search ──────────────────────────────────────────
|
|
# ImageApi calls Apollo's /api/internal/clip/{encode_image,encode_text}
|
|
# to populate per-photo embeddings during the watcher's backlog drain
|
|
# and to encode user queries at /photos/search time. Disabled when
|
|
# neither APOLLO_CLIP_API_BASE_URL nor APOLLO_API_BASE_URL is set.
|
|
#
|
|
# Per-watcher-tick cap on the encode drain. Default 32 ≈ ~1 photo/sec
|
|
# on CPU, ~30 photos/sec on a single-GPU host (Apollo's threadpool
|
|
# is 1 on CUDA, so concurrency is bounded server-side regardless of
|
|
# our setting). Bump on a fresh deploy to clear the backlog faster.
|
|
CLIP_BACKLOG_MAX_PER_TICK=32
|
|
# Client-side parallel encode calls per drain pass. Apollo's GPU pool
|
|
# serializes server-side; this just overlaps file-IO with inference.
|
|
CLIP_ENCODE_CONCURRENCY=4
|
|
# Per-encode HTTP timeout. CPU-only Apollo deploys may need higher.
|
|
CLIP_REQUEST_TIMEOUT_SEC=60
|
|
|
|
# ── RAG / search ────────────────────────────────────────────────────────
|
|
# Set to `1` to enable cross-encoder reranking on /search results.
|
|
SEARCH_RAG_RERANK=0
|