Mirrors the section added to CLAUDE.md so deploys can opt into the llamacpp backend from the template alone.
126 lines
6.5 KiB
Plaintext
126 lines
6.5 KiB
Plaintext
# ImageApi configuration template. Copy to `.env` and fill in for your
|
|
# deploy. Comments mirror the canonical docs in CLAUDE.md — see there
|
|
# for the full picture (especially the AI-Insights / Apollo / face
|
|
# integration sections).
|
|
|
|
# ── Required ────────────────────────────────────────────────────────────
|
|
DATABASE_URL=./database.db
|
|
BASE_PATH=/path/to/media
|
|
THUMBNAILS=/path/to/thumbnails
|
|
VIDEO_PATH=/path/to/video/hls
|
|
GIFS_DIRECTORY=/path/to/gifs
|
|
PREVIEW_CLIPS_DIRECTORY=/path/to/preview-clips
|
|
BIND_URL=0.0.0.0:8080
|
|
CORS_ALLOWED_ORIGINS=http://localhost:3000
|
|
SECRET_KEY=replace-me-with-a-long-random-secret
|
|
RUST_LOG=info
|
|
|
|
# ── File watching ───────────────────────────────────────────────────────
|
|
# Quick scan = recently-modified-files only; full scan = comprehensive walk.
|
|
WATCH_QUICK_INTERVAL_SECONDS=60
|
|
WATCH_FULL_INTERVAL_SECONDS=3600
|
|
# Comma-separated path prefixes / component names to skip in /memories
|
|
# AND in face detection (e.g. @eaDir, .thumbnails, /private).
|
|
EXCLUDED_DIRS=
|
|
|
|
# ── Video / HLS ─────────────────────────────────────────────────────────
|
|
HLS_CONCURRENCY=2
|
|
HLS_TIMEOUT_SECONDS=900
|
|
PLAYLIST_CLEANUP_INTERVAL_SECONDS=86400
|
|
|
|
# ── Telemetry (release builds only) ─────────────────────────────────────
|
|
# OTLP_OTLS_ENDPOINT=http://localhost:4317
|
|
|
|
# ── AI Insights — Ollama (local LLM) ────────────────────────────────────
|
|
OLLAMA_PRIMARY_URL=http://localhost:11434
|
|
OLLAMA_PRIMARY_MODEL=nemotron-3-nano:30b
|
|
# Optional fallback server tried on connection failure.
|
|
# OLLAMA_FALLBACK_URL=http://server:11434
|
|
# OLLAMA_FALLBACK_MODEL=llama3.2:3b
|
|
OLLAMA_REQUEST_TIMEOUT_SECONDS=120
|
|
# Cap on tool-calling iterations per chat turn / agentic insight.
|
|
AGENTIC_MAX_ITERATIONS=6
|
|
AGENTIC_CHAT_MAX_ITERATIONS=6
|
|
|
|
# ── AI Insights — OpenRouter (hybrid backend, optional) ─────────────────
|
|
# Set OPENROUTER_API_KEY to enable the hybrid backend (vision stays
|
|
# local on Ollama, chat routes to OpenRouter).
|
|
# OPENROUTER_API_KEY=sk-or-...
|
|
# OPENROUTER_DEFAULT_MODEL=anthropic/claude-sonnet-4
|
|
# OPENROUTER_ALLOWED_MODELS=openai/gpt-4o-mini,anthropic/claude-haiku-4-5,google/gemini-2.5-flash
|
|
# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
|
# OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small
|
|
# OPENROUTER_HTTP_REFERER=https://your-site.example
|
|
# OPENROUTER_APP_TITLE=ImageApi
|
|
|
|
# ── AI Insights — llama.cpp / llama-swap (optional) ─────────────────────
|
|
# Set LLAMA_SWAP_URL to enable the `llamacpp` chat_backend. Talks
|
|
# OpenAI-compatible /v1 to a llama-swap proxy that fronts per-slot
|
|
# llama-server instances (chat / vision / embed). Like hybrid, the
|
|
# agentic loop describes images via the vision slot then inlines the
|
|
# text into the chat slot — so the chat slot itself can be text-only.
|
|
# LLAMA_SWAP_URL=http://localhost:9292/v1
|
|
# LLAMA_SWAP_PRIMARY_MODEL=chat
|
|
# LLAMA_SWAP_VISION_MODEL=vision
|
|
# LLAMA_SWAP_EMBEDDING_MODEL=embed
|
|
# Comma-separated allowlist of model ids the /v1/models endpoint should
|
|
# advertise as vision-capable (llama-swap doesn't report modality).
|
|
# LLAMA_SWAP_VISION_MODELS=vision
|
|
# Comma-separated allowlist surfaced by /insights/llamacpp/models.
|
|
# LLAMA_SWAP_ALLOWED_MODELS=chat,vision,embed
|
|
# LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=120
|
|
# Routes hybrid mode's vision-describe pass through llama-swap's vision
|
|
# slot instead of Ollama (chat still goes to OpenRouter). Values:
|
|
# `ollama` (default) | `llamacpp`.
|
|
# HYBRID_VISION_BACKEND=ollama
|
|
|
|
# ── AI Insights — sibling services (optional) ───────────────────────────
|
|
# Apollo (places, face inference, CLIP encoders). Single-Apollo deploys
|
|
# typically set only APOLLO_API_BASE_URL and let the face + CLIP
|
|
# clients fall back to it.
|
|
# APOLLO_API_BASE_URL=http://apollo.lan:8000
|
|
# APOLLO_FACE_API_BASE_URL=http://apollo.lan:8000
|
|
# APOLLO_CLIP_API_BASE_URL=http://apollo.lan:8000
|
|
# SMS_API_URL=http://localhost:8000
|
|
# SMS_API_TOKEN=
|
|
|
|
# Display name used in agentic prompts when the LLM refers to "you".
|
|
USER_NAME=
|
|
|
|
# ── Face detection (Phase 3+) ───────────────────────────────────────────
|
|
# Cosine-sim floor for auto-binding a detected face to an existing
|
|
# same-named person on detection. 0.4 ≈ moderate-confidence match.
|
|
FACE_AUTOBIND_MIN_COS=0.4
|
|
# Per-scan-tick fan-out into Apollo's detect endpoint. Apollo's GPU
|
|
# pool serializes server-side; this just overlaps file-IO with
|
|
# inference RTT.
|
|
FACE_DETECT_CONCURRENCY=8
|
|
# Per-detect HTTP timeout. CPU-only Apollo deploys may need higher.
|
|
FACE_DETECT_TIMEOUT_SEC=60
|
|
# Per-tick caps on the two backlog drains (independent of WATCH_*
|
|
# quick / full scans). Tune up if you have a large unscanned backlog
|
|
# and want it to clear faster; tune down if Apollo is overloaded.
|
|
FACE_BACKLOG_MAX_PER_TICK=64
|
|
FACE_HASH_BACKFILL_MAX_PER_TICK=2000
|
|
|
|
# ── CLIP semantic photo search ──────────────────────────────────────────
|
|
# ImageApi calls Apollo's /api/internal/clip/{encode_image,encode_text}
|
|
# to populate per-photo embeddings during the watcher's backlog drain
|
|
# and to encode user queries at /photos/search time. Disabled when
|
|
# neither APOLLO_CLIP_API_BASE_URL nor APOLLO_API_BASE_URL is set.
|
|
#
|
|
# Per-watcher-tick cap on the encode drain. Default 32 ≈ ~1 photo/sec
|
|
# on CPU, ~30 photos/sec on a single-GPU host (Apollo's threadpool
|
|
# is 1 on CUDA, so concurrency is bounded server-side regardless of
|
|
# our setting). Bump on a fresh deploy to clear the backlog faster.
|
|
CLIP_BACKLOG_MAX_PER_TICK=32
|
|
# Client-side parallel encode calls per drain pass. Apollo's GPU pool
|
|
# serializes server-side; this just overlaps file-IO with inference.
|
|
CLIP_ENCODE_CONCURRENCY=4
|
|
# Per-encode HTTP timeout. CPU-only Apollo deploys may need higher.
|
|
CLIP_REQUEST_TIMEOUT_SEC=60
|
|
|
|
# ── RAG / search ────────────────────────────────────────────────────────
|
|
# Set to `1` to enable cross-encoder reranking on /search results.
|
|
SEARCH_RAG_RERANK=0
|