diff --git a/.env.example b/.env.example index 718f6bd..63d8436 100644 --- a/.env.example +++ b/.env.example @@ -53,6 +53,27 @@ AGENTIC_CHAT_MAX_ITERATIONS=6 # OPENROUTER_HTTP_REFERER=https://your-site.example # OPENROUTER_APP_TITLE=ImageApi +# ── AI Insights — llama.cpp / llama-swap (optional) ───────────────────── +# Set LLAMA_SWAP_URL to enable the `llamacpp` chat_backend. Talks +# OpenAI-compatible /v1 to a llama-swap proxy that fronts per-slot +# llama-server instances (chat / vision / embed). Like hybrid, the +# agentic loop describes images via the vision slot then inlines the +# text into the chat slot — so the chat slot itself can be text-only. +# LLAMA_SWAP_URL=http://localhost:9292/v1 +# LLAMA_SWAP_PRIMARY_MODEL=chat +# LLAMA_SWAP_VISION_MODEL=vision +# LLAMA_SWAP_EMBEDDING_MODEL=embed +# Comma-separated allowlist of model ids the /v1/models endpoint should +# advertise as vision-capable (llama-swap doesn't report modality). +# LLAMA_SWAP_VISION_MODELS=vision +# Comma-separated allowlist surfaced by /insights/llamacpp/models. +# LLAMA_SWAP_ALLOWED_MODELS=chat,vision,embed +# LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=120 +# Routes hybrid mode's vision-describe pass through llama-swap's vision +# slot instead of Ollama (chat still goes to OpenRouter). Values: +# `ollama` (default) | `llamacpp`. +# HYBRID_VISION_BACKEND=ollama + # ── AI Insights — sibling services (optional) ─────────────────────────── # Apollo (places, face inference, CLIP encoders). Single-Apollo deploys # typically set only APOLLO_API_BASE_URL and let the face + CLIP