From e2eefbd15652a238da9391835c5509dcf73ea0bc Mon Sep 17 00:00:00 2001 From: Cameron Date: Tue, 21 Apr 2026 10:36:19 -0400 Subject: [PATCH] feat(ai): curated OpenRouter model picker for hybrid backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add OPENROUTER_ALLOWED_MODELS env var and GET /insights/openrouter/models endpoint returning the curated list verbatim. Drop the live capability precheck in hybrid mode — trust the operator's allowlist; bad ids surface as a chat-call error. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 22 ++++++++++++++++++++++ README.md | 23 +++++++++++++++++++++++ src/ai/handlers.rs | 28 ++++++++++++++++++++++++++++ src/ai/insight_generator.rs | 28 ++++------------------------ src/ai/mod.rs | 2 +- src/main.rs | 1 + src/state.rs | 20 ++++++++++++++++++++ 7 files changed, 99 insertions(+), 25 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 5da2612..0849e41 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -258,6 +258,17 @@ OLLAMA_PRIMARY_MODEL=nemotron-3-nano:30b # Model for primary server (defau OLLAMA_FALLBACK_MODEL=llama3.2:3b # Model for fallback server (optional, uses primary if not set) SMS_API_URL=http://localhost:8000 # SMS message API endpoint (default: localhost:8000) SMS_API_TOKEN=your-api-token # SMS API authentication token (optional) + +# OpenRouter (Hybrid Backend) - keeps embeddings + vision local, routes chat to OpenRouter +OPENROUTER_API_KEY=sk-or-... # Required to enable hybrid backend +OPENROUTER_DEFAULT_MODEL=anthropic/claude-sonnet-4 # Used when client doesn't pick a model +OPENROUTER_ALLOWED_MODELS=openai/gpt-4o-mini,anthropic/claude-haiku-4-5,google/gemini-2.5-flash + # Curated allowlist exposed to clients via + # GET /insights/openrouter/models. Empty = no picker. +OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 # Override base URL (optional) +OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small # Optional, embeddings stay local today +OPENROUTER_HTTP_REFERER=https://your-site.example # Optional attribution header +OPENROUTER_APP_TITLE=ImageApi # Optional attribution header ``` **AI Insights Fallback Behavior:** @@ -275,6 +286,17 @@ The `OllamaClient` provides methods to query available models: This allows runtime verification of model availability before generating insights. +**Hybrid Backend (OpenRouter):** +- Per-request opt-in via `backend=hybrid` on `POST /insights/generate/agentic`. +- Local Ollama still describes the image (vision); the description is inlined + into the chat prompt and the agentic loop runs on OpenRouter. +- `request.model` (if provided) overrides `OPENROUTER_DEFAULT_MODEL` for that + call. The mobile picker reads from `OPENROUTER_ALLOWED_MODELS`. +- No live capability precheck — the operator-curated allowlist is trusted. + A bad model id surfaces as a chat-call error. +- `GET /insights/openrouter/models` returns `{ models, default_model, configured }` + for client picker UIs. + ## Dependencies of Note - **actix-web**: HTTP framework diff --git a/README.md b/README.md index c8f1c69..1bd3c9a 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,29 @@ The following environment variables configure AI-powered photo insights and dail - `OLLAMA_URL` - Used if `OLLAMA_PRIMARY_URL` not set - `OLLAMA_MODEL` - Used if `OLLAMA_PRIMARY_MODEL` not set +#### OpenRouter Configuration (Hybrid Backend) +The hybrid agentic backend keeps embeddings + vision local (Ollama) while routing +chat + tool-calling to OpenRouter. Enabled per-request when the client sends +`backend=hybrid`. + +- `OPENROUTER_API_KEY` - OpenRouter API key. Required to enable the hybrid backend. +- `OPENROUTER_DEFAULT_MODEL` - Model id used when the client doesn't specify one + [default: `anthropic/claude-sonnet-4`] + - Example: `openai/gpt-4o-mini`, `google/gemini-2.5-flash` +- `OPENROUTER_ALLOWED_MODELS` - Comma-separated curated allowlist exposed to + clients via `GET /insights/openrouter/models`. The mobile picker shows only + these. Empty/unset = no picker, server default is used. + - Example: `openai/gpt-4o-mini,anthropic/claude-haiku-4-5,google/gemini-2.5-flash` +- `OPENROUTER_BASE_URL` - Override base URL [default: `https://openrouter.ai/api/v1`] +- `OPENROUTER_EMBEDDING_MODEL` - Embedding model for OpenRouter + [default: `openai/text-embedding-3-small`]. Only used if/when embeddings are + routed through OpenRouter (currently embeddings stay local). +- `OPENROUTER_HTTP_REFERER` - Optional `HTTP-Referer` for OpenRouter attribution +- `OPENROUTER_APP_TITLE` - Optional `X-Title` for OpenRouter attribution + +Capability checks are skipped for the curated allowlist — bad model ids surface +as a 4xx from the chat call. Pick tool-capable models. + #### SMS API Configuration - `SMS_API_URL` - URL to SMS message API [default: `http://localhost:8000`] - Used to fetch conversation data for context in insights diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index 038470d..5c4036e 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -445,6 +445,34 @@ pub async fn get_available_models_handler( HttpResponse::Ok().json(response) } +#[derive(Debug, Serialize)] +pub struct OpenRouterModelsResponse { + pub models: Vec, + pub default_model: Option, + pub configured: bool, +} + +/// GET /insights/openrouter/models - Curated OpenRouter model ids exposed +/// to clients for the hybrid backend. Returned verbatim from +/// `OPENROUTER_ALLOWED_MODELS`; no live call to OpenRouter. +#[get("/insights/openrouter/models")] +pub async fn get_openrouter_models_handler( + _claims: Claims, + app_state: web::Data, +) -> impl Responder { + let configured = app_state.openrouter.is_some(); + let default_model = app_state + .openrouter + .as_ref() + .map(|c| c.primary_model.clone()); + let response = OpenRouterModelsResponse { + models: app_state.openrouter_allowed_models.clone(), + default_model, + configured, + }; + HttpResponse::Ok().json(response) +} + /// POST /insights/rate - Rate an insight (thumbs up/down for training data) #[post("/insights/rate")] pub async fn rate_insight_handler( diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 292324c..fcf89ee 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -2525,30 +2525,10 @@ Return ONLY the summary, nothing else."#, // 2. Verify chat model supports tool calling. // - local: existing Ollama model availability + capability check. - // - hybrid: query OpenRouter's /models for the chosen model. + // - hybrid: trust the operator's curated allowlist + // (OPENROUTER_ALLOWED_MODELS) — no live precheck. A bad model id + // surfaces as a chat-call error on the next step. let has_vision = if is_hybrid { - let or_client = openrouter_client - .as_ref() - .expect("openrouter_client constructed when is_hybrid"); - let caps = or_client - .model_capabilities(&or_client.primary_model) - .await - .map_err(|e| { - anyhow::anyhow!( - "OpenRouter capability lookup failed for '{}': {}", - or_client.primary_model, - e - ) - })?; - if !caps.has_tool_calling { - return Err(anyhow::anyhow!( - "tool calling not supported by OpenRouter model '{}'", - or_client.primary_model - )); - } - insight_cx - .span() - .set_attribute(KeyValue::new("model_has_tool_calling", true)); // In hybrid mode the chat model never sees images directly — we // describe-then-inject, so `has_vision` drives only whether we // bother loading the image to describe it, which we always do. @@ -2776,7 +2756,7 @@ Return ONLY the summary, nothing else."#, 3. Use recall_facts_for_photo to load any previously stored knowledge about subjects in this photo.\n\ 4. Use recall_entities to look up known people, places, or things that appear in this photo.\n\ 5. When you identify people, places, events, or notable things in this photo: use store_entity to record them and store_fact to record key facts (relationships, roles, attributes). This builds a persistent memory for future insights.\n\ - 6. Only produce your final insight AFTER you have gathered context from at least 5-12 tool calls.\n\ + 6. Only produce your final insight AFTER you have gathered context from at least 5 tool calls.\n\ 7. If a tool returns no results, that is useful information — continue calling the remaining tools anyway.", cameron_id_note = cameron_id_note ); diff --git a/src/ai/mod.rs b/src/ai/mod.rs index 5414e69..60a3f43 100644 --- a/src/ai/mod.rs +++ b/src/ai/mod.rs @@ -12,7 +12,7 @@ pub use daily_summary_job::{generate_daily_summaries, strip_summary_boilerplate} pub use handlers::{ delete_insight_handler, export_training_data_handler, generate_agentic_insight_handler, generate_insight_handler, get_all_insights_handler, get_available_models_handler, - get_insight_handler, rate_insight_handler, + get_insight_handler, get_openrouter_models_handler, rate_insight_handler, }; pub use insight_generator::InsightGenerator; #[allow(unused_imports)] diff --git a/src/main.rs b/src/main.rs index 570cf58..2deee7e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1355,6 +1355,7 @@ fn main() -> std::io::Result<()> { .service(ai::delete_insight_handler) .service(ai::get_all_insights_handler) .service(ai::get_available_models_handler) + .service(ai::get_openrouter_models_handler) .service(ai::rate_insight_handler) .service(ai::export_training_data_handler) .service(libraries::list_libraries) diff --git a/src/state.rs b/src/state.rs index 72a509e..dd8628a 100644 --- a/src/state.rs +++ b/src/state.rs @@ -39,6 +39,9 @@ pub struct AppState { /// generator. #[allow(dead_code)] pub openrouter: Option>, + /// Curated list of OpenRouter model ids exposed to clients. Sourced from + /// `OPENROUTER_ALLOWED_MODELS` (comma-separated). Empty when unset. + pub openrouter_allowed_models: Vec, pub sms_client: SmsApiClient, pub insight_generator: InsightGenerator, } @@ -70,6 +73,7 @@ impl AppState { excluded_dirs: Vec, ollama: OllamaClient, openrouter: Option>, + openrouter_allowed_models: Vec, sms_client: SmsApiClient, insight_generator: InsightGenerator, preview_dao: Arc>>, @@ -102,6 +106,7 @@ impl AppState { excluded_dirs, ollama, openrouter, + openrouter_allowed_models, sms_client, insight_generator, } @@ -138,6 +143,7 @@ impl Default for AppState { ); let openrouter = build_openrouter_from_env(); + let openrouter_allowed_models = parse_openrouter_allowed_models(); let sms_api_url = env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string()); @@ -209,6 +215,7 @@ impl Default for AppState { Self::parse_excluded_dirs(), ollama, openrouter, + openrouter_allowed_models, sms_client, insight_generator, preview_dao, @@ -235,6 +242,18 @@ fn build_openrouter_from_env() -> Option> { Some(Arc::new(client)) } +/// Parse `OPENROUTER_ALLOWED_MODELS` (comma-separated) into a vec. Returns +/// empty when unset, in which case `/insights/openrouter/models` reports no +/// curated picks and the server falls back to `OPENROUTER_DEFAULT_MODEL`. +fn parse_openrouter_allowed_models() -> Vec { + env::var("OPENROUTER_ALLOWED_MODELS") + .unwrap_or_default() + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect() +} + #[cfg(test)] impl AppState { /// Creates an AppState instance for testing with temporary directories @@ -321,6 +340,7 @@ impl AppState { Vec::new(), // No excluded directories for test state ollama, None, + Vec::new(), sms_client, insight_generator, preview_dao,