feat(ai): curated OpenRouter model picker for hybrid backend

Add OPENROUTER_ALLOWED_MODELS env var and GET /insights/openrouter/models
endpoint returning the curated list verbatim. Drop the live capability
precheck in hybrid mode — trust the operator's allowlist; bad ids surface
as a chat-call error.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron
2026-04-21 10:36:19 -04:00
parent 3ac0cd62eb
commit e2eefbd156
7 changed files with 99 additions and 25 deletions

View File

@@ -445,6 +445,34 @@ pub async fn get_available_models_handler(
HttpResponse::Ok().json(response)
}
#[derive(Debug, Serialize)]
pub struct OpenRouterModelsResponse {
pub models: Vec<String>,
pub default_model: Option<String>,
pub configured: bool,
}
/// GET /insights/openrouter/models - Curated OpenRouter model ids exposed
/// to clients for the hybrid backend. Returned verbatim from
/// `OPENROUTER_ALLOWED_MODELS`; no live call to OpenRouter.
#[get("/insights/openrouter/models")]
pub async fn get_openrouter_models_handler(
_claims: Claims,
app_state: web::Data<crate::state::AppState>,
) -> impl Responder {
let configured = app_state.openrouter.is_some();
let default_model = app_state
.openrouter
.as_ref()
.map(|c| c.primary_model.clone());
let response = OpenRouterModelsResponse {
models: app_state.openrouter_allowed_models.clone(),
default_model,
configured,
};
HttpResponse::Ok().json(response)
}
/// POST /insights/rate - Rate an insight (thumbs up/down for training data)
#[post("/insights/rate")]
pub async fn rate_insight_handler(

View File

@@ -2525,30 +2525,10 @@ Return ONLY the summary, nothing else."#,
// 2. Verify chat model supports tool calling.
// - local: existing Ollama model availability + capability check.
// - hybrid: query OpenRouter's /models for the chosen model.
// - hybrid: trust the operator's curated allowlist
// (OPENROUTER_ALLOWED_MODELS) — no live precheck. A bad model id
// surfaces as a chat-call error on the next step.
let has_vision = if is_hybrid {
let or_client = openrouter_client
.as_ref()
.expect("openrouter_client constructed when is_hybrid");
let caps = or_client
.model_capabilities(&or_client.primary_model)
.await
.map_err(|e| {
anyhow::anyhow!(
"OpenRouter capability lookup failed for '{}': {}",
or_client.primary_model,
e
)
})?;
if !caps.has_tool_calling {
return Err(anyhow::anyhow!(
"tool calling not supported by OpenRouter model '{}'",
or_client.primary_model
));
}
insight_cx
.span()
.set_attribute(KeyValue::new("model_has_tool_calling", true));
// In hybrid mode the chat model never sees images directly — we
// describe-then-inject, so `has_vision` drives only whether we
// bother loading the image to describe it, which we always do.
@@ -2776,7 +2756,7 @@ Return ONLY the summary, nothing else."#,
3. Use recall_facts_for_photo to load any previously stored knowledge about subjects in this photo.\n\
4. Use recall_entities to look up known people, places, or things that appear in this photo.\n\
5. When you identify people, places, events, or notable things in this photo: use store_entity to record them and store_fact to record key facts (relationships, roles, attributes). This builds a persistent memory for future insights.\n\
6. Only produce your final insight AFTER you have gathered context from at least 5-12 tool calls.\n\
6. Only produce your final insight AFTER you have gathered context from at least 5 tool calls.\n\
7. If a tool returns no results, that is useful information — continue calling the remaining tools anyway.",
cameron_id_note = cameron_id_note
);

View File

@@ -12,7 +12,7 @@ pub use daily_summary_job::{generate_daily_summaries, strip_summary_boilerplate}
pub use handlers::{
delete_insight_handler, export_training_data_handler, generate_agentic_insight_handler,
generate_insight_handler, get_all_insights_handler, get_available_models_handler,
get_insight_handler, rate_insight_handler,
get_insight_handler, get_openrouter_models_handler, rate_insight_handler,
};
pub use insight_generator::InsightGenerator;
#[allow(unused_imports)]

View File

@@ -1355,6 +1355,7 @@ fn main() -> std::io::Result<()> {
.service(ai::delete_insight_handler)
.service(ai::get_all_insights_handler)
.service(ai::get_available_models_handler)
.service(ai::get_openrouter_models_handler)
.service(ai::rate_insight_handler)
.service(ai::export_training_data_handler)
.service(libraries::list_libraries)

View File

@@ -39,6 +39,9 @@ pub struct AppState {
/// generator.
#[allow(dead_code)]
pub openrouter: Option<Arc<OpenRouterClient>>,
/// Curated list of OpenRouter model ids exposed to clients. Sourced from
/// `OPENROUTER_ALLOWED_MODELS` (comma-separated). Empty when unset.
pub openrouter_allowed_models: Vec<String>,
pub sms_client: SmsApiClient,
pub insight_generator: InsightGenerator,
}
@@ -70,6 +73,7 @@ impl AppState {
excluded_dirs: Vec<String>,
ollama: OllamaClient,
openrouter: Option<Arc<OpenRouterClient>>,
openrouter_allowed_models: Vec<String>,
sms_client: SmsApiClient,
insight_generator: InsightGenerator,
preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>,
@@ -102,6 +106,7 @@ impl AppState {
excluded_dirs,
ollama,
openrouter,
openrouter_allowed_models,
sms_client,
insight_generator,
}
@@ -138,6 +143,7 @@ impl Default for AppState {
);
let openrouter = build_openrouter_from_env();
let openrouter_allowed_models = parse_openrouter_allowed_models();
let sms_api_url =
env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
@@ -209,6 +215,7 @@ impl Default for AppState {
Self::parse_excluded_dirs(),
ollama,
openrouter,
openrouter_allowed_models,
sms_client,
insight_generator,
preview_dao,
@@ -235,6 +242,18 @@ fn build_openrouter_from_env() -> Option<Arc<OpenRouterClient>> {
Some(Arc::new(client))
}
/// Parse `OPENROUTER_ALLOWED_MODELS` (comma-separated) into a vec. Returns
/// empty when unset, in which case `/insights/openrouter/models` reports no
/// curated picks and the server falls back to `OPENROUTER_DEFAULT_MODEL`.
fn parse_openrouter_allowed_models() -> Vec<String> {
env::var("OPENROUTER_ALLOWED_MODELS")
.unwrap_or_default()
.split(',')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect()
}
#[cfg(test)]
impl AppState {
/// Creates an AppState instance for testing with temporary directories
@@ -321,6 +340,7 @@ impl AppState {
Vec::new(), // No excluded directories for test state
ollama,
None,
Vec::new(),
sms_client,
insight_generator,
preview_dao,