use actix_web::{HttpRequest, HttpResponse, Responder, delete, get, post, web}; use opentelemetry::KeyValue; use opentelemetry::trace::{Span, Status, Tracer}; use serde::{Deserialize, Serialize}; use crate::ai::insight_chat::{ChatStreamEvent, ChatTurnRequest}; use crate::ai::ollama::ChatMessage; use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient}; use crate::data::Claims; use crate::database::{ExifDao, InsightDao}; use crate::libraries; use crate::otel::{extract_context_from_request, global_tracer}; use crate::state::AppState; use crate::utils::normalize_path; /// Hardcoded few-shot exemplars for the agentic endpoint. Populate with the /// ids of approved insights whose `training_messages` should be compressed /// into trajectory form and injected into the system prompt. Empty = no /// change in behavior. Request-level `fewshot_insight_ids` overrides this /// when non-empty. // const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[2918, 2908]; const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[]; #[derive(Debug, Deserialize)] pub struct GeneratePhotoInsightRequest { pub file_path: String, #[serde(default)] pub model: Option, #[serde(default)] pub system_prompt: Option, #[serde(default)] pub num_ctx: Option, #[serde(default)] pub temperature: Option, #[serde(default)] pub top_p: Option, #[serde(default)] pub top_k: Option, #[serde(default)] pub min_p: Option, /// `"local"` (default, Ollama with images) | `"hybrid"` (local vision + /// OpenRouter chat). Only respected by the agentic endpoint. #[serde(default)] pub backend: Option, /// Insight ids whose stored `training_messages` should be compressed /// into few-shot trajectories and injected into the system prompt. /// Silently truncated to the first 2. When absent/empty, the handler /// falls back to `DEFAULT_FEWSHOT_INSIGHT_IDS`. #[serde(default)] pub fewshot_insight_ids: Option>, } #[derive(Debug, Deserialize)] pub struct GetPhotoInsightQuery { pub path: String, /// Library context for this lookup. Used to pick the right content /// hash when the same rel_path exists under multiple roots. #[serde(default)] pub library: Option, } #[derive(Debug, Deserialize)] pub struct RateInsightRequest { pub file_path: String, pub approved: bool, } #[derive(Debug, Deserialize)] pub struct ExportTrainingDataQuery { #[serde(default)] pub approved_only: Option, } #[derive(Debug, Serialize)] pub struct PhotoInsightResponse { pub id: i32, pub file_path: String, pub title: String, pub summary: String, pub generated_at: i64, pub model_version: String, #[serde(skip_serializing_if = "Option::is_none")] pub prompt_eval_count: Option, #[serde(skip_serializing_if = "Option::is_none")] pub eval_count: Option, #[serde(skip_serializing_if = "Option::is_none")] pub approved: Option, pub backend: String, /// True when the insight was generated agentically and a chat /// continuation can be started against it. Drives the mobile chat button. pub has_training_messages: bool, } #[derive(Debug, Serialize)] pub struct AvailableModelsResponse { pub primary: ServerModels, #[serde(skip_serializing_if = "Option::is_none")] pub fallback: Option, } #[derive(Debug, Serialize)] pub struct ServerModels { pub url: String, pub models: Vec, pub default_model: String, } /// POST /insights/generate - Generate insight for a specific photo #[post("/insights/generate")] pub async fn generate_insight_handler( http_request: HttpRequest, _claims: Claims, request: web::Json, insight_generator: web::Data, ) -> impl Responder { let parent_context = extract_context_from_request(&http_request); let tracer = global_tracer(); let mut span = tracer.start_with_context("http.insights.generate", &parent_context); let normalized_path = normalize_path(&request.file_path); span.set_attribute(KeyValue::new("file_path", normalized_path.clone())); if let Some(ref model) = request.model { span.set_attribute(KeyValue::new("model", model.clone())); } if let Some(ref prompt) = request.system_prompt { span.set_attribute(KeyValue::new("has_custom_prompt", true)); span.set_attribute(KeyValue::new("prompt_length", prompt.len() as i64)); } if let Some(ctx) = request.num_ctx { span.set_attribute(KeyValue::new("num_ctx", ctx as i64)); } log::info!( "Manual insight generation triggered for photo: {} with model: {:?}, custom_prompt: {}, num_ctx: {:?}", normalized_path, request.model, request.system_prompt.is_some(), request.num_ctx ); // Generate insight with optional custom model, system prompt, and context size let result = insight_generator .generate_insight_for_photo_with_config( &normalized_path, request.model.clone(), request.system_prompt.clone(), request.num_ctx, request.temperature, request.top_p, request.top_k, request.min_p, ) .await; match result { Ok(()) => { span.set_status(Status::Ok); HttpResponse::Ok().json(serde_json::json!({ "success": true, "message": "Insight generated successfully" })) } Err(e) => { log::error!("Failed to generate insight: {:?}", e); span.set_status(Status::error(e.to_string())); HttpResponse::InternalServerError().json(serde_json::json!({ "error": format!("Failed to generate insight: {:?}", e) })) } } } /// GET /insights?path=/path/to/photo.jpg - Fetch insight for specific photo #[get("/insights")] pub async fn get_insight_handler( _claims: Claims, query: web::Query, app_state: web::Data, insight_dao: web::Data>>, exif_dao: web::Data>>, ) -> impl Responder { let normalized_path = normalize_path(&query.path); log::debug!("Fetching insight for {}", normalized_path); let otel_context = opentelemetry::Context::new(); // Expand to rel_paths sharing content so an insight generated under // library 1 still shows when the same photo is viewed from library 2. let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); let sibling_paths = { let mut exif = exif_dao.lock().expect("Unable to lock ExifDao"); exif.get_rel_paths_sharing_content(&otel_context, library.id, &normalized_path) .unwrap_or_else(|_| vec![normalized_path.clone()]) }; let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); match dao.get_insight_for_paths(&otel_context, &sibling_paths) { Ok(Some(insight)) => { let response = PhotoInsightResponse { id: insight.id, file_path: insight.file_path, title: insight.title, summary: insight.summary, generated_at: insight.generated_at, model_version: insight.model_version, prompt_eval_count: None, eval_count: None, approved: insight.approved, has_training_messages: insight.training_messages.is_some(), backend: insight.backend, }; HttpResponse::Ok().json(response) } Ok(None) => HttpResponse::NotFound().json(serde_json::json!({ "error": "Insight not found" })), Err(e) => { log::error!("Failed to fetch insight ({}): {:?}", &query.path, e); HttpResponse::InternalServerError().json(serde_json::json!({ "error": format!("Failed to fetch insight: {:?}", e) })) } } } /// DELETE /insights?path=/path/to/photo.jpg - Remove insight (will regenerate on next request) #[delete("/insights")] pub async fn delete_insight_handler( _claims: Claims, query: web::Query, insight_dao: web::Data>>, ) -> impl Responder { let normalized_path = normalize_path(&query.path); log::info!("Deleting insight for {}", normalized_path); let otel_context = opentelemetry::Context::new(); let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); match dao.delete_insight(&otel_context, &normalized_path) { Ok(()) => HttpResponse::Ok().json(serde_json::json!({ "success": true, "message": "Insight deleted successfully" })), Err(e) => { log::error!("Failed to delete insight: {:?}", e); HttpResponse::InternalServerError().json(serde_json::json!({ "error": format!("Failed to delete insight: {:?}", e) })) } } } /// GET /insights/all - Get all insights #[get("/insights/all")] pub async fn get_all_insights_handler( _claims: Claims, insight_dao: web::Data>>, ) -> impl Responder { log::debug!("Fetching all insights"); let otel_context = opentelemetry::Context::new(); let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); match dao.get_all_insights(&otel_context) { Ok(insights) => { let responses: Vec = insights .into_iter() .map(|insight| PhotoInsightResponse { id: insight.id, file_path: insight.file_path, title: insight.title, summary: insight.summary, generated_at: insight.generated_at, model_version: insight.model_version, prompt_eval_count: None, eval_count: None, approved: insight.approved, has_training_messages: insight.training_messages.is_some(), backend: insight.backend, }) .collect(); HttpResponse::Ok().json(responses) } Err(e) => { log::error!("Failed to fetch all insights: {:?}", e); HttpResponse::InternalServerError().json(serde_json::json!({ "error": format!("Failed to fetch insights: {:?}", e) })) } } } /// POST /insights/generate/agentic - Generate insight using agentic tool-calling loop #[post("/insights/generate/agentic")] pub async fn generate_agentic_insight_handler( http_request: HttpRequest, _claims: Claims, request: web::Json, insight_generator: web::Data, insight_dao: web::Data>>, ) -> impl Responder { let parent_context = extract_context_from_request(&http_request); let tracer = global_tracer(); let mut span = tracer.start_with_context("http.insights.generate_agentic", &parent_context); let normalized_path = normalize_path(&request.file_path); span.set_attribute(KeyValue::new("file_path", normalized_path.clone())); if let Some(ref model) = request.model { span.set_attribute(KeyValue::new("model", model.clone())); } if let Some(ref prompt) = request.system_prompt { span.set_attribute(KeyValue::new("has_custom_prompt", true)); span.set_attribute(KeyValue::new("prompt_length", prompt.len() as i64)); } if let Some(ctx) = request.num_ctx { span.set_attribute(KeyValue::new("num_ctx", ctx as i64)); } let max_iterations: usize = std::env::var("AGENTIC_MAX_ITERATIONS") .ok() .and_then(|v| v.parse().ok()) .unwrap_or(12); span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64)); log::info!( "Agentic insight generation triggered for photo: {} with model: {:?}, max_iterations: {}", normalized_path, request.model, max_iterations ); if let Some(ref b) = request.backend { span.set_attribute(KeyValue::new("backend", b.clone())); } // Resolve few-shot ids: request-provided ids take precedence when // non-empty; otherwise fall back to the hardcoded defaults. let fewshot_ids: Vec = match request.fewshot_insight_ids.as_deref() { Some(ids) if !ids.is_empty() => ids.iter().take(2).copied().collect(), _ => DEFAULT_FEWSHOT_INSIGHT_IDS .iter() .take(2) .copied() .collect(), }; span.set_attribute(KeyValue::new("fewshot_count", fewshot_ids.len() as i64)); let fewshot_examples: Vec> = { let otel_context = opentelemetry::Context::new(); let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); fewshot_ids .iter() .filter_map(|id| { let insight = dao.get_insight_by_id(&otel_context, *id).ok().flatten()?; let json = insight.training_messages?; match serde_json::from_str::>(&json) { Ok(msgs) => Some(msgs), Err(e) => { log::warn!( "Few-shot insight {} has malformed training_messages: {}", id, e ); None } } }) .collect() }; let result = insight_generator .generate_agentic_insight_for_photo( &normalized_path, request.model.clone(), request.system_prompt.clone(), request.num_ctx, request.temperature, request.top_p, request.top_k, request.min_p, max_iterations, request.backend.clone(), fewshot_examples, fewshot_ids, ) .await; match result { Ok((prompt_eval_count, eval_count)) => { span.set_status(Status::Ok); // Fetch the stored insight to return it let otel_context = opentelemetry::Context::new(); let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); match dao.get_insight(&otel_context, &normalized_path) { Ok(Some(insight)) => { let response = PhotoInsightResponse { id: insight.id, file_path: insight.file_path, title: insight.title, summary: insight.summary, generated_at: insight.generated_at, model_version: insight.model_version, prompt_eval_count, eval_count, approved: insight.approved, has_training_messages: insight.training_messages.is_some(), backend: insight.backend, }; HttpResponse::Ok().json(response) } Ok(None) => HttpResponse::Ok().json(serde_json::json!({ "success": true, "message": "Agentic insight generated successfully" })), Err(e) => { log::warn!("Insight stored but failed to retrieve: {:?}", e); HttpResponse::Ok().json(serde_json::json!({ "success": true, "message": "Agentic insight generated successfully" })) } } } Err(e) => { let error_msg = format!("{:?}", e); log::error!("Failed to generate agentic insight: {}", error_msg); span.set_status(Status::error(error_msg.clone())); if error_msg.contains("tool calling not supported") || error_msg.contains("model not available") { HttpResponse::BadRequest().json(serde_json::json!({ "error": format!("Failed to generate agentic insight: {}", error_msg) })) } else if error_msg.contains("error parsing tool call") { HttpResponse::BadRequest().json(serde_json::json!({ "error": "Model is not compatible with Ollama's tool calling protocol. Try a model known to support native tool calling (e.g. llama3.1, llama3.2, qwen2.5, mistral-nemo)." })) } else { HttpResponse::InternalServerError().json(serde_json::json!({ "error": format!("Failed to generate agentic insight: {}", error_msg) })) } } } } /// GET /insights/models - List available models from both servers with capabilities #[get("/insights/models")] pub async fn get_available_models_handler( _claims: Claims, app_state: web::Data, ) -> impl Responder { log::debug!("Fetching available models with capabilities"); let ollama_client = &app_state.ollama; // Fetch models with capabilities from primary server let primary_models = match OllamaClient::list_models_with_capabilities(&ollama_client.primary_url).await { Ok(models) => models, Err(e) => { log::warn!("Failed to fetch models from primary server: {:?}", e); vec![] } }; let primary = ServerModels { url: ollama_client.primary_url.clone(), models: primary_models, default_model: ollama_client.primary_model.clone(), }; // Fetch models with capabilities from fallback server if configured let fallback = if let Some(fallback_url) = &ollama_client.fallback_url { match OllamaClient::list_models_with_capabilities(fallback_url).await { Ok(models) => Some(ServerModels { url: fallback_url.clone(), models, default_model: ollama_client .fallback_model .clone() .unwrap_or_else(|| ollama_client.primary_model.clone()), }), Err(e) => { log::warn!("Failed to fetch models from fallback server: {:?}", e); None } } } else { None }; let response = AvailableModelsResponse { primary, fallback }; HttpResponse::Ok().json(response) } #[derive(Debug, Serialize)] pub struct OpenRouterModelsResponse { pub models: Vec, pub default_model: Option, pub configured: bool, } /// GET /insights/openrouter/models - Curated OpenRouter model ids exposed /// to clients for the hybrid backend. Returned verbatim from /// `OPENROUTER_ALLOWED_MODELS`; no live call to OpenRouter. #[get("/insights/openrouter/models")] pub async fn get_openrouter_models_handler( _claims: Claims, app_state: web::Data, ) -> impl Responder { let configured = app_state.openrouter.is_some(); let default_model = app_state .openrouter .as_ref() .map(|c| c.primary_model.clone()); let response = OpenRouterModelsResponse { models: app_state.openrouter_allowed_models.clone(), default_model, configured, }; HttpResponse::Ok().json(response) } /// POST /insights/rate - Rate an insight (thumbs up/down for training data) #[post("/insights/rate")] pub async fn rate_insight_handler( _claims: Claims, request: web::Json, insight_dao: web::Data>>, ) -> impl Responder { let normalized_path = normalize_path(&request.file_path); log::info!( "Rating insight for {}: approved={}", normalized_path, request.approved ); let otel_context = opentelemetry::Context::new(); let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); match dao.rate_insight(&otel_context, &normalized_path, request.approved) { Ok(()) => HttpResponse::Ok().json(serde_json::json!({ "success": true, "message": "Insight rated successfully" })), Err(e) => { log::error!("Failed to rate insight: {:?}", e); HttpResponse::InternalServerError().json(serde_json::json!({ "error": format!("Failed to rate insight: {:?}", e) })) } } } /// GET /insights/training-data - Export approved training data as JSONL #[get("/insights/training-data")] pub async fn export_training_data_handler( _claims: Claims, query: web::Query, insight_dao: web::Data>>, ) -> impl Responder { let approved_only = query.approved_only.unwrap_or(true); log::info!("Exporting training data (approved_only={})", approved_only); let otel_context = opentelemetry::Context::new(); let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); let insights = if approved_only { dao.get_approved_insights(&otel_context) } else { dao.get_all_insights(&otel_context) }; match insights { Ok(insights) => { let mut jsonl = String::new(); for insight in &insights { if let Some(ref messages) = insight.training_messages { let entry = serde_json::json!({ "file_path": insight.file_path, "model_version": insight.model_version, "generated_at": insight.generated_at, "title": insight.title, "summary": insight.summary, "messages": serde_json::from_str::(messages) .unwrap_or(serde_json::Value::Null), }); jsonl.push_str(&entry.to_string()); jsonl.push('\n'); } } HttpResponse::Ok() .content_type("application/jsonl") .insert_header(( "Content-Disposition", "attachment; filename=\"training_data.jsonl\"", )) .body(jsonl) } Err(e) => { log::error!("Failed to export training data: {:?}", e); HttpResponse::InternalServerError().json(serde_json::json!({ "error": format!("Failed to export training data: {:?}", e) })) } } } #[derive(Debug, Deserialize)] pub struct ChatTurnHttpRequest { pub file_path: String, #[serde(default)] pub library: Option, pub user_message: String, #[serde(default)] pub model: Option, #[serde(default)] pub backend: Option, #[serde(default)] pub num_ctx: Option, #[serde(default)] pub temperature: Option, #[serde(default)] pub top_p: Option, #[serde(default)] pub top_k: Option, #[serde(default)] pub min_p: Option, #[serde(default)] pub max_iterations: Option, #[serde(default)] pub amend: bool, } #[derive(Debug, Serialize)] pub struct ChatTurnHttpResponse { pub assistant_message: String, pub tool_calls_made: usize, pub iterations_used: usize, pub truncated: bool, #[serde(skip_serializing_if = "Option::is_none")] pub prompt_eval_count: Option, #[serde(skip_serializing_if = "Option::is_none")] pub eval_count: Option, #[serde(skip_serializing_if = "Option::is_none")] pub amended_insight_id: Option, pub backend: String, pub model: String, } /// POST /insights/chat — submit a follow-up turn against an existing insight. #[post("/insights/chat")] pub async fn chat_turn_handler( http_request: HttpRequest, _claims: Claims, request: web::Json, app_state: web::Data, ) -> impl Responder { let parent_context = extract_context_from_request(&http_request); let tracer = global_tracer(); let mut span = tracer.start_with_context("http.insights.chat", &parent_context); span.set_attribute(KeyValue::new("file_path", request.file_path.clone())); let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(e) => { return HttpResponse::BadRequest().json(serde_json::json!({ "error": format!("invalid library: {}", e) })); } }; let chat_req = ChatTurnRequest { library_id: library.id, file_path: request.file_path.clone(), user_message: request.user_message.clone(), model: request.model.clone(), backend: request.backend.clone(), num_ctx: request.num_ctx, temperature: request.temperature, top_p: request.top_p, top_k: request.top_k, min_p: request.min_p, max_iterations: request.max_iterations, amend: request.amend, }; match app_state.insight_chat.chat_turn(chat_req).await { Ok(result) => { span.set_status(Status::Ok); HttpResponse::Ok().json(ChatTurnHttpResponse { assistant_message: result.assistant_message, tool_calls_made: result.tool_calls_made, iterations_used: result.iterations_used, truncated: result.truncated, prompt_eval_count: result.prompt_eval_count, eval_count: result.eval_count, amended_insight_id: result.amended_insight_id, backend: result.backend_used, model: result.model_used, }) } Err(e) => { let msg = format!("{}", e); log::error!("Chat turn failed: {}", msg); span.set_status(Status::error(msg.clone())); // Map well-known errors to client-facing 4xx codes. if msg.contains("no insight found") { HttpResponse::NotFound().json(serde_json::json!({ "error": msg })) } else if msg.contains("no chat history") { HttpResponse::Conflict().json(serde_json::json!({ "error": msg })) } else if msg.contains("user_message") || msg.contains("unknown backend") || msg.contains("switching from local to hybrid") || msg.contains("hybrid backend unavailable") { HttpResponse::BadRequest().json(serde_json::json!({ "error": msg })) } else { HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg })) } } } } #[derive(Debug, Deserialize)] pub struct ChatHistoryQuery { pub path: String, #[serde(default)] pub library: Option, } #[derive(Debug, Serialize)] pub struct ChatHistoryHttpResponse { pub messages: Vec, pub turn_count: usize, pub model_version: String, pub backend: String, } #[derive(Debug, Serialize)] pub struct RenderedHistoryMessage { pub role: String, pub content: String, pub is_initial: bool, #[serde(skip_serializing_if = "Vec::is_empty")] pub tools: Vec, } #[derive(Debug, Serialize)] pub struct HistoryToolInvocation { pub name: String, pub arguments: serde_json::Value, pub result: String, #[serde(skip_serializing_if = "std::ops::Not::not")] pub result_truncated: bool, } #[derive(Debug, Deserialize)] pub struct ChatRewindHttpRequest { pub file_path: String, #[serde(default)] pub library: Option, /// 0-based index into the rendered transcript. The message at this /// index, and everything after it, is discarded. Must be > 0 — the /// initial user message is protected. pub discard_from_rendered_index: usize, } /// POST /insights/chat/rewind — truncate the stored conversation so the /// rendered message at `discard_from_rendered_index` (and everything after) /// is removed. Use when a user wants to retry a turn with a different /// prompt without prior replies poisoning context. #[post("/insights/chat/rewind")] pub async fn chat_rewind_handler( _claims: Claims, request: web::Json, app_state: web::Data, ) -> impl Responder { let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(e) => { return HttpResponse::BadRequest().json(serde_json::json!({ "error": format!("invalid library: {}", e) })); } }; match app_state .insight_chat .rewind_history( library.id, &request.file_path, request.discard_from_rendered_index, ) .await { Ok(()) => HttpResponse::Ok().json(serde_json::json!({ "success": true })), Err(e) => { let msg = format!("{}", e); log::error!("Chat rewind failed: {}", msg); if msg.contains("no insight found") { HttpResponse::NotFound().json(serde_json::json!({ "error": msg })) } else if msg.contains("no chat history") { HttpResponse::Conflict().json(serde_json::json!({ "error": msg })) } else if msg.contains("cannot discard the initial") || msg.contains("out of range") { HttpResponse::BadRequest().json(serde_json::json!({ "error": msg })) } else { HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg })) } } } } /// GET /insights/chat/history — return the rendered transcript for a photo. #[get("/insights/chat/history")] pub async fn chat_history_handler( _claims: Claims, query: web::Query, app_state: web::Data, ) -> impl Responder { // library param parsed for parity with other insight endpoints, even // though load_history currently keys on file_path alone (matches the // existing get_insight DAO contract). let _library = libraries::resolve_library_param(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); match app_state.insight_chat.load_history(&query.path) { Ok(view) => HttpResponse::Ok().json(ChatHistoryHttpResponse { messages: view .messages .into_iter() .map(|m| RenderedHistoryMessage { role: m.role, content: m.content, is_initial: m.is_initial, tools: m .tools .into_iter() .map(|t| HistoryToolInvocation { name: t.name, arguments: t.arguments, result: t.result, result_truncated: t.result_truncated, }) .collect(), }) .collect(), turn_count: view.turn_count, model_version: view.model_version, backend: view.backend, }), Err(e) => { let msg = format!("{}", e); if msg.contains("no insight found") { HttpResponse::NotFound().json(serde_json::json!({ "error": msg })) } else if msg.contains("no chat history") { HttpResponse::Conflict().json(serde_json::json!({ "error": msg })) } else { HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg })) } } } } /// POST /insights/chat/stream — streaming variant of /insights/chat. /// Returns `text/event-stream` with one event per chat stream event. #[post("/insights/chat/stream")] pub async fn chat_stream_handler( _claims: Claims, request: web::Json, app_state: web::Data, ) -> HttpResponse { let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(e) => { return HttpResponse::BadRequest().json(serde_json::json!({ "error": format!("invalid library: {}", e) })); } }; let chat_req = ChatTurnRequest { library_id: library.id, file_path: request.file_path.clone(), user_message: request.user_message.clone(), model: request.model.clone(), backend: request.backend.clone(), num_ctx: request.num_ctx, temperature: request.temperature, top_p: request.top_p, top_k: request.top_k, min_p: request.min_p, max_iterations: request.max_iterations, amend: request.amend, }; let service = app_state.insight_chat.clone(); let events = service.chat_turn_stream(chat_req); // Map ChatStreamEvent → SSE frame bytes. let sse_stream = futures::stream::StreamExt::map(events, |ev| { let frame = render_sse_frame(&ev); Ok::<_, actix_web::Error>(actix_web::web::Bytes::from(frame)) }); HttpResponse::Ok() .content_type("text/event-stream") .insert_header(("Cache-Control", "no-cache")) .insert_header(("X-Accel-Buffering", "no")) // nginx: disable response buffering .streaming(sse_stream) } fn render_sse_frame(ev: &ChatStreamEvent) -> String { let (event_name, payload) = match ev { ChatStreamEvent::IterationStart { n, max } => { ("iteration_start", serde_json::json!({ "n": n, "max": max })) } ChatStreamEvent::Truncated => ("truncated", serde_json::json!({})), ChatStreamEvent::TextDelta(delta) => ("text", serde_json::json!({ "delta": delta })), ChatStreamEvent::ToolCall { index, name, arguments, } => ( "tool_call", serde_json::json!({ "index": index, "name": name, "arguments": arguments }), ), ChatStreamEvent::ToolResult { index, name, result, result_truncated, } => ( "tool_result", serde_json::json!({ "index": index, "name": name, "result": result, "result_truncated": result_truncated, }), ), ChatStreamEvent::Done { tool_calls_made, iterations_used, truncated, prompt_eval_count, eval_count, amended_insight_id, backend_used, model_used, } => ( "done", serde_json::json!({ "tool_calls_made": tool_calls_made, "iterations_used": iterations_used, "truncated": truncated, "prompt_eval_count": prompt_eval_count, "eval_count": eval_count, "amended_insight_id": amended_insight_id, "backend": backend_used, "model": model_used, }), ), ChatStreamEvent::Error(msg) => ("error", serde_json::json!({ "message": msg })), }; let data = serde_json::to_string(&payload).unwrap_or_else(|_| "{}".to_string()); format!("event: {}\ndata: {}\n\n", event_name, data) }