use actix_web::{HttpRequest, HttpResponse, Responder, delete, get, post, web}; use futures::StreamExt; use opentelemetry::KeyValue; use opentelemetry::trace::{Span, Status, Tracer}; use serde::{Deserialize, Serialize}; use crate::ai::insight_chat::{ChatStreamEvent, ChatTurnRequest}; use crate::ai::ollama::ChatMessage; use crate::ai::{ModelCapabilities, OllamaClient}; use crate::data::Claims; use crate::database::models::{InsightGenerationType, InsightJobStatus, PhotoInsight}; use crate::database::{ExifDao, InsightDao}; use crate::libraries; use crate::otel::{extract_context_from_request, global_tracer}; use crate::state::AppState; use crate::utils::normalize_path; /// Hardcoded few-shot exemplars for the agentic endpoint. Populate with the /// ids of approved insights whose `training_messages` should be compressed /// into trajectory form and injected into the system prompt. Empty = no /// change in behavior. Request-level `fewshot_insight_ids` overrides this /// when non-empty. // const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[2918, 2908]; const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[]; #[derive(Debug, Deserialize)] pub struct GeneratePhotoInsightRequest { pub file_path: String, #[serde(default)] pub model: Option, #[serde(default)] pub system_prompt: Option, #[serde(default)] pub num_ctx: Option, #[serde(default)] pub temperature: Option, #[serde(default)] pub top_p: Option, #[serde(default)] pub top_k: Option, #[serde(default)] pub min_p: Option, /// `"local"` (default, Ollama with images) | `"hybrid"` (local vision + /// OpenRouter chat). Only respected by the agentic endpoint. #[serde(default)] pub backend: Option, /// Insight ids whose stored `training_messages` should be compressed /// into few-shot trajectories and injected into the system prompt. /// Silently truncated to the first 2. When absent/empty, the handler /// falls back to `DEFAULT_FEWSHOT_INSIGHT_IDS`. #[serde(default)] pub fewshot_insight_ids: Option>, /// Active persona id for this generation. New facts are tagged with /// it (`entity_facts.persona_id`); recall during the agentic loop is /// scoped to it. Defaults to `"default"` when absent. #[serde(default)] pub persona_id: Option, } #[derive(Debug, Deserialize)] pub struct GetPhotoInsightQuery { pub path: String, /// Library context for this lookup. Used to pick the right content /// hash when the same rel_path exists under multiple roots. #[serde(default)] pub library: Option, } #[derive(Debug, Deserialize)] pub struct GenerationStatusQuery { /// If provided, look up the job by id. #[serde(default)] pub job_id: Option, /// If provided with `library`, look up the latest running job for this /// file. Used when the client doesn't have a persisted job_id. #[serde(default)] pub path: Option, #[serde(default)] pub library: Option, } /// GET /insights/generation/status - Check status of a generation job. /// Accepts either `?job_id=` or `?path=&library=`. #[get("/insights/generation/status")] pub async fn generation_status_handler( _claims: Claims, query: web::Query, app_state: web::Data, ) -> impl Responder { let ctx = opentelemetry::Context::new(); if let Some(jid) = query.job_id { let mut dao = app_state .insight_job_dao .lock() .expect("Unable to lock InsightJobDao"); match dao.get_job_by_id(&ctx, jid) { Ok(Some(job)) => { return HttpResponse::Ok().json(GenerationStatusResponse { job_id: job.id, status: InsightJobStatus::parse(&job.status), started_at: job.started_at, completed_at: job.completed_at, result_insight_id: job.result_insight_id, error_message: job.error_message, }); } Ok(None) => { return HttpResponse::NotFound().json(serde_json::json!({ "error": format!("Job {} not found", jid) })); } Err(e) => { log::error!("Failed to look up job {}: {:?}", jid, e); return HttpResponse::InternalServerError().json(serde_json::json!({ "error": "Failed to look up job" })); } } } if let Some(ref fp) = query.path { let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); let normalized = normalize_path(fp); let mut dao = app_state .insight_job_dao .lock() .expect("Unable to lock InsightJobDao"); match dao.get_active_job(&ctx, library.id, &normalized) { Ok(Some(job)) => { return HttpResponse::Ok().json(GenerationStatusResponse { job_id: job.id, status: InsightJobStatus::parse(&job.status), started_at: job.started_at, completed_at: job.completed_at, result_insight_id: job.result_insight_id, error_message: job.error_message, }); } Ok(None) => { return HttpResponse::Ok().json(serde_json::json!({ "status": "idle", "message": "No running generation job for this file" })); } Err(e) => { log::error!("Failed to look up active job for {}: {:?}", normalized, e); return HttpResponse::InternalServerError().json(serde_json::json!({ "error": "Failed to look up active job" })); } } } HttpResponse::BadRequest().json(serde_json::json!({ "error": "Provide either job_id or path query parameter" })) } #[derive(Debug, Deserialize)] pub struct CancelGenerationRequest { /// If provided, cancel the specific job by id. #[serde(default)] pub job_id: Option, /// If provided with `library`, cancel all running jobs for this file. #[serde(default)] pub file_path: Option, #[serde(default)] pub library: Option, } /// POST /insights/generation/cancel - Cancel a running generation job. /// Accepts either `job_id` or `file_path` + optional `library` in the body. #[post("/insights/generation/cancel")] pub async fn cancel_generation_handler( _claims: Claims, request: web::Json, app_state: web::Data, ) -> impl Responder { let ctx = opentelemetry::Context::new(); if let Some(jid) = request.job_id { let mut dao = app_state .insight_job_dao .lock() .expect("Unable to lock InsightJobDao"); match dao.cancel_job(&ctx, jid) { Ok(true) => { let mut handles = app_state .insight_job_handles .lock() .expect("Unable to lock InsightJobHandles"); if let Some(handle) = handles.remove(&jid) { handle.abort(); } return HttpResponse::Ok().json(serde_json::json!({ "success": true, "message": format!("Job {} cancelled", jid) })); } Ok(false) => { return HttpResponse::Ok().json(serde_json::json!({ "success": true, "message": format!("Job {} was not running", jid) })); } Err(e) => { log::error!("Failed to cancel job {}: {:?}", jid, e); return HttpResponse::InternalServerError().json(serde_json::json!({ "error": "Failed to cancel job" })); } } } if let Some(ref fp) = request.file_path { let library = libraries::resolve_library_param(&app_state, request.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); let normalized = normalize_path(fp); // Get active job ids first, then cancel in DB, then abort tasks let active_ids: Vec = { let mut dao = app_state .insight_job_dao .lock() .expect("Unable to lock InsightJobDao"); let ids = dao .get_active_job(&ctx, library.id, &normalized) .ok() .flatten() .map(|j| vec![j.id]) .unwrap_or_default(); let _ = dao.cancel_active_jobs(&ctx, library.id, &normalized); ids }; if active_ids.is_empty() { return HttpResponse::Ok().json(serde_json::json!({ "success": true, "message": "No running generation job for this file" })); } for jid in &active_ids { if let Some(handle) = app_state .insight_job_handles .lock() .expect("Unable to lock InsightJobHandles") .remove(jid) { handle.abort(); } } return HttpResponse::Ok().json(serde_json::json!({ "success": true, "message": format!("Cancelled {} running job(s) for {}", active_ids.len(), normalized) })); } HttpResponse::BadRequest().json(serde_json::json!({ "error": "Provide either job_id or file_path in the request body" })) } #[derive(Debug, Deserialize)] pub struct RateInsightRequest { pub file_path: String, pub approved: bool, /// When set, rate this specific insight version by primary key /// (used by the per-file history view to rate superseded versions). /// When omitted, the current insight for `file_path` is rated. #[serde(default)] pub insight_id: Option, } #[derive(Debug, Deserialize)] pub struct ExportTrainingDataQuery { #[serde(default)] pub approved_only: Option, } #[derive(Debug, Serialize)] pub struct JobIdResponse { pub job_id: i32, } #[derive(Debug, Serialize)] pub struct GenerationStatusResponse { pub job_id: i32, pub status: InsightJobStatus, pub started_at: i64, #[serde(skip_serializing_if = "Option::is_none")] pub completed_at: Option, #[serde(skip_serializing_if = "Option::is_none")] pub result_insight_id: Option, #[serde(skip_serializing_if = "Option::is_none")] pub error_message: Option, } #[derive(Debug, Serialize)] pub struct PhotoInsightResponse { pub id: i32, pub file_path: String, pub title: String, pub summary: String, pub generated_at: i64, pub model_version: String, #[serde(skip_serializing_if = "Option::is_none")] pub prompt_eval_count: Option, #[serde(skip_serializing_if = "Option::is_none")] pub eval_count: Option, #[serde(skip_serializing_if = "Option::is_none")] pub approved: Option, pub backend: String, /// True when the insight was generated agentically and a chat /// continuation can be started against it. Drives the mobile chat button. pub has_training_messages: bool, #[serde(skip_serializing_if = "Option::is_none")] pub num_ctx: Option, #[serde(skip_serializing_if = "Option::is_none")] pub temperature: Option, #[serde(skip_serializing_if = "Option::is_none")] pub top_p: Option, #[serde(skip_serializing_if = "Option::is_none")] pub top_k: Option, #[serde(skip_serializing_if = "Option::is_none")] pub min_p: Option, #[serde(skip_serializing_if = "Option::is_none")] pub system_prompt: Option, #[serde(skip_serializing_if = "Option::is_none")] pub persona_id: Option, } impl From for PhotoInsightResponse { fn from(insight: PhotoInsight) -> Self { PhotoInsightResponse { id: insight.id, file_path: insight.file_path, title: insight.title, summary: insight.summary, generated_at: insight.generated_at, model_version: insight.model_version, prompt_eval_count: insight.prompt_eval_count, eval_count: insight.eval_count, approved: insight.approved, has_training_messages: insight.training_messages.is_some(), backend: insight.backend, num_ctx: insight.num_ctx, temperature: insight.temperature, top_p: insight.top_p, top_k: insight.top_k, min_p: insight.min_p, system_prompt: insight.system_prompt, persona_id: insight.persona_id, } } } #[derive(Debug, Serialize)] pub struct AvailableModelsResponse { pub primary: ServerModels, #[serde(skip_serializing_if = "Option::is_none")] pub fallback: Option, } #[derive(Debug, Serialize)] pub struct ServerModels { pub url: String, pub models: Vec, pub default_model: String, } /// POST /insights/generate - Generate insight for a specific photo (async) #[post("/insights/generate")] pub async fn generate_insight_handler( http_request: HttpRequest, _claims: Claims, request: web::Json, app_state: web::Data, ) -> impl Responder { let parent_context = extract_context_from_request(&http_request); let tracer = global_tracer(); let mut span = tracer.start_with_context("http.insights.generate", &parent_context); let normalized_path = normalize_path(&request.file_path); let library = app_state.primary_library(); let gen_type = InsightGenerationType::Standard; span.set_attribute(KeyValue::new("file_path", normalized_path.clone())); if let Some(ref model) = request.model { span.set_attribute(KeyValue::new("model", model.clone())); } log::info!( "Manual insight generation triggered for photo: {} with model: {:?}", normalized_path, request.model ); // Look up and abort any running job for this file, then cancel in DB let old_job_ids: Vec = { let mut dao = app_state .insight_job_dao .lock() .expect("Unable to lock InsightJobDao"); let ctx = opentelemetry::Context::new(); let ids = dao .get_active_job(&ctx, library.id, &normalized_path) .ok() .flatten() .map(|j| vec![j.id]) .unwrap_or_default(); let _ = dao.cancel_active_jobs(&ctx, library.id, &normalized_path); ids }; for jid in &old_job_ids { if let Some(handle) = app_state .insight_job_handles .lock() .expect("Unable to lock InsightJobHandles") .remove(jid) { handle.abort(); } } let job_id = { let mut dao = app_state .insight_job_dao .lock() .expect("Unable to lock InsightJobDao"); match dao.create_job( &opentelemetry::Context::new(), library.id, &normalized_path, gen_type, ) { Ok(id) => id, Err(e) => { log::error!("Failed to create generation job: {:?}", e); span.set_status(Status::error("Failed to create generation job")); return HttpResponse::InternalServerError().json(serde_json::json!({ "error": "Failed to create generation job" })); } } }; // Spawn background task with timeout let generator = app_state.insight_generator.clone(); let job_dao = app_state.insight_job_dao.clone(); let job_handles = app_state.insight_job_handles.clone(); let path = normalized_path.clone(); let handle = tokio::spawn(async move { let timeout_secs: u64 = std::env::var("INSIGHT_GENERATION_TIMEOUT_SECS") .ok() .and_then(|v| v.parse().ok()) .unwrap_or(120); let path_for_task = path.clone(); let generator_for_task = generator.clone(); let result = tokio::task::spawn(async move { tokio::time::timeout( std::time::Duration::from_secs(timeout_secs), generator_for_task.generate_insight_for_photo_with_config( &path_for_task, request.model.clone(), request.system_prompt.clone(), request.num_ctx, request.temperature, request.top_p, request.top_k, request.min_p, ), ) .await }) .await; let ctx = opentelemetry::Context::new(); let mut dao = job_dao.lock().expect("Unable to lock InsightJobDao"); match result { Ok(Ok(Ok(()))) => { let mut insight_dao = generator .insight_dao() .lock() .expect("Unable to lock InsightDao"); let insight_id = insight_dao .get_insight(&ctx, &path) .ok() .flatten() .map(|i| i.id); if let Some(id) = insight_id { if let Err(e) = dao.complete_job(&ctx, job_id, id) { log::error!("Failed to mark job {} as completed: {:?}", job_id, e); } } else if let Err(e) = dao.fail_job(&ctx, job_id, "generation returned no insight") { log::error!("Failed to mark job {} as failed: {:?}", job_id, e); } } Ok(Ok(Err(e))) => { log::error!("Insight generation failed for {}: {:?}", path, e); if let Err(err) = dao.fail_job(&ctx, job_id, &format!("{:?}", e)) { log::error!("Failed to mark job {} as failed: {:?}", job_id, err); } } Ok(Err(_)) => { log::error!( "Insight generation timed out for {} after {}s", path, timeout_secs ); if let Err(err) = dao.fail_job(&ctx, job_id, &format!("timeout after {}s", timeout_secs)) { log::error!("Failed to mark job {} as failed: {:?}", job_id, err); } } Err(_) => { log::error!("Insight generation task panicked for {}", path); if let Err(err) = dao.fail_job(&ctx, job_id, "generation task panicked") { log::error!("Failed to mark job {} as failed: {:?}", job_id, err); } } } // Remove handle from map on completion let mut handles = job_handles .lock() .expect("Unable to lock InsightJobHandles"); handles.remove(&job_id); }); // Store abort handle { let mut handles = app_state .insight_job_handles .lock() .expect("Unable to lock InsightJobHandles"); handles.insert(job_id, handle.abort_handle()); } span.set_attribute(KeyValue::new("job_id", job_id as i64)); span.set_status(Status::Ok); HttpResponse::Accepted().json(JobIdResponse { job_id }) } /// GET /insights?path=/path/to/photo.jpg - Fetch insight for specific photo #[get("/insights")] pub async fn get_insight_handler( _claims: Claims, query: web::Query, app_state: web::Data, insight_dao: web::Data>>, exif_dao: web::Data>>, ) -> impl Responder { let normalized_path = normalize_path(&query.path); log::debug!("Fetching insight for {}", normalized_path); let otel_context = opentelemetry::Context::new(); // Expand to rel_paths sharing content so an insight generated under // library 1 still shows when the same photo is viewed from library 2. let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); let sibling_paths = { let mut exif = exif_dao.lock().expect("Unable to lock ExifDao"); exif.get_rel_paths_sharing_content(&otel_context, library.id, &normalized_path) .unwrap_or_else(|_| vec![normalized_path.clone()]) }; let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); match dao.get_insight_for_paths(&otel_context, &sibling_paths) { Ok(Some(insight)) => HttpResponse::Ok().json(PhotoInsightResponse::from(insight)), Ok(None) => HttpResponse::NotFound().json(serde_json::json!({ "error": "Insight not found" })), Err(e) => { log::error!("Failed to fetch insight ({}): {:?}", &query.path, e); HttpResponse::InternalServerError().json(serde_json::json!({ "error": format!("Failed to fetch insight: {:?}", e) })) } } } /// DELETE /insights?path=/path/to/photo.jpg - Remove insight (will regenerate on next request) #[delete("/insights")] pub async fn delete_insight_handler( _claims: Claims, query: web::Query, insight_dao: web::Data>>, ) -> impl Responder { let normalized_path = normalize_path(&query.path); log::info!("Deleting insight for {}", normalized_path); let otel_context = opentelemetry::Context::new(); let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); match dao.delete_insight(&otel_context, &normalized_path) { Ok(()) => HttpResponse::Ok().json(serde_json::json!({ "success": true, "message": "Insight deleted successfully" })), Err(e) => { log::error!("Failed to delete insight: {:?}", e); HttpResponse::InternalServerError().json(serde_json::json!({ "error": format!("Failed to delete insight: {:?}", e) })) } } } /// GET /insights/all - Get all insights #[get("/insights/all")] pub async fn get_all_insights_handler( _claims: Claims, insight_dao: web::Data>>, ) -> impl Responder { log::debug!("Fetching all insights"); let otel_context = opentelemetry::Context::new(); let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); match dao.get_all_insights(&otel_context) { Ok(insights) => { let responses: Vec = insights .into_iter() .map(PhotoInsightResponse::from) .collect(); HttpResponse::Ok().json(responses) } Err(e) => { log::error!("Failed to fetch all insights: {:?}", e); HttpResponse::InternalServerError().json(serde_json::json!({ "error": format!("Failed to fetch insights: {:?}", e) })) } } } /// GET /insights/history?path=/path/to/photo.jpg - Get all insight versions /// for a single photo (current plus previously generated/superseded ones), /// newest first. Backs the per-file insight history view. #[get("/insights/history")] pub async fn get_insight_history_handler( _claims: Claims, query: web::Query, insight_dao: web::Data>>, ) -> impl Responder { let normalized_path = normalize_path(&query.path); log::debug!("Fetching insight history for {}", normalized_path); let otel_context = opentelemetry::Context::new(); let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); match dao.get_insight_history(&otel_context, &normalized_path) { Ok(insights) => { let responses: Vec = insights .into_iter() .map(PhotoInsightResponse::from) .collect(); HttpResponse::Ok().json(responses) } Err(e) => { log::error!("Failed to fetch insight history ({}): {:?}", &query.path, e); HttpResponse::InternalServerError().json(serde_json::json!({ "error": format!("Failed to fetch insight history: {:?}", e) })) } } } /// POST /insights/generate/agentic - Generate insight using agentic tool-calling loop (async) #[post("/insights/generate/agentic")] pub async fn generate_agentic_insight_handler( http_request: HttpRequest, claims: Claims, request: web::Json, app_state: web::Data, ) -> impl Responder { let parent_context = extract_context_from_request(&http_request); let tracer = global_tracer(); let mut span = tracer.start_with_context("http.insights.generate_agentic", &parent_context); let normalized_path = normalize_path(&request.file_path); let library = app_state.primary_library(); let gen_type = InsightGenerationType::Agentic; span.set_attribute(KeyValue::new("file_path", normalized_path.clone())); if let Some(ref model) = request.model { span.set_attribute(KeyValue::new("model", model.clone())); } if let Some(ref backend) = request.backend { span.set_attribute(KeyValue::new("backend", backend.clone())); } log::info!( "Agentic insight generation triggered for photo: {} with model: {:?}", normalized_path, request.model ); // Look up and abort any running job for this file, then cancel in DB let old_job_ids: Vec = { let mut dao = app_state .insight_job_dao .lock() .expect("Unable to lock InsightJobDao"); let ctx = opentelemetry::Context::new(); let ids = dao .get_active_job(&ctx, library.id, &normalized_path) .ok() .flatten() .map(|j| vec![j.id]) .unwrap_or_default(); let _ = dao.cancel_active_jobs(&ctx, library.id, &normalized_path); ids }; for jid in &old_job_ids { if let Some(handle) = app_state .insight_job_handles .lock() .expect("Unable to lock InsightJobHandles") .remove(jid) { handle.abort(); } } let job_id = { let mut dao = app_state .insight_job_dao .lock() .expect("Unable to lock InsightJobDao"); match dao.create_job( &opentelemetry::Context::new(), library.id, &normalized_path, gen_type, ) { Ok(id) => id, Err(e) => { log::error!("Failed to create agentic generation job: {:?}", e); span.set_status(Status::error("Failed to create generation job")); return HttpResponse::InternalServerError().json(serde_json::json!({ "error": "Failed to create generation job" })); } } }; // Resolve few-shot ids for the background task let fewshot_ids: Vec = match request.fewshot_insight_ids.as_deref() { Some(ids) if !ids.is_empty() => ids.iter().take(2).copied().collect(), _ => DEFAULT_FEWSHOT_INSIGHT_IDS .iter() .take(2) .copied() .collect(), }; let fewshot_examples: Vec> = { let otel_context = opentelemetry::Context::new(); let mut dao = app_state .insight_chat .insight_dao() .lock() .expect("Unable to lock InsightDao"); fewshot_ids .iter() .filter_map(|id| { let insight = dao.get_insight_by_id(&otel_context, *id).ok().flatten()?; let json = insight.training_messages?; match serde_json::from_str::>(&json) { Ok(msgs) => Some(msgs), Err(e) => { log::warn!( "Few-shot insight {} has malformed training_messages: {}", id, e ); None } } }) .collect() }; let user_id = claims.sub.parse::().unwrap_or(1); let persona_id = request .persona_id .clone() .filter(|s| !s.trim().is_empty()) .unwrap_or_else(|| "default".to_string()); // Server-side persona resolution: an explicit client `system_prompt` // wins; otherwise the persona's stored prompt from the persona store; // otherwise None and `build_system_content` applies its neutral // default. Without the lookup, a request carrying only `persona_id` // silently generated in the default voice. let system_prompt = request .system_prompt .clone() .filter(|s| !s.trim().is_empty()) .or_else(|| { app_state .insight_generator .persona_system_prompt(user_id, &persona_id) }); let max_iterations: usize = std::env::var("AGENTIC_MAX_ITERATIONS") .ok() .and_then(|v| v.parse().ok()) .unwrap_or(12); // Spawn background task with timeout let generator = app_state.insight_generator.clone(); let job_dao = app_state.insight_job_dao.clone(); let job_handles = app_state.insight_job_handles.clone(); let path = normalized_path.clone(); let handle = tokio::spawn(async move { let timeout_secs: u64 = std::env::var("INSIGHT_GENERATION_TIMEOUT_SECS") .ok() .and_then(|v| v.parse().ok()) .unwrap_or(180); let path_for_task = path.clone(); let generator_for_task = generator.clone(); let result = tokio::task::spawn(async move { tokio::time::timeout( std::time::Duration::from_secs(timeout_secs), generator_for_task.generate_agentic_insight_for_photo( &path_for_task, request.model.clone(), system_prompt, request.num_ctx, request.temperature, request.top_p, request.top_k, request.min_p, max_iterations, request.backend.clone(), fewshot_examples, fewshot_ids, user_id, persona_id, ), ) .await }) .await; let ctx = opentelemetry::Context::new(); let mut dao = job_dao.lock().expect("Unable to lock InsightJobDao"); match result { Ok(Ok(Ok((Some(insight_id), _, _)))) => { if let Err(e) = dao.complete_job(&ctx, job_id, insight_id) { log::error!("Failed to mark job {} as completed: {:?}", job_id, e); } } Ok(Ok(Ok((None, _, _)))) => { if let Err(e) = dao.fail_job(&ctx, job_id, "agentic generation returned no insight") { log::error!("Failed to mark job {} as failed: {:?}", job_id, e); } } Ok(Ok(Err(e))) => { log::error!("Agentic insight generation failed for {}: {:?}", path, e); if let Err(err) = dao.fail_job(&ctx, job_id, &format!("{:?}", e)) { log::error!("Failed to mark job {} as failed: {:?}", job_id, err); } } Ok(Err(_)) => { log::error!( "Agentic insight generation timed out for {} after {}s", path, timeout_secs ); if let Err(err) = dao.fail_job(&ctx, job_id, &format!("timeout after {}s", timeout_secs)) { log::error!("Failed to mark job {} as failed: {:?}", job_id, err); } } Err(_) => { log::error!("Agentic insight generation task panicked for {}", path); if let Err(err) = dao.fail_job(&ctx, job_id, "generation task panicked") { log::error!("Failed to mark job {} as failed: {:?}", job_id, err); } } } // Remove handle from map on completion let mut handles = job_handles .lock() .expect("Unable to lock InsightJobHandles"); handles.remove(&job_id); }); // Store abort handle { let mut handles = app_state .insight_job_handles .lock() .expect("Unable to lock InsightJobHandles"); handles.insert(job_id, handle.abort_handle()); } span.set_attribute(KeyValue::new("job_id", job_id as i64)); span.set_status(Status::Ok); HttpResponse::Accepted().json(JobIdResponse { job_id }) } /// GET /insights/models - Local-backend models with capabilities. Returns /// Ollama servers when `LLM_BACKEND=ollama` (default), or llama-swap slots /// when `LLM_BACKEND=llamacpp`. Same envelope shape either way so the /// client picker doesn't have to branch on backend kind. /// /// For llama-swap: `models` comes verbatim from `LLAMA_SWAP_ALLOWED_MODELS` /// (no live `/v1/models` probe), `has_vision` is true only for the /// configured `LLAMA_SWAP_VISION_MODEL` slot id, and `has_tool_calling` is /// reported as true for every slot (llama-server is launched with `--jinja` /// by convention — a misconfigured slot surfaces as a chat-call error). #[get("/insights/models")] pub async fn get_available_models_handler( _claims: Claims, app_state: web::Data, ) -> impl Responder { log::debug!("Fetching available models with capabilities"); if crate::ai::local_backend_is_llamacpp() && let Some(lc) = app_state.llamacpp.as_ref() { let models: Vec = app_state .llamacpp_allowed_models .iter() .map(|name| ModelCapabilities { name: name.clone(), has_vision: true, has_tool_calling: true, }) .collect(); let primary = ServerModels { url: lc.base_url.clone(), models, default_model: lc.primary_model.clone(), }; return HttpResponse::Ok().json(AvailableModelsResponse { primary, fallback: None, }); } let ollama_client = &app_state.ollama; // Fetch models with capabilities from primary server let primary_models = match OllamaClient::list_models_with_capabilities(&ollama_client.primary_url).await { Ok(models) => models, Err(e) => { log::warn!("Failed to fetch models from primary server: {:?}", e); vec![] } }; let primary = ServerModels { url: ollama_client.primary_url.clone(), models: primary_models, default_model: ollama_client.primary_model.clone(), }; // Fetch models with capabilities from fallback server if configured let fallback = if let Some(fallback_url) = &ollama_client.fallback_url { match OllamaClient::list_models_with_capabilities(fallback_url).await { Ok(models) => Some(ServerModels { url: fallback_url.clone(), models, default_model: ollama_client .fallback_model .clone() .unwrap_or_else(|| ollama_client.primary_model.clone()), }), Err(e) => { log::warn!("Failed to fetch models from fallback server: {:?}", e); None } } } else { None }; let response = AvailableModelsResponse { primary, fallback }; HttpResponse::Ok().json(response) } #[derive(Debug, Serialize)] pub struct OpenRouterModelsResponse { pub models: Vec, pub default_model: Option, pub configured: bool, } /// GET /insights/openrouter/models - Curated OpenRouter model ids exposed /// to clients for the hybrid backend. Returned verbatim from /// `OPENROUTER_ALLOWED_MODELS`; no live call to OpenRouter. #[get("/insights/openrouter/models")] pub async fn get_openrouter_models_handler( _claims: Claims, app_state: web::Data, ) -> impl Responder { let configured = app_state.openrouter.is_some(); let default_model = app_state .openrouter .as_ref() .map(|c| c.primary_model.clone()); let response = OpenRouterModelsResponse { models: app_state.openrouter_allowed_models.clone(), default_model, configured, }; HttpResponse::Ok().json(response) } /// POST /insights/rate - Rate an insight (thumbs up/down for training data) #[post("/insights/rate")] pub async fn rate_insight_handler( _claims: Claims, request: web::Json, insight_dao: web::Data>>, ) -> impl Responder { let normalized_path = normalize_path(&request.file_path); log::info!( "Rating insight for {} (id={:?}): approved={}", normalized_path, request.insight_id, request.approved ); let otel_context = opentelemetry::Context::new(); let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); // Rate a specific version by id when provided (history view), otherwise // rate the current insight for the path. let result = match request.insight_id { Some(id) => dao.rate_insight_by_id(&otel_context, id, request.approved), None => dao.rate_insight(&otel_context, &normalized_path, request.approved), }; match result { Ok(()) => HttpResponse::Ok().json(serde_json::json!({ "success": true, "message": "Insight rated successfully" })), Err(e) => { log::error!("Failed to rate insight: {:?}", e); HttpResponse::InternalServerError().json(serde_json::json!({ "error": format!("Failed to rate insight: {:?}", e) })) } } } /// GET /insights/training-data - Export approved training data as JSONL #[get("/insights/training-data")] pub async fn export_training_data_handler( _claims: Claims, query: web::Query, insight_dao: web::Data>>, ) -> impl Responder { let approved_only = query.approved_only.unwrap_or(true); log::info!("Exporting training data (approved_only={})", approved_only); let otel_context = opentelemetry::Context::new(); let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); let insights = if approved_only { dao.get_approved_insights(&otel_context) } else { dao.get_all_insights(&otel_context) }; match insights { Ok(insights) => { let mut jsonl = String::new(); for insight in &insights { if let Some(ref messages) = insight.training_messages { let entry = serde_json::json!({ "file_path": insight.file_path, "model_version": insight.model_version, "generated_at": insight.generated_at, "title": insight.title, "summary": insight.summary, "messages": serde_json::from_str::(messages) .unwrap_or(serde_json::Value::Null), }); jsonl.push_str(&entry.to_string()); jsonl.push('\n'); } } HttpResponse::Ok() .content_type("application/jsonl") .insert_header(( "Content-Disposition", "attachment; filename=\"training_data.jsonl\"", )) .body(jsonl) } Err(e) => { log::error!("Failed to export training data: {:?}", e); HttpResponse::InternalServerError().json(serde_json::json!({ "error": format!("Failed to export training data: {:?}", e) })) } } } #[derive(Debug, Deserialize)] pub struct ChatTurnHttpRequest { pub file_path: String, #[serde(default)] pub library: Option, pub user_message: String, #[serde(default)] pub model: Option, #[serde(default)] pub backend: Option, #[serde(default)] pub num_ctx: Option, #[serde(default)] pub temperature: Option, #[serde(default)] pub top_p: Option, #[serde(default)] pub top_k: Option, #[serde(default)] pub min_p: Option, #[serde(default)] pub max_iterations: Option, /// Per-turn system-prompt override. Ephemeral in append mode, /// persisted in amend / regenerate mode. See ChatTurnRequest for /// semantics. Also seeds the bootstrap path when no insight exists. #[serde(default)] pub system_prompt: Option, /// Active persona id for this turn. New facts/recalls scope to it. /// Defaults to `"default"` when missing. #[serde(default)] pub persona_id: Option, #[serde(default)] pub amend: bool, /// When true, force the bootstrap path even if an insight already /// exists: flip the existing row(s) to `is_current=false` and create /// a new insight row from this turn. Takes precedence over `amend`. /// Collapses to a normal bootstrap when no insight exists. #[serde(default)] pub regenerate: bool, } #[derive(Debug, Serialize)] pub struct ChatTurnHttpResponse { pub assistant_message: String, pub tool_calls_made: usize, pub iterations_used: usize, pub truncated: bool, #[serde(skip_serializing_if = "Option::is_none")] pub prompt_eval_count: Option, #[serde(skip_serializing_if = "Option::is_none")] pub eval_count: Option, #[serde(skip_serializing_if = "Option::is_none")] pub amended_insight_id: Option, pub backend: String, pub model: String, } /// POST /insights/chat — submit a follow-up turn against an existing insight. #[post("/insights/chat")] pub async fn chat_turn_handler( http_request: HttpRequest, claims: Claims, request: web::Json, app_state: web::Data, ) -> impl Responder { let parent_context = extract_context_from_request(&http_request); let tracer = global_tracer(); let mut span = tracer.start_with_context("http.insights.chat", &parent_context); span.set_attribute(KeyValue::new("file_path", request.file_path.clone())); let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(e) => { return HttpResponse::BadRequest().json(serde_json::json!({ "error": format!("invalid library: {}", e) })); } }; // Service-token claims (sub: "service:apollo") fall through to // user_id=1 — the operator convention. Mobile/web clients have a // numeric sub. Required for the entity_facts composite FK. let user_id = claims.sub.parse::().unwrap_or(1); let chat_req = ChatTurnRequest { library_id: library.id, user_id, file_path: request.file_path.clone(), user_message: request.user_message.clone(), model: request.model.clone(), backend: request.backend.clone(), num_ctx: request.num_ctx, temperature: request.temperature, top_p: request.top_p, top_k: request.top_k, min_p: request.min_p, max_iterations: request.max_iterations, system_prompt: request.system_prompt.clone(), persona_id: request.persona_id.clone(), amend: request.amend, regenerate: request.regenerate, }; match app_state.insight_chat.chat_turn(chat_req).await { Ok(result) => { span.set_status(Status::Ok); HttpResponse::Ok().json(ChatTurnHttpResponse { assistant_message: result.assistant_message, tool_calls_made: result.tool_calls_made, iterations_used: result.iterations_used, truncated: result.truncated, prompt_eval_count: result.prompt_eval_count, eval_count: result.eval_count, amended_insight_id: result.amended_insight_id, backend: result.backend_used, model: result.model_used, }) } Err(e) => { let msg = format!("{}", e); log::error!("Chat turn failed: {}", msg); span.set_status(Status::error(msg.clone())); // Map well-known errors to client-facing 4xx codes. if msg.contains("no insight found") { HttpResponse::NotFound().json(serde_json::json!({ "error": msg })) } else if msg.contains("no chat history") { HttpResponse::Conflict().json(serde_json::json!({ "error": msg })) } else if msg.contains("user_message") || msg.contains("unknown backend") || msg.contains("switching from local to hybrid") || msg.contains("hybrid backend unavailable") { HttpResponse::BadRequest().json(serde_json::json!({ "error": msg })) } else { HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg })) } } } } #[derive(Debug, Deserialize)] pub struct ChatHistoryQuery { pub path: String, #[serde(default)] pub library: Option, } #[derive(Debug, Serialize)] pub struct ChatHistoryHttpResponse { pub messages: Vec, pub turn_count: usize, pub model_version: String, pub backend: String, } #[derive(Debug, Serialize)] pub struct RenderedHistoryMessage { pub role: String, pub content: String, pub is_initial: bool, #[serde(skip_serializing_if = "Vec::is_empty")] pub tools: Vec, } #[derive(Debug, Serialize)] pub struct HistoryToolInvocation { pub name: String, pub arguments: serde_json::Value, pub result: String, #[serde(skip_serializing_if = "std::ops::Not::not")] pub result_truncated: bool, } #[derive(Debug, Deserialize)] pub struct ChatRewindHttpRequest { pub file_path: String, #[serde(default)] pub library: Option, /// 0-based index into the rendered transcript. The message at this /// index, and everything after it, is discarded. Must be > 0 — the /// initial user message is protected. pub discard_from_rendered_index: usize, } /// POST /insights/chat/rewind — truncate the stored conversation so the /// rendered message at `discard_from_rendered_index` (and everything after) /// is removed. Use when a user wants to retry a turn with a different /// prompt without prior replies poisoning context. #[post("/insights/chat/rewind")] pub async fn chat_rewind_handler( _claims: Claims, request: web::Json, app_state: web::Data, ) -> impl Responder { let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(e) => { return HttpResponse::BadRequest().json(serde_json::json!({ "error": format!("invalid library: {}", e) })); } }; match app_state .insight_chat .rewind_history( library.id, &request.file_path, request.discard_from_rendered_index, ) .await { Ok(()) => HttpResponse::Ok().json(serde_json::json!({ "success": true })), Err(e) => { let msg = format!("{}", e); log::error!("Chat rewind failed: {}", msg); if msg.contains("no insight found") { HttpResponse::NotFound().json(serde_json::json!({ "error": msg })) } else if msg.contains("no chat history") { HttpResponse::Conflict().json(serde_json::json!({ "error": msg })) } else if msg.contains("cannot discard the initial") || msg.contains("out of range") { HttpResponse::BadRequest().json(serde_json::json!({ "error": msg })) } else { HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg })) } } } } /// GET /insights/chat/history — return the rendered transcript for a photo. #[get("/insights/chat/history")] pub async fn chat_history_handler( _claims: Claims, query: web::Query, app_state: web::Data, ) -> impl Responder { // library_id scopes the lookup so a regenerate on this library // isn't shadowed by an untouched is_current=true row in another // library for the same rel_path. load_history falls back to the // cross-library lookup when the scoped one misses, so a photo // with no insight in this library but one in another still // surfaces (the "show this photo's primary insight" merge case). let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); match app_state.insight_chat.load_history(library.id, &query.path) { Ok(view) => HttpResponse::Ok().json(ChatHistoryHttpResponse { messages: view .messages .into_iter() .map(|m| RenderedHistoryMessage { role: m.role, content: m.content, is_initial: m.is_initial, tools: m .tools .into_iter() .map(|t| HistoryToolInvocation { name: t.name, arguments: t.arguments, result: t.result, result_truncated: t.result_truncated, }) .collect(), }) .collect(), turn_count: view.turn_count, model_version: view.model_version, backend: view.backend, }), Err(e) => { let msg = format!("{}", e); if msg.contains("no insight found") { HttpResponse::NotFound().json(serde_json::json!({ "error": msg })) } else if msg.contains("no chat history") { HttpResponse::Conflict().json(serde_json::json!({ "error": msg })) } else { HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg })) } } } } /// POST /insights/chat/stream — streaming variant of /insights/chat. /// Returns `text/event-stream` with one event per chat stream event. #[post("/insights/chat/stream")] pub async fn chat_stream_handler( claims: Claims, request: web::Json, app_state: web::Data, ) -> HttpResponse { let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(e) => { return HttpResponse::BadRequest().json(serde_json::json!({ "error": format!("invalid library: {}", e) })); } }; // Service-token sub falls through to user_id=1 (see chat_turn_handler). let user_id = claims.sub.parse::().unwrap_or(1); let chat_req = ChatTurnRequest { library_id: library.id, user_id, file_path: request.file_path.clone(), user_message: request.user_message.clone(), model: request.model.clone(), backend: request.backend.clone(), num_ctx: request.num_ctx, temperature: request.temperature, top_p: request.top_p, top_k: request.top_k, min_p: request.min_p, max_iterations: request.max_iterations, system_prompt: request.system_prompt.clone(), persona_id: request.persona_id.clone(), amend: request.amend, regenerate: request.regenerate, }; let service = app_state.insight_chat.clone(); let events = service.chat_turn_stream(chat_req); // Map ChatStreamEvent → SSE frame bytes. let sse_stream = futures::stream::StreamExt::map(events, |ev| { let frame = render_sse_frame(&ev); Ok::<_, actix_web::Error>(actix_web::web::Bytes::from(frame)) }); HttpResponse::Ok() .content_type("text/event-stream") .insert_header(("Cache-Control", "no-cache")) .insert_header(("X-Accel-Buffering", "no")) // nginx: disable response buffering .streaming(sse_stream) } fn render_sse_frame(ev: &ChatStreamEvent) -> String { let (event_name, payload) = sse_event_payload(ev); let data = serde_json::to_string(&payload).unwrap_or_else(|_| "{}".to_string()); format!("event: {}\ndata: {}\n\n", event_name, data) } /// Like `render_sse_frame`, but stamps the event's absolute sequence number /// (`seq`) into the payload so reconnecting replay clients can compute /// `skip_before` precisely. `seq` is distinct from the tool-pairing `index` /// already carried by `tool_call`/`tool_result`. fn render_indexed_frame(ev: &ChatStreamEvent, seq: u32) -> String { let (event_name, mut payload) = sse_event_payload(ev); if let serde_json::Value::Object(map) = &mut payload { map.insert("seq".to_string(), serde_json::json!(seq)); } let data = serde_json::to_string(&payload).unwrap_or_else(|_| "{}".to_string()); format!("event: {}\ndata: {}\n\n", event_name, data) } fn sse_event_payload(ev: &ChatStreamEvent) -> (&'static str, serde_json::Value) { match ev { ChatStreamEvent::IterationStart { n, max } => { ("iteration_start", serde_json::json!({ "n": n, "max": max })) } ChatStreamEvent::Truncated => ("truncated", serde_json::json!({})), ChatStreamEvent::TextDelta(delta) => ("text", serde_json::json!({ "delta": delta })), ChatStreamEvent::ToolCall { index, name, arguments, } => ( "tool_call", serde_json::json!({ "index": index, "name": name, "arguments": arguments }), ), ChatStreamEvent::ToolResult { index, name, result, result_truncated, } => ( "tool_result", serde_json::json!({ "index": index, "name": name, "result": result, "result_truncated": result_truncated, }), ), ChatStreamEvent::Done { tool_calls_made, iterations_used, truncated, prompt_tokens, eval_tokens, num_ctx, amended_insight_id, backend_used, model_used, cancelled, } => ( "done", serde_json::json!({ "tool_calls_made": tool_calls_made, "iterations_used": iterations_used, "truncated": truncated, "prompt_tokens": prompt_tokens, "eval_tokens": eval_tokens, "num_ctx": num_ctx, "amended_insight_id": amended_insight_id, "backend": backend_used, "model": model_used, "cancelled": cancelled, }), ), // Apollo's frontend SSE consumer (and its free-chat backend, which // is the de-facto convention) listens for `error_message`. Emitting // `error` here meant any failure on the photo-chat path (e.g. // "no insight found for path") was silently dropped, leaving an // empty assistant bubble with no clue why the turn died. ChatStreamEvent::Error(msg) => ("error_message", serde_json::json!({ "message": msg })), } } /// POST /insights/chat/turn — async turn dispatch. Returns turn_id immediately, /// client then polls GET /insights/chat/turn/{turn_id} for SSE replay. #[post("/insights/chat/turn")] pub async fn turn_async_handler( http_request: HttpRequest, claims: Claims, request: web::Json, app_state: web::Data, ) -> impl Responder { let parent_context = extract_context_from_request(&http_request); let tracer = global_tracer(); let mut span = tracer.start_with_context("http.insights.chat_turn_async", &parent_context); span.set_attribute(KeyValue::new("file_path", request.file_path.clone())); let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(e) => { return HttpResponse::BadRequest().json(serde_json::json!({ "error": format!("invalid library: {}", e) })); } }; let user_id = claims.sub.parse::().unwrap_or(1); let chat_req = ChatTurnRequest { library_id: library.id, user_id, file_path: request.file_path.clone(), user_message: request.user_message.clone(), model: request.model.clone(), backend: request.backend.clone(), num_ctx: request.num_ctx, temperature: request.temperature, top_p: request.top_p, top_k: request.top_k, min_p: request.min_p, max_iterations: request.max_iterations, system_prompt: request.system_prompt.clone(), persona_id: request.persona_id.clone(), amend: request.amend, regenerate: request.regenerate, }; let service = app_state.insight_chat.clone(); let registry = app_state.turn_registry.clone(); let turn_id = service.chat_turn_async(registry, chat_req).await; span.set_attribute(KeyValue::new("turn_id", turn_id.clone())); span.set_status(Status::Ok); HttpResponse::Accepted().json(serde_json::json!({ "turn_id": turn_id, "status": "running" })) } /// Query params for the SSE replay stream. #[derive(Debug, Deserialize)] pub struct ReplayQuery { /// Replay events from this absolute sequence number (`seq`) onward. /// Absent or 0 replays from the beginning. On reconnect the client sends /// the `seq` of the last event it applied, plus one. pub skip_before: Option, } /// GET /insights/chat/turn/{turn_id} — SSE replay stream. #[get("/insights/chat/turn/{turn_id}")] pub async fn turn_replay_handler( http_request: HttpRequest, path: web::Path, query: web::Query, app_state: web::Data, ) -> HttpResponse { use crate::ai::turn_registry::ReplayOutcome; let turn_id = path.into_inner(); let skip_before = query.skip_before.unwrap_or(0); let parent_context = extract_context_from_request(&http_request); let tracer = global_tracer(); let mut span = tracer.start_with_context("ai.chat.turn.replay", &parent_context); span.set_attribute(KeyValue::new("turn_id", turn_id.clone())); span.set_attribute(KeyValue::new("skip_before", skip_before as i64)); let registry = app_state.turn_registry.clone(); let entry = match registry.get(&turn_id).await { Some(e) => e, None => { span.set_status(Status::error("turn not found")); return HttpResponse::NotFound().json(serde_json::json!({ "error": format!("turn {} not found", turn_id) })); } }; let info = entry.info().await; span.set_attribute(KeyValue::new("status", info.status.as_str())); span.set_attribute(KeyValue::new( "event_count", info.total_events_pushed as i64, )); let turn_info_frame = render_turn_info_frame(&info); // Initial buffered batch: events produced before this connection attached. // Stamp each frame with its absolute `seq` so the client can track // `skip_before` precisely across reconnects. let (initial_frames, start_skip) = match entry.replay_from(skip_before).await { ReplayOutcome::Gone => { span.set_status(Status::error("buffer evicted")); return HttpResponse::Gone().json(serde_json::json!({ "error": "turn history has expired (buffer evicted)" })); } ReplayOutcome::CaughtUp { next_skip } => (Vec::new(), next_skip), ReplayOutcome::Events { events, next_skip } => { let frames: Vec = events .into_iter() .enumerate() .map(|(i, ev)| { actix_web::web::Bytes::from(render_indexed_frame(&ev, skip_before + i as u32)) }) .collect(); (frames, next_skip) } }; span.set_status(Status::Ok); let running = entry.is_running(); // Head: the `turn_info` event followed by any already-buffered events. let head = futures::stream::once(async move { Ok::<_, actix_web::Error>(actix_web::web::Bytes::from(turn_info_frame)) }) .chain(futures::stream::iter( initial_frames.into_iter().map(Ok::<_, actix_web::Error>), )); if !running { // Completed turn: every event — including the terminal Done/Error — is // already in the buffered batch above. Emit it and close. return HttpResponse::Ok() .content_type("text/event-stream") .insert_header(("Cache-Control", "no-cache")) .insert_header(("X-Accel-Buffering", "no")) .streaming(head); } // In-progress turn: after the head, wait for new events. `next_batch` // drains every buffered event (including the terminal one) before it // reports the turn finished, so the final Done/Error is never dropped; // CaughtUp then closes the stream by returning None. let tail = futures::stream::unfold( ( entry, start_skip, Vec::::new(), false, ), |(entry, skip, pending, finished)| async move { // Flush queued frames from a previous multi-event batch first. if let Some((first, rest)) = pending.split_first() { return Some((Ok(first.clone()), (entry, skip, rest.to_vec(), finished))); } if finished { return None; } match entry.next_batch(skip).await { ReplayOutcome::Events { events, next_skip } => { let frames: Vec = events .into_iter() .enumerate() .map(|(i, ev)| { actix_web::web::Bytes::from(render_indexed_frame(&ev, skip + i as u32)) }) .collect(); // next_batch only returns Events for a non-empty batch. let (first, rest) = frames.split_first().expect("non-empty batch"); Some((Ok(first.clone()), (entry, next_skip, rest.to_vec(), false))) } // Terminal reached and fully drained — close the connection. ReplayOutcome::CaughtUp { .. } => None, ReplayOutcome::Gone => { // Evicted mid-stream: emit one error frame, then close. let gone = actix_web::web::Bytes::from(render_sse_frame(&ChatStreamEvent::Error( "turn history has expired (buffer evicted)".to_string(), ))); Some((Ok(gone), (entry, skip, Vec::new(), true))) } } }, ); HttpResponse::Ok() .content_type("text/event-stream") .insert_header(("Cache-Control", "no-cache")) .insert_header(("X-Accel-Buffering", "no")) .streaming(head.chain(tail)) } fn render_turn_info_frame(info: &crate::ai::turn_registry::TurnInfo) -> String { let payload = serde_json::json!({ "turn_id": info.turn_id, "file_path": info.file_path, "library_id": info.library_id, "status": info.status.as_str(), "total_events_pushed": info.total_events_pushed, "buffered_count": info.buffered_count, }); let data = serde_json::to_string(&payload).unwrap_or_else(|_| "{}".to_string()); format!("event: turn_info\ndata: {}\n\n", data) } /// DELETE /insights/chat/turn/{turn_id} — cancel a running turn. #[delete("/insights/chat/turn/{turn_id}")] pub async fn cancel_turn_handler( http_request: HttpRequest, path: web::Path, app_state: web::Data, ) -> impl Responder { let turn_id = path.into_inner(); let parent_context = extract_context_from_request(&http_request); let tracer = global_tracer(); let mut span = tracer.start_with_context("ai.chat.turn.cancel", &parent_context); span.set_attribute(KeyValue::new("turn_id", turn_id.clone())); let registry = app_state.turn_registry.clone(); let entry = match registry.get(&turn_id).await { Some(e) => e, None => { span.set_status(Status::error("turn not found")); return HttpResponse::NotFound().json(serde_json::json!({ "error": format!("turn {} not found", turn_id) })); } }; // Abort the spawned task so it stops producing events promptly. The loop // also checks `is_running()` at each iteration boundary as a graceful // backstop in case the abort lands between await points. let aborted = entry.abort(); span.set_attribute(KeyValue::new("aborted", aborted)); // Push the terminal event BEFORE flipping status: a replay reader treats a // terminal status with no buffered tail as "closed", so the Done must be // buffered first for in-progress connections to receive it. let _ = entry .push_event(ChatStreamEvent::Done { tool_calls_made: 0, iterations_used: 0, truncated: false, prompt_tokens: None, eval_tokens: None, num_ctx: None, amended_insight_id: None, backend_used: "cancelled".to_string(), model_used: "cancelled".to_string(), cancelled: true, }) .await; entry.set_terminal_status(crate::ai::turn_registry::TurnStatus::Cancelled); span.set_status(Status::Ok); HttpResponse::Ok().json(serde_json::json!({ "cancelled": true })) } #[cfg(test)] mod turn_replay_tests { use super::{cancel_turn_handler, render_indexed_frame, turn_replay_handler}; use crate::ai::insight_chat::ChatStreamEvent; use crate::ai::turn_registry::{TurnEntry, TurnStatus}; use crate::state::AppState; use actix_web::test as actix_test; use actix_web::{App, web::Data}; use std::sync::Arc; /// Serialize `AppState::test_state()` construction across the parallel /// tests in this module: each build opens ~10 DAO connections to the one /// shared `DATABASE_URL` file, and doing several at once races the WAL /// `journal_mode` switch into a spurious "database is locked". The test /// bodies themselves still run in parallel; only the open is gated. static DB_INIT: std::sync::Mutex<()> = std::sync::Mutex::new(()); fn build_state() -> Data { let _guard = DB_INIT.lock().unwrap_or_else(|p| p.into_inner()); Data::new(AppState::test_state()) } fn done(cancelled: bool) -> ChatStreamEvent { ChatStreamEvent::Done { tool_calls_made: 0, iterations_used: 1, truncated: false, prompt_tokens: Some(10), eval_tokens: Some(20), num_ctx: None, amended_insight_id: None, backend_used: "local".into(), model_used: "m".into(), cancelled, } } /// Seed a completed turn (events + terminal Done) directly in the registry. async fn seed_completed(state: &AppState, id: &str, text_events: usize) { let entry = Arc::new(TurnEntry::new(id.into(), "/p.jpg".into(), 1)); for i in 0..text_events { entry .push_event(ChatStreamEvent::TextDelta(format!("d{i}"))) .await; } entry.push_event(done(false)).await; entry.set_terminal_status(TurnStatus::Done); state.turn_registry.insert(entry).await; } #[test] fn indexed_frame_stamps_seq_without_clobbering_tool_index() { // tool_call carries its own pairing `index`; `seq` must be additive. let frame = render_indexed_frame( &ChatStreamEvent::ToolCall { index: 3, name: "geo".into(), arguments: serde_json::json!({}), }, 42, ); assert!(frame.contains("event: tool_call")); assert!(frame.contains("\"index\":3")); assert!(frame.contains("\"seq\":42")); } #[actix_rt::test] async fn replay_unknown_turn_is_404() { let state = build_state(); let app = actix_test::init_service( App::new() .service(turn_replay_handler) .app_data(state.clone()), ) .await; let req = actix_test::TestRequest::get() .uri("/insights/chat/turn/nope") .to_request(); let resp = actix_test::call_service(&app, req).await; assert_eq!(resp.status(), 404); } #[actix_rt::test] async fn replay_completed_turn_emits_turn_info_and_done_with_seq() { let state = build_state(); seed_completed(&state, "t1", 2).await; let app = actix_test::init_service( App::new() .service(turn_replay_handler) .app_data(state.clone()), ) .await; let req = actix_test::TestRequest::get() .uri("/insights/chat/turn/t1") .to_request(); let resp = actix_test::call_service(&app, req).await; assert_eq!(resp.status(), 200); let body = String::from_utf8(actix_test::read_body(resp).await.to_vec()).unwrap(); assert!(body.contains("event: turn_info")); assert!(body.contains("event: text")); assert!(body.contains("event: done")); // Events are seq-stamped 0,1 (text) and 2 (done). assert!(body.contains("\"seq\":0")); assert!(body.contains("\"seq\":2")); // Done payload carries the renamed token fields the client reads. assert!(body.contains("\"prompt_tokens\":10")); } #[actix_rt::test] async fn replay_skip_before_query_skips_applied_events() { let state = build_state(); seed_completed(&state, "t2", 3).await; // seqs 0,1,2 text; 3 done let app = actix_test::init_service( App::new() .service(turn_replay_handler) .app_data(state.clone()), ) .await; let req = actix_test::TestRequest::get() .uri("/insights/chat/turn/t2?skip_before=2") .to_request(); let resp = actix_test::call_service(&app, req).await; assert_eq!(resp.status(), 200); let body = String::from_utf8(actix_test::read_body(resp).await.to_vec()).unwrap(); // Only seq 2 (last text) and seq 3 (done) should be present. assert!(body.contains("\"seq\":2")); assert!(body.contains("\"seq\":3")); assert!(!body.contains("\"seq\":0")); assert!(!body.contains("\"seq\":1")); } #[actix_rt::test] async fn replay_evicted_index_is_410() { let state = build_state(); let entry = Arc::new(TurnEntry::new("t3".into(), "/p.jpg".into(), 1)); // Push past the cap so the front is evicted and base advances. for i in 0..600 { entry .push_event(ChatStreamEvent::TextDelta(format!("d{i}"))) .await; } entry.set_terminal_status(TurnStatus::Done); state.turn_registry.insert(entry).await; let app = actix_test::init_service( App::new() .service(turn_replay_handler) .app_data(state.clone()), ) .await; let req = actix_test::TestRequest::get() .uri("/insights/chat/turn/t3?skip_before=0") .to_request(); let resp = actix_test::call_service(&app, req).await; assert_eq!(resp.status(), 410); } #[actix_rt::test] async fn cancel_unknown_turn_is_404() { let state = build_state(); let app = actix_test::init_service( App::new() .service(cancel_turn_handler) .app_data(state.clone()), ) .await; let req = actix_test::TestRequest::delete() .uri("/insights/chat/turn/nope") .to_request(); let resp = actix_test::call_service(&app, req).await; assert_eq!(resp.status(), 404); } #[actix_rt::test] async fn cancel_running_turn_marks_cancelled_and_buffers_terminal() { let state = build_state(); let entry = Arc::new(TurnEntry::new("t4".into(), "/p.jpg".into(), 1)); entry .push_event(ChatStreamEvent::TextDelta("partial".into())) .await; state.turn_registry.insert(entry.clone()).await; let app = actix_test::init_service( App::new() .service(cancel_turn_handler) .app_data(state.clone()), ) .await; let req = actix_test::TestRequest::delete() .uri("/insights/chat/turn/t4") .to_request(); let resp = actix_test::call_service(&app, req).await; assert_eq!(resp.status(), 200); // Status flipped to Cancelled and a terminal Done(cancelled) buffered // after the existing event, so a late replay reader still completes. assert_eq!( TurnStatus::from(entry.status.load(std::sync::atomic::Ordering::Relaxed)), TurnStatus::Cancelled ); let info = entry.info().await; assert_eq!(info.total_events_pushed, 2); } }