ImageApi/src/ai/handlers.rs

use actix_web::{HttpRequest, HttpResponse, Responder, delete, get, post, web};
use futures::StreamExt;
use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, Tracer};
use serde::{Deserialize, Serialize};

use crate::ai::insight_chat::{ChatStreamEvent, ChatTurnRequest};
use crate::ai::ollama::ChatMessage;
use crate::ai::{ModelCapabilities, OllamaClient};
use crate::data::Claims;
use crate::database::models::{InsightGenerationType, InsightJobStatus, PhotoInsight};
use crate::database::{ExifDao, InsightDao};
use crate::libraries;
use crate::otel::{extract_context_from_request, global_tracer};
use crate::state::AppState;
use crate::utils::normalize_path;

/// Hardcoded few-shot exemplars for the agentic endpoint. Populate with the
/// ids of approved insights whose `training_messages` should be compressed
/// into trajectory form and injected into the system prompt. Empty = no
/// change in behavior. Request-level `fewshot_insight_ids` overrides this
/// when non-empty.
// const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[2918, 2908];
const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[];

#[derive(Debug, Deserialize)]
pub struct GeneratePhotoInsightRequest {
    pub file_path: String,
    #[serde(default)]
    pub model: Option<String>,
    #[serde(default)]
    pub system_prompt: Option<String>,
    #[serde(default)]
    pub num_ctx: Option<i32>,
    #[serde(default)]
    pub temperature: Option<f32>,
    #[serde(default)]
    pub top_p: Option<f32>,
    #[serde(default)]
    pub top_k: Option<i32>,
    #[serde(default)]
    pub min_p: Option<f32>,
    /// `"local"` (default, Ollama with images) | `"hybrid"` (local vision +
    /// OpenRouter chat). Only respected by the agentic endpoint.
    #[serde(default)]
    pub backend: Option<String>,
    /// Insight ids whose stored `training_messages` should be compressed
    /// into few-shot trajectories and injected into the system prompt.
    /// Silently truncated to the first 2. When absent/empty, the handler
    /// falls back to `DEFAULT_FEWSHOT_INSIGHT_IDS`.
    #[serde(default)]
    pub fewshot_insight_ids: Option<Vec<i32>>,
    /// Active persona id for this generation. New facts are tagged with
    /// it (`entity_facts.persona_id`); recall during the agentic loop is
    /// scoped to it. Defaults to `"default"` when absent.
    #[serde(default)]
    pub persona_id: Option<String>,
}

#[derive(Debug, Deserialize)]
pub struct GetPhotoInsightQuery {
    pub path: String,
    /// Library context for this lookup. Used to pick the right content
    /// hash when the same rel_path exists under multiple roots.
    #[serde(default)]
    pub library: Option<String>,
}

#[derive(Debug, Deserialize)]
pub struct GenerationStatusQuery {
    /// If provided, look up the job by id.
    #[serde(default)]
    pub job_id: Option<i32>,
    /// If provided with `library`, look up the latest running job for this
    /// file. Used when the client doesn't have a persisted job_id.
    #[serde(default)]
    pub path: Option<String>,
    #[serde(default)]
    pub library: Option<String>,
}

/// GET /insights/generation/status - Check status of a generation job.
/// Accepts either `?job_id=<id>` or `?path=<path>&library=<name>`.
#[get("/insights/generation/status")]
pub async fn generation_status_handler(
    _claims: Claims,
    query: web::Query<GenerationStatusQuery>,
    app_state: web::Data<AppState>,
) -> impl Responder {
    let ctx = opentelemetry::Context::new();

    if let Some(jid) = query.job_id {
        let mut dao = app_state
            .insight_job_dao
            .lock()
            .expect("Unable to lock InsightJobDao");
        match dao.get_job_by_id(&ctx, jid) {
            Ok(Some(job)) => {
                return HttpResponse::Ok().json(GenerationStatusResponse {
                    job_id: job.id,
                    status: InsightJobStatus::parse(&job.status),
                    started_at: job.started_at,
                    completed_at: job.completed_at,
                    result_insight_id: job.result_insight_id,
                    error_message: job.error_message,
                });
            }
            Ok(None) => {
                return HttpResponse::NotFound().json(serde_json::json!({
                    "error": format!("Job {} not found", jid)
                }));
            }
            Err(e) => {
                log::error!("Failed to look up job {}: {:?}", jid, e);
                return HttpResponse::InternalServerError().json(serde_json::json!({
                    "error": "Failed to look up job"
                }));
            }
        }
    }

    if let Some(ref fp) = query.path {
        let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
            .ok()
            .flatten()
            .unwrap_or_else(|| app_state.primary_library());
        let normalized = normalize_path(fp);

        let mut dao = app_state
            .insight_job_dao
            .lock()
            .expect("Unable to lock InsightJobDao");
        match dao.get_active_job(&ctx, library.id, &normalized) {
            Ok(Some(job)) => {
                return HttpResponse::Ok().json(GenerationStatusResponse {
                    job_id: job.id,
                    status: InsightJobStatus::parse(&job.status),
                    started_at: job.started_at,
                    completed_at: job.completed_at,
                    result_insight_id: job.result_insight_id,
                    error_message: job.error_message,
                });
            }
            Ok(None) => {
                return HttpResponse::Ok().json(serde_json::json!({
                    "status": "idle",
                    "message": "No running generation job for this file"
                }));
            }
            Err(e) => {
                log::error!("Failed to look up active job for {}: {:?}", normalized, e);
                return HttpResponse::InternalServerError().json(serde_json::json!({
                    "error": "Failed to look up active job"
                }));
            }
        }
    }

    HttpResponse::BadRequest().json(serde_json::json!({
        "error": "Provide either job_id or path query parameter"
    }))
}

#[derive(Debug, Deserialize)]
pub struct CancelGenerationRequest {
    /// If provided, cancel the specific job by id.
    #[serde(default)]
    pub job_id: Option<i32>,
    /// If provided with `library`, cancel all running jobs for this file.
    #[serde(default)]
    pub file_path: Option<String>,
    #[serde(default)]
    pub library: Option<String>,
}

/// POST /insights/generation/cancel - Cancel a running generation job.
/// Accepts either `job_id` or `file_path` + optional `library` in the body.
#[post("/insights/generation/cancel")]
pub async fn cancel_generation_handler(
    _claims: Claims,
    request: web::Json<CancelGenerationRequest>,
    app_state: web::Data<AppState>,
) -> impl Responder {
    let ctx = opentelemetry::Context::new();

    if let Some(jid) = request.job_id {
        let mut dao = app_state
            .insight_job_dao
            .lock()
            .expect("Unable to lock InsightJobDao");
        match dao.cancel_job(&ctx, jid) {
            Ok(true) => {
                let mut handles = app_state
                    .insight_job_handles
                    .lock()
                    .expect("Unable to lock InsightJobHandles");
                if let Some(handle) = handles.remove(&jid) {
                    handle.abort();
                }
                return HttpResponse::Ok().json(serde_json::json!({
                    "success": true,
                    "message": format!("Job {} cancelled", jid)
                }));
            }
            Ok(false) => {
                return HttpResponse::Ok().json(serde_json::json!({
                    "success": true,
                    "message": format!("Job {} was not running", jid)
                }));
            }
            Err(e) => {
                log::error!("Failed to cancel job {}: {:?}", jid, e);
                return HttpResponse::InternalServerError().json(serde_json::json!({
                    "error": "Failed to cancel job"
                }));
            }
        }
    }

    if let Some(ref fp) = request.file_path {
        let library = libraries::resolve_library_param(&app_state, request.library.as_deref())
            .ok()
            .flatten()
            .unwrap_or_else(|| app_state.primary_library());
        let normalized = normalize_path(fp);

        // Get active job ids first, then cancel in DB, then abort tasks
        let active_ids: Vec<i32> = {
            let mut dao = app_state
                .insight_job_dao
                .lock()
                .expect("Unable to lock InsightJobDao");
            let ids = dao
                .get_active_job(&ctx, library.id, &normalized)
                .ok()
                .flatten()
                .map(|j| vec![j.id])
                .unwrap_or_default();
            let _ = dao.cancel_active_jobs(&ctx, library.id, &normalized);
            ids
        };

        if active_ids.is_empty() {
            return HttpResponse::Ok().json(serde_json::json!({
                "success": true,
                "message": "No running generation job for this file"
            }));
        }

        for jid in &active_ids {
            if let Some(handle) = app_state
                .insight_job_handles
                .lock()
                .expect("Unable to lock InsightJobHandles")
                .remove(jid)
            {
                handle.abort();
            }
        }

        return HttpResponse::Ok().json(serde_json::json!({
            "success": true,
            "message": format!("Cancelled {} running job(s) for {}", active_ids.len(), normalized)
        }));
    }

    HttpResponse::BadRequest().json(serde_json::json!({
        "error": "Provide either job_id or file_path in the request body"
    }))
}

#[derive(Debug, Deserialize)]
pub struct RateInsightRequest {
    pub file_path: String,
    pub approved: bool,
    /// When set, rate this specific insight version by primary key
    /// (used by the per-file history view to rate superseded versions).
    /// When omitted, the current insight for `file_path` is rated.
    #[serde(default)]
    pub insight_id: Option<i32>,
}

#[derive(Debug, Deserialize)]
pub struct ExportTrainingDataQuery {
    #[serde(default)]
    pub approved_only: Option<bool>,
}

#[derive(Debug, Serialize)]
pub struct JobIdResponse {
    pub job_id: i32,
}

#[derive(Debug, Serialize)]
pub struct GenerationStatusResponse {
    pub job_id: i32,
    pub status: InsightJobStatus,
    pub started_at: i64,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub completed_at: Option<i64>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub result_insight_id: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub error_message: Option<String>,
}

#[derive(Debug, Serialize)]
pub struct PhotoInsightResponse {
    pub id: i32,
    pub file_path: String,
    pub title: String,
    pub summary: String,
    pub generated_at: i64,
    pub model_version: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub prompt_eval_count: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub eval_count: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub approved: Option<bool>,
    pub backend: String,
    /// True when the insight was generated agentically and a chat
    /// continuation can be started against it. Drives the mobile chat button.
    pub has_training_messages: bool,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub num_ctx: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub temperature: Option<f32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_p: Option<f32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_k: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub min_p: Option<f32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub system_prompt: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub persona_id: Option<String>,
}

impl From<PhotoInsight> for PhotoInsightResponse {
    fn from(insight: PhotoInsight) -> Self {
        PhotoInsightResponse {
            id: insight.id,
            file_path: insight.file_path,
            title: insight.title,
            summary: insight.summary,
            generated_at: insight.generated_at,
            model_version: insight.model_version,
            prompt_eval_count: insight.prompt_eval_count,
            eval_count: insight.eval_count,
            approved: insight.approved,
            has_training_messages: insight.training_messages.is_some(),
            backend: insight.backend,
            num_ctx: insight.num_ctx,
            temperature: insight.temperature,
            top_p: insight.top_p,
            top_k: insight.top_k,
            min_p: insight.min_p,
            system_prompt: insight.system_prompt,
            persona_id: insight.persona_id,
        }
    }
}

#[derive(Debug, Serialize)]
pub struct AvailableModelsResponse {
    pub primary: ServerModels,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub fallback: Option<ServerModels>,
}

#[derive(Debug, Serialize)]
pub struct ServerModels {
    pub url: String,
    pub models: Vec<ModelCapabilities>,
    pub default_model: String,
}

/// POST /insights/generate - Generate insight for a specific photo (async)
#[post("/insights/generate")]
pub async fn generate_insight_handler(
    http_request: HttpRequest,
    _claims: Claims,
    request: web::Json<GeneratePhotoInsightRequest>,
    app_state: web::Data<AppState>,
) -> impl Responder {
    let parent_context = extract_context_from_request(&http_request);
    let tracer = global_tracer();
    let mut span = tracer.start_with_context("http.insights.generate", &parent_context);

    let normalized_path = normalize_path(&request.file_path);
    let library = app_state.primary_library();
    let gen_type = InsightGenerationType::Standard;

    span.set_attribute(KeyValue::new("file_path", normalized_path.clone()));
    if let Some(ref model) = request.model {
        span.set_attribute(KeyValue::new("model", model.clone()));
    }

    log::info!(
        "Manual insight generation triggered for photo: {} with model: {:?}",
        normalized_path,
        request.model
    );

    // Look up and abort any running job for this file, then cancel in DB
    let old_job_ids: Vec<i32> = {
        let mut dao = app_state
            .insight_job_dao
            .lock()
            .expect("Unable to lock InsightJobDao");
        let ctx = opentelemetry::Context::new();
        let ids = dao
            .get_active_job(&ctx, library.id, &normalized_path)
            .ok()
            .flatten()
            .map(|j| vec![j.id])
            .unwrap_or_default();
        let _ = dao.cancel_active_jobs(&ctx, library.id, &normalized_path);
        ids
    };
    for jid in &old_job_ids {
        if let Some(handle) = app_state
            .insight_job_handles
            .lock()
            .expect("Unable to lock InsightJobHandles")
            .remove(jid)
        {
            handle.abort();
        }
    }

    let job_id = {
        let mut dao = app_state
            .insight_job_dao
            .lock()
            .expect("Unable to lock InsightJobDao");
        match dao.create_job(
            &opentelemetry::Context::new(),
            library.id,
            &normalized_path,
            gen_type,
        ) {
            Ok(id) => id,
            Err(e) => {
                log::error!("Failed to create generation job: {:?}", e);
                span.set_status(Status::error("Failed to create generation job"));
                return HttpResponse::InternalServerError().json(serde_json::json!({
                    "error": "Failed to create generation job"
                }));
            }
        }
    };

    // Spawn background task with timeout
    let generator = app_state.insight_generator.clone();
    let job_dao = app_state.insight_job_dao.clone();
    let job_handles = app_state.insight_job_handles.clone();
    let path = normalized_path.clone();

    let handle = tokio::spawn(async move {
        let timeout_secs: u64 = std::env::var("INSIGHT_GENERATION_TIMEOUT_SECS")
            .ok()
            .and_then(|v| v.parse().ok())
            .unwrap_or(120);

        let path_for_task = path.clone();
        let generator_for_task = generator.clone();
        let result = tokio::task::spawn(async move {
            tokio::time::timeout(
                std::time::Duration::from_secs(timeout_secs),
                generator_for_task.generate_insight_for_photo_with_config(
                    &path_for_task,
                    request.model.clone(),
                    request.system_prompt.clone(),
                    request.num_ctx,
                    request.temperature,
                    request.top_p,
                    request.top_k,
                    request.min_p,
                ),
            )
            .await
        })
        .await;

        let ctx = opentelemetry::Context::new();
        let mut dao = job_dao.lock().expect("Unable to lock InsightJobDao");

        match result {
            Ok(Ok(Ok(()))) => {
                let mut insight_dao = generator
                    .insight_dao()
                    .lock()
                    .expect("Unable to lock InsightDao");
                let insight_id = insight_dao
                    .get_insight(&ctx, &path)
                    .ok()
                    .flatten()
                    .map(|i| i.id);
                if let Some(id) = insight_id {
                    if let Err(e) = dao.complete_job(&ctx, job_id, id) {
                        log::error!("Failed to mark job {} as completed: {:?}", job_id, e);
                    }
                } else if let Err(e) = dao.fail_job(&ctx, job_id, "generation returned no insight")
                {
                    log::error!("Failed to mark job {} as failed: {:?}", job_id, e);
                }
            }
            Ok(Ok(Err(e))) => {
                log::error!("Insight generation failed for {}: {:?}", path, e);
                if let Err(err) = dao.fail_job(&ctx, job_id, &format!("{:?}", e)) {
                    log::error!("Failed to mark job {} as failed: {:?}", job_id, err);
                }
            }
            Ok(Err(_)) => {
                log::error!(
                    "Insight generation timed out for {} after {}s",
                    path,
                    timeout_secs
                );
                if let Err(err) =
                    dao.fail_job(&ctx, job_id, &format!("timeout after {}s", timeout_secs))
                {
                    log::error!("Failed to mark job {} as failed: {:?}", job_id, err);
                }
            }
            Err(_) => {
                log::error!("Insight generation task panicked for {}", path);
                if let Err(err) = dao.fail_job(&ctx, job_id, "generation task panicked") {
                    log::error!("Failed to mark job {} as failed: {:?}", job_id, err);
                }
            }
        }

        // Remove handle from map on completion
        let mut handles = job_handles
            .lock()
            .expect("Unable to lock InsightJobHandles");
        handles.remove(&job_id);
    });

    // Store abort handle
    {
        let mut handles = app_state
            .insight_job_handles
            .lock()
            .expect("Unable to lock InsightJobHandles");
        handles.insert(job_id, handle.abort_handle());
    }

    span.set_attribute(KeyValue::new("job_id", job_id as i64));
    span.set_status(Status::Ok);
    HttpResponse::Accepted().json(JobIdResponse { job_id })
}

/// GET /insights?path=/path/to/photo.jpg - Fetch insight for specific photo
#[get("/insights")]
pub async fn get_insight_handler(
    _claims: Claims,
    query: web::Query<GetPhotoInsightQuery>,
    app_state: web::Data<AppState>,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
    exif_dao: web::Data<std::sync::Mutex<Box<dyn ExifDao>>>,
) -> impl Responder {
    let normalized_path = normalize_path(&query.path);
    log::debug!("Fetching insight for {}", normalized_path);

    let otel_context = opentelemetry::Context::new();

    // Expand to rel_paths sharing content so an insight generated under
    // library 1 still shows when the same photo is viewed from library 2.
    let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
        .ok()
        .flatten()
        .unwrap_or_else(|| app_state.primary_library());
    let sibling_paths = {
        let mut exif = exif_dao.lock().expect("Unable to lock ExifDao");
        exif.get_rel_paths_sharing_content(&otel_context, library.id, &normalized_path)
            .unwrap_or_else(|_| vec![normalized_path.clone()])
    };

    let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");

    match dao.get_insight_for_paths(&otel_context, &sibling_paths) {
        Ok(Some(insight)) => HttpResponse::Ok().json(PhotoInsightResponse::from(insight)),
        Ok(None) => HttpResponse::NotFound().json(serde_json::json!({
            "error": "Insight not found"
        })),
        Err(e) => {
            log::error!("Failed to fetch insight ({}): {:?}", &query.path, e);
            HttpResponse::InternalServerError().json(serde_json::json!({
                "error": format!("Failed to fetch insight: {:?}", e)
            }))
        }
    }
}

/// DELETE /insights?path=/path/to/photo.jpg - Remove insight (will regenerate on next request)
#[delete("/insights")]
pub async fn delete_insight_handler(
    _claims: Claims,
    query: web::Query<GetPhotoInsightQuery>,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
    let normalized_path = normalize_path(&query.path);
    log::info!("Deleting insight for {}", normalized_path);

    let otel_context = opentelemetry::Context::new();
    let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");

    match dao.delete_insight(&otel_context, &normalized_path) {
        Ok(()) => HttpResponse::Ok().json(serde_json::json!({
            "success": true,
            "message": "Insight deleted successfully"
        })),
        Err(e) => {
            log::error!("Failed to delete insight: {:?}", e);
            HttpResponse::InternalServerError().json(serde_json::json!({
                "error": format!("Failed to delete insight: {:?}", e)
            }))
        }
    }
}

/// GET /insights/all - Get all insights
#[get("/insights/all")]
pub async fn get_all_insights_handler(
    _claims: Claims,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
    log::debug!("Fetching all insights");

    let otel_context = opentelemetry::Context::new();
    let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");

    match dao.get_all_insights(&otel_context) {
        Ok(insights) => {
            let responses: Vec<PhotoInsightResponse> = insights
                .into_iter()
                .map(PhotoInsightResponse::from)
                .collect();

            HttpResponse::Ok().json(responses)
        }
        Err(e) => {
            log::error!("Failed to fetch all insights: {:?}", e);
            HttpResponse::InternalServerError().json(serde_json::json!({
                "error": format!("Failed to fetch insights: {:?}", e)
            }))
        }
    }
}

/// GET /insights/history?path=/path/to/photo.jpg - Get all insight versions
/// for a single photo (current plus previously generated/superseded ones),
/// newest first. Backs the per-file insight history view.
#[get("/insights/history")]
pub async fn get_insight_history_handler(
    _claims: Claims,
    query: web::Query<GetPhotoInsightQuery>,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
    let normalized_path = normalize_path(&query.path);
    log::debug!("Fetching insight history for {}", normalized_path);

    let otel_context = opentelemetry::Context::new();
    let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");

    match dao.get_insight_history(&otel_context, &normalized_path) {
        Ok(insights) => {
            let responses: Vec<PhotoInsightResponse> = insights
                .into_iter()
                .map(PhotoInsightResponse::from)
                .collect();

            HttpResponse::Ok().json(responses)
        }
        Err(e) => {
            log::error!("Failed to fetch insight history ({}): {:?}", &query.path, e);
            HttpResponse::InternalServerError().json(serde_json::json!({
                "error": format!("Failed to fetch insight history: {:?}", e)
            }))
        }
    }
}

/// POST /insights/generate/agentic - Generate insight using agentic tool-calling loop (async)
#[post("/insights/generate/agentic")]
pub async fn generate_agentic_insight_handler(
    http_request: HttpRequest,
    claims: Claims,
    request: web::Json<GeneratePhotoInsightRequest>,
    app_state: web::Data<AppState>,
) -> impl Responder {
    let parent_context = extract_context_from_request(&http_request);
    let tracer = global_tracer();
    let mut span = tracer.start_with_context("http.insights.generate_agentic", &parent_context);

    let normalized_path = normalize_path(&request.file_path);
    let library = app_state.primary_library();
    let gen_type = InsightGenerationType::Agentic;

    span.set_attribute(KeyValue::new("file_path", normalized_path.clone()));
    if let Some(ref model) = request.model {
        span.set_attribute(KeyValue::new("model", model.clone()));
    }
    if let Some(ref backend) = request.backend {
        span.set_attribute(KeyValue::new("backend", backend.clone()));
    }

    log::info!(
        "Agentic insight generation triggered for photo: {} with model: {:?}",
        normalized_path,
        request.model
    );

    // Look up and abort any running job for this file, then cancel in DB
    let old_job_ids: Vec<i32> = {
        let mut dao = app_state
            .insight_job_dao
            .lock()
            .expect("Unable to lock InsightJobDao");
        let ctx = opentelemetry::Context::new();
        let ids = dao
            .get_active_job(&ctx, library.id, &normalized_path)
            .ok()
            .flatten()
            .map(|j| vec![j.id])
            .unwrap_or_default();
        let _ = dao.cancel_active_jobs(&ctx, library.id, &normalized_path);
        ids
    };
    for jid in &old_job_ids {
        if let Some(handle) = app_state
            .insight_job_handles
            .lock()
            .expect("Unable to lock InsightJobHandles")
            .remove(jid)
        {
            handle.abort();
        }
    }

    let job_id = {
        let mut dao = app_state
            .insight_job_dao
            .lock()
            .expect("Unable to lock InsightJobDao");
        match dao.create_job(
            &opentelemetry::Context::new(),
            library.id,
            &normalized_path,
            gen_type,
        ) {
            Ok(id) => id,
            Err(e) => {
                log::error!("Failed to create agentic generation job: {:?}", e);
                span.set_status(Status::error("Failed to create generation job"));
                return HttpResponse::InternalServerError().json(serde_json::json!({
                    "error": "Failed to create generation job"
                }));
            }
        }
    };

    // Resolve few-shot ids for the background task
    let fewshot_ids: Vec<i32> = match request.fewshot_insight_ids.as_deref() {
        Some(ids) if !ids.is_empty() => ids.iter().take(2).copied().collect(),
        _ => DEFAULT_FEWSHOT_INSIGHT_IDS
            .iter()
            .take(2)
            .copied()
            .collect(),
    };

    let fewshot_examples: Vec<Vec<ChatMessage>> = {
        let otel_context = opentelemetry::Context::new();
        let mut dao = app_state
            .insight_chat
            .insight_dao()
            .lock()
            .expect("Unable to lock InsightDao");
        fewshot_ids
            .iter()
            .filter_map(|id| {
                let insight = dao.get_insight_by_id(&otel_context, *id).ok().flatten()?;
                let json = insight.training_messages?;
                match serde_json::from_str::<Vec<ChatMessage>>(&json) {
                    Ok(msgs) => Some(msgs),
                    Err(e) => {
                        log::warn!(
                            "Few-shot insight {} has malformed training_messages: {}",
                            id,
                            e
                        );
                        None
                    }
                }
            })
            .collect()
    };

    let user_id = claims.sub.parse::<i32>().unwrap_or(1);
    let persona_id = request
        .persona_id
        .clone()
        .filter(|s| !s.trim().is_empty())
        .unwrap_or_else(|| "default".to_string());

    // Server-side persona resolution: an explicit client `system_prompt`
    // wins; otherwise the persona's stored prompt from the persona store;
    // otherwise None and `build_system_content` applies its neutral
    // default. Without the lookup, a request carrying only `persona_id`
    // silently generated in the default voice.
    let system_prompt = request
        .system_prompt
        .clone()
        .filter(|s| !s.trim().is_empty())
        .or_else(|| {
            app_state
                .insight_generator
                .persona_system_prompt(user_id, &persona_id)
        });

    let max_iterations: usize = std::env::var("AGENTIC_MAX_ITERATIONS")
        .ok()
        .and_then(|v| v.parse().ok())
        .unwrap_or(12);

    // Spawn background task with timeout
    let generator = app_state.insight_generator.clone();
    let job_dao = app_state.insight_job_dao.clone();
    let job_handles = app_state.insight_job_handles.clone();
    let path = normalized_path.clone();

    let handle = tokio::spawn(async move {
        let timeout_secs: u64 = std::env::var("INSIGHT_GENERATION_TIMEOUT_SECS")
            .ok()
            .and_then(|v| v.parse().ok())
            .unwrap_or(180);

        let path_for_task = path.clone();
        let generator_for_task = generator.clone();
        let result = tokio::task::spawn(async move {
            tokio::time::timeout(
                std::time::Duration::from_secs(timeout_secs),
                generator_for_task.generate_agentic_insight_for_photo(
                    &path_for_task,
                    request.model.clone(),
                    system_prompt,
                    request.num_ctx,
                    request.temperature,
                    request.top_p,
                    request.top_k,
                    request.min_p,
                    max_iterations,
                    request.backend.clone(),
                    fewshot_examples,
                    fewshot_ids,
                    user_id,
                    persona_id,
                ),
            )
            .await
        })
        .await;

        let ctx = opentelemetry::Context::new();
        let mut dao = job_dao.lock().expect("Unable to lock InsightJobDao");

        match result {
            Ok(Ok(Ok((Some(insight_id), _, _)))) => {
                if let Err(e) = dao.complete_job(&ctx, job_id, insight_id) {
                    log::error!("Failed to mark job {} as completed: {:?}", job_id, e);
                }
            }
            Ok(Ok(Ok((None, _, _)))) => {
                if let Err(e) = dao.fail_job(&ctx, job_id, "agentic generation returned no insight")
                {
                    log::error!("Failed to mark job {} as failed: {:?}", job_id, e);
                }
            }
            Ok(Ok(Err(e))) => {
                log::error!("Agentic insight generation failed for {}: {:?}", path, e);
                if let Err(err) = dao.fail_job(&ctx, job_id, &format!("{:?}", e)) {
                    log::error!("Failed to mark job {} as failed: {:?}", job_id, err);
                }
            }
            Ok(Err(_)) => {
                log::error!(
                    "Agentic insight generation timed out for {} after {}s",
                    path,
                    timeout_secs
                );
                if let Err(err) =
                    dao.fail_job(&ctx, job_id, &format!("timeout after {}s", timeout_secs))
                {
                    log::error!("Failed to mark job {} as failed: {:?}", job_id, err);
                }
            }
            Err(_) => {
                log::error!("Agentic insight generation task panicked for {}", path);
                if let Err(err) = dao.fail_job(&ctx, job_id, "generation task panicked") {
                    log::error!("Failed to mark job {} as failed: {:?}", job_id, err);
                }
            }
        }

        // Remove handle from map on completion
        let mut handles = job_handles
            .lock()
            .expect("Unable to lock InsightJobHandles");
        handles.remove(&job_id);
    });

    // Store abort handle
    {
        let mut handles = app_state
            .insight_job_handles
            .lock()
            .expect("Unable to lock InsightJobHandles");
        handles.insert(job_id, handle.abort_handle());
    }

    span.set_attribute(KeyValue::new("job_id", job_id as i64));
    span.set_status(Status::Ok);
    HttpResponse::Accepted().json(JobIdResponse { job_id })
}

/// GET /insights/models - Local-backend models with capabilities. Returns
/// Ollama servers when `LLM_BACKEND=ollama` (default), or llama-swap slots
/// when `LLM_BACKEND=llamacpp`. Same envelope shape either way so the
/// client picker doesn't have to branch on backend kind.
///
/// For llama-swap: `models` comes verbatim from `LLAMA_SWAP_ALLOWED_MODELS`
/// (no live `/v1/models` probe), `has_vision` is true only for the
/// configured `LLAMA_SWAP_VISION_MODEL` slot id, and `has_tool_calling` is
/// reported as true for every slot (llama-server is launched with `--jinja`
/// by convention — a misconfigured slot surfaces as a chat-call error).
#[get("/insights/models")]
pub async fn get_available_models_handler(
    _claims: Claims,
    app_state: web::Data<crate::state::AppState>,
) -> impl Responder {
    log::debug!("Fetching available models with capabilities");

    if crate::ai::local_backend_is_llamacpp()
        && let Some(lc) = app_state.llamacpp.as_ref()
    {
        let models: Vec<ModelCapabilities> = app_state
            .llamacpp_allowed_models
            .iter()
            .map(|name| ModelCapabilities {
                name: name.clone(),
                has_vision: true,
                has_tool_calling: true,
            })
            .collect();
        let primary = ServerModels {
            url: lc.base_url.clone(),
            models,
            default_model: lc.primary_model.clone(),
        };
        return HttpResponse::Ok().json(AvailableModelsResponse {
            primary,
            fallback: None,
        });
    }

    let ollama_client = &app_state.ollama;

    // Fetch models with capabilities from primary server
    let primary_models =
        match OllamaClient::list_models_with_capabilities(&ollama_client.primary_url).await {
            Ok(models) => models,
            Err(e) => {
                log::warn!("Failed to fetch models from primary server: {:?}", e);
                vec![]
            }
        };

    let primary = ServerModels {
        url: ollama_client.primary_url.clone(),
        models: primary_models,
        default_model: ollama_client.primary_model.clone(),
    };

    // Fetch models with capabilities from fallback server if configured
    let fallback = if let Some(fallback_url) = &ollama_client.fallback_url {
        match OllamaClient::list_models_with_capabilities(fallback_url).await {
            Ok(models) => Some(ServerModels {
                url: fallback_url.clone(),
                models,
                default_model: ollama_client
                    .fallback_model
                    .clone()
                    .unwrap_or_else(|| ollama_client.primary_model.clone()),
            }),
            Err(e) => {
                log::warn!("Failed to fetch models from fallback server: {:?}", e);
                None
            }
        }
    } else {
        None
    };

    let response = AvailableModelsResponse { primary, fallback };

    HttpResponse::Ok().json(response)
}

#[derive(Debug, Serialize)]
pub struct OpenRouterModelsResponse {
    pub models: Vec<String>,
    pub default_model: Option<String>,
    pub configured: bool,
}

/// GET /insights/openrouter/models - Curated OpenRouter model ids exposed
/// to clients for the hybrid backend. Returned verbatim from
/// `OPENROUTER_ALLOWED_MODELS`; no live call to OpenRouter.
#[get("/insights/openrouter/models")]
pub async fn get_openrouter_models_handler(
    _claims: Claims,
    app_state: web::Data<crate::state::AppState>,
) -> impl Responder {
    let configured = app_state.openrouter.is_some();
    let default_model = app_state
        .openrouter
        .as_ref()
        .map(|c| c.primary_model.clone());
    let response = OpenRouterModelsResponse {
        models: app_state.openrouter_allowed_models.clone(),
        default_model,
        configured,
    };
    HttpResponse::Ok().json(response)
}

/// POST /insights/rate - Rate an insight (thumbs up/down for training data)
#[post("/insights/rate")]
pub async fn rate_insight_handler(
    _claims: Claims,
    request: web::Json<RateInsightRequest>,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
    let normalized_path = normalize_path(&request.file_path);
    log::info!(
        "Rating insight for {} (id={:?}): approved={}",
        normalized_path,
        request.insight_id,
        request.approved
    );

    let otel_context = opentelemetry::Context::new();
    let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");

    // Rate a specific version by id when provided (history view), otherwise
    // rate the current insight for the path.
    let result = match request.insight_id {
        Some(id) => dao.rate_insight_by_id(&otel_context, id, request.approved),
        None => dao.rate_insight(&otel_context, &normalized_path, request.approved),
    };

    match result {
        Ok(()) => HttpResponse::Ok().json(serde_json::json!({
            "success": true,
            "message": "Insight rated successfully"
        })),
        Err(e) => {
            log::error!("Failed to rate insight: {:?}", e);
            HttpResponse::InternalServerError().json(serde_json::json!({
                "error": format!("Failed to rate insight: {:?}", e)
            }))
        }
    }
}

/// GET /insights/training-data - Export approved training data as JSONL
#[get("/insights/training-data")]
pub async fn export_training_data_handler(
    _claims: Claims,
    query: web::Query<ExportTrainingDataQuery>,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
    let approved_only = query.approved_only.unwrap_or(true);
    log::info!("Exporting training data (approved_only={})", approved_only);

    let otel_context = opentelemetry::Context::new();
    let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");

    let insights = if approved_only {
        dao.get_approved_insights(&otel_context)
    } else {
        dao.get_all_insights(&otel_context)
    };

    match insights {
        Ok(insights) => {
            let mut jsonl = String::new();
            for insight in &insights {
                if let Some(ref messages) = insight.training_messages {
                    let entry = serde_json::json!({
                        "file_path": insight.file_path,
                        "model_version": insight.model_version,
                        "generated_at": insight.generated_at,
                        "title": insight.title,
                        "summary": insight.summary,
                        "messages": serde_json::from_str::<serde_json::Value>(messages)
                            .unwrap_or(serde_json::Value::Null),
                    });
                    jsonl.push_str(&entry.to_string());
                    jsonl.push('\n');
                }
            }

            HttpResponse::Ok()
                .content_type("application/jsonl")
                .insert_header((
                    "Content-Disposition",
                    "attachment; filename=\"training_data.jsonl\"",
                ))
                .body(jsonl)
        }
        Err(e) => {
            log::error!("Failed to export training data: {:?}", e);
            HttpResponse::InternalServerError().json(serde_json::json!({
                "error": format!("Failed to export training data: {:?}", e)
            }))
        }
    }
}

#[derive(Debug, Deserialize)]
pub struct ChatTurnHttpRequest {
    pub file_path: String,
    #[serde(default)]
    pub library: Option<String>,
    pub user_message: String,
    #[serde(default)]
    pub model: Option<String>,
    #[serde(default)]
    pub backend: Option<String>,
    #[serde(default)]
    pub num_ctx: Option<i32>,
    #[serde(default)]
    pub temperature: Option<f32>,
    #[serde(default)]
    pub top_p: Option<f32>,
    #[serde(default)]
    pub top_k: Option<i32>,
    #[serde(default)]
    pub min_p: Option<f32>,
    #[serde(default)]
    pub max_iterations: Option<usize>,
    /// Per-turn system-prompt override. Ephemeral in append mode,
    /// persisted in amend / regenerate mode. See ChatTurnRequest for
    /// semantics. Also seeds the bootstrap path when no insight exists.
    #[serde(default)]
    pub system_prompt: Option<String>,
    /// Active persona id for this turn. New facts/recalls scope to it.
    /// Defaults to `"default"` when missing.
    #[serde(default)]
    pub persona_id: Option<String>,
    #[serde(default)]
    pub amend: bool,
    /// When true, force the bootstrap path even if an insight already
    /// exists: flip the existing row(s) to `is_current=false` and create
    /// a new insight row from this turn. Takes precedence over `amend`.
    /// Collapses to a normal bootstrap when no insight exists.
    #[serde(default)]
    pub regenerate: bool,
}

#[derive(Debug, Serialize)]
pub struct ChatTurnHttpResponse {
    pub assistant_message: String,
    pub tool_calls_made: usize,
    pub iterations_used: usize,
    pub truncated: bool,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub prompt_eval_count: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub eval_count: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub amended_insight_id: Option<i32>,
    pub backend: String,
    pub model: String,
}

/// POST /insights/chat — submit a follow-up turn against an existing insight.
#[post("/insights/chat")]
pub async fn chat_turn_handler(
    http_request: HttpRequest,
    claims: Claims,
    request: web::Json<ChatTurnHttpRequest>,
    app_state: web::Data<AppState>,
) -> impl Responder {
    let parent_context = extract_context_from_request(&http_request);
    let tracer = global_tracer();
    let mut span = tracer.start_with_context("http.insights.chat", &parent_context);
    span.set_attribute(KeyValue::new("file_path", request.file_path.clone()));

    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
        Ok(Some(lib)) => lib,
        Ok(None) => app_state.primary_library(),
        Err(e) => {
            return HttpResponse::BadRequest().json(serde_json::json!({
                "error": format!("invalid library: {}", e)
            }));
        }
    };

    // Service-token claims (sub: "service:apollo") fall through to
    // user_id=1 — the operator convention. Mobile/web clients have a
    // numeric sub. Required for the entity_facts composite FK.
    let user_id = claims.sub.parse::<i32>().unwrap_or(1);

    let chat_req = ChatTurnRequest {
        library_id: library.id,
        user_id,
        file_path: request.file_path.clone(),
        user_message: request.user_message.clone(),
        model: request.model.clone(),
        backend: request.backend.clone(),
        num_ctx: request.num_ctx,
        temperature: request.temperature,
        top_p: request.top_p,
        top_k: request.top_k,
        min_p: request.min_p,
        max_iterations: request.max_iterations,
        system_prompt: request.system_prompt.clone(),
        persona_id: request.persona_id.clone(),
        amend: request.amend,
        regenerate: request.regenerate,
    };

    match app_state.insight_chat.chat_turn(chat_req).await {
        Ok(result) => {
            span.set_status(Status::Ok);
            HttpResponse::Ok().json(ChatTurnHttpResponse {
                assistant_message: result.assistant_message,
                tool_calls_made: result.tool_calls_made,
                iterations_used: result.iterations_used,
                truncated: result.truncated,
                prompt_eval_count: result.prompt_eval_count,
                eval_count: result.eval_count,
                amended_insight_id: result.amended_insight_id,
                backend: result.backend_used,
                model: result.model_used,
            })
        }
        Err(e) => {
            let msg = format!("{}", e);
            log::error!("Chat turn failed: {}", msg);
            span.set_status(Status::error(msg.clone()));

            // Map well-known errors to client-facing 4xx codes.
            if msg.contains("no insight found") {
                HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
            } else if msg.contains("no chat history") {
                HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
            } else if msg.contains("user_message")
                || msg.contains("unknown backend")
                || msg.contains("switching from local to hybrid")
                || msg.contains("hybrid backend unavailable")
            {
                HttpResponse::BadRequest().json(serde_json::json!({ "error": msg }))
            } else {
                HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
            }
        }
    }
}

#[derive(Debug, Deserialize)]
pub struct ChatHistoryQuery {
    pub path: String,
    #[serde(default)]
    pub library: Option<String>,
}

#[derive(Debug, Serialize)]
pub struct ChatHistoryHttpResponse {
    pub messages: Vec<RenderedHistoryMessage>,
    pub turn_count: usize,
    pub model_version: String,
    pub backend: String,
}

#[derive(Debug, Serialize)]
pub struct RenderedHistoryMessage {
    pub role: String,
    pub content: String,
    pub is_initial: bool,
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub tools: Vec<HistoryToolInvocation>,
}

#[derive(Debug, Serialize)]
pub struct HistoryToolInvocation {
    pub name: String,
    pub arguments: serde_json::Value,
    pub result: String,
    #[serde(skip_serializing_if = "std::ops::Not::not")]
    pub result_truncated: bool,
}

#[derive(Debug, Deserialize)]
pub struct ChatRewindHttpRequest {
    pub file_path: String,
    #[serde(default)]
    pub library: Option<String>,
    /// 0-based index into the rendered transcript. The message at this
    /// index, and everything after it, is discarded. Must be > 0 — the
    /// initial user message is protected.
    pub discard_from_rendered_index: usize,
}

/// POST /insights/chat/rewind — truncate the stored conversation so the
/// rendered message at `discard_from_rendered_index` (and everything after)
/// is removed. Use when a user wants to retry a turn with a different
/// prompt without prior replies poisoning context.
#[post("/insights/chat/rewind")]
pub async fn chat_rewind_handler(
    _claims: Claims,
    request: web::Json<ChatRewindHttpRequest>,
    app_state: web::Data<AppState>,
) -> impl Responder {
    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
        Ok(Some(lib)) => lib,
        Ok(None) => app_state.primary_library(),
        Err(e) => {
            return HttpResponse::BadRequest().json(serde_json::json!({
                "error": format!("invalid library: {}", e)
            }));
        }
    };

    match app_state
        .insight_chat
        .rewind_history(
            library.id,
            &request.file_path,
            request.discard_from_rendered_index,
        )
        .await
    {
        Ok(()) => HttpResponse::Ok().json(serde_json::json!({ "success": true })),
        Err(e) => {
            let msg = format!("{}", e);
            log::error!("Chat rewind failed: {}", msg);
            if msg.contains("no insight found") {
                HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
            } else if msg.contains("no chat history") {
                HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
            } else if msg.contains("cannot discard the initial") || msg.contains("out of range") {
                HttpResponse::BadRequest().json(serde_json::json!({ "error": msg }))
            } else {
                HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
            }
        }
    }
}

/// GET /insights/chat/history — return the rendered transcript for a photo.
#[get("/insights/chat/history")]
pub async fn chat_history_handler(
    _claims: Claims,
    query: web::Query<ChatHistoryQuery>,
    app_state: web::Data<AppState>,
) -> impl Responder {
    // library_id scopes the lookup so a regenerate on this library
    // isn't shadowed by an untouched is_current=true row in another
    // library for the same rel_path. load_history falls back to the
    // cross-library lookup when the scoped one misses, so a photo
    // with no insight in this library but one in another still
    // surfaces (the "show this photo's primary insight" merge case).
    let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
        .ok()
        .flatten()
        .unwrap_or_else(|| app_state.primary_library());

    match app_state.insight_chat.load_history(library.id, &query.path) {
        Ok(view) => HttpResponse::Ok().json(ChatHistoryHttpResponse {
            messages: view
                .messages
                .into_iter()
                .map(|m| RenderedHistoryMessage {
                    role: m.role,
                    content: m.content,
                    is_initial: m.is_initial,
                    tools: m
                        .tools
                        .into_iter()
                        .map(|t| HistoryToolInvocation {
                            name: t.name,
                            arguments: t.arguments,
                            result: t.result,
                            result_truncated: t.result_truncated,
                        })
                        .collect(),
                })
                .collect(),
            turn_count: view.turn_count,
            model_version: view.model_version,
            backend: view.backend,
        }),
        Err(e) => {
            let msg = format!("{}", e);
            if msg.contains("no insight found") {
                HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
            } else if msg.contains("no chat history") {
                HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
            } else {
                HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
            }
        }
    }
}

/// POST /insights/chat/stream — streaming variant of /insights/chat.
/// Returns `text/event-stream` with one event per chat stream event.
#[post("/insights/chat/stream")]
pub async fn chat_stream_handler(
    claims: Claims,
    request: web::Json<ChatTurnHttpRequest>,
    app_state: web::Data<AppState>,
) -> HttpResponse {
    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
        Ok(Some(lib)) => lib,
        Ok(None) => app_state.primary_library(),
        Err(e) => {
            return HttpResponse::BadRequest().json(serde_json::json!({
                "error": format!("invalid library: {}", e)
            }));
        }
    };

    // Service-token sub falls through to user_id=1 (see chat_turn_handler).
    let user_id = claims.sub.parse::<i32>().unwrap_or(1);

    let chat_req = ChatTurnRequest {
        library_id: library.id,
        user_id,
        file_path: request.file_path.clone(),
        user_message: request.user_message.clone(),
        model: request.model.clone(),
        backend: request.backend.clone(),
        num_ctx: request.num_ctx,
        temperature: request.temperature,
        top_p: request.top_p,
        top_k: request.top_k,
        min_p: request.min_p,
        max_iterations: request.max_iterations,
        system_prompt: request.system_prompt.clone(),
        persona_id: request.persona_id.clone(),
        amend: request.amend,
        regenerate: request.regenerate,
    };

    let service = app_state.insight_chat.clone();
    let events = service.chat_turn_stream(chat_req);

    // Map ChatStreamEvent → SSE frame bytes.
    let sse_stream = futures::stream::StreamExt::map(events, |ev| {
        let frame = render_sse_frame(&ev);
        Ok::<_, actix_web::Error>(actix_web::web::Bytes::from(frame))
    });

    HttpResponse::Ok()
        .content_type("text/event-stream")
        .insert_header(("Cache-Control", "no-cache"))
        .insert_header(("X-Accel-Buffering", "no")) // nginx: disable response buffering
        .streaming(sse_stream)
}

fn render_sse_frame(ev: &ChatStreamEvent) -> String {
    let (event_name, payload) = sse_event_payload(ev);
    let data = serde_json::to_string(&payload).unwrap_or_else(|_| "{}".to_string());
    format!("event: {}\ndata: {}\n\n", event_name, data)
}

/// Like `render_sse_frame`, but stamps the event's absolute sequence number
/// (`seq`) into the payload so reconnecting replay clients can compute
/// `skip_before` precisely. `seq` is distinct from the tool-pairing `index`
/// already carried by `tool_call`/`tool_result`.
fn render_indexed_frame(ev: &ChatStreamEvent, seq: u32) -> String {
    let (event_name, mut payload) = sse_event_payload(ev);
    if let serde_json::Value::Object(map) = &mut payload {
        map.insert("seq".to_string(), serde_json::json!(seq));
    }
    let data = serde_json::to_string(&payload).unwrap_or_else(|_| "{}".to_string());
    format!("event: {}\ndata: {}\n\n", event_name, data)
}

fn sse_event_payload(ev: &ChatStreamEvent) -> (&'static str, serde_json::Value) {
    match ev {
        ChatStreamEvent::IterationStart { n, max } => {
            ("iteration_start", serde_json::json!({ "n": n, "max": max }))
        }
        ChatStreamEvent::Truncated => ("truncated", serde_json::json!({})),
        ChatStreamEvent::TextDelta(delta) => ("text", serde_json::json!({ "delta": delta })),
        ChatStreamEvent::ToolCall {
            index,
            name,
            arguments,
        } => (
            "tool_call",
            serde_json::json!({ "index": index, "name": name, "arguments": arguments }),
        ),
        ChatStreamEvent::ToolResult {
            index,
            name,
            result,
            result_truncated,
        } => (
            "tool_result",
            serde_json::json!({
                "index": index,
                "name": name,
                "result": result,
                "result_truncated": result_truncated,
            }),
        ),
        ChatStreamEvent::Done {
            tool_calls_made,
            iterations_used,
            truncated,
            prompt_tokens,
            eval_tokens,
            num_ctx,
            amended_insight_id,
            backend_used,
            model_used,
            cancelled,
        } => (
            "done",
            serde_json::json!({
                "tool_calls_made": tool_calls_made,
                "iterations_used": iterations_used,
                "truncated": truncated,
                "prompt_tokens": prompt_tokens,
                "eval_tokens": eval_tokens,
                "num_ctx": num_ctx,
                "amended_insight_id": amended_insight_id,
                "backend": backend_used,
                "model": model_used,
                "cancelled": cancelled,
            }),
        ),
        // Apollo's frontend SSE consumer (and its free-chat backend, which
        // is the de-facto convention) listens for `error_message`. Emitting
        // `error` here meant any failure on the photo-chat path (e.g.
        // "no insight found for path") was silently dropped, leaving an
        // empty assistant bubble with no clue why the turn died.
        ChatStreamEvent::Error(msg) => ("error_message", serde_json::json!({ "message": msg })),
    }
}

/// POST /insights/chat/turn — async turn dispatch. Returns turn_id immediately,
/// client then polls GET /insights/chat/turn/{turn_id} for SSE replay.
#[post("/insights/chat/turn")]
pub async fn turn_async_handler(
    http_request: HttpRequest,
    claims: Claims,
    request: web::Json<ChatTurnHttpRequest>,
    app_state: web::Data<AppState>,
) -> impl Responder {
    let parent_context = extract_context_from_request(&http_request);
    let tracer = global_tracer();
    let mut span = tracer.start_with_context("http.insights.chat_turn_async", &parent_context);
    span.set_attribute(KeyValue::new("file_path", request.file_path.clone()));

    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
        Ok(Some(lib)) => lib,
        Ok(None) => app_state.primary_library(),
        Err(e) => {
            return HttpResponse::BadRequest().json(serde_json::json!({
                "error": format!("invalid library: {}", e)
            }));
        }
    };

    let user_id = claims.sub.parse::<i32>().unwrap_or(1);

    let chat_req = ChatTurnRequest {
        library_id: library.id,
        user_id,
        file_path: request.file_path.clone(),
        user_message: request.user_message.clone(),
        model: request.model.clone(),
        backend: request.backend.clone(),
        num_ctx: request.num_ctx,
        temperature: request.temperature,
        top_p: request.top_p,
        top_k: request.top_k,
        min_p: request.min_p,
        max_iterations: request.max_iterations,
        system_prompt: request.system_prompt.clone(),
        persona_id: request.persona_id.clone(),
        amend: request.amend,
        regenerate: request.regenerate,
    };

    let service = app_state.insight_chat.clone();
    let registry = app_state.turn_registry.clone();

    let turn_id = service.chat_turn_async(registry, chat_req).await;
    span.set_attribute(KeyValue::new("turn_id", turn_id.clone()));
    span.set_status(Status::Ok);
    HttpResponse::Accepted().json(serde_json::json!({
        "turn_id": turn_id,
        "status": "running"
    }))
}

/// Query params for the SSE replay stream.
#[derive(Debug, Deserialize)]
pub struct ReplayQuery {
    /// Replay events from this absolute sequence number (`seq`) onward.
    /// Absent or 0 replays from the beginning. On reconnect the client sends
    /// the `seq` of the last event it applied, plus one.
    pub skip_before: Option<u32>,
}

/// GET /insights/chat/turn/{turn_id} — SSE replay stream.
#[get("/insights/chat/turn/{turn_id}")]
pub async fn turn_replay_handler(
    http_request: HttpRequest,
    path: web::Path<String>,
    query: web::Query<ReplayQuery>,
    app_state: web::Data<AppState>,
) -> HttpResponse {
    use crate::ai::turn_registry::ReplayOutcome;

    let turn_id = path.into_inner();
    let skip_before = query.skip_before.unwrap_or(0);

    let parent_context = extract_context_from_request(&http_request);
    let tracer = global_tracer();
    let mut span = tracer.start_with_context("ai.chat.turn.replay", &parent_context);
    span.set_attribute(KeyValue::new("turn_id", turn_id.clone()));
    span.set_attribute(KeyValue::new("skip_before", skip_before as i64));

    let registry = app_state.turn_registry.clone();
    let entry = match registry.get(&turn_id).await {
        Some(e) => e,
        None => {
            span.set_status(Status::error("turn not found"));
            return HttpResponse::NotFound().json(serde_json::json!({
                "error": format!("turn {} not found", turn_id)
            }));
        }
    };

    let info = entry.info().await;
    span.set_attribute(KeyValue::new("status", info.status.as_str()));
    span.set_attribute(KeyValue::new(
        "event_count",
        info.total_events_pushed as i64,
    ));
    let turn_info_frame = render_turn_info_frame(&info);

    // Initial buffered batch: events produced before this connection attached.
    // Stamp each frame with its absolute `seq` so the client can track
    // `skip_before` precisely across reconnects.
    let (initial_frames, start_skip) = match entry.replay_from(skip_before).await {
        ReplayOutcome::Gone => {
            span.set_status(Status::error("buffer evicted"));
            return HttpResponse::Gone().json(serde_json::json!({
                "error": "turn history has expired (buffer evicted)"
            }));
        }
        ReplayOutcome::CaughtUp { next_skip } => (Vec::new(), next_skip),
        ReplayOutcome::Events { events, next_skip } => {
            let frames: Vec<actix_web::web::Bytes> = events
                .into_iter()
                .enumerate()
                .map(|(i, ev)| {
                    actix_web::web::Bytes::from(render_indexed_frame(&ev, skip_before + i as u32))
                })
                .collect();
            (frames, next_skip)
        }
    };

    span.set_status(Status::Ok);
    let running = entry.is_running();

    // Head: the `turn_info` event followed by any already-buffered events.
    let head = futures::stream::once(async move {
        Ok::<_, actix_web::Error>(actix_web::web::Bytes::from(turn_info_frame))
    })
    .chain(futures::stream::iter(
        initial_frames.into_iter().map(Ok::<_, actix_web::Error>),
    ));

    if !running {
        // Completed turn: every event — including the terminal Done/Error — is
        // already in the buffered batch above. Emit it and close.
        return HttpResponse::Ok()
            .content_type("text/event-stream")
            .insert_header(("Cache-Control", "no-cache"))
            .insert_header(("X-Accel-Buffering", "no"))
            .streaming(head);
    }

    // In-progress turn: after the head, wait for new events. `next_batch`
    // drains every buffered event (including the terminal one) before it
    // reports the turn finished, so the final Done/Error is never dropped;
    // CaughtUp then closes the stream by returning None.
    let tail = futures::stream::unfold(
        (
            entry,
            start_skip,
            Vec::<actix_web::web::Bytes>::new(),
            false,
        ),
        |(entry, skip, pending, finished)| async move {
            // Flush queued frames from a previous multi-event batch first.
            if let Some((first, rest)) = pending.split_first() {
                return Some((Ok(first.clone()), (entry, skip, rest.to_vec(), finished)));
            }
            if finished {
                return None;
            }

            match entry.next_batch(skip).await {
                ReplayOutcome::Events { events, next_skip } => {
                    let frames: Vec<actix_web::web::Bytes> = events
                        .into_iter()
                        .enumerate()
                        .map(|(i, ev)| {
                            actix_web::web::Bytes::from(render_indexed_frame(&ev, skip + i as u32))
                        })
                        .collect();
                    // next_batch only returns Events for a non-empty batch.
                    let (first, rest) = frames.split_first().expect("non-empty batch");
                    Some((Ok(first.clone()), (entry, next_skip, rest.to_vec(), false)))
                }
                // Terminal reached and fully drained — close the connection.
                ReplayOutcome::CaughtUp { .. } => None,
                ReplayOutcome::Gone => {
                    // Evicted mid-stream: emit one error frame, then close.
                    let gone =
                        actix_web::web::Bytes::from(render_sse_frame(&ChatStreamEvent::Error(
                            "turn history has expired (buffer evicted)".to_string(),
                        )));
                    Some((Ok(gone), (entry, skip, Vec::new(), true)))
                }
            }
        },
    );

    HttpResponse::Ok()
        .content_type("text/event-stream")
        .insert_header(("Cache-Control", "no-cache"))
        .insert_header(("X-Accel-Buffering", "no"))
        .streaming(head.chain(tail))
}

fn render_turn_info_frame(info: &crate::ai::turn_registry::TurnInfo) -> String {
    let payload = serde_json::json!({
        "turn_id": info.turn_id,
        "file_path": info.file_path,
        "library_id": info.library_id,
        "status": info.status.as_str(),
        "total_events_pushed": info.total_events_pushed,
        "buffered_count": info.buffered_count,
    });
    let data = serde_json::to_string(&payload).unwrap_or_else(|_| "{}".to_string());
    format!("event: turn_info\ndata: {}\n\n", data)
}

/// DELETE /insights/chat/turn/{turn_id} — cancel a running turn.
#[delete("/insights/chat/turn/{turn_id}")]
pub async fn cancel_turn_handler(
    http_request: HttpRequest,
    path: web::Path<String>,
    app_state: web::Data<AppState>,
) -> impl Responder {
    let turn_id = path.into_inner();

    let parent_context = extract_context_from_request(&http_request);
    let tracer = global_tracer();
    let mut span = tracer.start_with_context("ai.chat.turn.cancel", &parent_context);
    span.set_attribute(KeyValue::new("turn_id", turn_id.clone()));

    let registry = app_state.turn_registry.clone();
    let entry = match registry.get(&turn_id).await {
        Some(e) => e,
        None => {
            span.set_status(Status::error("turn not found"));
            return HttpResponse::NotFound().json(serde_json::json!({
                "error": format!("turn {} not found", turn_id)
            }));
        }
    };

    // Abort the spawned task so it stops producing events promptly. The loop
    // also checks `is_running()` at each iteration boundary as a graceful
    // backstop in case the abort lands between await points.
    let aborted = entry.abort();
    span.set_attribute(KeyValue::new("aborted", aborted));

    // Push the terminal event BEFORE flipping status: a replay reader treats a
    // terminal status with no buffered tail as "closed", so the Done must be
    // buffered first for in-progress connections to receive it.
    let _ = entry
        .push_event(ChatStreamEvent::Done {
            tool_calls_made: 0,
            iterations_used: 0,
            truncated: false,
            prompt_tokens: None,
            eval_tokens: None,
            num_ctx: None,
            amended_insight_id: None,
            backend_used: "cancelled".to_string(),
            model_used: "cancelled".to_string(),
            cancelled: true,
        })
        .await;
    entry.set_terminal_status(crate::ai::turn_registry::TurnStatus::Cancelled);
    span.set_status(Status::Ok);

    HttpResponse::Ok().json(serde_json::json!({
        "cancelled": true
    }))
}

#[cfg(test)]
mod turn_replay_tests {
    use super::{cancel_turn_handler, render_indexed_frame, turn_replay_handler};
    use crate::ai::insight_chat::ChatStreamEvent;
    use crate::ai::turn_registry::{TurnEntry, TurnStatus};
    use crate::state::AppState;
    use actix_web::test as actix_test;
    use actix_web::{App, web::Data};
    use std::sync::Arc;

    /// Serialize `AppState::test_state()` construction across the parallel
    /// tests in this module: each build opens ~10 DAO connections to the one
    /// shared `DATABASE_URL` file, and doing several at once races the WAL
    /// `journal_mode` switch into a spurious "database is locked". The test
    /// bodies themselves still run in parallel; only the open is gated.
    static DB_INIT: std::sync::Mutex<()> = std::sync::Mutex::new(());

    fn build_state() -> Data<AppState> {
        let _guard = DB_INIT.lock().unwrap_or_else(|p| p.into_inner());
        Data::new(AppState::test_state())
    }

    fn done(cancelled: bool) -> ChatStreamEvent {
        ChatStreamEvent::Done {
            tool_calls_made: 0,
            iterations_used: 1,
            truncated: false,
            prompt_tokens: Some(10),
            eval_tokens: Some(20),
            num_ctx: None,
            amended_insight_id: None,
            backend_used: "local".into(),
            model_used: "m".into(),
            cancelled,
        }
    }

    /// Seed a completed turn (events + terminal Done) directly in the registry.
    async fn seed_completed(state: &AppState, id: &str, text_events: usize) {
        let entry = Arc::new(TurnEntry::new(id.into(), "/p.jpg".into(), 1));
        for i in 0..text_events {
            entry
                .push_event(ChatStreamEvent::TextDelta(format!("d{i}")))
                .await;
        }
        entry.push_event(done(false)).await;
        entry.set_terminal_status(TurnStatus::Done);
        state.turn_registry.insert(entry).await;
    }

    #[test]
    fn indexed_frame_stamps_seq_without_clobbering_tool_index() {
        // tool_call carries its own pairing `index`; `seq` must be additive.
        let frame = render_indexed_frame(
            &ChatStreamEvent::ToolCall {
                index: 3,
                name: "geo".into(),
                arguments: serde_json::json!({}),
            },
            42,
        );
        assert!(frame.contains("event: tool_call"));
        assert!(frame.contains("\"index\":3"));
        assert!(frame.contains("\"seq\":42"));
    }

    #[actix_rt::test]
    async fn replay_unknown_turn_is_404() {
        let state = build_state();
        let app = actix_test::init_service(
            App::new()
                .service(turn_replay_handler)
                .app_data(state.clone()),
        )
        .await;

        let req = actix_test::TestRequest::get()
            .uri("/insights/chat/turn/nope")
            .to_request();
        let resp = actix_test::call_service(&app, req).await;
        assert_eq!(resp.status(), 404);
    }

    #[actix_rt::test]
    async fn replay_completed_turn_emits_turn_info_and_done_with_seq() {
        let state = build_state();
        seed_completed(&state, "t1", 2).await;

        let app = actix_test::init_service(
            App::new()
                .service(turn_replay_handler)
                .app_data(state.clone()),
        )
        .await;

        let req = actix_test::TestRequest::get()
            .uri("/insights/chat/turn/t1")
            .to_request();
        let resp = actix_test::call_service(&app, req).await;
        assert_eq!(resp.status(), 200);

        let body = String::from_utf8(actix_test::read_body(resp).await.to_vec()).unwrap();
        assert!(body.contains("event: turn_info"));
        assert!(body.contains("event: text"));
        assert!(body.contains("event: done"));
        // Events are seq-stamped 0,1 (text) and 2 (done).
        assert!(body.contains("\"seq\":0"));
        assert!(body.contains("\"seq\":2"));
        // Done payload carries the renamed token fields the client reads.
        assert!(body.contains("\"prompt_tokens\":10"));
    }

    #[actix_rt::test]
    async fn replay_skip_before_query_skips_applied_events() {
        let state = build_state();
        seed_completed(&state, "t2", 3).await; // seqs 0,1,2 text; 3 done

        let app = actix_test::init_service(
            App::new()
                .service(turn_replay_handler)
                .app_data(state.clone()),
        )
        .await;

        let req = actix_test::TestRequest::get()
            .uri("/insights/chat/turn/t2?skip_before=2")
            .to_request();
        let resp = actix_test::call_service(&app, req).await;
        assert_eq!(resp.status(), 200);

        let body = String::from_utf8(actix_test::read_body(resp).await.to_vec()).unwrap();
        // Only seq 2 (last text) and seq 3 (done) should be present.
        assert!(body.contains("\"seq\":2"));
        assert!(body.contains("\"seq\":3"));
        assert!(!body.contains("\"seq\":0"));
        assert!(!body.contains("\"seq\":1"));
    }

    #[actix_rt::test]
    async fn replay_evicted_index_is_410() {
        let state = build_state();
        let entry = Arc::new(TurnEntry::new("t3".into(), "/p.jpg".into(), 1));
        // Push past the cap so the front is evicted and base advances.
        for i in 0..600 {
            entry
                .push_event(ChatStreamEvent::TextDelta(format!("d{i}")))
                .await;
        }
        entry.set_terminal_status(TurnStatus::Done);
        state.turn_registry.insert(entry).await;

        let app = actix_test::init_service(
            App::new()
                .service(turn_replay_handler)
                .app_data(state.clone()),
        )
        .await;

        let req = actix_test::TestRequest::get()
            .uri("/insights/chat/turn/t3?skip_before=0")
            .to_request();
        let resp = actix_test::call_service(&app, req).await;
        assert_eq!(resp.status(), 410);
    }

    #[actix_rt::test]
    async fn cancel_unknown_turn_is_404() {
        let state = build_state();
        let app = actix_test::init_service(
            App::new()
                .service(cancel_turn_handler)
                .app_data(state.clone()),
        )
        .await;

        let req = actix_test::TestRequest::delete()
            .uri("/insights/chat/turn/nope")
            .to_request();
        let resp = actix_test::call_service(&app, req).await;
        assert_eq!(resp.status(), 404);
    }

    #[actix_rt::test]
    async fn cancel_running_turn_marks_cancelled_and_buffers_terminal() {
        let state = build_state();
        let entry = Arc::new(TurnEntry::new("t4".into(), "/p.jpg".into(), 1));
        entry
            .push_event(ChatStreamEvent::TextDelta("partial".into()))
            .await;
        state.turn_registry.insert(entry.clone()).await;

        let app = actix_test::init_service(
            App::new()
                .service(cancel_turn_handler)
                .app_data(state.clone()),
        )
        .await;

        let req = actix_test::TestRequest::delete()
            .uri("/insights/chat/turn/t4")
            .to_request();
        let resp = actix_test::call_service(&app, req).await;
        assert_eq!(resp.status(), 200);

        // Status flipped to Cancelled and a terminal Done(cancelled) buffered
        // after the existing event, so a late replay reader still completes.
        assert_eq!(
            TurnStatus::from(entry.status.load(std::sync::atomic::Ordering::Relaxed)),
            TurnStatus::Cancelled
        );
        let info = entry.info().await;
        assert_eq!(info.total_events_pushed, 2);
    }
}