ImageApi/src/ai/handlers.rs

use actix_web::{HttpRequest, HttpResponse, Responder, delete, get, post, web};
use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, Tracer};
use serde::{Deserialize, Serialize};

use crate::ai::insight_chat::{ChatStreamEvent, ChatTurnRequest};
use crate::ai::ollama::ChatMessage;
use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient};
use crate::data::Claims;
use crate::database::{ExifDao, InsightDao};
use crate::libraries;
use crate::otel::{extract_context_from_request, global_tracer};
use crate::state::AppState;
use crate::utils::normalize_path;

/// Hardcoded few-shot exemplars for the agentic endpoint. Populate with the
/// ids of approved insights whose `training_messages` should be compressed
/// into trajectory form and injected into the system prompt. Empty = no
/// change in behavior. Request-level `fewshot_insight_ids` overrides this
/// when non-empty.
// const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[2918, 2908];
const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[];

#[derive(Debug, Deserialize)]
pub struct GeneratePhotoInsightRequest {
    pub file_path: String,
    #[serde(default)]
    pub model: Option<String>,
    #[serde(default)]
    pub system_prompt: Option<String>,
    #[serde(default)]
    pub num_ctx: Option<i32>,
    #[serde(default)]
    pub temperature: Option<f32>,
    #[serde(default)]
    pub top_p: Option<f32>,
    #[serde(default)]
    pub top_k: Option<i32>,
    #[serde(default)]
    pub min_p: Option<f32>,
    /// `"local"` (default, Ollama with images) | `"hybrid"` (local vision +
    /// OpenRouter chat). Only respected by the agentic endpoint.
    #[serde(default)]
    pub backend: Option<String>,
    /// Insight ids whose stored `training_messages` should be compressed
    /// into few-shot trajectories and injected into the system prompt.
    /// Silently truncated to the first 2. When absent/empty, the handler
    /// falls back to `DEFAULT_FEWSHOT_INSIGHT_IDS`.
    #[serde(default)]
    pub fewshot_insight_ids: Option<Vec<i32>>,
}

#[derive(Debug, Deserialize)]
pub struct GetPhotoInsightQuery {
    pub path: String,
    /// Library context for this lookup. Used to pick the right content
    /// hash when the same rel_path exists under multiple roots.
    #[serde(default)]
    pub library: Option<String>,
}

#[derive(Debug, Deserialize)]
pub struct RateInsightRequest {
    pub file_path: String,
    pub approved: bool,
}

#[derive(Debug, Deserialize)]
pub struct ExportTrainingDataQuery {
    #[serde(default)]
    pub approved_only: Option<bool>,
}

#[derive(Debug, Serialize)]
pub struct PhotoInsightResponse {
    pub id: i32,
    pub file_path: String,
    pub title: String,
    pub summary: String,
    pub generated_at: i64,
    pub model_version: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub prompt_eval_count: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub eval_count: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub approved: Option<bool>,
    pub backend: String,
    /// True when the insight was generated agentically and a chat
    /// continuation can be started against it. Drives the mobile chat button.
    pub has_training_messages: bool,
}

#[derive(Debug, Serialize)]
pub struct AvailableModelsResponse {
    pub primary: ServerModels,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub fallback: Option<ServerModels>,
}

#[derive(Debug, Serialize)]
pub struct ServerModels {
    pub url: String,
    pub models: Vec<ModelCapabilities>,
    pub default_model: String,
}

/// POST /insights/generate - Generate insight for a specific photo
#[post("/insights/generate")]
pub async fn generate_insight_handler(
    http_request: HttpRequest,
    _claims: Claims,
    request: web::Json<GeneratePhotoInsightRequest>,
    insight_generator: web::Data<InsightGenerator>,
) -> impl Responder {
    let parent_context = extract_context_from_request(&http_request);
    let tracer = global_tracer();
    let mut span = tracer.start_with_context("http.insights.generate", &parent_context);

    let normalized_path = normalize_path(&request.file_path);

    span.set_attribute(KeyValue::new("file_path", normalized_path.clone()));
    if let Some(ref model) = request.model {
        span.set_attribute(KeyValue::new("model", model.clone()));
    }
    if let Some(ref prompt) = request.system_prompt {
        span.set_attribute(KeyValue::new("has_custom_prompt", true));
        span.set_attribute(KeyValue::new("prompt_length", prompt.len() as i64));
    }
    if let Some(ctx) = request.num_ctx {
        span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
    }

    log::info!(
        "Manual insight generation triggered for photo: {} with model: {:?}, custom_prompt: {}, num_ctx: {:?}",
        normalized_path,
        request.model,
        request.system_prompt.is_some(),
        request.num_ctx
    );

    // Generate insight with optional custom model, system prompt, and context size
    let result = insight_generator
        .generate_insight_for_photo_with_config(
            &normalized_path,
            request.model.clone(),
            request.system_prompt.clone(),
            request.num_ctx,
            request.temperature,
            request.top_p,
            request.top_k,
            request.min_p,
        )
        .await;

    match result {
        Ok(()) => {
            span.set_status(Status::Ok);
            HttpResponse::Ok().json(serde_json::json!({
                "success": true,
                "message": "Insight generated successfully"
            }))
        }
        Err(e) => {
            log::error!("Failed to generate insight: {:?}", e);
            span.set_status(Status::error(e.to_string()));
            HttpResponse::InternalServerError().json(serde_json::json!({
                "error": format!("Failed to generate insight: {:?}", e)
            }))
        }
    }
}

/// GET /insights?path=/path/to/photo.jpg - Fetch insight for specific photo
#[get("/insights")]
pub async fn get_insight_handler(
    _claims: Claims,
    query: web::Query<GetPhotoInsightQuery>,
    app_state: web::Data<AppState>,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
    exif_dao: web::Data<std::sync::Mutex<Box<dyn ExifDao>>>,
) -> impl Responder {
    let normalized_path = normalize_path(&query.path);
    log::debug!("Fetching insight for {}", normalized_path);

    let otel_context = opentelemetry::Context::new();

    // Expand to rel_paths sharing content so an insight generated under
    // library 1 still shows when the same photo is viewed from library 2.
    let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
        .ok()
        .flatten()
        .unwrap_or_else(|| app_state.primary_library());
    let sibling_paths = {
        let mut exif = exif_dao.lock().expect("Unable to lock ExifDao");
        exif.get_rel_paths_sharing_content(&otel_context, library.id, &normalized_path)
            .unwrap_or_else(|_| vec![normalized_path.clone()])
    };

    let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");

    match dao.get_insight_for_paths(&otel_context, &sibling_paths) {
        Ok(Some(insight)) => {
            let response = PhotoInsightResponse {
                id: insight.id,
                file_path: insight.file_path,
                title: insight.title,
                summary: insight.summary,
                generated_at: insight.generated_at,
                model_version: insight.model_version,
                prompt_eval_count: None,
                eval_count: None,
                approved: insight.approved,
                has_training_messages: insight.training_messages.is_some(),
                backend: insight.backend,
            };
            HttpResponse::Ok().json(response)
        }
        Ok(None) => HttpResponse::NotFound().json(serde_json::json!({
            "error": "Insight not found"
        })),
        Err(e) => {
            log::error!("Failed to fetch insight ({}): {:?}", &query.path, e);
            HttpResponse::InternalServerError().json(serde_json::json!({
                "error": format!("Failed to fetch insight: {:?}", e)
            }))
        }
    }
}

/// DELETE /insights?path=/path/to/photo.jpg - Remove insight (will regenerate on next request)
#[delete("/insights")]
pub async fn delete_insight_handler(
    _claims: Claims,
    query: web::Query<GetPhotoInsightQuery>,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
    let normalized_path = normalize_path(&query.path);
    log::info!("Deleting insight for {}", normalized_path);

    let otel_context = opentelemetry::Context::new();
    let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");

    match dao.delete_insight(&otel_context, &normalized_path) {
        Ok(()) => HttpResponse::Ok().json(serde_json::json!({
            "success": true,
            "message": "Insight deleted successfully"
        })),
        Err(e) => {
            log::error!("Failed to delete insight: {:?}", e);
            HttpResponse::InternalServerError().json(serde_json::json!({
                "error": format!("Failed to delete insight: {:?}", e)
            }))
        }
    }
}

/// GET /insights/all - Get all insights
#[get("/insights/all")]
pub async fn get_all_insights_handler(
    _claims: Claims,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
    log::debug!("Fetching all insights");

    let otel_context = opentelemetry::Context::new();
    let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");

    match dao.get_all_insights(&otel_context) {
        Ok(insights) => {
            let responses: Vec<PhotoInsightResponse> = insights
                .into_iter()
                .map(|insight| PhotoInsightResponse {
                    id: insight.id,
                    file_path: insight.file_path,
                    title: insight.title,
                    summary: insight.summary,
                    generated_at: insight.generated_at,
                    model_version: insight.model_version,
                    prompt_eval_count: None,
                    eval_count: None,
                    approved: insight.approved,
                    has_training_messages: insight.training_messages.is_some(),
                    backend: insight.backend,
                })
                .collect();

            HttpResponse::Ok().json(responses)
        }
        Err(e) => {
            log::error!("Failed to fetch all insights: {:?}", e);
            HttpResponse::InternalServerError().json(serde_json::json!({
                "error": format!("Failed to fetch insights: {:?}", e)
            }))
        }
    }
}

/// POST /insights/generate/agentic - Generate insight using agentic tool-calling loop
#[post("/insights/generate/agentic")]
pub async fn generate_agentic_insight_handler(
    http_request: HttpRequest,
    _claims: Claims,
    request: web::Json<GeneratePhotoInsightRequest>,
    insight_generator: web::Data<InsightGenerator>,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
    let parent_context = extract_context_from_request(&http_request);
    let tracer = global_tracer();
    let mut span = tracer.start_with_context("http.insights.generate_agentic", &parent_context);

    let normalized_path = normalize_path(&request.file_path);

    span.set_attribute(KeyValue::new("file_path", normalized_path.clone()));
    if let Some(ref model) = request.model {
        span.set_attribute(KeyValue::new("model", model.clone()));
    }
    if let Some(ref prompt) = request.system_prompt {
        span.set_attribute(KeyValue::new("has_custom_prompt", true));
        span.set_attribute(KeyValue::new("prompt_length", prompt.len() as i64));
    }
    if let Some(ctx) = request.num_ctx {
        span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
    }

    let max_iterations: usize = std::env::var("AGENTIC_MAX_ITERATIONS")
        .ok()
        .and_then(|v| v.parse().ok())
        .unwrap_or(12);

    span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));

    log::info!(
        "Agentic insight generation triggered for photo: {} with model: {:?}, max_iterations: {}",
        normalized_path,
        request.model,
        max_iterations
    );

    if let Some(ref b) = request.backend {
        span.set_attribute(KeyValue::new("backend", b.clone()));
    }

    // Resolve few-shot ids: request-provided ids take precedence when
    // non-empty; otherwise fall back to the hardcoded defaults.
    let fewshot_ids: Vec<i32> = match request.fewshot_insight_ids.as_deref() {
        Some(ids) if !ids.is_empty() => ids.iter().take(2).copied().collect(),
        _ => DEFAULT_FEWSHOT_INSIGHT_IDS
            .iter()
            .take(2)
            .copied()
            .collect(),
    };
    span.set_attribute(KeyValue::new("fewshot_count", fewshot_ids.len() as i64));

    let fewshot_examples: Vec<Vec<ChatMessage>> = {
        let otel_context = opentelemetry::Context::new();
        let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
        fewshot_ids
            .iter()
            .filter_map(|id| {
                let insight = dao.get_insight_by_id(&otel_context, *id).ok().flatten()?;
                let json = insight.training_messages?;
                match serde_json::from_str::<Vec<ChatMessage>>(&json) {
                    Ok(msgs) => Some(msgs),
                    Err(e) => {
                        log::warn!(
                            "Few-shot insight {} has malformed training_messages: {}",
                            id,
                            e
                        );
                        None
                    }
                }
            })
            .collect()
    };

    let result = insight_generator
        .generate_agentic_insight_for_photo(
            &normalized_path,
            request.model.clone(),
            request.system_prompt.clone(),
            request.num_ctx,
            request.temperature,
            request.top_p,
            request.top_k,
            request.min_p,
            max_iterations,
            request.backend.clone(),
            fewshot_examples,
            fewshot_ids,
        )
        .await;

    match result {
        Ok((prompt_eval_count, eval_count)) => {
            span.set_status(Status::Ok);
            // Fetch the stored insight to return it
            let otel_context = opentelemetry::Context::new();
            let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
            match dao.get_insight(&otel_context, &normalized_path) {
                Ok(Some(insight)) => {
                    let response = PhotoInsightResponse {
                        id: insight.id,
                        file_path: insight.file_path,
                        title: insight.title,
                        summary: insight.summary,
                        generated_at: insight.generated_at,
                        model_version: insight.model_version,
                        prompt_eval_count,
                        eval_count,
                        approved: insight.approved,
                        has_training_messages: insight.training_messages.is_some(),
                        backend: insight.backend,
                    };
                    HttpResponse::Ok().json(response)
                }
                Ok(None) => HttpResponse::Ok().json(serde_json::json!({
                    "success": true,
                    "message": "Agentic insight generated successfully"
                })),
                Err(e) => {
                    log::warn!("Insight stored but failed to retrieve: {:?}", e);
                    HttpResponse::Ok().json(serde_json::json!({
                        "success": true,
                        "message": "Agentic insight generated successfully"
                    }))
                }
            }
        }
        Err(e) => {
            let error_msg = format!("{:?}", e);
            log::error!("Failed to generate agentic insight: {}", error_msg);
            span.set_status(Status::error(error_msg.clone()));

            if error_msg.contains("tool calling not supported")
                || error_msg.contains("model not available")
            {
                HttpResponse::BadRequest().json(serde_json::json!({
                    "error": format!("Failed to generate agentic insight: {}", error_msg)
                }))
            } else if error_msg.contains("error parsing tool call") {
                HttpResponse::BadRequest().json(serde_json::json!({
                    "error": "Model is not compatible with Ollama's tool calling protocol. Try a model known to support native tool calling (e.g. llama3.1, llama3.2, qwen2.5, mistral-nemo)."
                }))
            } else {
                HttpResponse::InternalServerError().json(serde_json::json!({
                    "error": format!("Failed to generate agentic insight: {}", error_msg)
                }))
            }
        }
    }
}

/// GET /insights/models - List available models from both servers with capabilities
#[get("/insights/models")]
pub async fn get_available_models_handler(
    _claims: Claims,
    app_state: web::Data<crate::state::AppState>,
) -> impl Responder {
    log::debug!("Fetching available models with capabilities");

    let ollama_client = &app_state.ollama;

    // Fetch models with capabilities from primary server
    let primary_models =
        match OllamaClient::list_models_with_capabilities(&ollama_client.primary_url).await {
            Ok(models) => models,
            Err(e) => {
                log::warn!("Failed to fetch models from primary server: {:?}", e);
                vec![]
            }
        };

    let primary = ServerModels {
        url: ollama_client.primary_url.clone(),
        models: primary_models,
        default_model: ollama_client.primary_model.clone(),
    };

    // Fetch models with capabilities from fallback server if configured
    let fallback = if let Some(fallback_url) = &ollama_client.fallback_url {
        match OllamaClient::list_models_with_capabilities(fallback_url).await {
            Ok(models) => Some(ServerModels {
                url: fallback_url.clone(),
                models,
                default_model: ollama_client
                    .fallback_model
                    .clone()
                    .unwrap_or_else(|| ollama_client.primary_model.clone()),
            }),
            Err(e) => {
                log::warn!("Failed to fetch models from fallback server: {:?}", e);
                None
            }
        }
    } else {
        None
    };

    let response = AvailableModelsResponse { primary, fallback };

    HttpResponse::Ok().json(response)
}

#[derive(Debug, Serialize)]
pub struct OpenRouterModelsResponse {
    pub models: Vec<String>,
    pub default_model: Option<String>,
    pub configured: bool,
}

/// GET /insights/openrouter/models - Curated OpenRouter model ids exposed
/// to clients for the hybrid backend. Returned verbatim from
/// `OPENROUTER_ALLOWED_MODELS`; no live call to OpenRouter.
#[get("/insights/openrouter/models")]
pub async fn get_openrouter_models_handler(
    _claims: Claims,
    app_state: web::Data<crate::state::AppState>,
) -> impl Responder {
    let configured = app_state.openrouter.is_some();
    let default_model = app_state
        .openrouter
        .as_ref()
        .map(|c| c.primary_model.clone());
    let response = OpenRouterModelsResponse {
        models: app_state.openrouter_allowed_models.clone(),
        default_model,
        configured,
    };
    HttpResponse::Ok().json(response)
}

/// POST /insights/rate - Rate an insight (thumbs up/down for training data)
#[post("/insights/rate")]
pub async fn rate_insight_handler(
    _claims: Claims,
    request: web::Json<RateInsightRequest>,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
    let normalized_path = normalize_path(&request.file_path);
    log::info!(
        "Rating insight for {}: approved={}",
        normalized_path,
        request.approved
    );

    let otel_context = opentelemetry::Context::new();
    let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");

    match dao.rate_insight(&otel_context, &normalized_path, request.approved) {
        Ok(()) => HttpResponse::Ok().json(serde_json::json!({
            "success": true,
            "message": "Insight rated successfully"
        })),
        Err(e) => {
            log::error!("Failed to rate insight: {:?}", e);
            HttpResponse::InternalServerError().json(serde_json::json!({
                "error": format!("Failed to rate insight: {:?}", e)
            }))
        }
    }
}

/// GET /insights/training-data - Export approved training data as JSONL
#[get("/insights/training-data")]
pub async fn export_training_data_handler(
    _claims: Claims,
    query: web::Query<ExportTrainingDataQuery>,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
    let approved_only = query.approved_only.unwrap_or(true);
    log::info!("Exporting training data (approved_only={})", approved_only);

    let otel_context = opentelemetry::Context::new();
    let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");

    let insights = if approved_only {
        dao.get_approved_insights(&otel_context)
    } else {
        dao.get_all_insights(&otel_context)
    };

    match insights {
        Ok(insights) => {
            let mut jsonl = String::new();
            for insight in &insights {
                if let Some(ref messages) = insight.training_messages {
                    let entry = serde_json::json!({
                        "file_path": insight.file_path,
                        "model_version": insight.model_version,
                        "generated_at": insight.generated_at,
                        "title": insight.title,
                        "summary": insight.summary,
                        "messages": serde_json::from_str::<serde_json::Value>(messages)
                            .unwrap_or(serde_json::Value::Null),
                    });
                    jsonl.push_str(&entry.to_string());
                    jsonl.push('\n');
                }
            }

            HttpResponse::Ok()
                .content_type("application/jsonl")
                .insert_header((
                    "Content-Disposition",
                    "attachment; filename=\"training_data.jsonl\"",
                ))
                .body(jsonl)
        }
        Err(e) => {
            log::error!("Failed to export training data: {:?}", e);
            HttpResponse::InternalServerError().json(serde_json::json!({
                "error": format!("Failed to export training data: {:?}", e)
            }))
        }
    }
}

#[derive(Debug, Deserialize)]
pub struct ChatTurnHttpRequest {
    pub file_path: String,
    #[serde(default)]
    pub library: Option<String>,
    pub user_message: String,
    #[serde(default)]
    pub model: Option<String>,
    #[serde(default)]
    pub backend: Option<String>,
    #[serde(default)]
    pub num_ctx: Option<i32>,
    #[serde(default)]
    pub temperature: Option<f32>,
    #[serde(default)]
    pub top_p: Option<f32>,
    #[serde(default)]
    pub top_k: Option<i32>,
    #[serde(default)]
    pub min_p: Option<f32>,
    #[serde(default)]
    pub max_iterations: Option<usize>,
    #[serde(default)]
    pub amend: bool,
}

#[derive(Debug, Serialize)]
pub struct ChatTurnHttpResponse {
    pub assistant_message: String,
    pub tool_calls_made: usize,
    pub iterations_used: usize,
    pub truncated: bool,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub prompt_eval_count: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub eval_count: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub amended_insight_id: Option<i32>,
    pub backend: String,
    pub model: String,
}

/// POST /insights/chat — submit a follow-up turn against an existing insight.
#[post("/insights/chat")]
pub async fn chat_turn_handler(
    http_request: HttpRequest,
    _claims: Claims,
    request: web::Json<ChatTurnHttpRequest>,
    app_state: web::Data<AppState>,
) -> impl Responder {
    let parent_context = extract_context_from_request(&http_request);
    let tracer = global_tracer();
    let mut span = tracer.start_with_context("http.insights.chat", &parent_context);
    span.set_attribute(KeyValue::new("file_path", request.file_path.clone()));

    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
        Ok(Some(lib)) => lib,
        Ok(None) => app_state.primary_library(),
        Err(e) => {
            return HttpResponse::BadRequest().json(serde_json::json!({
                "error": format!("invalid library: {}", e)
            }));
        }
    };

    let chat_req = ChatTurnRequest {
        library_id: library.id,
        file_path: request.file_path.clone(),
        user_message: request.user_message.clone(),
        model: request.model.clone(),
        backend: request.backend.clone(),
        num_ctx: request.num_ctx,
        temperature: request.temperature,
        top_p: request.top_p,
        top_k: request.top_k,
        min_p: request.min_p,
        max_iterations: request.max_iterations,
        amend: request.amend,
    };

    match app_state.insight_chat.chat_turn(chat_req).await {
        Ok(result) => {
            span.set_status(Status::Ok);
            HttpResponse::Ok().json(ChatTurnHttpResponse {
                assistant_message: result.assistant_message,
                tool_calls_made: result.tool_calls_made,
                iterations_used: result.iterations_used,
                truncated: result.truncated,
                prompt_eval_count: result.prompt_eval_count,
                eval_count: result.eval_count,
                amended_insight_id: result.amended_insight_id,
                backend: result.backend_used,
                model: result.model_used,
            })
        }
        Err(e) => {
            let msg = format!("{}", e);
            log::error!("Chat turn failed: {}", msg);
            span.set_status(Status::error(msg.clone()));

            // Map well-known errors to client-facing 4xx codes.
            if msg.contains("no insight found") {
                HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
            } else if msg.contains("no chat history") {
                HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
            } else if msg.contains("user_message")
                || msg.contains("unknown backend")
                || msg.contains("switching from local to hybrid")
                || msg.contains("hybrid backend unavailable")
            {
                HttpResponse::BadRequest().json(serde_json::json!({ "error": msg }))
            } else {
                HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
            }
        }
    }
}

#[derive(Debug, Deserialize)]
pub struct ChatHistoryQuery {
    pub path: String,
    #[serde(default)]
    pub library: Option<String>,
}

#[derive(Debug, Serialize)]
pub struct ChatHistoryHttpResponse {
    pub messages: Vec<RenderedHistoryMessage>,
    pub turn_count: usize,
    pub model_version: String,
    pub backend: String,
}

#[derive(Debug, Serialize)]
pub struct RenderedHistoryMessage {
    pub role: String,
    pub content: String,
    pub is_initial: bool,
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub tools: Vec<HistoryToolInvocation>,
}

#[derive(Debug, Serialize)]
pub struct HistoryToolInvocation {
    pub name: String,
    pub arguments: serde_json::Value,
    pub result: String,
    #[serde(skip_serializing_if = "std::ops::Not::not")]
    pub result_truncated: bool,
}

#[derive(Debug, Deserialize)]
pub struct ChatRewindHttpRequest {
    pub file_path: String,
    #[serde(default)]
    pub library: Option<String>,
    /// 0-based index into the rendered transcript. The message at this
    /// index, and everything after it, is discarded. Must be > 0 — the
    /// initial user message is protected.
    pub discard_from_rendered_index: usize,
}

/// POST /insights/chat/rewind — truncate the stored conversation so the
/// rendered message at `discard_from_rendered_index` (and everything after)
/// is removed. Use when a user wants to retry a turn with a different
/// prompt without prior replies poisoning context.
#[post("/insights/chat/rewind")]
pub async fn chat_rewind_handler(
    _claims: Claims,
    request: web::Json<ChatRewindHttpRequest>,
    app_state: web::Data<AppState>,
) -> impl Responder {
    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
        Ok(Some(lib)) => lib,
        Ok(None) => app_state.primary_library(),
        Err(e) => {
            return HttpResponse::BadRequest().json(serde_json::json!({
                "error": format!("invalid library: {}", e)
            }));
        }
    };

    match app_state
        .insight_chat
        .rewind_history(
            library.id,
            &request.file_path,
            request.discard_from_rendered_index,
        )
        .await
    {
        Ok(()) => HttpResponse::Ok().json(serde_json::json!({ "success": true })),
        Err(e) => {
            let msg = format!("{}", e);
            log::error!("Chat rewind failed: {}", msg);
            if msg.contains("no insight found") {
                HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
            } else if msg.contains("no chat history") {
                HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
            } else if msg.contains("cannot discard the initial") || msg.contains("out of range") {
                HttpResponse::BadRequest().json(serde_json::json!({ "error": msg }))
            } else {
                HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
            }
        }
    }
}

/// GET /insights/chat/history — return the rendered transcript for a photo.
#[get("/insights/chat/history")]
pub async fn chat_history_handler(
    _claims: Claims,
    query: web::Query<ChatHistoryQuery>,
    app_state: web::Data<AppState>,
) -> impl Responder {
    // library param parsed for parity with other insight endpoints, even
    // though load_history currently keys on file_path alone (matches the
    // existing get_insight DAO contract).
    let _library = libraries::resolve_library_param(&app_state, query.library.as_deref())
        .ok()
        .flatten()
        .unwrap_or_else(|| app_state.primary_library());

    match app_state.insight_chat.load_history(&query.path) {
        Ok(view) => HttpResponse::Ok().json(ChatHistoryHttpResponse {
            messages: view
                .messages
                .into_iter()
                .map(|m| RenderedHistoryMessage {
                    role: m.role,
                    content: m.content,
                    is_initial: m.is_initial,
                    tools: m
                        .tools
                        .into_iter()
                        .map(|t| HistoryToolInvocation {
                            name: t.name,
                            arguments: t.arguments,
                            result: t.result,
                            result_truncated: t.result_truncated,
                        })
                        .collect(),
                })
                .collect(),
            turn_count: view.turn_count,
            model_version: view.model_version,
            backend: view.backend,
        }),
        Err(e) => {
            let msg = format!("{}", e);
            if msg.contains("no insight found") {
                HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
            } else if msg.contains("no chat history") {
                HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
            } else {
                HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
            }
        }
    }
}

/// POST /insights/chat/stream — streaming variant of /insights/chat.
/// Returns `text/event-stream` with one event per chat stream event.
#[post("/insights/chat/stream")]
pub async fn chat_stream_handler(
    _claims: Claims,
    request: web::Json<ChatTurnHttpRequest>,
    app_state: web::Data<AppState>,
) -> HttpResponse {
    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
        Ok(Some(lib)) => lib,
        Ok(None) => app_state.primary_library(),
        Err(e) => {
            return HttpResponse::BadRequest().json(serde_json::json!({
                "error": format!("invalid library: {}", e)
            }));
        }
    };

    let chat_req = ChatTurnRequest {
        library_id: library.id,
        file_path: request.file_path.clone(),
        user_message: request.user_message.clone(),
        model: request.model.clone(),
        backend: request.backend.clone(),
        num_ctx: request.num_ctx,
        temperature: request.temperature,
        top_p: request.top_p,
        top_k: request.top_k,
        min_p: request.min_p,
        max_iterations: request.max_iterations,
        amend: request.amend,
    };

    let service = app_state.insight_chat.clone();
    let events = service.chat_turn_stream(chat_req);

    // Map ChatStreamEvent → SSE frame bytes.
    let sse_stream = futures::stream::StreamExt::map(events, |ev| {
        let frame = render_sse_frame(&ev);
        Ok::<_, actix_web::Error>(actix_web::web::Bytes::from(frame))
    });

    HttpResponse::Ok()
        .content_type("text/event-stream")
        .insert_header(("Cache-Control", "no-cache"))
        .insert_header(("X-Accel-Buffering", "no")) // nginx: disable response buffering
        .streaming(sse_stream)
}

fn render_sse_frame(ev: &ChatStreamEvent) -> String {
    let (event_name, payload) = match ev {
        ChatStreamEvent::IterationStart { n, max } => {
            ("iteration_start", serde_json::json!({ "n": n, "max": max }))
        }
        ChatStreamEvent::Truncated => ("truncated", serde_json::json!({})),
        ChatStreamEvent::TextDelta(delta) => ("text", serde_json::json!({ "delta": delta })),
        ChatStreamEvent::ToolCall {
            index,
            name,
            arguments,
        } => (
            "tool_call",
            serde_json::json!({ "index": index, "name": name, "arguments": arguments }),
        ),
        ChatStreamEvent::ToolResult {
            index,
            name,
            result,
            result_truncated,
        } => (
            "tool_result",
            serde_json::json!({
                "index": index,
                "name": name,
                "result": result,
                "result_truncated": result_truncated,
            }),
        ),
        ChatStreamEvent::Done {
            tool_calls_made,
            iterations_used,
            truncated,
            prompt_eval_count,
            eval_count,
            amended_insight_id,
            backend_used,
            model_used,
        } => (
            "done",
            serde_json::json!({
                "tool_calls_made": tool_calls_made,
                "iterations_used": iterations_used,
                "truncated": truncated,
                "prompt_eval_count": prompt_eval_count,
                "eval_count": eval_count,
                "amended_insight_id": amended_insight_id,
                "backend": backend_used,
                "model": model_used,
            }),
        ),
        ChatStreamEvent::Error(msg) => ("error", serde_json::json!({ "message": msg })),
    };
    let data = serde_json::to_string(&payload).unwrap_or_else(|_| "{}".to_string());
    format!("event: {}\ndata: {}\n\n", event_name, data)
}