feat: async insight generation with SQLite job tracking

- Add insight_generation_jobs table migration and DAO - Implement job lifecycle: create_or_get_active, complete, fail, cancel - Refactor POST /insights/generate and /agentic to async spawn with timeout - Add GET /insights/generation/status endpoint with job_id and file_path lookup - Use String for enum fields in Diesel models to avoid private Bound type - Add from_str() helpers on InsightJobStatus and InsightGenerationType - Fix update_training_messages to return Result<usize, DbError> - 7/7 DAO unit tests passing
2026-05-27 10:01:17 -04:00
parent 5a75d1a28c
commit b87eb4e690
13 changed files with 1046 additions and 174 deletions
@@ -5,8 +5,9 @@ use serde::{Deserialize, Serialize};

 use crate::ai::insight_chat::{ChatStreamEvent, ChatTurnRequest};
 use crate::ai::ollama::ChatMessage;
-use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient};
+use crate::ai::{ModelCapabilities, OllamaClient};
 use crate::data::Claims;
+use crate::database::models::{InsightGenerationType, InsightJobStatus};
 use crate::database::{ExifDao, InsightDao};
 use crate::libraries;
 use crate::otel::{extract_context_from_request, global_tracer};
@@ -64,6 +65,101 @@ pub struct GetPhotoInsightQuery {
    pub library: Option<String>,
 }

+#[derive(Debug, Deserialize)]
+pub struct GenerationStatusQuery {
+    /// If provided, look up the job by id.
+    #[serde(default)]
+    pub job_id: Option<i32>,
+    /// If provided with `library`, look up the latest running job for this
+    /// file. Used when the client doesn't have a persisted job_id.
+    #[serde(default)]
+    pub file_path: Option<String>,
+    #[serde(default)]
+    pub library: Option<String>,
+}
+
+/// GET /insights/generation/status - Check status of a generation job.
+/// Accepts either `?job_id=<id>` or `?file_path=<path>&library=<name>`.
+#[get("/insights/generation/status")]
+pub async fn generation_status_handler(
+    _claims: Claims,
+    query: web::Query<GenerationStatusQuery>,
+    app_state: web::Data<AppState>,
+) -> impl Responder {
+    let ctx = opentelemetry::Context::new();
+
+    if let Some(jid) = query.job_id {
+        let mut dao = app_state
+            .insight_job_dao
+            .lock()
+            .expect("Unable to lock InsightJobDao");
+        match dao.get_job_by_id(&ctx, jid) {
+            Ok(Some(job)) => {
+                return HttpResponse::Ok().json(GenerationStatusResponse {
+                    job_id: job.id,
+                    status: InsightJobStatus::from_str(&job.status),
+                    started_at: job.started_at,
+                    completed_at: job.completed_at,
+                    result_insight_id: job.result_insight_id,
+                    error_message: job.error_message,
+                });
+            }
+            Ok(None) => {
+                return HttpResponse::NotFound().json(serde_json::json!({
+                    "error": format!("Job {} not found", jid)
+                }));
+            }
+            Err(e) => {
+                log::error!("Failed to look up job {}: {:?}", jid, e);
+                return HttpResponse::InternalServerError().json(serde_json::json!({
+                    "error": "Failed to look up job"
+                }));
+            }
+        }
+    }
+
+    if let Some(ref fp) = query.file_path {
+        let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
+            .ok()
+            .flatten()
+            .unwrap_or_else(|| app_state.primary_library());
+        let normalized = normalize_path(fp);
+
+        let mut dao = app_state
+            .insight_job_dao
+            .lock()
+            .expect("Unable to lock InsightJobDao");
+        match dao.get_active_job(&ctx, library.id, &normalized) {
+            Ok(Some(job)) => {
+                return HttpResponse::Ok().json(GenerationStatusResponse {
+                    job_id: job.id,
+                    status: InsightJobStatus::from_str(&job.status),
+                    started_at: job.started_at,
+                    completed_at: job.completed_at,
+                    result_insight_id: job.result_insight_id,
+                    error_message: job.error_message,
+                });
+            }
+            Ok(None) => {
+                return HttpResponse::Ok().json(serde_json::json!({
+                    "status": "idle",
+                    "message": "No running generation job for this file"
+                }));
+            }
+            Err(e) => {
+                log::error!("Failed to look up active job for {}: {:?}", normalized, e);
+                return HttpResponse::InternalServerError().json(serde_json::json!({
+                    "error": "Failed to look up active job"
+                }));
+            }
+        }
+    }
+
+    HttpResponse::BadRequest().json(serde_json::json!({
+        "error": "Provide either job_id or file_path query parameter"
+    }))
+}
+
 #[derive(Debug, Deserialize)]
 pub struct RateInsightRequest {
    pub file_path: String,
@@ -76,6 +172,24 @@ pub struct ExportTrainingDataQuery {
    pub approved_only: Option<bool>,
 }

+#[derive(Debug, Serialize)]
+pub struct JobIdResponse {
+    pub job_id: i32,
+}
+
+#[derive(Debug, Serialize)]
+pub struct GenerationStatusResponse {
+    pub job_id: i32,
+    pub status: InsightJobStatus,
+    pub started_at: i64,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub completed_at: Option<i64>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub result_insight_id: Option<i32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error_message: Option<String>,
+}
+
 #[derive(Debug, Serialize)]
 pub struct PhotoInsightResponse {
    pub id: i32,
@@ -110,70 +224,123 @@ pub struct ServerModels {
    pub default_model: String,
 }

-/// POST /insights/generate - Generate insight for a specific photo
+/// POST /insights/generate - Generate insight for a specific photo (async)
 #[post("/insights/generate")]
 pub async fn generate_insight_handler(
-    http_request: HttpRequest,
+    _http_request: HttpRequest,
    _claims: Claims,
    request: web::Json<GeneratePhotoInsightRequest>,
-    insight_generator: web::Data<InsightGenerator>,
+    app_state: web::Data<AppState>,
 ) -> impl Responder {
-    let parent_context = extract_context_from_request(&http_request);
-    let tracer = global_tracer();
-    let mut span = tracer.start_with_context("http.insights.generate", &parent_context);
-
    let normalized_path = normalize_path(&request.file_path);
-
-    span.set_attribute(KeyValue::new("file_path", normalized_path.clone()));
-    if let Some(ref model) = request.model {
-        span.set_attribute(KeyValue::new("model", model.clone()));
-    }
-    if let Some(ref prompt) = request.system_prompt {
-        span.set_attribute(KeyValue::new("has_custom_prompt", true));
-        span.set_attribute(KeyValue::new("prompt_length", prompt.len() as i64));
-    }
-    if let Some(ctx) = request.num_ctx {
-        span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
-    }
+    let library = app_state.primary_library();
+    let gen_type = InsightGenerationType::Standard;

    log::info!(
-        "Manual insight generation triggered for photo: {} with model: {:?}, custom_prompt: {}, num_ctx: {:?}",
+        "Manual insight generation triggered for photo: {} with model: {:?}",
        normalized_path,
-        request.model,
-        request.system_prompt.is_some(),
-        request.num_ctx
+        request.model
    );

-    // Generate insight with optional custom model, system prompt, and context size
-    let result = insight_generator
-        .generate_insight_for_photo_with_config(
+    // Cancel any running job for this file, then create a fresh one
+    {
+        let mut dao = app_state
+            .insight_job_dao
+            .lock()
+            .expect("Unable to lock InsightJobDao");
+        let _ = dao.cancel_active_job(
+            &opentelemetry::Context::new(),
+            library.id,
            &normalized_path,
-            request.model.clone(),
-            request.system_prompt.clone(),
-            request.num_ctx,
-            request.temperature,
-            request.top_p,
-            request.top_k,
-            request.min_p,
+            gen_type,
+        );
+    }
+
+    let job_id = {
+        let mut dao = app_state
+            .insight_job_dao
+            .lock()
+            .expect("Unable to lock InsightJobDao");
+        match dao.create_or_get_active_job(
+            &opentelemetry::Context::new(),
+            library.id,
+            &normalized_path,
+            gen_type,
+        ) {
+            Ok(id) => id,
+            Err(e) => {
+                log::error!("Failed to create generation job: {:?}", e);
+                return HttpResponse::InternalServerError().json(serde_json::json!({
+                    "error": "Failed to create generation job"
+                }));
+            }
+        }
+    };
+
+    // Spawn background task with timeout
+    let generator = app_state.insight_generator.clone();
+    let job_dao = app_state.insight_job_dao.clone();
+    let lib_id = library.id;
+    let path = normalized_path.clone();
+
+    tokio::spawn(async move {
+        let timeout_secs: u64 = std::env::var("INSIGHT_GENERATION_TIMEOUT_SECS")
+            .ok()
+            .and_then(|v| v.parse().ok())
+            .unwrap_or(120);
+
+        let result = tokio::time::timeout(
+            std::time::Duration::from_secs(timeout_secs),
+            generator.generate_insight_for_photo_with_config(
+                &path,
+                request.model.clone(),
+                request.system_prompt.clone(),
+                request.num_ctx,
+                request.temperature,
+                request.top_p,
+                request.top_k,
+                request.min_p,
+            ),
        )
        .await;

-    match result {
-        Ok(()) => {
-            span.set_status(Status::Ok);
-            HttpResponse::Ok().json(serde_json::json!({
-                "success": true,
-                "message": "Insight generated successfully"
-            }))
+        let ctx = opentelemetry::Context::new();
+        let mut dao = job_dao.lock().expect("Unable to lock InsightJobDao");
+
+        match result {
+            Ok(Ok(())) => {
+                // Look up the stored insight id to record on the job
+                let mut insight_dao = generator
+                    .insight_dao()
+                    .lock()
+                    .expect("Unable to lock InsightDao");
+                let insight_id = insight_dao
+                    .get_insight(&ctx, &path)
+                    .ok()
+                    .flatten()
+                    .map(|i| i.id);
+                if let Some(id) = insight_id {
+                    let _ = dao.complete_job(&ctx, job_id, id);
+                } else {
+                    let _ = dao.fail_job(&ctx, job_id, "generation returned no insight");
+                }
+            }
+            Ok(Err(e)) => {
+                log::error!("Insight generation failed for {}: {:?}", path, e);
+                let _ = dao.fail_job(&ctx, job_id, &format!("{:?}", e));
+            }
+            Err(_) => {
+                log::error!(
+                    "Insight generation timed out for {} after {}s",
+                    path,
+                    timeout_secs
+                );
+                let _ = dao.fail_job(&ctx, job_id, &format!("timeout after {}s", timeout_secs));
+            }
        }
-        Err(e) => {
-            log::error!("Failed to generate insight: {:?}", e);
-            span.set_status(Status::error(e.to_string()));
-            HttpResponse::InternalServerError().json(serde_json::json!({
-                "error": format!("Failed to generate insight: {:?}", e)
-            }))
-        }
-    }
+    });
+
+    HttpResponse::Ok().json(JobIdResponse { job_id })
 }

 /// GET /insights?path=/path/to/photo.jpg - Fetch insight for specific photo
@@ -301,56 +468,60 @@ pub async fn get_all_insights_handler(
    }
 }

-/// POST /insights/generate/agentic - Generate insight using agentic tool-calling loop
+/// POST /insights/generate/agentic - Generate insight using agentic tool-calling loop (async)
 #[post("/insights/generate/agentic")]
 pub async fn generate_agentic_insight_handler(
-    http_request: HttpRequest,
+    _http_request: HttpRequest,
    claims: Claims,
    request: web::Json<GeneratePhotoInsightRequest>,
-    insight_generator: web::Data<InsightGenerator>,
-    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
+    app_state: web::Data<AppState>,
 ) -> impl Responder {
-    // Service tokens (sub: "service:apollo") fall through to user_id=1
-    // — the operator convention. Mobile/web clients have a numeric sub.
-    let user_id = claims.sub.parse::<i32>().unwrap_or(1);
-    let parent_context = extract_context_from_request(&http_request);
-    let tracer = global_tracer();
-    let mut span = tracer.start_with_context("http.insights.generate_agentic", &parent_context);
-
    let normalized_path = normalize_path(&request.file_path);
-
-    span.set_attribute(KeyValue::new("file_path", normalized_path.clone()));
-    if let Some(ref model) = request.model {
-        span.set_attribute(KeyValue::new("model", model.clone()));
-    }
-    if let Some(ref prompt) = request.system_prompt {
-        span.set_attribute(KeyValue::new("has_custom_prompt", true));
-        span.set_attribute(KeyValue::new("prompt_length", prompt.len() as i64));
-    }
-    if let Some(ctx) = request.num_ctx {
-        span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
-    }
-
-    let max_iterations: usize = std::env::var("AGENTIC_MAX_ITERATIONS")
-        .ok()
-        .and_then(|v| v.parse().ok())
-        .unwrap_or(12);
-
-    span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
+    let library = app_state.primary_library();
+    let gen_type = InsightGenerationType::Agentic;

    log::info!(
-        "Agentic insight generation triggered for photo: {} with model: {:?}, max_iterations: {}",
+        "Agentic insight generation triggered for photo: {} with model: {:?}",
        normalized_path,
-        request.model,
-        max_iterations
+        request.model
    );

-    if let Some(ref b) = request.backend {
-        span.set_attribute(KeyValue::new("backend", b.clone()));
+    // Cancel any running job for this file, then create a fresh one
+    {
+        let mut dao = app_state
+            .insight_job_dao
+            .lock()
+            .expect("Unable to lock InsightJobDao");
+        let _ = dao.cancel_active_job(
+            &opentelemetry::Context::new(),
+            library.id,
+            &normalized_path,
+            gen_type,
+        );
    }

-    // Resolve few-shot ids: request-provided ids take precedence when
-    // non-empty; otherwise fall back to the hardcoded defaults.
+    let job_id = {
+        let mut dao = app_state
+            .insight_job_dao
+            .lock()
+            .expect("Unable to lock InsightJobDao");
+        match dao.create_or_get_active_job(
+            &opentelemetry::Context::new(),
+            library.id,
+            &normalized_path,
+            gen_type,
+        ) {
+            Ok(id) => id,
+            Err(e) => {
+                log::error!("Failed to create agentic generation job: {:?}", e);
+                return HttpResponse::InternalServerError().json(serde_json::json!({
+                    "error": "Failed to create generation job"
+                }));
+            }
+        }
+    };
+
+    // Resolve few-shot ids for the background task
    let fewshot_ids: Vec<i32> = match request.fewshot_insight_ids.as_deref() {
        Some(ids) if !ids.is_empty() => ids.iter().take(2).copied().collect(),
        _ => DEFAULT_FEWSHOT_INSIGHT_IDS
@@ -359,11 +530,14 @@ pub async fn generate_agentic_insight_handler(
            .copied()
            .collect(),
    };
-    span.set_attribute(KeyValue::new("fewshot_count", fewshot_ids.len() as i64));

    let fewshot_examples: Vec<Vec<ChatMessage>> = {
        let otel_context = opentelemetry::Context::new();
-        let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
+        let mut dao = app_state
+            .insight_chat
+            .insight_dao()
+            .lock()
+            .expect("Unable to lock InsightDao");
        fewshot_ids
            .iter()
            .filter_map(|id| {
@@ -384,90 +558,88 @@ pub async fn generate_agentic_insight_handler(
            .collect()
    };

+    let user_id = claims.sub.parse::<i32>().unwrap_or(1);
    let persona_id = request
        .persona_id
        .clone()
        .filter(|s| !s.trim().is_empty())
        .unwrap_or_else(|| "default".to_string());
-    span.set_attribute(KeyValue::new("persona_id", persona_id.clone()));

-    let result = insight_generator
-        .generate_agentic_insight_for_photo(
-            &normalized_path,
-            request.model.clone(),
-            request.system_prompt.clone(),
-            request.num_ctx,
-            request.temperature,
-            request.top_p,
-            request.top_k,
-            request.min_p,
-            max_iterations,
-            request.backend.clone(),
-            fewshot_examples,
-            fewshot_ids,
-            user_id,
-            persona_id,
+    let max_iterations: usize = std::env::var("AGENTIC_MAX_ITERATIONS")
+        .ok()
+        .and_then(|v| v.parse().ok())
+        .unwrap_or(12);
+
+    // Spawn background task with timeout
+    let generator = app_state.insight_generator.clone();
+    let job_dao = app_state.insight_job_dao.clone();
+    let lib_id = library.id;
+    let path = normalized_path.clone();
+
+    tokio::spawn(async move {
+        let timeout_secs: u64 = std::env::var("INSIGHT_GENERATION_TIMEOUT_SECS")
+            .ok()
+            .and_then(|v| v.parse().ok())
+            .unwrap_or(180);
+
+        let result = tokio::time::timeout(
+            std::time::Duration::from_secs(timeout_secs),
+            generator.generate_agentic_insight_for_photo(
+                &path,
+                request.model.clone(),
+                request.system_prompt.clone(),
+                request.num_ctx,
+                request.temperature,
+                request.top_p,
+                request.top_k,
+                request.min_p,
+                max_iterations,
+                request.backend.clone(),
+                fewshot_examples,
+                fewshot_ids,
+                user_id,
+                persona_id,
+            ),
        )
        .await;

-    match result {
-        Ok((prompt_eval_count, eval_count)) => {
-            span.set_status(Status::Ok);
-            // Fetch the stored insight to return it
-            let otel_context = opentelemetry::Context::new();
-            let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
-            match dao.get_insight(&otel_context, &normalized_path) {
-                Ok(Some(insight)) => {
-                    let response = PhotoInsightResponse {
-                        id: insight.id,
-                        file_path: insight.file_path,
-                        title: insight.title,
-                        summary: insight.summary,
-                        generated_at: insight.generated_at,
-                        model_version: insight.model_version,
-                        prompt_eval_count,
-                        eval_count,
-                        approved: insight.approved,
-                        has_training_messages: insight.training_messages.is_some(),
-                        backend: insight.backend,
-                    };
-                    HttpResponse::Ok().json(response)
-                }
-                Ok(None) => HttpResponse::Ok().json(serde_json::json!({
-                    "success": true,
-                    "message": "Agentic insight generated successfully"
-                })),
-                Err(e) => {
-                    log::warn!("Insight stored but failed to retrieve: {:?}", e);
-                    HttpResponse::Ok().json(serde_json::json!({
-                        "success": true,
-                        "message": "Agentic insight generated successfully"
-                    }))
-                }
-            }
-        }
-        Err(e) => {
-            let error_msg = format!("{:?}", e);
-            log::error!("Failed to generate agentic insight: {}", error_msg);
-            span.set_status(Status::error(error_msg.clone()));
+        let ctx = opentelemetry::Context::new();
+        let mut dao = job_dao.lock().expect("Unable to lock InsightJobDao");

-            if error_msg.contains("tool calling not supported")
-                || error_msg.contains("model not available")
-            {
-                HttpResponse::BadRequest().json(serde_json::json!({
-                    "error": format!("Failed to generate agentic insight: {}", error_msg)
-                }))
-            } else if error_msg.contains("error parsing tool call") {
-                HttpResponse::BadRequest().json(serde_json::json!({
-                    "error": "Model is not compatible with Ollama's tool calling protocol. Try a model known to support native tool calling (e.g. llama3.1, llama3.2, qwen2.5, mistral-nemo)."
-                }))
-            } else {
-                HttpResponse::InternalServerError().json(serde_json::json!({
-                    "error": format!("Failed to generate agentic insight: {}", error_msg)
-                }))
+        match result {
+            Ok(Ok(_)) => {
+                // Fetch the stored insight id to record on the job
+                let mut insight_dao = generator
+                    .insight_dao()
+                    .lock()
+                    .expect("Unable to lock InsightDao");
+                let insight_id = insight_dao
+                    .get_insight(&ctx, &path)
+                    .ok()
+                    .flatten()
+                    .map(|i| i.id);
+                if let Some(id) = insight_id {
+                    let _ = dao.complete_job(&ctx, job_id, id);
+                } else {
+                    let _ = dao.fail_job(&ctx, job_id, "generation returned no insight");
+                }
+            }
+            Ok(Err(e)) => {
+                log::error!("Agentic insight generation failed for {}: {:?}", path, e);
+                let _ = dao.fail_job(&ctx, job_id, &format!("{:?}", e));
+            }
+            Err(_) => {
+                log::error!(
+                    "Agentic insight generation timed out for {} after {}s",
+                    path,
+                    timeout_secs
+                );
+                let _ = dao.fail_job(&ctx, job_id, &format!("timeout after {}s", timeout_secs));
            }
        }
-    }
+    });
+
+    HttpResponse::Ok().json(JobIdResponse { job_id })
 }

 /// GET /insights/models - Local-backend models with capabilities. Returns