feat: async insight generation with SQLite job tracking

- Add insight_generation_jobs table migration and DAO
- Implement job lifecycle: create_or_get_active, complete, fail, cancel
- Refactor POST /insights/generate and /agentic to async spawn with timeout
- Add GET /insights/generation/status endpoint with job_id and file_path lookup
- Use String for enum fields in Diesel models to avoid private Bound type
- Add from_str() helpers on InsightJobStatus and InsightGenerationType
- Fix update_training_messages to return Result<usize, DbError>
- 7/7 DAO unit tests passing
This commit is contained in:
Cameron Cordes
2026-05-27 10:01:17 -04:00
parent 5a75d1a28c
commit b87eb4e690
13 changed files with 1046 additions and 174 deletions
+332 -160
View File
@@ -5,8 +5,9 @@ use serde::{Deserialize, Serialize};
use crate::ai::insight_chat::{ChatStreamEvent, ChatTurnRequest};
use crate::ai::ollama::ChatMessage;
use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient};
use crate::ai::{ModelCapabilities, OllamaClient};
use crate::data::Claims;
use crate::database::models::{InsightGenerationType, InsightJobStatus};
use crate::database::{ExifDao, InsightDao};
use crate::libraries;
use crate::otel::{extract_context_from_request, global_tracer};
@@ -64,6 +65,101 @@ pub struct GetPhotoInsightQuery {
pub library: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct GenerationStatusQuery {
/// If provided, look up the job by id.
#[serde(default)]
pub job_id: Option<i32>,
/// If provided with `library`, look up the latest running job for this
/// file. Used when the client doesn't have a persisted job_id.
#[serde(default)]
pub file_path: Option<String>,
#[serde(default)]
pub library: Option<String>,
}
/// GET /insights/generation/status - Check status of a generation job.
/// Accepts either `?job_id=<id>` or `?file_path=<path>&library=<name>`.
#[get("/insights/generation/status")]
pub async fn generation_status_handler(
_claims: Claims,
query: web::Query<GenerationStatusQuery>,
app_state: web::Data<AppState>,
) -> impl Responder {
let ctx = opentelemetry::Context::new();
if let Some(jid) = query.job_id {
let mut dao = app_state
.insight_job_dao
.lock()
.expect("Unable to lock InsightJobDao");
match dao.get_job_by_id(&ctx, jid) {
Ok(Some(job)) => {
return HttpResponse::Ok().json(GenerationStatusResponse {
job_id: job.id,
status: InsightJobStatus::from_str(&job.status),
started_at: job.started_at,
completed_at: job.completed_at,
result_insight_id: job.result_insight_id,
error_message: job.error_message,
});
}
Ok(None) => {
return HttpResponse::NotFound().json(serde_json::json!({
"error": format!("Job {} not found", jid)
}));
}
Err(e) => {
log::error!("Failed to look up job {}: {:?}", jid, e);
return HttpResponse::InternalServerError().json(serde_json::json!({
"error": "Failed to look up job"
}));
}
}
}
if let Some(ref fp) = query.file_path {
let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
.ok()
.flatten()
.unwrap_or_else(|| app_state.primary_library());
let normalized = normalize_path(fp);
let mut dao = app_state
.insight_job_dao
.lock()
.expect("Unable to lock InsightJobDao");
match dao.get_active_job(&ctx, library.id, &normalized) {
Ok(Some(job)) => {
return HttpResponse::Ok().json(GenerationStatusResponse {
job_id: job.id,
status: InsightJobStatus::from_str(&job.status),
started_at: job.started_at,
completed_at: job.completed_at,
result_insight_id: job.result_insight_id,
error_message: job.error_message,
});
}
Ok(None) => {
return HttpResponse::Ok().json(serde_json::json!({
"status": "idle",
"message": "No running generation job for this file"
}));
}
Err(e) => {
log::error!("Failed to look up active job for {}: {:?}", normalized, e);
return HttpResponse::InternalServerError().json(serde_json::json!({
"error": "Failed to look up active job"
}));
}
}
}
HttpResponse::BadRequest().json(serde_json::json!({
"error": "Provide either job_id or file_path query parameter"
}))
}
#[derive(Debug, Deserialize)]
pub struct RateInsightRequest {
pub file_path: String,
@@ -76,6 +172,24 @@ pub struct ExportTrainingDataQuery {
pub approved_only: Option<bool>,
}
#[derive(Debug, Serialize)]
pub struct JobIdResponse {
pub job_id: i32,
}
#[derive(Debug, Serialize)]
pub struct GenerationStatusResponse {
pub job_id: i32,
pub status: InsightJobStatus,
pub started_at: i64,
#[serde(skip_serializing_if = "Option::is_none")]
pub completed_at: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub result_insight_id: Option<i32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error_message: Option<String>,
}
#[derive(Debug, Serialize)]
pub struct PhotoInsightResponse {
pub id: i32,
@@ -110,70 +224,123 @@ pub struct ServerModels {
pub default_model: String,
}
/// POST /insights/generate - Generate insight for a specific photo
/// POST /insights/generate - Generate insight for a specific photo (async)
#[post("/insights/generate")]
pub async fn generate_insight_handler(
http_request: HttpRequest,
_http_request: HttpRequest,
_claims: Claims,
request: web::Json<GeneratePhotoInsightRequest>,
insight_generator: web::Data<InsightGenerator>,
app_state: web::Data<AppState>,
) -> impl Responder {
let parent_context = extract_context_from_request(&http_request);
let tracer = global_tracer();
let mut span = tracer.start_with_context("http.insights.generate", &parent_context);
let normalized_path = normalize_path(&request.file_path);
span.set_attribute(KeyValue::new("file_path", normalized_path.clone()));
if let Some(ref model) = request.model {
span.set_attribute(KeyValue::new("model", model.clone()));
}
if let Some(ref prompt) = request.system_prompt {
span.set_attribute(KeyValue::new("has_custom_prompt", true));
span.set_attribute(KeyValue::new("prompt_length", prompt.len() as i64));
}
if let Some(ctx) = request.num_ctx {
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
}
let library = app_state.primary_library();
let gen_type = InsightGenerationType::Standard;
log::info!(
"Manual insight generation triggered for photo: {} with model: {:?}, custom_prompt: {}, num_ctx: {:?}",
"Manual insight generation triggered for photo: {} with model: {:?}",
normalized_path,
request.model,
request.system_prompt.is_some(),
request.num_ctx
request.model
);
// Generate insight with optional custom model, system prompt, and context size
let result = insight_generator
.generate_insight_for_photo_with_config(
// Cancel any running job for this file, then create a fresh one
{
let mut dao = app_state
.insight_job_dao
.lock()
.expect("Unable to lock InsightJobDao");
let _ = dao.cancel_active_job(
&opentelemetry::Context::new(),
library.id,
&normalized_path,
request.model.clone(),
request.system_prompt.clone(),
request.num_ctx,
request.temperature,
request.top_p,
request.top_k,
request.min_p,
gen_type,
);
}
let job_id = {
let mut dao = app_state
.insight_job_dao
.lock()
.expect("Unable to lock InsightJobDao");
match dao.create_or_get_active_job(
&opentelemetry::Context::new(),
library.id,
&normalized_path,
gen_type,
) {
Ok(id) => id,
Err(e) => {
log::error!("Failed to create generation job: {:?}", e);
return HttpResponse::InternalServerError().json(serde_json::json!({
"error": "Failed to create generation job"
}));
}
}
};
// Spawn background task with timeout
let generator = app_state.insight_generator.clone();
let job_dao = app_state.insight_job_dao.clone();
let lib_id = library.id;
let path = normalized_path.clone();
tokio::spawn(async move {
let timeout_secs: u64 = std::env::var("INSIGHT_GENERATION_TIMEOUT_SECS")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(120);
let result = tokio::time::timeout(
std::time::Duration::from_secs(timeout_secs),
generator.generate_insight_for_photo_with_config(
&path,
request.model.clone(),
request.system_prompt.clone(),
request.num_ctx,
request.temperature,
request.top_p,
request.top_k,
request.min_p,
),
)
.await;
match result {
Ok(()) => {
span.set_status(Status::Ok);
HttpResponse::Ok().json(serde_json::json!({
"success": true,
"message": "Insight generated successfully"
}))
let ctx = opentelemetry::Context::new();
let mut dao = job_dao.lock().expect("Unable to lock InsightJobDao");
match result {
Ok(Ok(())) => {
// Look up the stored insight id to record on the job
let mut insight_dao = generator
.insight_dao()
.lock()
.expect("Unable to lock InsightDao");
let insight_id = insight_dao
.get_insight(&ctx, &path)
.ok()
.flatten()
.map(|i| i.id);
if let Some(id) = insight_id {
let _ = dao.complete_job(&ctx, job_id, id);
} else {
let _ = dao.fail_job(&ctx, job_id, "generation returned no insight");
}
}
Ok(Err(e)) => {
log::error!("Insight generation failed for {}: {:?}", path, e);
let _ = dao.fail_job(&ctx, job_id, &format!("{:?}", e));
}
Err(_) => {
log::error!(
"Insight generation timed out for {} after {}s",
path,
timeout_secs
);
let _ = dao.fail_job(&ctx, job_id, &format!("timeout after {}s", timeout_secs));
}
}
Err(e) => {
log::error!("Failed to generate insight: {:?}", e);
span.set_status(Status::error(e.to_string()));
HttpResponse::InternalServerError().json(serde_json::json!({
"error": format!("Failed to generate insight: {:?}", e)
}))
}
}
});
HttpResponse::Ok().json(JobIdResponse { job_id })
}
/// GET /insights?path=/path/to/photo.jpg - Fetch insight for specific photo
@@ -301,56 +468,60 @@ pub async fn get_all_insights_handler(
}
}
/// POST /insights/generate/agentic - Generate insight using agentic tool-calling loop
/// POST /insights/generate/agentic - Generate insight using agentic tool-calling loop (async)
#[post("/insights/generate/agentic")]
pub async fn generate_agentic_insight_handler(
http_request: HttpRequest,
_http_request: HttpRequest,
claims: Claims,
request: web::Json<GeneratePhotoInsightRequest>,
insight_generator: web::Data<InsightGenerator>,
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
app_state: web::Data<AppState>,
) -> impl Responder {
// Service tokens (sub: "service:apollo") fall through to user_id=1
// — the operator convention. Mobile/web clients have a numeric sub.
let user_id = claims.sub.parse::<i32>().unwrap_or(1);
let parent_context = extract_context_from_request(&http_request);
let tracer = global_tracer();
let mut span = tracer.start_with_context("http.insights.generate_agentic", &parent_context);
let normalized_path = normalize_path(&request.file_path);
span.set_attribute(KeyValue::new("file_path", normalized_path.clone()));
if let Some(ref model) = request.model {
span.set_attribute(KeyValue::new("model", model.clone()));
}
if let Some(ref prompt) = request.system_prompt {
span.set_attribute(KeyValue::new("has_custom_prompt", true));
span.set_attribute(KeyValue::new("prompt_length", prompt.len() as i64));
}
if let Some(ctx) = request.num_ctx {
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
}
let max_iterations: usize = std::env::var("AGENTIC_MAX_ITERATIONS")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(12);
span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
let library = app_state.primary_library();
let gen_type = InsightGenerationType::Agentic;
log::info!(
"Agentic insight generation triggered for photo: {} with model: {:?}, max_iterations: {}",
"Agentic insight generation triggered for photo: {} with model: {:?}",
normalized_path,
request.model,
max_iterations
request.model
);
if let Some(ref b) = request.backend {
span.set_attribute(KeyValue::new("backend", b.clone()));
// Cancel any running job for this file, then create a fresh one
{
let mut dao = app_state
.insight_job_dao
.lock()
.expect("Unable to lock InsightJobDao");
let _ = dao.cancel_active_job(
&opentelemetry::Context::new(),
library.id,
&normalized_path,
gen_type,
);
}
// Resolve few-shot ids: request-provided ids take precedence when
// non-empty; otherwise fall back to the hardcoded defaults.
let job_id = {
let mut dao = app_state
.insight_job_dao
.lock()
.expect("Unable to lock InsightJobDao");
match dao.create_or_get_active_job(
&opentelemetry::Context::new(),
library.id,
&normalized_path,
gen_type,
) {
Ok(id) => id,
Err(e) => {
log::error!("Failed to create agentic generation job: {:?}", e);
return HttpResponse::InternalServerError().json(serde_json::json!({
"error": "Failed to create generation job"
}));
}
}
};
// Resolve few-shot ids for the background task
let fewshot_ids: Vec<i32> = match request.fewshot_insight_ids.as_deref() {
Some(ids) if !ids.is_empty() => ids.iter().take(2).copied().collect(),
_ => DEFAULT_FEWSHOT_INSIGHT_IDS
@@ -359,11 +530,14 @@ pub async fn generate_agentic_insight_handler(
.copied()
.collect(),
};
span.set_attribute(KeyValue::new("fewshot_count", fewshot_ids.len() as i64));
let fewshot_examples: Vec<Vec<ChatMessage>> = {
let otel_context = opentelemetry::Context::new();
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
let mut dao = app_state
.insight_chat
.insight_dao()
.lock()
.expect("Unable to lock InsightDao");
fewshot_ids
.iter()
.filter_map(|id| {
@@ -384,90 +558,88 @@ pub async fn generate_agentic_insight_handler(
.collect()
};
let user_id = claims.sub.parse::<i32>().unwrap_or(1);
let persona_id = request
.persona_id
.clone()
.filter(|s| !s.trim().is_empty())
.unwrap_or_else(|| "default".to_string());
span.set_attribute(KeyValue::new("persona_id", persona_id.clone()));
let result = insight_generator
.generate_agentic_insight_for_photo(
&normalized_path,
request.model.clone(),
request.system_prompt.clone(),
request.num_ctx,
request.temperature,
request.top_p,
request.top_k,
request.min_p,
max_iterations,
request.backend.clone(),
fewshot_examples,
fewshot_ids,
user_id,
persona_id,
let max_iterations: usize = std::env::var("AGENTIC_MAX_ITERATIONS")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(12);
// Spawn background task with timeout
let generator = app_state.insight_generator.clone();
let job_dao = app_state.insight_job_dao.clone();
let lib_id = library.id;
let path = normalized_path.clone();
tokio::spawn(async move {
let timeout_secs: u64 = std::env::var("INSIGHT_GENERATION_TIMEOUT_SECS")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(180);
let result = tokio::time::timeout(
std::time::Duration::from_secs(timeout_secs),
generator.generate_agentic_insight_for_photo(
&path,
request.model.clone(),
request.system_prompt.clone(),
request.num_ctx,
request.temperature,
request.top_p,
request.top_k,
request.min_p,
max_iterations,
request.backend.clone(),
fewshot_examples,
fewshot_ids,
user_id,
persona_id,
),
)
.await;
match result {
Ok((prompt_eval_count, eval_count)) => {
span.set_status(Status::Ok);
// Fetch the stored insight to return it
let otel_context = opentelemetry::Context::new();
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
match dao.get_insight(&otel_context, &normalized_path) {
Ok(Some(insight)) => {
let response = PhotoInsightResponse {
id: insight.id,
file_path: insight.file_path,
title: insight.title,
summary: insight.summary,
generated_at: insight.generated_at,
model_version: insight.model_version,
prompt_eval_count,
eval_count,
approved: insight.approved,
has_training_messages: insight.training_messages.is_some(),
backend: insight.backend,
};
HttpResponse::Ok().json(response)
}
Ok(None) => HttpResponse::Ok().json(serde_json::json!({
"success": true,
"message": "Agentic insight generated successfully"
})),
Err(e) => {
log::warn!("Insight stored but failed to retrieve: {:?}", e);
HttpResponse::Ok().json(serde_json::json!({
"success": true,
"message": "Agentic insight generated successfully"
}))
}
}
}
Err(e) => {
let error_msg = format!("{:?}", e);
log::error!("Failed to generate agentic insight: {}", error_msg);
span.set_status(Status::error(error_msg.clone()));
let ctx = opentelemetry::Context::new();
let mut dao = job_dao.lock().expect("Unable to lock InsightJobDao");
if error_msg.contains("tool calling not supported")
|| error_msg.contains("model not available")
{
HttpResponse::BadRequest().json(serde_json::json!({
"error": format!("Failed to generate agentic insight: {}", error_msg)
}))
} else if error_msg.contains("error parsing tool call") {
HttpResponse::BadRequest().json(serde_json::json!({
"error": "Model is not compatible with Ollama's tool calling protocol. Try a model known to support native tool calling (e.g. llama3.1, llama3.2, qwen2.5, mistral-nemo)."
}))
} else {
HttpResponse::InternalServerError().json(serde_json::json!({
"error": format!("Failed to generate agentic insight: {}", error_msg)
}))
match result {
Ok(Ok(_)) => {
// Fetch the stored insight id to record on the job
let mut insight_dao = generator
.insight_dao()
.lock()
.expect("Unable to lock InsightDao");
let insight_id = insight_dao
.get_insight(&ctx, &path)
.ok()
.flatten()
.map(|i| i.id);
if let Some(id) = insight_id {
let _ = dao.complete_job(&ctx, job_id, id);
} else {
let _ = dao.fail_job(&ctx, job_id, "generation returned no insight");
}
}
Ok(Err(e)) => {
log::error!("Agentic insight generation failed for {}: {:?}", path, e);
let _ = dao.fail_job(&ctx, job_id, &format!("{:?}", e));
}
Err(_) => {
log::error!(
"Agentic insight generation timed out for {} after {}s",
path,
timeout_secs
);
let _ = dao.fail_job(&ctx, job_id, &format!("timeout after {}s", timeout_secs));
}
}
}
});
HttpResponse::Ok().json(JobIdResponse { job_id })
}
/// GET /insights/models - Local-backend models with capabilities. Returns