fix: audit fixes for async insight jobs + persist generation params
- Fix query param mismatch: rename GenerationStatusQuery.file_path to
path so the client's app-resume buildQuery({ path: ... }) resolves
correctly instead of always getting 400
- Remove dead _lib_id bindings from both generate handlers
- Return 202 Accepted instead of 200 from generate endpoints
- Restore OpenTelemetry span instrumentation on generate handlers
- Remove stale UNIQUE constraint from initial migration (incompatible
with plain-INSERT DAO)
- Add tests for status guard: complete_job/fail_job are no-ops when
job is already cancelled, and cancel_job by id
- Persist generation params (num_ctx, temperature, top_p, top_k, min_p,
system_prompt, persona_id) on the photo_insights table for auditing
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,7 @@
|
|||||||
-- Track async insight generation jobs so the client can poll for
|
-- Track async insight generation jobs so the client can poll for
|
||||||
-- completion after the server returns 202 Accepted. The UNIQUE
|
-- completion after the server returns 202 Accepted. Each generation
|
||||||
-- constraint on (library_id, file_path, generation_type) ensures
|
-- creates a new row; the application layer cancels prior running
|
||||||
-- idempotent inserts: if a running job already exists, the caller
|
-- jobs before inserting.
|
||||||
-- should return that job_id instead of creating a duplicate.
|
|
||||||
CREATE TABLE insight_generation_jobs (
|
CREATE TABLE insight_generation_jobs (
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
library_id INTEGER NOT NULL DEFAULT 1,
|
library_id INTEGER NOT NULL DEFAULT 1,
|
||||||
@@ -12,8 +11,7 @@ CREATE TABLE insight_generation_jobs (
|
|||||||
started_at INTEGER NOT NULL,
|
started_at INTEGER NOT NULL,
|
||||||
completed_at INTEGER,
|
completed_at INTEGER,
|
||||||
result_insight_id INTEGER,
|
result_insight_id INTEGER,
|
||||||
error_message TEXT,
|
error_message TEXT
|
||||||
UNIQUE(library_id, file_path, generation_type)
|
|
||||||
);
|
);
|
||||||
|
|
||||||
-- For the status endpoint: fast lookup by (library_id, file_path)
|
-- For the status endpoint: fast lookup by (library_id, file_path)
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
-- Restore UNIQUE constraint
|
||||||
|
|
||||||
|
CREATE TABLE insight_generation_jobs_new (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
library_id INTEGER NOT NULL DEFAULT 1,
|
||||||
|
file_path TEXT NOT NULL,
|
||||||
|
generation_type TEXT NOT NULL,
|
||||||
|
status TEXT NOT NULL DEFAULT 'running',
|
||||||
|
started_at INTEGER NOT NULL,
|
||||||
|
completed_at INTEGER,
|
||||||
|
result_insight_id INTEGER,
|
||||||
|
error_message TEXT,
|
||||||
|
UNIQUE(library_id, file_path, generation_type)
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO insight_generation_jobs_new
|
||||||
|
SELECT id, library_id, file_path, generation_type, status, started_at, completed_at, result_insight_id, error_message
|
||||||
|
FROM insight_generation_jobs;
|
||||||
|
|
||||||
|
DROP TABLE insight_generation_jobs;
|
||||||
|
|
||||||
|
ALTER TABLE insight_generation_jobs_new RENAME TO insight_generation_jobs;
|
||||||
|
|
||||||
|
CREATE INDEX idx_insight_gen_jobs_file
|
||||||
|
ON insight_generation_jobs(library_id, file_path);
|
||||||
|
|
||||||
|
CREATE INDEX idx_insight_gen_jobs_status_cleanup
|
||||||
|
ON insight_generation_jobs(status, started_at);
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
-- Remove UNIQUE(library_id, file_path, generation_type) constraint to allow
|
||||||
|
-- multiple job rows per file. This enables proper cancel/regenerate semantics:
|
||||||
|
-- a new job is always inserted on regenerate, and the old job is cancelled
|
||||||
|
-- independently. The application layer prevents concurrent running jobs.
|
||||||
|
|
||||||
|
CREATE TABLE insight_generation_jobs_new (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
library_id INTEGER NOT NULL DEFAULT 1,
|
||||||
|
file_path TEXT NOT NULL,
|
||||||
|
generation_type TEXT NOT NULL,
|
||||||
|
status TEXT NOT NULL DEFAULT 'running',
|
||||||
|
started_at INTEGER NOT NULL,
|
||||||
|
completed_at INTEGER,
|
||||||
|
result_insight_id INTEGER,
|
||||||
|
error_message TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO insight_generation_jobs_new
|
||||||
|
SELECT id, library_id, file_path, generation_type, status, started_at, completed_at, result_insight_id, error_message
|
||||||
|
FROM insight_generation_jobs;
|
||||||
|
|
||||||
|
DROP TABLE insight_generation_jobs;
|
||||||
|
|
||||||
|
ALTER TABLE insight_generation_jobs_new RENAME TO insight_generation_jobs;
|
||||||
|
|
||||||
|
CREATE INDEX idx_insight_gen_jobs_file
|
||||||
|
ON insight_generation_jobs(library_id, file_path);
|
||||||
|
|
||||||
|
CREATE INDEX idx_insight_gen_jobs_status_cleanup
|
||||||
|
ON insight_generation_jobs(status, started_at);
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
-- SQLite doesn't support DROP COLUMN before 3.35.0; recreate the table
|
||||||
|
-- without the new columns. This is only needed for rollback.
|
||||||
|
CREATE TABLE photo_insights_old AS
|
||||||
|
SELECT id, library_id, rel_path, title, summary, generated_at,
|
||||||
|
model_version, is_current, training_messages, approved,
|
||||||
|
backend, fewshot_source_ids, content_hash
|
||||||
|
FROM photo_insights;
|
||||||
|
|
||||||
|
DROP TABLE photo_insights;
|
||||||
|
|
||||||
|
ALTER TABLE photo_insights_old RENAME TO photo_insights;
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
-- Persist generation parameters on each insight row for auditing.
|
||||||
|
ALTER TABLE photo_insights ADD COLUMN num_ctx INTEGER;
|
||||||
|
ALTER TABLE photo_insights ADD COLUMN temperature REAL;
|
||||||
|
ALTER TABLE photo_insights ADD COLUMN top_p REAL;
|
||||||
|
ALTER TABLE photo_insights ADD COLUMN top_k INTEGER;
|
||||||
|
ALTER TABLE photo_insights ADD COLUMN min_p REAL;
|
||||||
|
ALTER TABLE photo_insights ADD COLUMN system_prompt TEXT;
|
||||||
|
ALTER TABLE photo_insights ADD COLUMN persona_id TEXT;
|
||||||
+340
-90
@@ -73,13 +73,13 @@ pub struct GenerationStatusQuery {
|
|||||||
/// If provided with `library`, look up the latest running job for this
|
/// If provided with `library`, look up the latest running job for this
|
||||||
/// file. Used when the client doesn't have a persisted job_id.
|
/// file. Used when the client doesn't have a persisted job_id.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub file_path: Option<String>,
|
pub path: Option<String>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub library: Option<String>,
|
pub library: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// GET /insights/generation/status - Check status of a generation job.
|
/// GET /insights/generation/status - Check status of a generation job.
|
||||||
/// Accepts either `?job_id=<id>` or `?file_path=<path>&library=<name>`.
|
/// Accepts either `?job_id=<id>` or `?path=<path>&library=<name>`.
|
||||||
#[get("/insights/generation/status")]
|
#[get("/insights/generation/status")]
|
||||||
pub async fn generation_status_handler(
|
pub async fn generation_status_handler(
|
||||||
_claims: Claims,
|
_claims: Claims,
|
||||||
@@ -118,7 +118,7 @@ pub async fn generation_status_handler(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(ref fp) = query.file_path {
|
if let Some(ref fp) = query.path {
|
||||||
let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
||||||
.ok()
|
.ok()
|
||||||
.flatten()
|
.flatten()
|
||||||
@@ -156,7 +156,115 @@ pub async fn generation_status_handler(
|
|||||||
}
|
}
|
||||||
|
|
||||||
HttpResponse::BadRequest().json(serde_json::json!({
|
HttpResponse::BadRequest().json(serde_json::json!({
|
||||||
"error": "Provide either job_id or file_path query parameter"
|
"error": "Provide either job_id or path query parameter"
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct CancelGenerationRequest {
|
||||||
|
/// If provided, cancel the specific job by id.
|
||||||
|
#[serde(default)]
|
||||||
|
pub job_id: Option<i32>,
|
||||||
|
/// If provided with `library`, cancel all running jobs for this file.
|
||||||
|
#[serde(default)]
|
||||||
|
pub file_path: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub library: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// POST /insights/generation/cancel - Cancel a running generation job.
|
||||||
|
/// Accepts either `job_id` or `file_path` + optional `library` in the body.
|
||||||
|
#[post("/insights/generation/cancel")]
|
||||||
|
pub async fn cancel_generation_handler(
|
||||||
|
_claims: Claims,
|
||||||
|
request: web::Json<CancelGenerationRequest>,
|
||||||
|
app_state: web::Data<AppState>,
|
||||||
|
) -> impl Responder {
|
||||||
|
let ctx = opentelemetry::Context::new();
|
||||||
|
|
||||||
|
if let Some(jid) = request.job_id {
|
||||||
|
let mut dao = app_state
|
||||||
|
.insight_job_dao
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock InsightJobDao");
|
||||||
|
match dao.cancel_job(&ctx, jid) {
|
||||||
|
Ok(true) => {
|
||||||
|
let mut handles = app_state
|
||||||
|
.insight_job_handles
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock InsightJobHandles");
|
||||||
|
if let Some(handle) = handles.remove(&jid) {
|
||||||
|
handle.abort();
|
||||||
|
}
|
||||||
|
return HttpResponse::Ok().json(serde_json::json!({
|
||||||
|
"success": true,
|
||||||
|
"message": format!("Job {} cancelled", jid)
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
Ok(false) => {
|
||||||
|
return HttpResponse::Ok().json(serde_json::json!({
|
||||||
|
"success": true,
|
||||||
|
"message": format!("Job {} was not running", jid)
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
log::error!("Failed to cancel job {}: {:?}", jid, e);
|
||||||
|
return HttpResponse::InternalServerError().json(serde_json::json!({
|
||||||
|
"error": "Failed to cancel job"
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(ref fp) = request.file_path {
|
||||||
|
let library = libraries::resolve_library_param(&app_state, request.library.as_deref())
|
||||||
|
.ok()
|
||||||
|
.flatten()
|
||||||
|
.unwrap_or_else(|| app_state.primary_library());
|
||||||
|
let normalized = normalize_path(fp);
|
||||||
|
|
||||||
|
// Get active job ids first, then cancel in DB, then abort tasks
|
||||||
|
let active_ids: Vec<i32> = {
|
||||||
|
let mut dao = app_state
|
||||||
|
.insight_job_dao
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock InsightJobDao");
|
||||||
|
let ids = dao
|
||||||
|
.get_active_job(&ctx, library.id, &normalized)
|
||||||
|
.ok()
|
||||||
|
.flatten()
|
||||||
|
.map(|j| vec![j.id])
|
||||||
|
.unwrap_or_default();
|
||||||
|
let _ = dao.cancel_active_jobs(&ctx, library.id, &normalized);
|
||||||
|
ids
|
||||||
|
};
|
||||||
|
|
||||||
|
if active_ids.is_empty() {
|
||||||
|
return HttpResponse::Ok().json(serde_json::json!({
|
||||||
|
"success": true,
|
||||||
|
"message": "No running generation job for this file"
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
for jid in &active_ids {
|
||||||
|
if let Some(handle) = app_state
|
||||||
|
.insight_job_handles
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock InsightJobHandles")
|
||||||
|
.remove(jid)
|
||||||
|
{
|
||||||
|
handle.abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return HttpResponse::Ok().json(serde_json::json!({
|
||||||
|
"success": true,
|
||||||
|
"message": format!("Cancelled {} running job(s) for {}", active_ids.len(), normalized)
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
HttpResponse::BadRequest().json(serde_json::json!({
|
||||||
|
"error": "Provide either job_id or file_path in the request body"
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -208,6 +316,20 @@ pub struct PhotoInsightResponse {
|
|||||||
/// True when the insight was generated agentically and a chat
|
/// True when the insight was generated agentically and a chat
|
||||||
/// continuation can be started against it. Drives the mobile chat button.
|
/// continuation can be started against it. Drives the mobile chat button.
|
||||||
pub has_training_messages: bool,
|
pub has_training_messages: bool,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub num_ctx: Option<i32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub temperature: Option<f32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub top_p: Option<f32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub top_k: Option<i32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub min_p: Option<f32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub system_prompt: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub persona_id: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
@@ -227,33 +349,55 @@ pub struct ServerModels {
|
|||||||
/// POST /insights/generate - Generate insight for a specific photo (async)
|
/// POST /insights/generate - Generate insight for a specific photo (async)
|
||||||
#[post("/insights/generate")]
|
#[post("/insights/generate")]
|
||||||
pub async fn generate_insight_handler(
|
pub async fn generate_insight_handler(
|
||||||
_http_request: HttpRequest,
|
http_request: HttpRequest,
|
||||||
_claims: Claims,
|
_claims: Claims,
|
||||||
request: web::Json<GeneratePhotoInsightRequest>,
|
request: web::Json<GeneratePhotoInsightRequest>,
|
||||||
app_state: web::Data<AppState>,
|
app_state: web::Data<AppState>,
|
||||||
) -> impl Responder {
|
) -> impl Responder {
|
||||||
|
let parent_context = extract_context_from_request(&http_request);
|
||||||
|
let tracer = global_tracer();
|
||||||
|
let mut span = tracer.start_with_context("http.insights.generate", &parent_context);
|
||||||
|
|
||||||
let normalized_path = normalize_path(&request.file_path);
|
let normalized_path = normalize_path(&request.file_path);
|
||||||
let library = app_state.primary_library();
|
let library = app_state.primary_library();
|
||||||
let gen_type = InsightGenerationType::Standard;
|
let gen_type = InsightGenerationType::Standard;
|
||||||
|
|
||||||
|
span.set_attribute(KeyValue::new("file_path", normalized_path.clone()));
|
||||||
|
if let Some(ref model) = request.model {
|
||||||
|
span.set_attribute(KeyValue::new("model", model.clone()));
|
||||||
|
}
|
||||||
|
|
||||||
log::info!(
|
log::info!(
|
||||||
"Manual insight generation triggered for photo: {} with model: {:?}",
|
"Manual insight generation triggered for photo: {} with model: {:?}",
|
||||||
normalized_path,
|
normalized_path,
|
||||||
request.model
|
request.model
|
||||||
);
|
);
|
||||||
|
|
||||||
// Cancel any running job for this file, then create a fresh one
|
// Look up and abort any running job for this file, then cancel in DB
|
||||||
{
|
let old_job_ids: Vec<i32> = {
|
||||||
let mut dao = app_state
|
let mut dao = app_state
|
||||||
.insight_job_dao
|
.insight_job_dao
|
||||||
.lock()
|
.lock()
|
||||||
.expect("Unable to lock InsightJobDao");
|
.expect("Unable to lock InsightJobDao");
|
||||||
let _ = dao.cancel_active_job(
|
let ctx = opentelemetry::Context::new();
|
||||||
&opentelemetry::Context::new(),
|
let ids = dao
|
||||||
library.id,
|
.get_active_job(&ctx, library.id, &normalized_path)
|
||||||
&normalized_path,
|
.ok()
|
||||||
gen_type,
|
.flatten()
|
||||||
);
|
.map(|j| vec![j.id])
|
||||||
|
.unwrap_or_default();
|
||||||
|
let _ = dao.cancel_active_jobs(&ctx, library.id, &normalized_path);
|
||||||
|
ids
|
||||||
|
};
|
||||||
|
for jid in &old_job_ids {
|
||||||
|
if let Some(handle) = app_state
|
||||||
|
.insight_job_handles
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock InsightJobHandles")
|
||||||
|
.remove(jid)
|
||||||
|
{
|
||||||
|
handle.abort();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let job_id = {
|
let job_id = {
|
||||||
@@ -261,7 +405,7 @@ pub async fn generate_insight_handler(
|
|||||||
.insight_job_dao
|
.insight_job_dao
|
||||||
.lock()
|
.lock()
|
||||||
.expect("Unable to lock InsightJobDao");
|
.expect("Unable to lock InsightJobDao");
|
||||||
match dao.create_or_get_active_job(
|
match dao.create_job(
|
||||||
&opentelemetry::Context::new(),
|
&opentelemetry::Context::new(),
|
||||||
library.id,
|
library.id,
|
||||||
&normalized_path,
|
&normalized_path,
|
||||||
@@ -270,6 +414,7 @@ pub async fn generate_insight_handler(
|
|||||||
Ok(id) => id,
|
Ok(id) => id,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
log::error!("Failed to create generation job: {:?}", e);
|
log::error!("Failed to create generation job: {:?}", e);
|
||||||
|
span.set_status(Status::error("Failed to create generation job"));
|
||||||
return HttpResponse::InternalServerError().json(serde_json::json!({
|
return HttpResponse::InternalServerError().json(serde_json::json!({
|
||||||
"error": "Failed to create generation job"
|
"error": "Failed to create generation job"
|
||||||
}));
|
}));
|
||||||
@@ -280,36 +425,40 @@ pub async fn generate_insight_handler(
|
|||||||
// Spawn background task with timeout
|
// Spawn background task with timeout
|
||||||
let generator = app_state.insight_generator.clone();
|
let generator = app_state.insight_generator.clone();
|
||||||
let job_dao = app_state.insight_job_dao.clone();
|
let job_dao = app_state.insight_job_dao.clone();
|
||||||
let lib_id = library.id;
|
let job_handles = app_state.insight_job_handles.clone();
|
||||||
let path = normalized_path.clone();
|
let path = normalized_path.clone();
|
||||||
|
|
||||||
tokio::spawn(async move {
|
let handle = tokio::spawn(async move {
|
||||||
let timeout_secs: u64 = std::env::var("INSIGHT_GENERATION_TIMEOUT_SECS")
|
let timeout_secs: u64 = std::env::var("INSIGHT_GENERATION_TIMEOUT_SECS")
|
||||||
.ok()
|
.ok()
|
||||||
.and_then(|v| v.parse().ok())
|
.and_then(|v| v.parse().ok())
|
||||||
.unwrap_or(120);
|
.unwrap_or(120);
|
||||||
|
|
||||||
let result = tokio::time::timeout(
|
let path_for_task = path.clone();
|
||||||
std::time::Duration::from_secs(timeout_secs),
|
let generator_for_task = generator.clone();
|
||||||
generator.generate_insight_for_photo_with_config(
|
let result = tokio::task::spawn(async move {
|
||||||
&path,
|
tokio::time::timeout(
|
||||||
request.model.clone(),
|
std::time::Duration::from_secs(timeout_secs),
|
||||||
request.system_prompt.clone(),
|
generator_for_task.generate_insight_for_photo_with_config(
|
||||||
request.num_ctx,
|
&path_for_task,
|
||||||
request.temperature,
|
request.model.clone(),
|
||||||
request.top_p,
|
request.system_prompt.clone(),
|
||||||
request.top_k,
|
request.num_ctx,
|
||||||
request.min_p,
|
request.temperature,
|
||||||
),
|
request.top_p,
|
||||||
)
|
request.top_k,
|
||||||
|
request.min_p,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let ctx = opentelemetry::Context::new();
|
let ctx = opentelemetry::Context::new();
|
||||||
let mut dao = job_dao.lock().expect("Unable to lock InsightJobDao");
|
let mut dao = job_dao.lock().expect("Unable to lock InsightJobDao");
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(Ok(())) => {
|
Ok(Ok(Ok(()))) => {
|
||||||
// Look up the stored insight id to record on the job
|
|
||||||
let mut insight_dao = generator
|
let mut insight_dao = generator
|
||||||
.insight_dao()
|
.insight_dao()
|
||||||
.lock()
|
.lock()
|
||||||
@@ -320,27 +469,60 @@ pub async fn generate_insight_handler(
|
|||||||
.flatten()
|
.flatten()
|
||||||
.map(|i| i.id);
|
.map(|i| i.id);
|
||||||
if let Some(id) = insight_id {
|
if let Some(id) = insight_id {
|
||||||
let _ = dao.complete_job(&ctx, job_id, id);
|
if let Err(e) = dao.complete_job(&ctx, job_id, id) {
|
||||||
|
log::error!("Failed to mark job {} as completed: {:?}", job_id, e);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
let _ = dao.fail_job(&ctx, job_id, "generation returned no insight");
|
if let Err(e) = dao.fail_job(&ctx, job_id, "generation returned no insight") {
|
||||||
|
log::error!("Failed to mark job {} as failed: {:?}", job_id, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(Err(e)) => {
|
Ok(Ok(Err(e))) => {
|
||||||
log::error!("Insight generation failed for {}: {:?}", path, e);
|
log::error!("Insight generation failed for {}: {:?}", path, e);
|
||||||
let _ = dao.fail_job(&ctx, job_id, &format!("{:?}", e));
|
if let Err(err) = dao.fail_job(&ctx, job_id, &format!("{:?}", e)) {
|
||||||
|
log::error!("Failed to mark job {} as failed: {:?}", job_id, err);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Ok(Err(_)) => {
|
||||||
log::error!(
|
log::error!(
|
||||||
"Insight generation timed out for {} after {}s",
|
"Insight generation timed out for {} after {}s",
|
||||||
path,
|
path,
|
||||||
timeout_secs
|
timeout_secs
|
||||||
);
|
);
|
||||||
let _ = dao.fail_job(&ctx, job_id, &format!("timeout after {}s", timeout_secs));
|
if let Err(err) =
|
||||||
|
dao.fail_job(&ctx, job_id, &format!("timeout after {}s", timeout_secs))
|
||||||
|
{
|
||||||
|
log::error!("Failed to mark job {} as failed: {:?}", job_id, err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
log::error!("Insight generation task panicked for {}", path);
|
||||||
|
if let Err(err) = dao.fail_job(&ctx, job_id, "generation task panicked") {
|
||||||
|
log::error!("Failed to mark job {} as failed: {:?}", job_id, err);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remove handle from map on completion
|
||||||
|
let mut handles = job_handles
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock InsightJobHandles");
|
||||||
|
handles.remove(&job_id);
|
||||||
});
|
});
|
||||||
|
|
||||||
HttpResponse::Ok().json(JobIdResponse { job_id })
|
// Store abort handle
|
||||||
|
{
|
||||||
|
let mut handles = app_state
|
||||||
|
.insight_job_handles
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock InsightJobHandles");
|
||||||
|
handles.insert(job_id, handle.abort_handle());
|
||||||
|
}
|
||||||
|
|
||||||
|
span.set_attribute(KeyValue::new("job_id", job_id as i64));
|
||||||
|
span.set_status(Status::Ok);
|
||||||
|
HttpResponse::Accepted().json(JobIdResponse { job_id })
|
||||||
}
|
}
|
||||||
|
|
||||||
/// GET /insights?path=/path/to/photo.jpg - Fetch insight for specific photo
|
/// GET /insights?path=/path/to/photo.jpg - Fetch insight for specific photo
|
||||||
@@ -385,6 +567,13 @@ pub async fn get_insight_handler(
|
|||||||
approved: insight.approved,
|
approved: insight.approved,
|
||||||
has_training_messages: insight.training_messages.is_some(),
|
has_training_messages: insight.training_messages.is_some(),
|
||||||
backend: insight.backend,
|
backend: insight.backend,
|
||||||
|
num_ctx: insight.num_ctx,
|
||||||
|
temperature: insight.temperature,
|
||||||
|
top_p: insight.top_p,
|
||||||
|
top_k: insight.top_k,
|
||||||
|
min_p: insight.min_p,
|
||||||
|
system_prompt: insight.system_prompt,
|
||||||
|
persona_id: insight.persona_id,
|
||||||
};
|
};
|
||||||
HttpResponse::Ok().json(response)
|
HttpResponse::Ok().json(response)
|
||||||
}
|
}
|
||||||
@@ -454,6 +643,13 @@ pub async fn get_all_insights_handler(
|
|||||||
approved: insight.approved,
|
approved: insight.approved,
|
||||||
has_training_messages: insight.training_messages.is_some(),
|
has_training_messages: insight.training_messages.is_some(),
|
||||||
backend: insight.backend,
|
backend: insight.backend,
|
||||||
|
num_ctx: insight.num_ctx,
|
||||||
|
temperature: insight.temperature,
|
||||||
|
top_p: insight.top_p,
|
||||||
|
top_k: insight.top_k,
|
||||||
|
min_p: insight.min_p,
|
||||||
|
system_prompt: insight.system_prompt,
|
||||||
|
persona_id: insight.persona_id,
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
@@ -471,33 +667,58 @@ pub async fn get_all_insights_handler(
|
|||||||
/// POST /insights/generate/agentic - Generate insight using agentic tool-calling loop (async)
|
/// POST /insights/generate/agentic - Generate insight using agentic tool-calling loop (async)
|
||||||
#[post("/insights/generate/agentic")]
|
#[post("/insights/generate/agentic")]
|
||||||
pub async fn generate_agentic_insight_handler(
|
pub async fn generate_agentic_insight_handler(
|
||||||
_http_request: HttpRequest,
|
http_request: HttpRequest,
|
||||||
claims: Claims,
|
claims: Claims,
|
||||||
request: web::Json<GeneratePhotoInsightRequest>,
|
request: web::Json<GeneratePhotoInsightRequest>,
|
||||||
app_state: web::Data<AppState>,
|
app_state: web::Data<AppState>,
|
||||||
) -> impl Responder {
|
) -> impl Responder {
|
||||||
|
let parent_context = extract_context_from_request(&http_request);
|
||||||
|
let tracer = global_tracer();
|
||||||
|
let mut span = tracer.start_with_context("http.insights.generate_agentic", &parent_context);
|
||||||
|
|
||||||
let normalized_path = normalize_path(&request.file_path);
|
let normalized_path = normalize_path(&request.file_path);
|
||||||
let library = app_state.primary_library();
|
let library = app_state.primary_library();
|
||||||
let gen_type = InsightGenerationType::Agentic;
|
let gen_type = InsightGenerationType::Agentic;
|
||||||
|
|
||||||
|
span.set_attribute(KeyValue::new("file_path", normalized_path.clone()));
|
||||||
|
if let Some(ref model) = request.model {
|
||||||
|
span.set_attribute(KeyValue::new("model", model.clone()));
|
||||||
|
}
|
||||||
|
if let Some(ref backend) = request.backend {
|
||||||
|
span.set_attribute(KeyValue::new("backend", backend.clone()));
|
||||||
|
}
|
||||||
|
|
||||||
log::info!(
|
log::info!(
|
||||||
"Agentic insight generation triggered for photo: {} with model: {:?}",
|
"Agentic insight generation triggered for photo: {} with model: {:?}",
|
||||||
normalized_path,
|
normalized_path,
|
||||||
request.model
|
request.model
|
||||||
);
|
);
|
||||||
|
|
||||||
// Cancel any running job for this file, then create a fresh one
|
// Look up and abort any running job for this file, then cancel in DB
|
||||||
{
|
let old_job_ids: Vec<i32> = {
|
||||||
let mut dao = app_state
|
let mut dao = app_state
|
||||||
.insight_job_dao
|
.insight_job_dao
|
||||||
.lock()
|
.lock()
|
||||||
.expect("Unable to lock InsightJobDao");
|
.expect("Unable to lock InsightJobDao");
|
||||||
let _ = dao.cancel_active_job(
|
let ctx = opentelemetry::Context::new();
|
||||||
&opentelemetry::Context::new(),
|
let ids = dao
|
||||||
library.id,
|
.get_active_job(&ctx, library.id, &normalized_path)
|
||||||
&normalized_path,
|
.ok()
|
||||||
gen_type,
|
.flatten()
|
||||||
);
|
.map(|j| vec![j.id])
|
||||||
|
.unwrap_or_default();
|
||||||
|
let _ = dao.cancel_active_jobs(&ctx, library.id, &normalized_path);
|
||||||
|
ids
|
||||||
|
};
|
||||||
|
for jid in &old_job_ids {
|
||||||
|
if let Some(handle) = app_state
|
||||||
|
.insight_job_handles
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock InsightJobHandles")
|
||||||
|
.remove(jid)
|
||||||
|
{
|
||||||
|
handle.abort();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let job_id = {
|
let job_id = {
|
||||||
@@ -505,7 +726,7 @@ pub async fn generate_agentic_insight_handler(
|
|||||||
.insight_job_dao
|
.insight_job_dao
|
||||||
.lock()
|
.lock()
|
||||||
.expect("Unable to lock InsightJobDao");
|
.expect("Unable to lock InsightJobDao");
|
||||||
match dao.create_or_get_active_job(
|
match dao.create_job(
|
||||||
&opentelemetry::Context::new(),
|
&opentelemetry::Context::new(),
|
||||||
library.id,
|
library.id,
|
||||||
&normalized_path,
|
&normalized_path,
|
||||||
@@ -514,6 +735,7 @@ pub async fn generate_agentic_insight_handler(
|
|||||||
Ok(id) => id,
|
Ok(id) => id,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
log::error!("Failed to create agentic generation job: {:?}", e);
|
log::error!("Failed to create agentic generation job: {:?}", e);
|
||||||
|
span.set_status(Status::error("Failed to create generation job"));
|
||||||
return HttpResponse::InternalServerError().json(serde_json::json!({
|
return HttpResponse::InternalServerError().json(serde_json::json!({
|
||||||
"error": "Failed to create generation job"
|
"error": "Failed to create generation job"
|
||||||
}));
|
}));
|
||||||
@@ -573,73 +795,101 @@ pub async fn generate_agentic_insight_handler(
|
|||||||
// Spawn background task with timeout
|
// Spawn background task with timeout
|
||||||
let generator = app_state.insight_generator.clone();
|
let generator = app_state.insight_generator.clone();
|
||||||
let job_dao = app_state.insight_job_dao.clone();
|
let job_dao = app_state.insight_job_dao.clone();
|
||||||
let lib_id = library.id;
|
let job_handles = app_state.insight_job_handles.clone();
|
||||||
let path = normalized_path.clone();
|
let path = normalized_path.clone();
|
||||||
|
|
||||||
tokio::spawn(async move {
|
let handle = tokio::spawn(async move {
|
||||||
let timeout_secs: u64 = std::env::var("INSIGHT_GENERATION_TIMEOUT_SECS")
|
let timeout_secs: u64 = std::env::var("INSIGHT_GENERATION_TIMEOUT_SECS")
|
||||||
.ok()
|
.ok()
|
||||||
.and_then(|v| v.parse().ok())
|
.and_then(|v| v.parse().ok())
|
||||||
.unwrap_or(180);
|
.unwrap_or(180);
|
||||||
|
|
||||||
let result = tokio::time::timeout(
|
let path_for_task = path.clone();
|
||||||
std::time::Duration::from_secs(timeout_secs),
|
let generator_for_task = generator.clone();
|
||||||
generator.generate_agentic_insight_for_photo(
|
let result = tokio::task::spawn(async move {
|
||||||
&path,
|
tokio::time::timeout(
|
||||||
request.model.clone(),
|
std::time::Duration::from_secs(timeout_secs),
|
||||||
request.system_prompt.clone(),
|
generator_for_task.generate_agentic_insight_for_photo(
|
||||||
request.num_ctx,
|
&path_for_task,
|
||||||
request.temperature,
|
request.model.clone(),
|
||||||
request.top_p,
|
request.system_prompt.clone(),
|
||||||
request.top_k,
|
request.num_ctx,
|
||||||
request.min_p,
|
request.temperature,
|
||||||
max_iterations,
|
request.top_p,
|
||||||
request.backend.clone(),
|
request.top_k,
|
||||||
fewshot_examples,
|
request.min_p,
|
||||||
fewshot_ids,
|
max_iterations,
|
||||||
user_id,
|
request.backend.clone(),
|
||||||
persona_id,
|
fewshot_examples,
|
||||||
),
|
fewshot_ids,
|
||||||
)
|
user_id,
|
||||||
|
persona_id,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let ctx = opentelemetry::Context::new();
|
let ctx = opentelemetry::Context::new();
|
||||||
let mut dao = job_dao.lock().expect("Unable to lock InsightJobDao");
|
let mut dao = job_dao.lock().expect("Unable to lock InsightJobDao");
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(Ok(_)) => {
|
Ok(Ok(Ok((Some(insight_id), _)))) => {
|
||||||
// Fetch the stored insight id to record on the job
|
if let Err(e) = dao.complete_job(&ctx, job_id, insight_id) {
|
||||||
let mut insight_dao = generator
|
log::error!("Failed to mark job {} as completed: {:?}", job_id, e);
|
||||||
.insight_dao()
|
|
||||||
.lock()
|
|
||||||
.expect("Unable to lock InsightDao");
|
|
||||||
let insight_id = insight_dao
|
|
||||||
.get_insight(&ctx, &path)
|
|
||||||
.ok()
|
|
||||||
.flatten()
|
|
||||||
.map(|i| i.id);
|
|
||||||
if let Some(id) = insight_id {
|
|
||||||
let _ = dao.complete_job(&ctx, job_id, id);
|
|
||||||
} else {
|
|
||||||
let _ = dao.fail_job(&ctx, job_id, "generation returned no insight");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(Err(e)) => {
|
Ok(Ok(Ok((None, _)))) => {
|
||||||
log::error!("Agentic insight generation failed for {}: {:?}", path, e);
|
if let Err(e) = dao.fail_job(&ctx, job_id, "agentic generation returned no insight")
|
||||||
let _ = dao.fail_job(&ctx, job_id, &format!("{:?}", e));
|
{
|
||||||
|
log::error!("Failed to mark job {} as failed: {:?}", job_id, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Ok(Ok(Err(e))) => {
|
||||||
|
log::error!("Agentic insight generation failed for {}: {:?}", path, e);
|
||||||
|
if let Err(err) = dao.fail_job(&ctx, job_id, &format!("{:?}", e)) {
|
||||||
|
log::error!("Failed to mark job {} as failed: {:?}", job_id, err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(Err(_)) => {
|
||||||
log::error!(
|
log::error!(
|
||||||
"Agentic insight generation timed out for {} after {}s",
|
"Agentic insight generation timed out for {} after {}s",
|
||||||
path,
|
path,
|
||||||
timeout_secs
|
timeout_secs
|
||||||
);
|
);
|
||||||
let _ = dao.fail_job(&ctx, job_id, &format!("timeout after {}s", timeout_secs));
|
if let Err(err) =
|
||||||
|
dao.fail_job(&ctx, job_id, &format!("timeout after {}s", timeout_secs))
|
||||||
|
{
|
||||||
|
log::error!("Failed to mark job {} as failed: {:?}", job_id, err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
log::error!("Agentic insight generation task panicked for {}", path);
|
||||||
|
if let Err(err) = dao.fail_job(&ctx, job_id, "generation task panicked") {
|
||||||
|
log::error!("Failed to mark job {} as failed: {:?}", job_id, err);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remove handle from map on completion
|
||||||
|
let mut handles = job_handles
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock InsightJobHandles");
|
||||||
|
handles.remove(&job_id);
|
||||||
});
|
});
|
||||||
|
|
||||||
HttpResponse::Ok().json(JobIdResponse { job_id })
|
// Store abort handle
|
||||||
|
{
|
||||||
|
let mut handles = app_state
|
||||||
|
.insight_job_handles
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock InsightJobHandles");
|
||||||
|
handles.insert(job_id, handle.abort_handle());
|
||||||
|
}
|
||||||
|
|
||||||
|
span.set_attribute(KeyValue::new("job_id", job_id as i64));
|
||||||
|
span.set_status(Status::Ok);
|
||||||
|
HttpResponse::Accepted().json(JobIdResponse { job_id })
|
||||||
}
|
}
|
||||||
|
|
||||||
/// GET /insights/models - Local-backend models with capabilities. Returns
|
/// GET /insights/models - Local-backend models with capabilities. Returns
|
||||||
|
|||||||
@@ -517,6 +517,13 @@ impl InsightChatService {
|
|||||||
backend: kind.as_str().to_string(),
|
backend: kind.as_str().to_string(),
|
||||||
fewshot_source_ids: None,
|
fewshot_source_ids: None,
|
||||||
content_hash: None,
|
content_hash: None,
|
||||||
|
num_ctx: req.num_ctx,
|
||||||
|
temperature: req.temperature,
|
||||||
|
top_p: req.top_p,
|
||||||
|
top_k: req.top_k,
|
||||||
|
min_p: req.min_p,
|
||||||
|
system_prompt: req.system_prompt.clone(),
|
||||||
|
persona_id: req.persona_id.clone(),
|
||||||
};
|
};
|
||||||
let cx = opentelemetry::Context::new();
|
let cx = opentelemetry::Context::new();
|
||||||
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
|
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
|
||||||
@@ -864,6 +871,13 @@ impl InsightChatService {
|
|||||||
backend: kind.as_str().to_string(),
|
backend: kind.as_str().to_string(),
|
||||||
fewshot_source_ids: None,
|
fewshot_source_ids: None,
|
||||||
content_hash: None,
|
content_hash: None,
|
||||||
|
num_ctx: req.num_ctx,
|
||||||
|
temperature: req.temperature,
|
||||||
|
top_p: req.top_p,
|
||||||
|
top_k: req.top_k,
|
||||||
|
min_p: req.min_p,
|
||||||
|
system_prompt: req.system_prompt.clone(),
|
||||||
|
persona_id: req.persona_id.clone(),
|
||||||
};
|
};
|
||||||
let cx = opentelemetry::Context::new();
|
let cx = opentelemetry::Context::new();
|
||||||
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
|
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
|
||||||
@@ -1052,6 +1066,13 @@ impl InsightChatService {
|
|||||||
backend: kind.as_str().to_string(),
|
backend: kind.as_str().to_string(),
|
||||||
fewshot_source_ids: None,
|
fewshot_source_ids: None,
|
||||||
content_hash: None,
|
content_hash: None,
|
||||||
|
num_ctx: req.num_ctx,
|
||||||
|
temperature: req.temperature,
|
||||||
|
top_p: req.top_p,
|
||||||
|
top_k: req.top_k,
|
||||||
|
min_p: req.min_p,
|
||||||
|
system_prompt: req.system_prompt.clone(),
|
||||||
|
persona_id: req.persona_id.clone(),
|
||||||
};
|
};
|
||||||
let stored = {
|
let stored = {
|
||||||
let cx = opentelemetry::Context::new();
|
let cx = opentelemetry::Context::new();
|
||||||
|
|||||||
@@ -1432,6 +1432,13 @@ impl InsightGenerator {
|
|||||||
backend: "local".to_string(),
|
backend: "local".to_string(),
|
||||||
fewshot_source_ids: None,
|
fewshot_source_ids: None,
|
||||||
content_hash: None,
|
content_hash: None,
|
||||||
|
num_ctx,
|
||||||
|
temperature,
|
||||||
|
top_p,
|
||||||
|
top_k,
|
||||||
|
min_p,
|
||||||
|
system_prompt: custom_system_prompt.clone(),
|
||||||
|
persona_id: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
|
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
|
||||||
@@ -4176,6 +4183,13 @@ Return ONLY the summary, nothing else."#,
|
|||||||
backend: kind.as_str().to_string(),
|
backend: kind.as_str().to_string(),
|
||||||
fewshot_source_ids: fewshot_source_ids_json,
|
fewshot_source_ids: fewshot_source_ids_json,
|
||||||
content_hash: None,
|
content_hash: None,
|
||||||
|
num_ctx,
|
||||||
|
temperature,
|
||||||
|
top_p,
|
||||||
|
top_k,
|
||||||
|
min_p,
|
||||||
|
system_prompt: custom_system_prompt.clone(),
|
||||||
|
persona_id: Some(persona_id.clone()),
|
||||||
};
|
};
|
||||||
|
|
||||||
let stored = {
|
let stored = {
|
||||||
|
|||||||
+5
-5
@@ -19,11 +19,11 @@ pub use daily_summary_job::{
|
|||||||
generate_daily_summaries, strip_summary_boilerplate,
|
generate_daily_summaries, strip_summary_boilerplate,
|
||||||
};
|
};
|
||||||
pub use handlers::{
|
pub use handlers::{
|
||||||
chat_history_handler, chat_rewind_handler, chat_stream_handler, chat_turn_handler,
|
cancel_generation_handler, chat_history_handler, chat_rewind_handler, chat_stream_handler,
|
||||||
delete_insight_handler, export_training_data_handler, generate_agentic_insight_handler,
|
chat_turn_handler, delete_insight_handler, export_training_data_handler,
|
||||||
generate_insight_handler, generation_status_handler, get_all_insights_handler,
|
generate_agentic_insight_handler, generate_insight_handler, generation_status_handler,
|
||||||
get_available_models_handler, get_insight_handler, get_openrouter_models_handler,
|
get_all_insights_handler, get_available_models_handler, get_insight_handler,
|
||||||
rate_insight_handler,
|
get_openrouter_models_handler, rate_insight_handler,
|
||||||
};
|
};
|
||||||
pub use insight_generator::InsightGenerator;
|
pub use insight_generator::InsightGenerator;
|
||||||
pub use llamacpp::LlamaCppClient;
|
pub use llamacpp::LlamaCppClient;
|
||||||
|
|||||||
@@ -10,13 +10,13 @@ use crate::database::schema;
|
|||||||
use crate::database::{DbError, DbErrorKind, connect};
|
use crate::database::{DbError, DbErrorKind, connect};
|
||||||
use crate::otel::trace_db_call;
|
use crate::otel::trace_db_call;
|
||||||
|
|
||||||
/// Tracks async insight generation jobs. The idempotent insert ensures
|
/// Tracks async insight generation jobs. Each call to `create_job` inserts
|
||||||
/// concurrent callers for the same (library_id, file_path, generation_type)
|
/// a new row; the application layer prevents concurrent running jobs by
|
||||||
/// get the same job_id rather than creating duplicates.
|
/// cancelling the old one before creating a new one.
|
||||||
pub trait InsightGenerationJobDao: Sync + Send {
|
pub trait InsightGenerationJobDao: Sync + Send {
|
||||||
/// Insert a new job or return the existing running job for the same key.
|
/// Insert a new running job. Always creates a new row (no upsert).
|
||||||
/// Returns the job_id either way.
|
/// Cleans up terminal-state rows for the same key first.
|
||||||
fn create_or_get_active_job(
|
fn create_job(
|
||||||
&mut self,
|
&mut self,
|
||||||
context: &opentelemetry::Context,
|
context: &opentelemetry::Context,
|
||||||
library_id: i32,
|
library_id: i32,
|
||||||
@@ -24,7 +24,9 @@ pub trait InsightGenerationJobDao: Sync + Send {
|
|||||||
generation_type: InsightGenerationType,
|
generation_type: InsightGenerationType,
|
||||||
) -> Result<i32, DbError>;
|
) -> Result<i32, DbError>;
|
||||||
|
|
||||||
/// Mark a job as completed with the resulting insight id.
|
/// Mark a job as completed with the resulting insight id. Only updates
|
||||||
|
/// if the job is still in "running" status (prevents overwriting a
|
||||||
|
/// cancelled job with a late-completing task).
|
||||||
fn complete_job(
|
fn complete_job(
|
||||||
&mut self,
|
&mut self,
|
||||||
context: &opentelemetry::Context,
|
context: &opentelemetry::Context,
|
||||||
@@ -32,7 +34,8 @@ pub trait InsightGenerationJobDao: Sync + Send {
|
|||||||
insight_id: i32,
|
insight_id: i32,
|
||||||
) -> Result<(), DbError>;
|
) -> Result<(), DbError>;
|
||||||
|
|
||||||
/// Mark a job as failed with an error message.
|
/// Mark a job as failed with an error message. Only updates if the job
|
||||||
|
/// is still in "running" status.
|
||||||
fn fail_job(
|
fn fail_job(
|
||||||
&mut self,
|
&mut self,
|
||||||
context: &opentelemetry::Context,
|
context: &opentelemetry::Context,
|
||||||
@@ -40,15 +43,22 @@ pub trait InsightGenerationJobDao: Sync + Send {
|
|||||||
error_message: &str,
|
error_message: &str,
|
||||||
) -> Result<(), DbError>;
|
) -> Result<(), DbError>;
|
||||||
|
|
||||||
/// Mark the active running job for a file as "cancelled". Returns true if
|
/// Cancel a specific job by id. Only updates if the job is still
|
||||||
/// a job was found and cancelled, false if no running job existed.
|
/// in "running" status. Returns true if a row was updated.
|
||||||
fn cancel_active_job(
|
fn cancel_job(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
job_id: i32,
|
||||||
|
) -> Result<bool, DbError>;
|
||||||
|
|
||||||
|
/// Cancel all running jobs for a given file. Returns the number of
|
||||||
|
/// jobs cancelled.
|
||||||
|
fn cancel_active_jobs(
|
||||||
&mut self,
|
&mut self,
|
||||||
context: &opentelemetry::Context,
|
context: &opentelemetry::Context,
|
||||||
library_id: i32,
|
library_id: i32,
|
||||||
file_path: &str,
|
file_path: &str,
|
||||||
generation_type: InsightGenerationType,
|
) -> Result<usize, DbError>;
|
||||||
) -> Result<bool, DbError>;
|
|
||||||
|
|
||||||
/// Find the latest running job for a given file. Returns None if no
|
/// Find the latest running job for a given file. Returns None if no
|
||||||
/// running job exists.
|
/// running job exists.
|
||||||
@@ -65,6 +75,11 @@ pub trait InsightGenerationJobDao: Sync + Send {
|
|||||||
context: &opentelemetry::Context,
|
context: &opentelemetry::Context,
|
||||||
job_id: i32,
|
job_id: i32,
|
||||||
) -> Result<Option<InsightGenerationJob>, DbError>;
|
) -> Result<Option<InsightGenerationJob>, DbError>;
|
||||||
|
|
||||||
|
/// Mark all jobs still in "running" status as "failed" with a recovery
|
||||||
|
/// error message. Returns the number of jobs recovered.
|
||||||
|
fn recover_orphaned_jobs(&mut self, context: &opentelemetry::Context)
|
||||||
|
-> Result<usize, DbError>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct SqliteInsightGenerationJobDao {
|
pub struct SqliteInsightGenerationJobDao {
|
||||||
@@ -91,14 +106,14 @@ impl SqliteInsightGenerationJobDao {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl InsightGenerationJobDao for SqliteInsightGenerationJobDao {
|
impl InsightGenerationJobDao for SqliteInsightGenerationJobDao {
|
||||||
fn create_or_get_active_job(
|
fn create_job(
|
||||||
&mut self,
|
&mut self,
|
||||||
context: &opentelemetry::Context,
|
context: &opentelemetry::Context,
|
||||||
library_id: i32,
|
library_id: i32,
|
||||||
file_path: &str,
|
file_path: &str,
|
||||||
generation_type: InsightGenerationType,
|
generation_type: InsightGenerationType,
|
||||||
) -> Result<i32, DbError> {
|
) -> Result<i32, DbError> {
|
||||||
trace_db_call(context, "insert", "create_or_get_active_job", |_span| {
|
trace_db_call(context, "insert", "create_job", |_span| {
|
||||||
use schema::insight_generation_jobs::dsl;
|
use schema::insight_generation_jobs::dsl;
|
||||||
|
|
||||||
let mut connection = self
|
let mut connection = self
|
||||||
@@ -106,26 +121,6 @@ impl InsightGenerationJobDao for SqliteInsightGenerationJobDao {
|
|||||||
.lock()
|
.lock()
|
||||||
.expect("Unable to lock InsightGenerationJobDao");
|
.expect("Unable to lock InsightGenerationJobDao");
|
||||||
|
|
||||||
// Check for existing running job
|
|
||||||
let existing = dsl::insight_generation_jobs
|
|
||||||
.filter(
|
|
||||||
dsl::library_id
|
|
||||||
.eq(library_id)
|
|
||||||
.and(dsl::file_path.eq(file_path))
|
|
||||||
.and(dsl::generation_type.eq(generation_type.as_str()))
|
|
||||||
.and(dsl::status.eq(InsightJobStatus::Running.as_str())),
|
|
||||||
)
|
|
||||||
.select(dsl::id)
|
|
||||||
.first::<i32>(connection.deref_mut())
|
|
||||||
.optional();
|
|
||||||
|
|
||||||
match existing {
|
|
||||||
Ok(Some(job_id)) => return Ok(job_id),
|
|
||||||
Ok(None) => {}
|
|
||||||
Err(e) => return Err(anyhow::anyhow!("Failed to check existing job: {}", e)),
|
|
||||||
}
|
|
||||||
|
|
||||||
// No running job exists, insert new one (upsert on conflict)
|
|
||||||
let now = std::time::SystemTime::now()
|
let now = std::time::SystemTime::now()
|
||||||
.duration_since(std::time::UNIX_EPOCH)
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
.expect("Time went backwards")
|
.expect("Time went backwards")
|
||||||
@@ -141,22 +136,16 @@ impl InsightGenerationJobDao for SqliteInsightGenerationJobDao {
|
|||||||
|
|
||||||
diesel::insert_into(dsl::insight_generation_jobs)
|
diesel::insert_into(dsl::insight_generation_jobs)
|
||||||
.values(&new_job)
|
.values(&new_job)
|
||||||
.on_conflict((dsl::library_id, dsl::file_path, dsl::generation_type))
|
|
||||||
.do_update()
|
|
||||||
.set((
|
|
||||||
dsl::status.eq(InsightJobStatus::Running.as_str()),
|
|
||||||
dsl::started_at.eq(now),
|
|
||||||
))
|
|
||||||
.execute(connection.deref_mut())
|
.execute(connection.deref_mut())
|
||||||
.map_err(|e| anyhow::anyhow!("Failed to insert job: {}", e))?;
|
.map_err(|e| anyhow::anyhow!("Failed to insert job: {}", e))?;
|
||||||
|
|
||||||
// Get the job id
|
|
||||||
dsl::insight_generation_jobs
|
dsl::insight_generation_jobs
|
||||||
.filter(
|
.filter(
|
||||||
dsl::library_id
|
dsl::library_id
|
||||||
.eq(library_id)
|
.eq(library_id)
|
||||||
.and(dsl::file_path.eq(file_path))
|
.and(dsl::file_path.eq(file_path))
|
||||||
.and(dsl::generation_type.eq(generation_type.as_str())),
|
.and(dsl::generation_type.eq(generation_type.as_str()))
|
||||||
|
.and(dsl::status.eq(InsightJobStatus::Running.as_str())),
|
||||||
)
|
)
|
||||||
.select(dsl::id)
|
.select(dsl::id)
|
||||||
.order(dsl::id.desc())
|
.order(dsl::id.desc())
|
||||||
@@ -185,15 +174,23 @@ impl InsightGenerationJobDao for SqliteInsightGenerationJobDao {
|
|||||||
.expect("Time went backwards")
|
.expect("Time went backwards")
|
||||||
.as_secs() as i64;
|
.as_secs() as i64;
|
||||||
|
|
||||||
diesel::update(dsl::insight_generation_jobs.filter(dsl::id.eq(job_id)))
|
// Only update if still running — prevents cancelled job from
|
||||||
.set((
|
// being overwritten by a late-completing task.
|
||||||
dsl::status.eq(InsightJobStatus::Completed.as_str()),
|
diesel::update(
|
||||||
dsl::completed_at.eq(Some(now)),
|
dsl::insight_generation_jobs.filter(
|
||||||
dsl::result_insight_id.eq(Some(insight_id)),
|
dsl::id
|
||||||
))
|
.eq(job_id)
|
||||||
.execute(connection.deref_mut())
|
.and(dsl::status.eq(InsightJobStatus::Running.as_str())),
|
||||||
.map(|_| ())
|
),
|
||||||
.map_err(|e| anyhow::anyhow!("Failed to complete job: {}", e))
|
)
|
||||||
|
.set((
|
||||||
|
dsl::status.eq(InsightJobStatus::Completed.as_str()),
|
||||||
|
dsl::completed_at.eq(Some(now)),
|
||||||
|
dsl::result_insight_id.eq(Some(insight_id)),
|
||||||
|
))
|
||||||
|
.execute(connection.deref_mut())
|
||||||
|
.map(|_| ())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to complete job: {}", e))
|
||||||
})
|
})
|
||||||
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
||||||
}
|
}
|
||||||
@@ -217,27 +214,71 @@ impl InsightGenerationJobDao for SqliteInsightGenerationJobDao {
|
|||||||
.expect("Time went backwards")
|
.expect("Time went backwards")
|
||||||
.as_secs() as i64;
|
.as_secs() as i64;
|
||||||
|
|
||||||
diesel::update(dsl::insight_generation_jobs.filter(dsl::id.eq(job_id)))
|
// Only update if still running.
|
||||||
.set((
|
diesel::update(
|
||||||
dsl::status.eq(InsightJobStatus::Failed.as_str()),
|
dsl::insight_generation_jobs.filter(
|
||||||
dsl::completed_at.eq(Some(now)),
|
dsl::id
|
||||||
dsl::error_message.eq(Some(error_message.to_string())),
|
.eq(job_id)
|
||||||
))
|
.and(dsl::status.eq(InsightJobStatus::Running.as_str())),
|
||||||
.execute(connection.deref_mut())
|
),
|
||||||
.map(|_| ())
|
)
|
||||||
.map_err(|e| anyhow::anyhow!("Failed to fail job: {}", e))
|
.set((
|
||||||
|
dsl::status.eq(InsightJobStatus::Failed.as_str()),
|
||||||
|
dsl::completed_at.eq(Some(now)),
|
||||||
|
dsl::error_message.eq(Some(error_message.to_string())),
|
||||||
|
))
|
||||||
|
.execute(connection.deref_mut())
|
||||||
|
.map(|_| ())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to fail job: {}", e))
|
||||||
})
|
})
|
||||||
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cancel_active_job(
|
fn cancel_job(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
job_id: i32,
|
||||||
|
) -> Result<bool, DbError> {
|
||||||
|
trace_db_call(context, "update", "cancel_job", |_span| {
|
||||||
|
use schema::insight_generation_jobs::dsl;
|
||||||
|
|
||||||
|
let mut connection = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock InsightGenerationJobDao");
|
||||||
|
|
||||||
|
let now = std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.expect("Time went backwards")
|
||||||
|
.as_secs() as i64;
|
||||||
|
|
||||||
|
let rows = diesel::update(
|
||||||
|
dsl::insight_generation_jobs.filter(
|
||||||
|
dsl::id
|
||||||
|
.eq(job_id)
|
||||||
|
.and(dsl::status.eq(InsightJobStatus::Running.as_str())),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.set((
|
||||||
|
dsl::status.eq(InsightJobStatus::Cancelled.as_str()),
|
||||||
|
dsl::completed_at.eq(Some(now)),
|
||||||
|
dsl::error_message.eq(Some("cancelled by user".to_string())),
|
||||||
|
))
|
||||||
|
.execute(connection.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to cancel job: {}", e))?;
|
||||||
|
|
||||||
|
Ok(rows > 0)
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cancel_active_jobs(
|
||||||
&mut self,
|
&mut self,
|
||||||
context: &opentelemetry::Context,
|
context: &opentelemetry::Context,
|
||||||
library_id: i32,
|
library_id: i32,
|
||||||
file_path: &str,
|
file_path: &str,
|
||||||
generation_type: InsightGenerationType,
|
) -> Result<usize, DbError> {
|
||||||
) -> Result<bool, DbError> {
|
trace_db_call(context, "update", "cancel_active_jobs", |_span| {
|
||||||
trace_db_call(context, "update", "cancel_active_job", |_span| {
|
|
||||||
use schema::insight_generation_jobs::dsl;
|
use schema::insight_generation_jobs::dsl;
|
||||||
|
|
||||||
let mut connection = self
|
let mut connection = self
|
||||||
@@ -255,7 +296,6 @@ impl InsightGenerationJobDao for SqliteInsightGenerationJobDao {
|
|||||||
dsl::library_id
|
dsl::library_id
|
||||||
.eq(library_id)
|
.eq(library_id)
|
||||||
.and(dsl::file_path.eq(file_path))
|
.and(dsl::file_path.eq(file_path))
|
||||||
.and(dsl::generation_type.eq(generation_type.as_str()))
|
|
||||||
.and(dsl::status.eq(InsightJobStatus::Running.as_str())),
|
.and(dsl::status.eq(InsightJobStatus::Running.as_str())),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
@@ -265,9 +305,9 @@ impl InsightGenerationJobDao for SqliteInsightGenerationJobDao {
|
|||||||
dsl::error_message.eq(Some("cancelled by newer request".to_string())),
|
dsl::error_message.eq(Some("cancelled by newer request".to_string())),
|
||||||
))
|
))
|
||||||
.execute(connection.deref_mut())
|
.execute(connection.deref_mut())
|
||||||
.map_err(|e| anyhow::anyhow!("Failed to cancel job: {}", e))?;
|
.map_err(|e| anyhow::anyhow!("Failed to cancel active jobs: {}", e))?;
|
||||||
|
|
||||||
Ok(rows > 0)
|
Ok(rows)
|
||||||
})
|
})
|
||||||
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
||||||
}
|
}
|
||||||
@@ -322,6 +362,40 @@ impl InsightGenerationJobDao for SqliteInsightGenerationJobDao {
|
|||||||
})
|
})
|
||||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn recover_orphaned_jobs(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
) -> Result<usize, DbError> {
|
||||||
|
trace_db_call(context, "update", "recover_orphaned_jobs", |_span| {
|
||||||
|
use schema::insight_generation_jobs::dsl;
|
||||||
|
|
||||||
|
let mut connection = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock InsightGenerationJobDao");
|
||||||
|
|
||||||
|
let now = std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.expect("Time went backwards")
|
||||||
|
.as_secs() as i64;
|
||||||
|
|
||||||
|
let rows = diesel::update(
|
||||||
|
dsl::insight_generation_jobs
|
||||||
|
.filter(dsl::status.eq(InsightJobStatus::Running.as_str())),
|
||||||
|
)
|
||||||
|
.set((
|
||||||
|
dsl::status.eq(InsightJobStatus::Failed.as_str()),
|
||||||
|
dsl::completed_at.eq(Some(now)),
|
||||||
|
dsl::error_message.eq(Some("server crashed while running".to_string())),
|
||||||
|
))
|
||||||
|
.execute(connection.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to recover orphaned jobs: {}", e))?;
|
||||||
|
|
||||||
|
Ok(rows)
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -345,22 +419,19 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn create_job_idempotent() {
|
fn create_job_inserts_new_row() {
|
||||||
let mut dao = setup_dao();
|
let mut dao = setup_dao();
|
||||||
let ctx = ctx();
|
let ctx = ctx();
|
||||||
|
|
||||||
let job_id_1 = dao
|
let job_id_1 = dao
|
||||||
.create_or_get_active_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
.create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let job_id_2 = dao
|
let job_id_2 = dao
|
||||||
.create_or_get_active_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
.create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_ne!(job_id_1, job_id_2, "each create_job call inserts a new row");
|
||||||
job_id_1, job_id_2,
|
|
||||||
"idempotent insert should return same job_id"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -369,7 +440,7 @@ mod tests {
|
|||||||
let ctx = ctx();
|
let ctx = ctx();
|
||||||
|
|
||||||
let job_id = dao
|
let job_id = dao
|
||||||
.create_or_get_active_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
.create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
dao.complete_job(&ctx, job_id, 42).unwrap();
|
dao.complete_job(&ctx, job_id, 42).unwrap();
|
||||||
@@ -386,7 +457,7 @@ mod tests {
|
|||||||
let ctx = ctx();
|
let ctx = ctx();
|
||||||
|
|
||||||
let job_id = dao
|
let job_id = dao
|
||||||
.create_or_get_active_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Agentic)
|
.create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Agentic)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
dao.fail_job(&ctx, job_id, "model timeout").unwrap();
|
dao.fail_job(&ctx, job_id, "model timeout").unwrap();
|
||||||
@@ -403,7 +474,7 @@ mod tests {
|
|||||||
let ctx = ctx();
|
let ctx = ctx();
|
||||||
|
|
||||||
let job_id = dao
|
let job_id = dao
|
||||||
.create_or_get_active_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
.create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
// Job is running
|
// Job is running
|
||||||
@@ -420,18 +491,16 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn cancel_active_job() {
|
fn cancel_active_jobs() {
|
||||||
let mut dao = setup_dao();
|
let mut dao = setup_dao();
|
||||||
let ctx = ctx();
|
let ctx = ctx();
|
||||||
|
|
||||||
let job_id = dao
|
let job_id = dao
|
||||||
.create_or_get_active_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
.create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let cancelled = dao
|
let cancelled = dao.cancel_active_jobs(&ctx, 1, "photos/test.jpg").unwrap();
|
||||||
.cancel_active_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
assert_eq!(cancelled, 1, "should cancel 1 running job");
|
||||||
.unwrap();
|
|
||||||
assert!(cancelled, "should cancel existing running job");
|
|
||||||
|
|
||||||
// Job is no longer active
|
// Job is no longer active
|
||||||
let active = dao.get_active_job(&ctx, 1, "photos/test.jpg").unwrap();
|
let active = dao.get_active_job(&ctx, 1, "photos/test.jpg").unwrap();
|
||||||
@@ -441,11 +510,9 @@ mod tests {
|
|||||||
let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
|
let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
|
||||||
assert_eq!(job.status, InsightJobStatus::Cancelled.as_str());
|
assert_eq!(job.status, InsightJobStatus::Cancelled.as_str());
|
||||||
|
|
||||||
// Cancelling again returns false (nothing to cancel)
|
// Cancelling again returns 0 (nothing to cancel)
|
||||||
let cancelled2 = dao
|
let cancelled2 = dao.cancel_active_jobs(&ctx, 1, "photos/test.jpg").unwrap();
|
||||||
.cancel_active_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
assert_eq!(cancelled2, 0, "should return 0 when no running job");
|
||||||
.unwrap();
|
|
||||||
assert!(!cancelled2, "should return false when no running job");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -454,11 +521,11 @@ mod tests {
|
|||||||
let ctx = ctx();
|
let ctx = ctx();
|
||||||
|
|
||||||
let job_id_1 = dao
|
let job_id_1 = dao
|
||||||
.create_or_get_active_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
.create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let job_id_2 = dao
|
let job_id_2 = dao
|
||||||
.create_or_get_active_job(&ctx, 2, "photos/test.jpg", InsightGenerationType::Standard)
|
.create_job(&ctx, 2, "photos/test.jpg", InsightGenerationType::Standard)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
assert_ne!(
|
assert_ne!(
|
||||||
@@ -485,7 +552,7 @@ mod tests {
|
|||||||
let ctx = ctx();
|
let ctx = ctx();
|
||||||
|
|
||||||
let job_id = dao
|
let job_id = dao
|
||||||
.create_or_get_active_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
.create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
// Find while running
|
// Find while running
|
||||||
@@ -500,4 +567,115 @@ mod tests {
|
|||||||
assert_eq!(job.status, InsightJobStatus::Completed.as_str());
|
assert_eq!(job.status, InsightJobStatus::Completed.as_str());
|
||||||
assert_eq!(job.result_insight_id, Some(99));
|
assert_eq!(job.result_insight_id, Some(99));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn recover_orphaned_jobs() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let ctx = ctx();
|
||||||
|
|
||||||
|
// Create two running jobs
|
||||||
|
let job_id_1 = dao
|
||||||
|
.create_job(&ctx, 1, "photos/a.jpg", InsightGenerationType::Standard)
|
||||||
|
.unwrap();
|
||||||
|
let job_id_2 = dao
|
||||||
|
.create_job(&ctx, 1, "photos/b.jpg", InsightGenerationType::Agentic)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Complete one
|
||||||
|
dao.complete_job(&ctx, job_id_1, 1).unwrap();
|
||||||
|
|
||||||
|
// Recover should only affect the running job
|
||||||
|
let recovered = dao.recover_orphaned_jobs(&ctx).unwrap();
|
||||||
|
assert_eq!(recovered, 1, "should recover exactly 1 running job");
|
||||||
|
|
||||||
|
// job_id_1 is still completed
|
||||||
|
let job1 = dao.get_job_by_id(&ctx, job_id_1).unwrap().unwrap();
|
||||||
|
assert_eq!(job1.status, InsightJobStatus::Completed.as_str());
|
||||||
|
|
||||||
|
// job_id_2 is now failed with recovery message
|
||||||
|
let job2 = dao.get_job_by_id(&ctx, job_id_2).unwrap().unwrap();
|
||||||
|
assert_eq!(job2.status, InsightJobStatus::Failed.as_str());
|
||||||
|
assert_eq!(
|
||||||
|
job2.error_message.as_deref(),
|
||||||
|
Some("server crashed while running")
|
||||||
|
);
|
||||||
|
|
||||||
|
// Second recovery is a no-op
|
||||||
|
let recovered2 = dao.recover_orphaned_jobs(&ctx).unwrap();
|
||||||
|
assert_eq!(recovered2, 0, "no running jobs remain");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn complete_job_noop_when_cancelled() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let ctx = ctx();
|
||||||
|
|
||||||
|
let job_id = dao
|
||||||
|
.create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
dao.cancel_job(&ctx, job_id).unwrap();
|
||||||
|
|
||||||
|
// Late-completing task tries to mark as completed — should be a no-op
|
||||||
|
dao.complete_job(&ctx, job_id, 42).unwrap();
|
||||||
|
|
||||||
|
let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
job.status,
|
||||||
|
InsightJobStatus::Cancelled.as_str(),
|
||||||
|
"cancelled status must not be overwritten by late complete"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
job.result_insight_id, None,
|
||||||
|
"insight_id must stay None when complete is a no-op"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fail_job_noop_when_cancelled() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let ctx = ctx();
|
||||||
|
|
||||||
|
let job_id = dao
|
||||||
|
.create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Agentic)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
dao.cancel_job(&ctx, job_id).unwrap();
|
||||||
|
|
||||||
|
// Late-failing task tries to mark as failed — should be a no-op
|
||||||
|
dao.fail_job(&ctx, job_id, "timeout after 120s").unwrap();
|
||||||
|
|
||||||
|
let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
job.status,
|
||||||
|
InsightJobStatus::Cancelled.as_str(),
|
||||||
|
"cancelled status must not be overwritten by late fail"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
job.error_message.as_deref(),
|
||||||
|
Some("cancelled by user"),
|
||||||
|
"error_message must reflect the cancel, not the late fail"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cancel_job_by_id() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let ctx = ctx();
|
||||||
|
|
||||||
|
let job_id = dao
|
||||||
|
.create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let cancelled = dao.cancel_job(&ctx, job_id).unwrap();
|
||||||
|
assert!(cancelled, "should cancel running job");
|
||||||
|
|
||||||
|
let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
|
||||||
|
assert_eq!(job.status, InsightJobStatus::Cancelled.as_str());
|
||||||
|
assert!(job.completed_at.is_some());
|
||||||
|
|
||||||
|
// Cancelling again is a no-op
|
||||||
|
let cancelled2 = dao.cancel_job(&ctx, job_id).unwrap();
|
||||||
|
assert!(!cancelled2, "already cancelled job should return false");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+21
-1
@@ -38,7 +38,13 @@ impl InsightJobStatus {
|
|||||||
"completed" => Self::Completed,
|
"completed" => Self::Completed,
|
||||||
"failed" => Self::Failed,
|
"failed" => Self::Failed,
|
||||||
"cancelled" => Self::Cancelled,
|
"cancelled" => Self::Cancelled,
|
||||||
_ => Self::Failed,
|
other => {
|
||||||
|
log::warn!(
|
||||||
|
"Unknown InsightJobStatus value: {:?}, treating as failed",
|
||||||
|
other
|
||||||
|
);
|
||||||
|
Self::Failed
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -224,6 +230,13 @@ pub struct InsertPhotoInsight {
|
|||||||
/// inserted before the hash is available stay null and the
|
/// inserted before the hash is available stay null and the
|
||||||
/// reconciliation pass backfills them.
|
/// reconciliation pass backfills them.
|
||||||
pub content_hash: Option<String>,
|
pub content_hash: Option<String>,
|
||||||
|
pub num_ctx: Option<i32>,
|
||||||
|
pub temperature: Option<f32>,
|
||||||
|
pub top_p: Option<f32>,
|
||||||
|
pub top_k: Option<i32>,
|
||||||
|
pub min_p: Option<f32>,
|
||||||
|
pub system_prompt: Option<String>,
|
||||||
|
pub persona_id: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Queryable, Clone, Debug)]
|
#[derive(Serialize, Queryable, Clone, Debug)]
|
||||||
@@ -243,6 +256,13 @@ pub struct PhotoInsight {
|
|||||||
pub backend: String,
|
pub backend: String,
|
||||||
pub fewshot_source_ids: Option<String>,
|
pub fewshot_source_ids: Option<String>,
|
||||||
pub content_hash: Option<String>,
|
pub content_hash: Option<String>,
|
||||||
|
pub num_ctx: Option<i32>,
|
||||||
|
pub temperature: Option<f32>,
|
||||||
|
pub top_p: Option<f32>,
|
||||||
|
pub top_k: Option<i32>,
|
||||||
|
pub min_p: Option<f32>,
|
||||||
|
pub system_prompt: Option<String>,
|
||||||
|
pub persona_id: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Libraries ---
|
// --- Libraries ---
|
||||||
|
|||||||
@@ -216,6 +216,13 @@ diesel::table! {
|
|||||||
backend -> Text,
|
backend -> Text,
|
||||||
fewshot_source_ids -> Nullable<Text>,
|
fewshot_source_ids -> Nullable<Text>,
|
||||||
content_hash -> Nullable<Text>,
|
content_hash -> Nullable<Text>,
|
||||||
|
num_ctx -> Nullable<Integer>,
|
||||||
|
temperature -> Nullable<Float>,
|
||||||
|
top_p -> Nullable<Float>,
|
||||||
|
top_k -> Nullable<Integer>,
|
||||||
|
min_p -> Nullable<Float>,
|
||||||
|
system_prompt -> Nullable<Text>,
|
||||||
|
persona_id -> Nullable<Text>,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+17
@@ -75,6 +75,22 @@ fn main() -> std::io::Result<()> {
|
|||||||
|
|
||||||
run_migrations(&mut connect()).expect("Failed to run migrations");
|
run_migrations(&mut connect()).expect("Failed to run migrations");
|
||||||
|
|
||||||
|
// Recover orphaned insight generation jobs from a previous crash.
|
||||||
|
{
|
||||||
|
use crate::database::{InsightGenerationJobDao, SqliteInsightGenerationJobDao};
|
||||||
|
let mut dao = SqliteInsightGenerationJobDao::new();
|
||||||
|
let ctx = opentelemetry::Context::new();
|
||||||
|
match dao.recover_orphaned_jobs(&ctx) {
|
||||||
|
Ok(n) if n > 0 => {
|
||||||
|
info!("Recovered {} orphaned insight generation jobs", n);
|
||||||
|
}
|
||||||
|
Ok(_) => {}
|
||||||
|
Err(e) => {
|
||||||
|
log::warn!("Failed to recover orphaned insight jobs: {:?}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// One-shot retirement of the pre-content-hash HLS layout. Idempotent
|
// One-shot retirement of the pre-content-hash HLS layout. Idempotent
|
||||||
// — a second boot finds nothing and reports zero deletions, so it's
|
// — a second boot finds nothing and reports zero deletions, so it's
|
||||||
// safe to leave wired in until the module is removed in a later
|
// safe to leave wired in until the module is removed in a later
|
||||||
@@ -309,6 +325,7 @@ fn main() -> std::io::Result<()> {
|
|||||||
.service(ai::generate_insight_handler)
|
.service(ai::generate_insight_handler)
|
||||||
.service(ai::generate_agentic_insight_handler)
|
.service(ai::generate_agentic_insight_handler)
|
||||||
.service(ai::generation_status_handler)
|
.service(ai::generation_status_handler)
|
||||||
|
.service(ai::cancel_generation_handler)
|
||||||
.service(ai::get_insight_handler)
|
.service(ai::get_insight_handler)
|
||||||
.service(ai::delete_insight_handler)
|
.service(ai::delete_insight_handler)
|
||||||
.service(ai::get_all_insights_handler)
|
.service(ai::get_all_insights_handler)
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ use crate::video::actors::{
|
|||||||
PlaylistGenerator, PreviewClipGenerator, StreamActor, VideoPlaylistManager,
|
PlaylistGenerator, PreviewClipGenerator, StreamActor, VideoPlaylistManager,
|
||||||
};
|
};
|
||||||
use actix::{Actor, Addr};
|
use actix::{Actor, Addr};
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::sync::{Arc, Mutex, RwLock};
|
use std::sync::{Arc, Mutex, RwLock};
|
||||||
|
|
||||||
@@ -88,6 +89,9 @@ pub struct AppState {
|
|||||||
pub clip_client: ClipClient,
|
pub clip_client: ClipClient,
|
||||||
/// Tracks async insight generation jobs (spawned by generate endpoints).
|
/// Tracks async insight generation jobs (spawned by generate endpoints).
|
||||||
pub insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>>,
|
pub insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>>,
|
||||||
|
/// In-memory map from job_id → tokio AbortHandle for running tasks.
|
||||||
|
/// Used to abort server-side tasks on cancel or regenerate.
|
||||||
|
pub insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AppState {
|
impl AppState {
|
||||||
@@ -127,6 +131,7 @@ impl AppState {
|
|||||||
face_client: FaceClient,
|
face_client: FaceClient,
|
||||||
clip_client: ClipClient,
|
clip_client: ClipClient,
|
||||||
insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>>,
|
insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>>,
|
||||||
|
insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
assert!(
|
assert!(
|
||||||
!libraries_vec.is_empty(),
|
!libraries_vec.is_empty(),
|
||||||
@@ -169,6 +174,7 @@ impl AppState {
|
|||||||
face_client,
|
face_client,
|
||||||
clip_client,
|
clip_client,
|
||||||
insight_job_dao,
|
insight_job_dao,
|
||||||
|
insight_job_handles,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -260,6 +266,8 @@ impl Default for AppState {
|
|||||||
// Initialize insight generation job DAO (async generation tracking)
|
// Initialize insight generation job DAO (async generation tracking)
|
||||||
let insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>> =
|
let insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>> =
|
||||||
Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new())));
|
Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new())));
|
||||||
|
let insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>> =
|
||||||
|
Arc::new(Mutex::new(HashMap::new()));
|
||||||
|
|
||||||
// Load base path and ensure the primary library row reflects it.
|
// Load base path and ensure the primary library row reflects it.
|
||||||
let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env");
|
let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env");
|
||||||
@@ -328,6 +336,7 @@ impl Default for AppState {
|
|||||||
face_client,
|
face_client,
|
||||||
clip_client,
|
clip_client,
|
||||||
insight_job_dao,
|
insight_job_dao,
|
||||||
|
insight_job_handles,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -513,6 +522,7 @@ impl AppState {
|
|||||||
FaceClient::new(None), // disabled in test
|
FaceClient::new(None), // disabled in test
|
||||||
ClipClient::new(None), // disabled in test
|
ClipClient::new(None), // disabled in test
|
||||||
Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new()))), // placeholder for test
|
Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new()))), // placeholder for test
|
||||||
|
Arc::new(Mutex::new(HashMap::new())), // placeholder for test
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user