From 9654d256f4a674d71d17fc0f11969f7ddc32df95 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Wed, 27 May 2026 13:47:57 -0400 Subject: [PATCH] fix: persist token counts and fix agentic insight_id mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add prompt_eval_count and eval_count columns to photo_insights so token usage from llama-swap/Ollama is stored and returned by the API - Fix agentic generator return: was (prompt_eval_count, eval_count), handler destructured first element as insight_id — now returns (insight_id, prompt_eval_count, eval_count) - Wire prompt_eval_count/eval_count from DB into PhotoInsightResponse instead of hardcoded None Co-Authored-By: Claude Opus 4.7 (1M context) --- .../up.sql | 4 +++- src/ai/handlers.rs | 12 ++++++------ src/ai/insight_chat.rs | 6 ++++++ src/ai/insight_generator.rs | 12 ++++++++++-- src/database/models.rs | 4 ++++ src/database/schema.rs | 2 ++ 6 files changed, 31 insertions(+), 9 deletions(-) diff --git a/migrations/2026-05-27-000002_add_insight_generation_params/up.sql b/migrations/2026-05-27-000002_add_insight_generation_params/up.sql index 1313fde..c44f568 100644 --- a/migrations/2026-05-27-000002_add_insight_generation_params/up.sql +++ b/migrations/2026-05-27-000002_add_insight_generation_params/up.sql @@ -1,4 +1,4 @@ --- Persist generation parameters on each insight row for auditing. +-- Persist generation parameters and token usage on each insight row. ALTER TABLE photo_insights ADD COLUMN num_ctx INTEGER; ALTER TABLE photo_insights ADD COLUMN temperature REAL; ALTER TABLE photo_insights ADD COLUMN top_p REAL; @@ -6,3 +6,5 @@ ALTER TABLE photo_insights ADD COLUMN top_k INTEGER; ALTER TABLE photo_insights ADD COLUMN min_p REAL; ALTER TABLE photo_insights ADD COLUMN system_prompt TEXT; ALTER TABLE photo_insights ADD COLUMN persona_id TEXT; +ALTER TABLE photo_insights ADD COLUMN prompt_eval_count INTEGER; +ALTER TABLE photo_insights ADD COLUMN eval_count INTEGER; diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index c902432..bb599c2 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -561,8 +561,8 @@ pub async fn get_insight_handler( summary: insight.summary, generated_at: insight.generated_at, model_version: insight.model_version, - prompt_eval_count: None, - eval_count: None, + prompt_eval_count: insight.prompt_eval_count, + eval_count: insight.eval_count, approved: insight.approved, has_training_messages: insight.training_messages.is_some(), backend: insight.backend, @@ -637,8 +637,8 @@ pub async fn get_all_insights_handler( summary: insight.summary, generated_at: insight.generated_at, model_version: insight.model_version, - prompt_eval_count: None, - eval_count: None, + prompt_eval_count: insight.prompt_eval_count, + eval_count: insight.eval_count, approved: insight.approved, has_training_messages: insight.training_messages.is_some(), backend: insight.backend, @@ -833,12 +833,12 @@ pub async fn generate_agentic_insight_handler( let mut dao = job_dao.lock().expect("Unable to lock InsightJobDao"); match result { - Ok(Ok(Ok((Some(insight_id), _)))) => { + Ok(Ok(Ok((Some(insight_id), _, _)))) => { if let Err(e) = dao.complete_job(&ctx, job_id, insight_id) { log::error!("Failed to mark job {} as completed: {:?}", job_id, e); } } - Ok(Ok(Ok((None, _)))) => { + Ok(Ok(Ok((None, _, _)))) => { if let Err(e) = dao.fail_job(&ctx, job_id, "agentic generation returned no insight") { log::error!("Failed to mark job {} as failed: {:?}", job_id, e); diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs index 2131efc..d2b153b 100644 --- a/src/ai/insight_chat.rs +++ b/src/ai/insight_chat.rs @@ -524,6 +524,8 @@ impl InsightChatService { min_p: req.min_p, system_prompt: req.system_prompt.clone(), persona_id: req.persona_id.clone(), + prompt_eval_count: None, + eval_count: None, }; let cx = opentelemetry::Context::new(); let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao"); @@ -878,6 +880,8 @@ impl InsightChatService { min_p: req.min_p, system_prompt: req.system_prompt.clone(), persona_id: req.persona_id.clone(), + prompt_eval_count: None, + eval_count: None, }; let cx = opentelemetry::Context::new(); let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao"); @@ -1073,6 +1077,8 @@ impl InsightChatService { min_p: req.min_p, system_prompt: req.system_prompt.clone(), persona_id: req.persona_id.clone(), + prompt_eval_count: None, + eval_count: None, }; let stored = { let cx = opentelemetry::Context::new(); diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index d5542dc..4f15ef4 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -1439,6 +1439,8 @@ impl InsightGenerator { min_p, system_prompt: custom_system_prompt.clone(), persona_id: None, + prompt_eval_count: None, + eval_count: None, }; let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao"); @@ -3808,7 +3810,7 @@ Return ONLY the summary, nothing else."#, fewshot_source_ids: Vec, user_id: i32, persona_id: String, - ) -> Result<(Option, Option)> { + ) -> Result<(Option, Option, Option)> { let tracer = global_tracer(); let current_cx = opentelemetry::Context::current(); let mut span = tracer.start_with_context("ai.insight.generate_agentic", ¤t_cx); @@ -4190,6 +4192,8 @@ Return ONLY the summary, nothing else."#, min_p, system_prompt: custom_system_prompt.clone(), persona_id: Some(persona_id.clone()), + prompt_eval_count: last_prompt_eval_count, + eval_count: last_eval_count, }; let stored = { @@ -4227,7 +4231,11 @@ Return ONLY the summary, nothing else."#, } } - Ok((last_prompt_eval_count, last_eval_count)) + Ok(( + Some(stored_insight.id), + last_prompt_eval_count, + last_eval_count, + )) } /// Reverse geocode GPS coordinates to human-readable place names diff --git a/src/database/models.rs b/src/database/models.rs index 75e4e60..62274e2 100644 --- a/src/database/models.rs +++ b/src/database/models.rs @@ -225,6 +225,8 @@ pub struct InsertPhotoInsight { pub min_p: Option, pub system_prompt: Option, pub persona_id: Option, + pub prompt_eval_count: Option, + pub eval_count: Option, } #[derive(Serialize, Queryable, Clone, Debug)] @@ -251,6 +253,8 @@ pub struct PhotoInsight { pub min_p: Option, pub system_prompt: Option, pub persona_id: Option, + pub prompt_eval_count: Option, + pub eval_count: Option, } // --- Libraries --- diff --git a/src/database/schema.rs b/src/database/schema.rs index 9f7efe5..bf5791b 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -223,6 +223,8 @@ diesel::table! { min_p -> Nullable, system_prompt -> Nullable, persona_id -> Nullable, + prompt_eval_count -> Nullable, + eval_count -> Nullable, } }