fix: persist token counts and fix agentic insight_id mapping

- Add prompt_eval_count and eval_count columns to photo_insights so token usage from llama-swap/Ollama is stored and returned by the API - Fix agentic generator return: was (prompt_eval_count, eval_count), handler destructured first element as insight_id — now returns (insight_id, prompt_eval_count, eval_count) - Wire prompt_eval_count/eval_count from DB into PhotoInsightResponse instead of hardcoded None Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 13:47:57 -04:00
parent 449ce1fda1
commit 9654d256f4
6 changed files with 31 additions and 9 deletions
@@ -561,8 +561,8 @@ pub async fn get_insight_handler(
                summary: insight.summary,
                generated_at: insight.generated_at,
                model_version: insight.model_version,
-                prompt_eval_count: None,
-                eval_count: None,
+                prompt_eval_count: insight.prompt_eval_count,
+                eval_count: insight.eval_count,
                approved: insight.approved,
                has_training_messages: insight.training_messages.is_some(),
                backend: insight.backend,
@@ -637,8 +637,8 @@ pub async fn get_all_insights_handler(
                    summary: insight.summary,
                    generated_at: insight.generated_at,
                    model_version: insight.model_version,
-                    prompt_eval_count: None,
-                    eval_count: None,
+                    prompt_eval_count: insight.prompt_eval_count,
+                    eval_count: insight.eval_count,
                    approved: insight.approved,
                    has_training_messages: insight.training_messages.is_some(),
                    backend: insight.backend,
@@ -833,12 +833,12 @@ pub async fn generate_agentic_insight_handler(
        let mut dao = job_dao.lock().expect("Unable to lock InsightJobDao");

        match result {
-            Ok(Ok(Ok((Some(insight_id), _)))) => {
+            Ok(Ok(Ok((Some(insight_id), _, _)))) => {
                if let Err(e) = dao.complete_job(&ctx, job_id, insight_id) {
                    log::error!("Failed to mark job {} as completed: {:?}", job_id, e);
                }
            }
-            Ok(Ok(Ok((None, _)))) => {
+            Ok(Ok(Ok((None, _, _)))) => {
                if let Err(e) = dao.fail_job(&ctx, job_id, "agentic generation returned no insight")
                {
                    log::error!("Failed to mark job {} as failed: {:?}", job_id, e);
@@ -524,6 +524,8 @@ impl InsightChatService {
                min_p: req.min_p,
                system_prompt: req.system_prompt.clone(),
                persona_id: req.persona_id.clone(),
+                prompt_eval_count: None,
+                eval_count: None,
            };
            let cx = opentelemetry::Context::new();
            let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
@@ -878,6 +880,8 @@ impl InsightChatService {
                min_p: req.min_p,
                system_prompt: req.system_prompt.clone(),
                persona_id: req.persona_id.clone(),
+                prompt_eval_count: None,
+                eval_count: None,
            };
            let cx = opentelemetry::Context::new();
            let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
@@ -1073,6 +1077,8 @@ impl InsightChatService {
            min_p: req.min_p,
            system_prompt: req.system_prompt.clone(),
            persona_id: req.persona_id.clone(),
+            prompt_eval_count: None,
+            eval_count: None,
        };
        let stored = {
            let cx = opentelemetry::Context::new();
@@ -1439,6 +1439,8 @@ impl InsightGenerator {
            min_p,
            system_prompt: custom_system_prompt.clone(),
            persona_id: None,
+            prompt_eval_count: None,
+            eval_count: None,
        };

        let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
@@ -3808,7 +3810,7 @@ Return ONLY the summary, nothing else."#,
        fewshot_source_ids: Vec<i32>,
        user_id: i32,
        persona_id: String,
-    ) -> Result<(Option<i32>, Option<i32>)> {
+    ) -> Result<(Option<i32>, Option<i32>, Option<i32>)> {
        let tracer = global_tracer();
        let current_cx = opentelemetry::Context::current();
        let mut span = tracer.start_with_context("ai.insight.generate_agentic", &current_cx);
@@ -4190,6 +4192,8 @@ Return ONLY the summary, nothing else."#,
            min_p,
            system_prompt: custom_system_prompt.clone(),
            persona_id: Some(persona_id.clone()),
+            prompt_eval_count: last_prompt_eval_count,
+            eval_count: last_eval_count,
        };

        let stored = {
@@ -4227,7 +4231,11 @@ Return ONLY the summary, nothing else."#,
            }
        }

-        Ok((last_prompt_eval_count, last_eval_count))
+        Ok((
+            Some(stored_insight.id),
+            last_prompt_eval_count,
+            last_eval_count,
+        ))
    }

    /// Reverse geocode GPS coordinates to human-readable place names
@@ -225,6 +225,8 @@ pub struct InsertPhotoInsight {
    pub min_p: Option<f32>,
    pub system_prompt: Option<String>,
    pub persona_id: Option<String>,
+    pub prompt_eval_count: Option<i32>,
+    pub eval_count: Option<i32>,
 }

 #[derive(Serialize, Queryable, Clone, Debug)]
@@ -251,6 +253,8 @@ pub struct PhotoInsight {
    pub min_p: Option<f32>,
    pub system_prompt: Option<String>,
    pub persona_id: Option<String>,
+    pub prompt_eval_count: Option<i32>,
+    pub eval_count: Option<i32>,
 }

 // --- Libraries ---
@@ -223,6 +223,8 @@ diesel::table! {
        min_p -> Nullable<Float>,
        system_prompt -> Nullable<Text>,
        persona_id -> Nullable<Text>,
+        prompt_eval_count -> Nullable<Integer>,
+        eval_count -> Nullable<Integer>,
    }
 }