Pass image as additional Insight context

2026-01-10 11:30:01 -05:00
parent 084994e0b5
commit b2cc617bc2
9 changed files with 295 additions and 56 deletions
--- a/src/ai/insight_generator.rs
+++ b/src/ai/insight_generator.rs
@@ -1,9 +1,12 @@
 use anyhow::Result;
+use base64::Engine as _;
 use chrono::{DateTime, Utc};
+use image::ImageFormat;
 use opentelemetry::KeyValue;
 use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
 use serde::Deserialize;
 use std::fs::File;
+use std::io::Cursor;
 use std::sync::{Arc, Mutex};

 use crate::ai::ollama::OllamaClient;
@@ -92,6 +95,51 @@ impl InsightGenerator {
        None
    }

+    /// Load image file, resize it, and encode as base64 for vision models
+    /// Resizes to max 1024px on longest edge to reduce context usage
+    fn load_image_as_base64(&self, file_path: &str) -> Result<String> {
+        use image::imageops::FilterType;
+        use std::path::Path;
+
+        let full_path = Path::new(&self.base_path).join(file_path);
+
+        log::debug!("Loading image for vision model: {:?}", full_path);
+
+        // Open and decode the image
+        let img = image::open(&full_path)
+            .map_err(|e| anyhow::anyhow!("Failed to open image file: {}", e))?;
+
+        let (original_width, original_height) = (img.width(), img.height());
+
+        // Resize to max 1024px on longest edge
+        let resized = img.resize(1024, 1024, FilterType::Lanczos3);
+
+        log::debug!(
+            "Resized image from {}x{} to {}x{}",
+            original_width,
+            original_height,
+            resized.width(),
+            resized.height()
+        );
+
+        // Encode as JPEG at 85% quality
+        let mut buffer = Vec::new();
+        let mut cursor = Cursor::new(&mut buffer);
+        resized
+            .write_to(&mut cursor, ImageFormat::Jpeg)
+            .map_err(|e| anyhow::anyhow!("Failed to encode image as JPEG: {}", e))?;
+
+        let base64_string = base64::engine::general_purpose::STANDARD.encode(&buffer);
+
+        log::debug!(
+            "Encoded image as base64 ({} bytes -> {} chars)",
+            buffer.len(),
+            base64_string.len()
+        );
+
+        Ok(base64_string)
+    }
+
    /// Find relevant messages using RAG, excluding recent messages (>30 days ago)
    /// This prevents RAG from returning messages already in the immediate time window
    async fn find_relevant_messages_rag_historical(
@@ -564,10 +612,23 @@ impl InsightGenerator {
    }

    /// Generate AI insight for a single photo with optional custom model
+    /// (Deprecated: Use generate_insight_for_photo_with_config instead)
    pub async fn generate_insight_for_photo_with_model(
        &self,
        file_path: &str,
        custom_model: Option<String>,
+    ) -> Result<()> {
+        self.generate_insight_for_photo_with_config(file_path, custom_model, None, None)
+            .await
+    }
+
+    /// Generate AI insight for a single photo with custom configuration
+    pub async fn generate_insight_for_photo_with_config(
+        &self,
+        file_path: &str,
+        custom_model: Option<String>,
+        custom_system_prompt: Option<String>,
+        num_ctx: Option<i32>,
    ) -> Result<()> {
        let tracer = global_tracer();
        let current_cx = opentelemetry::Context::current();
@@ -580,7 +641,7 @@ impl InsightGenerator {
        span.set_attribute(KeyValue::new("file_path", file_path.clone()));

        // Create custom Ollama client if model is specified
-        let ollama_client = if let Some(model) = custom_model {
+        let mut ollama_client = if let Some(model) = custom_model {
            log::info!("Using custom model: {}", model);
            span.set_attribute(KeyValue::new("custom_model", model.clone()));
            OllamaClient::new(
@@ -594,6 +655,13 @@ impl InsightGenerator {
            self.ollama.clone()
        };

+        // Set context size if specified
+        if let Some(ctx) = num_ctx {
+            log::info!("Using custom context size: {}", ctx);
+            span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
+            ollama_client.set_num_ctx(Some(ctx));
+        }
+
        // Create context with this span for child operations
        let insight_cx = current_cx.with_span(span);

@@ -740,12 +808,20 @@ impl InsightGenerator {

                        // Step 4: Summarize contexts separately, then combine
                        let immediate_summary = self
-                            .summarize_context_from_messages(&immediate_messages, &ollama_client)
+                            .summarize_context_from_messages(
+                                &immediate_messages,
+                                &ollama_client,
+                                custom_system_prompt.as_deref(),
+                            )
                            .await
                            .unwrap_or_else(|| String::from("No immediate context"));

                        let historical_summary = self
-                            .summarize_messages(&historical_messages, &ollama_client)
+                            .summarize_messages(
+                                &historical_messages,
+                                &ollama_client,
+                                custom_system_prompt.as_deref(),
+                            )
                            .await
                            .unwrap_or_else(|| String::from("No historical context"));

@@ -759,13 +835,21 @@ impl InsightGenerator {
                        // RAG found no historical matches, just use immediate context
                        log::info!("No historical RAG matches, using immediate context only");
                        sms_summary = self
-                            .summarize_context_from_messages(&immediate_messages, &ollama_client)
+                            .summarize_context_from_messages(
+                                &immediate_messages,
+                                &ollama_client,
+                                custom_system_prompt.as_deref(),
+                            )
                            .await;
                    }
                    Err(e) => {
                        log::warn!("Historical RAG failed, using immediate context only: {}", e);
                        sms_summary = self
-                            .summarize_context_from_messages(&immediate_messages, &ollama_client)
+                            .summarize_context_from_messages(
+                                &immediate_messages,
+                                &ollama_client,
+                                custom_system_prompt.as_deref(),
+                            )
                            .await;
                    }
                }
@@ -778,7 +862,13 @@ impl InsightGenerator {
                {
                    Ok(rag_messages) if !rag_messages.is_empty() => {
                        used_rag = true;
-                        sms_summary = self.summarize_messages(&rag_messages, &ollama_client).await;
+                        sms_summary = self
+                            .summarize_messages(
+                                &rag_messages,
+                                &ollama_client,
+                                custom_system_prompt.as_deref(),
+                            )
+                            .await;
                    }
                    _ => {}
                }
@@ -882,13 +972,37 @@ impl InsightGenerator {
            combined_context.len()
        );

-        // 8. Generate title and summary with Ollama (using multi-source context)
+        // 8. Load image and encode as base64 for vision models
+        let image_base64 = match self.load_image_as_base64(&file_path) {
+            Ok(b64) => {
+                log::info!("Successfully loaded image for vision model");
+                Some(b64)
+            }
+            Err(e) => {
+                log::warn!("Failed to load image for vision model: {}", e);
+                None
+            }
+        };
+
+        // 9. Generate title and summary with Ollama (using multi-source context + image)
        let title = ollama_client
-            .generate_photo_title(date_taken, location.as_deref(), Some(&combined_context))
+            .generate_photo_title(
+                date_taken,
+                location.as_deref(),
+                Some(&combined_context),
+                custom_system_prompt.as_deref(),
+                image_base64.clone(),
+            )
            .await?;

        let summary = ollama_client
-            .generate_photo_summary(date_taken, location.as_deref(), Some(&combined_context))
+            .generate_photo_summary(
+                date_taken,
+                location.as_deref(),
+                Some(&combined_context),
+                custom_system_prompt.as_deref(),
+                image_base64,
+            )
            .await?;

        log::info!("Generated title: {}", title);
@@ -1037,6 +1151,7 @@ Return ONLY the comma-separated list, nothing else."#,
        &self,
        messages: &[String],
        ollama: &OllamaClient,
+        custom_system: Option<&str>,
    ) -> Option<String> {
        if messages.is_empty() {
            return None;
@@ -1054,13 +1169,10 @@ Return ONLY the summary, nothing else."#,
            messages_text
        );

-        match ollama
-            .generate(
-                &prompt,
-                Some("You are a context summarization assistant. Be concise and factual."),
-            )
-            .await
-        {
+        let system = custom_system
+            .unwrap_or("You are a context summarization assistant. Be concise and factual.");
+
+        match ollama.generate(&prompt, Some(system)).await {
            Ok(summary) => Some(summary),
            Err(e) => {
                log::warn!("Failed to summarize messages: {}", e);
@@ -1075,6 +1187,7 @@ Return ONLY the summary, nothing else."#,
        &self,
        messages: &[crate::ai::SmsMessage],
        ollama: &OllamaClient,
+        custom_system: Option<&str>,
    ) -> Option<String> {
        if messages.is_empty() {
            return None;
@@ -1111,13 +1224,11 @@ Return ONLY the summary, nothing else."#,
            messages_text
        );

-        match ollama
-            .generate(
-                &prompt,
-                Some("You are a context summarization assistant. Be detailed and factual, preserving important context."),
-            )
-            .await
-        {
+        let system = custom_system.unwrap_or(
+            "You are a context summarization assistant. Be detailed and factual, preserving important context.",
+        );
+
+        match ollama.generate(&prompt, Some(system)).await {
            Ok(summary) => Some(summary),
            Err(e) => {
                log::warn!("Failed to summarize immediate context: {}", e);