Pass image as additional Insight context

This commit is contained in:
Cameron
2026-01-10 11:30:01 -05:00
parent 084994e0b5
commit b2cc617bc2
9 changed files with 295 additions and 56 deletions

View File

@@ -1,9 +1,12 @@
use anyhow::Result;
use base64::Engine as _;
use chrono::{DateTime, Utc};
use image::ImageFormat;
use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
use serde::Deserialize;
use std::fs::File;
use std::io::Cursor;
use std::sync::{Arc, Mutex};
use crate::ai::ollama::OllamaClient;
@@ -92,6 +95,51 @@ impl InsightGenerator {
None
}
/// Load image file, resize it, and encode as base64 for vision models
/// Resizes to max 1024px on longest edge to reduce context usage
fn load_image_as_base64(&self, file_path: &str) -> Result<String> {
use image::imageops::FilterType;
use std::path::Path;
let full_path = Path::new(&self.base_path).join(file_path);
log::debug!("Loading image for vision model: {:?}", full_path);
// Open and decode the image
let img = image::open(&full_path)
.map_err(|e| anyhow::anyhow!("Failed to open image file: {}", e))?;
let (original_width, original_height) = (img.width(), img.height());
// Resize to max 1024px on longest edge
let resized = img.resize(1024, 1024, FilterType::Lanczos3);
log::debug!(
"Resized image from {}x{} to {}x{}",
original_width,
original_height,
resized.width(),
resized.height()
);
// Encode as JPEG at 85% quality
let mut buffer = Vec::new();
let mut cursor = Cursor::new(&mut buffer);
resized
.write_to(&mut cursor, ImageFormat::Jpeg)
.map_err(|e| anyhow::anyhow!("Failed to encode image as JPEG: {}", e))?;
let base64_string = base64::engine::general_purpose::STANDARD.encode(&buffer);
log::debug!(
"Encoded image as base64 ({} bytes -> {} chars)",
buffer.len(),
base64_string.len()
);
Ok(base64_string)
}
/// Find relevant messages using RAG, excluding recent messages (>30 days ago)
/// This prevents RAG from returning messages already in the immediate time window
async fn find_relevant_messages_rag_historical(
@@ -564,10 +612,23 @@ impl InsightGenerator {
}
/// Generate AI insight for a single photo with optional custom model
/// (Deprecated: Use generate_insight_for_photo_with_config instead)
pub async fn generate_insight_for_photo_with_model(
&self,
file_path: &str,
custom_model: Option<String>,
) -> Result<()> {
self.generate_insight_for_photo_with_config(file_path, custom_model, None, None)
.await
}
/// Generate AI insight for a single photo with custom configuration
pub async fn generate_insight_for_photo_with_config(
&self,
file_path: &str,
custom_model: Option<String>,
custom_system_prompt: Option<String>,
num_ctx: Option<i32>,
) -> Result<()> {
let tracer = global_tracer();
let current_cx = opentelemetry::Context::current();
@@ -580,7 +641,7 @@ impl InsightGenerator {
span.set_attribute(KeyValue::new("file_path", file_path.clone()));
// Create custom Ollama client if model is specified
let ollama_client = if let Some(model) = custom_model {
let mut ollama_client = if let Some(model) = custom_model {
log::info!("Using custom model: {}", model);
span.set_attribute(KeyValue::new("custom_model", model.clone()));
OllamaClient::new(
@@ -594,6 +655,13 @@ impl InsightGenerator {
self.ollama.clone()
};
// Set context size if specified
if let Some(ctx) = num_ctx {
log::info!("Using custom context size: {}", ctx);
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
ollama_client.set_num_ctx(Some(ctx));
}
// Create context with this span for child operations
let insight_cx = current_cx.with_span(span);
@@ -740,12 +808,20 @@ impl InsightGenerator {
// Step 4: Summarize contexts separately, then combine
let immediate_summary = self
.summarize_context_from_messages(&immediate_messages, &ollama_client)
.summarize_context_from_messages(
&immediate_messages,
&ollama_client,
custom_system_prompt.as_deref(),
)
.await
.unwrap_or_else(|| String::from("No immediate context"));
let historical_summary = self
.summarize_messages(&historical_messages, &ollama_client)
.summarize_messages(
&historical_messages,
&ollama_client,
custom_system_prompt.as_deref(),
)
.await
.unwrap_or_else(|| String::from("No historical context"));
@@ -759,13 +835,21 @@ impl InsightGenerator {
// RAG found no historical matches, just use immediate context
log::info!("No historical RAG matches, using immediate context only");
sms_summary = self
.summarize_context_from_messages(&immediate_messages, &ollama_client)
.summarize_context_from_messages(
&immediate_messages,
&ollama_client,
custom_system_prompt.as_deref(),
)
.await;
}
Err(e) => {
log::warn!("Historical RAG failed, using immediate context only: {}", e);
sms_summary = self
.summarize_context_from_messages(&immediate_messages, &ollama_client)
.summarize_context_from_messages(
&immediate_messages,
&ollama_client,
custom_system_prompt.as_deref(),
)
.await;
}
}
@@ -778,7 +862,13 @@ impl InsightGenerator {
{
Ok(rag_messages) if !rag_messages.is_empty() => {
used_rag = true;
sms_summary = self.summarize_messages(&rag_messages, &ollama_client).await;
sms_summary = self
.summarize_messages(
&rag_messages,
&ollama_client,
custom_system_prompt.as_deref(),
)
.await;
}
_ => {}
}
@@ -882,13 +972,37 @@ impl InsightGenerator {
combined_context.len()
);
// 8. Generate title and summary with Ollama (using multi-source context)
// 8. Load image and encode as base64 for vision models
let image_base64 = match self.load_image_as_base64(&file_path) {
Ok(b64) => {
log::info!("Successfully loaded image for vision model");
Some(b64)
}
Err(e) => {
log::warn!("Failed to load image for vision model: {}", e);
None
}
};
// 9. Generate title and summary with Ollama (using multi-source context + image)
let title = ollama_client
.generate_photo_title(date_taken, location.as_deref(), Some(&combined_context))
.generate_photo_title(
date_taken,
location.as_deref(),
Some(&combined_context),
custom_system_prompt.as_deref(),
image_base64.clone(),
)
.await?;
let summary = ollama_client
.generate_photo_summary(date_taken, location.as_deref(), Some(&combined_context))
.generate_photo_summary(
date_taken,
location.as_deref(),
Some(&combined_context),
custom_system_prompt.as_deref(),
image_base64,
)
.await?;
log::info!("Generated title: {}", title);
@@ -1037,6 +1151,7 @@ Return ONLY the comma-separated list, nothing else."#,
&self,
messages: &[String],
ollama: &OllamaClient,
custom_system: Option<&str>,
) -> Option<String> {
if messages.is_empty() {
return None;
@@ -1054,13 +1169,10 @@ Return ONLY the summary, nothing else."#,
messages_text
);
match ollama
.generate(
&prompt,
Some("You are a context summarization assistant. Be concise and factual."),
)
.await
{
let system = custom_system
.unwrap_or("You are a context summarization assistant. Be concise and factual.");
match ollama.generate(&prompt, Some(system)).await {
Ok(summary) => Some(summary),
Err(e) => {
log::warn!("Failed to summarize messages: {}", e);
@@ -1075,6 +1187,7 @@ Return ONLY the summary, nothing else."#,
&self,
messages: &[crate::ai::SmsMessage],
ollama: &OllamaClient,
custom_system: Option<&str>,
) -> Option<String> {
if messages.is_empty() {
return None;
@@ -1111,13 +1224,11 @@ Return ONLY the summary, nothing else."#,
messages_text
);
match ollama
.generate(
&prompt,
Some("You are a context summarization assistant. Be detailed and factual, preserving important context."),
)
.await
{
let system = custom_system.unwrap_or(
"You are a context summarization assistant. Be detailed and factual, preserving important context.",
);
match ollama.generate(&prompt, Some(system)).await {
Ok(summary) => Some(summary),
Err(e) => {
log::warn!("Failed to summarize immediate context: {}", e);