Pass image as additional Insight context
This commit is contained in:
@@ -1,9 +1,12 @@
|
||||
use anyhow::Result;
|
||||
use base64::Engine as _;
|
||||
use chrono::{DateTime, Utc};
|
||||
use image::ImageFormat;
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
|
||||
use serde::Deserialize;
|
||||
use std::fs::File;
|
||||
use std::io::Cursor;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::ai::ollama::OllamaClient;
|
||||
@@ -92,6 +95,51 @@ impl InsightGenerator {
|
||||
None
|
||||
}
|
||||
|
||||
/// Load image file, resize it, and encode as base64 for vision models
|
||||
/// Resizes to max 1024px on longest edge to reduce context usage
|
||||
fn load_image_as_base64(&self, file_path: &str) -> Result<String> {
|
||||
use image::imageops::FilterType;
|
||||
use std::path::Path;
|
||||
|
||||
let full_path = Path::new(&self.base_path).join(file_path);
|
||||
|
||||
log::debug!("Loading image for vision model: {:?}", full_path);
|
||||
|
||||
// Open and decode the image
|
||||
let img = image::open(&full_path)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to open image file: {}", e))?;
|
||||
|
||||
let (original_width, original_height) = (img.width(), img.height());
|
||||
|
||||
// Resize to max 1024px on longest edge
|
||||
let resized = img.resize(1024, 1024, FilterType::Lanczos3);
|
||||
|
||||
log::debug!(
|
||||
"Resized image from {}x{} to {}x{}",
|
||||
original_width,
|
||||
original_height,
|
||||
resized.width(),
|
||||
resized.height()
|
||||
);
|
||||
|
||||
// Encode as JPEG at 85% quality
|
||||
let mut buffer = Vec::new();
|
||||
let mut cursor = Cursor::new(&mut buffer);
|
||||
resized
|
||||
.write_to(&mut cursor, ImageFormat::Jpeg)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to encode image as JPEG: {}", e))?;
|
||||
|
||||
let base64_string = base64::engine::general_purpose::STANDARD.encode(&buffer);
|
||||
|
||||
log::debug!(
|
||||
"Encoded image as base64 ({} bytes -> {} chars)",
|
||||
buffer.len(),
|
||||
base64_string.len()
|
||||
);
|
||||
|
||||
Ok(base64_string)
|
||||
}
|
||||
|
||||
/// Find relevant messages using RAG, excluding recent messages (>30 days ago)
|
||||
/// This prevents RAG from returning messages already in the immediate time window
|
||||
async fn find_relevant_messages_rag_historical(
|
||||
@@ -564,10 +612,23 @@ impl InsightGenerator {
|
||||
}
|
||||
|
||||
/// Generate AI insight for a single photo with optional custom model
|
||||
/// (Deprecated: Use generate_insight_for_photo_with_config instead)
|
||||
pub async fn generate_insight_for_photo_with_model(
|
||||
&self,
|
||||
file_path: &str,
|
||||
custom_model: Option<String>,
|
||||
) -> Result<()> {
|
||||
self.generate_insight_for_photo_with_config(file_path, custom_model, None, None)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Generate AI insight for a single photo with custom configuration
|
||||
pub async fn generate_insight_for_photo_with_config(
|
||||
&self,
|
||||
file_path: &str,
|
||||
custom_model: Option<String>,
|
||||
custom_system_prompt: Option<String>,
|
||||
num_ctx: Option<i32>,
|
||||
) -> Result<()> {
|
||||
let tracer = global_tracer();
|
||||
let current_cx = opentelemetry::Context::current();
|
||||
@@ -580,7 +641,7 @@ impl InsightGenerator {
|
||||
span.set_attribute(KeyValue::new("file_path", file_path.clone()));
|
||||
|
||||
// Create custom Ollama client if model is specified
|
||||
let ollama_client = if let Some(model) = custom_model {
|
||||
let mut ollama_client = if let Some(model) = custom_model {
|
||||
log::info!("Using custom model: {}", model);
|
||||
span.set_attribute(KeyValue::new("custom_model", model.clone()));
|
||||
OllamaClient::new(
|
||||
@@ -594,6 +655,13 @@ impl InsightGenerator {
|
||||
self.ollama.clone()
|
||||
};
|
||||
|
||||
// Set context size if specified
|
||||
if let Some(ctx) = num_ctx {
|
||||
log::info!("Using custom context size: {}", ctx);
|
||||
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
|
||||
ollama_client.set_num_ctx(Some(ctx));
|
||||
}
|
||||
|
||||
// Create context with this span for child operations
|
||||
let insight_cx = current_cx.with_span(span);
|
||||
|
||||
@@ -740,12 +808,20 @@ impl InsightGenerator {
|
||||
|
||||
// Step 4: Summarize contexts separately, then combine
|
||||
let immediate_summary = self
|
||||
.summarize_context_from_messages(&immediate_messages, &ollama_client)
|
||||
.summarize_context_from_messages(
|
||||
&immediate_messages,
|
||||
&ollama_client,
|
||||
custom_system_prompt.as_deref(),
|
||||
)
|
||||
.await
|
||||
.unwrap_or_else(|| String::from("No immediate context"));
|
||||
|
||||
let historical_summary = self
|
||||
.summarize_messages(&historical_messages, &ollama_client)
|
||||
.summarize_messages(
|
||||
&historical_messages,
|
||||
&ollama_client,
|
||||
custom_system_prompt.as_deref(),
|
||||
)
|
||||
.await
|
||||
.unwrap_or_else(|| String::from("No historical context"));
|
||||
|
||||
@@ -759,13 +835,21 @@ impl InsightGenerator {
|
||||
// RAG found no historical matches, just use immediate context
|
||||
log::info!("No historical RAG matches, using immediate context only");
|
||||
sms_summary = self
|
||||
.summarize_context_from_messages(&immediate_messages, &ollama_client)
|
||||
.summarize_context_from_messages(
|
||||
&immediate_messages,
|
||||
&ollama_client,
|
||||
custom_system_prompt.as_deref(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Historical RAG failed, using immediate context only: {}", e);
|
||||
sms_summary = self
|
||||
.summarize_context_from_messages(&immediate_messages, &ollama_client)
|
||||
.summarize_context_from_messages(
|
||||
&immediate_messages,
|
||||
&ollama_client,
|
||||
custom_system_prompt.as_deref(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
@@ -778,7 +862,13 @@ impl InsightGenerator {
|
||||
{
|
||||
Ok(rag_messages) if !rag_messages.is_empty() => {
|
||||
used_rag = true;
|
||||
sms_summary = self.summarize_messages(&rag_messages, &ollama_client).await;
|
||||
sms_summary = self
|
||||
.summarize_messages(
|
||||
&rag_messages,
|
||||
&ollama_client,
|
||||
custom_system_prompt.as_deref(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -882,13 +972,37 @@ impl InsightGenerator {
|
||||
combined_context.len()
|
||||
);
|
||||
|
||||
// 8. Generate title and summary with Ollama (using multi-source context)
|
||||
// 8. Load image and encode as base64 for vision models
|
||||
let image_base64 = match self.load_image_as_base64(&file_path) {
|
||||
Ok(b64) => {
|
||||
log::info!("Successfully loaded image for vision model");
|
||||
Some(b64)
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Failed to load image for vision model: {}", e);
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
// 9. Generate title and summary with Ollama (using multi-source context + image)
|
||||
let title = ollama_client
|
||||
.generate_photo_title(date_taken, location.as_deref(), Some(&combined_context))
|
||||
.generate_photo_title(
|
||||
date_taken,
|
||||
location.as_deref(),
|
||||
Some(&combined_context),
|
||||
custom_system_prompt.as_deref(),
|
||||
image_base64.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let summary = ollama_client
|
||||
.generate_photo_summary(date_taken, location.as_deref(), Some(&combined_context))
|
||||
.generate_photo_summary(
|
||||
date_taken,
|
||||
location.as_deref(),
|
||||
Some(&combined_context),
|
||||
custom_system_prompt.as_deref(),
|
||||
image_base64,
|
||||
)
|
||||
.await?;
|
||||
|
||||
log::info!("Generated title: {}", title);
|
||||
@@ -1037,6 +1151,7 @@ Return ONLY the comma-separated list, nothing else."#,
|
||||
&self,
|
||||
messages: &[String],
|
||||
ollama: &OllamaClient,
|
||||
custom_system: Option<&str>,
|
||||
) -> Option<String> {
|
||||
if messages.is_empty() {
|
||||
return None;
|
||||
@@ -1054,13 +1169,10 @@ Return ONLY the summary, nothing else."#,
|
||||
messages_text
|
||||
);
|
||||
|
||||
match ollama
|
||||
.generate(
|
||||
&prompt,
|
||||
Some("You are a context summarization assistant. Be concise and factual."),
|
||||
)
|
||||
.await
|
||||
{
|
||||
let system = custom_system
|
||||
.unwrap_or("You are a context summarization assistant. Be concise and factual.");
|
||||
|
||||
match ollama.generate(&prompt, Some(system)).await {
|
||||
Ok(summary) => Some(summary),
|
||||
Err(e) => {
|
||||
log::warn!("Failed to summarize messages: {}", e);
|
||||
@@ -1075,6 +1187,7 @@ Return ONLY the summary, nothing else."#,
|
||||
&self,
|
||||
messages: &[crate::ai::SmsMessage],
|
||||
ollama: &OllamaClient,
|
||||
custom_system: Option<&str>,
|
||||
) -> Option<String> {
|
||||
if messages.is_empty() {
|
||||
return None;
|
||||
@@ -1111,13 +1224,11 @@ Return ONLY the summary, nothing else."#,
|
||||
messages_text
|
||||
);
|
||||
|
||||
match ollama
|
||||
.generate(
|
||||
&prompt,
|
||||
Some("You are a context summarization assistant. Be detailed and factual, preserving important context."),
|
||||
)
|
||||
.await
|
||||
{
|
||||
let system = custom_system.unwrap_or(
|
||||
"You are a context summarization assistant. Be detailed and factual, preserving important context.",
|
||||
);
|
||||
|
||||
match ollama.generate(&prompt, Some(system)).await {
|
||||
Ok(summary) => Some(summary),
|
||||
Err(e) => {
|
||||
log::warn!("Failed to summarize immediate context: {}", e);
|
||||
|
||||
Reference in New Issue
Block a user