Add check for vision capabilities

This commit is contained in:
Cameron
2026-01-11 15:22:24 -05:00
parent 5b35df4007
commit ad0bba63b4
4 changed files with 235 additions and 42 deletions

View File

@@ -961,23 +961,62 @@ impl InsightGenerator {
combined_context.len()
);
// 8. Load image and encode as base64 for vision models
let image_base64 = match self.load_image_as_base64(&file_path) {
Ok(b64) => {
log::info!("Successfully loaded image for vision model");
Some(b64)
// 8. Check if the model has vision capabilities
let model_to_check = ollama_client.primary_model.clone();
let has_vision = match OllamaClient::check_model_capabilities(
&ollama_client.primary_url,
&model_to_check,
)
.await
{
Ok(capabilities) => {
log::info!(
"Model '{}' vision capability: {}",
model_to_check,
capabilities.has_vision
);
capabilities.has_vision
}
Err(e) => {
log::warn!("Failed to load image for vision model: {}", e);
None
log::warn!(
"Failed to check vision capabilities for model '{}', assuming no vision support: {}",
model_to_check,
e
);
false
}
};
// 9. Generate title and summary with Ollama (using multi-source context + image)
insight_cx
.span()
.set_attribute(KeyValue::new("model_has_vision", has_vision));
// 9. Load image and encode as base64 only if model supports vision
let image_base64 = if has_vision {
match self.load_image_as_base64(&file_path) {
Ok(b64) => {
log::info!("Successfully loaded image for vision-capable model '{}'", model_to_check);
Some(b64)
}
Err(e) => {
log::warn!("Failed to load image for vision model: {}", e);
None
}
}
} else {
log::info!(
"Model '{}' does not support vision, skipping image processing",
model_to_check
);
None
};
// 10. Generate title and summary with Ollama (using multi-source context + image if supported)
let title = ollama_client
.generate_photo_title(
date_taken,
location.as_deref(),
contact.as_deref(),
Some(&combined_context),
custom_system_prompt.as_deref(),
image_base64.clone(),
@@ -988,6 +1027,7 @@ impl InsightGenerator {
.generate_photo_summary(
date_taken,
location.as_deref(),
contact.as_deref(),
Some(&combined_context),
custom_system_prompt.as_deref(),
image_base64,
@@ -1004,7 +1044,7 @@ impl InsightGenerator {
.span()
.set_attribute(KeyValue::new("summary_length", summary.len() as i64));
// 9. Store in database
// 11. Store in database
let insight = InsertPhotoInsight {
file_path: file_path.to_string(),
title,