Add check for vision capabilities

2026-01-11 15:22:24 -05:00
parent 5b35df4007
commit ad0bba63b4
4 changed files with 235 additions and 42 deletions
--- a/src/ai/insight_generator.rs
+++ b/src/ai/insight_generator.rs
@@ -961,23 +961,62 @@ impl InsightGenerator {
            combined_context.len()
        );

-        // 8. Load image and encode as base64 for vision models
-        let image_base64 = match self.load_image_as_base64(&file_path) {
-            Ok(b64) => {
-                log::info!("Successfully loaded image for vision model");
-                Some(b64)
+        // 8. Check if the model has vision capabilities
+        let model_to_check = ollama_client.primary_model.clone();
+        let has_vision = match OllamaClient::check_model_capabilities(
+            &ollama_client.primary_url,
+            &model_to_check,
+        )
+        .await
+        {
+            Ok(capabilities) => {
+                log::info!(
+                    "Model '{}' vision capability: {}",
+                    model_to_check,
+                    capabilities.has_vision
+                );
+                capabilities.has_vision
            }
            Err(e) => {
-                log::warn!("Failed to load image for vision model: {}", e);
-                None
+                log::warn!(
+                    "Failed to check vision capabilities for model '{}', assuming no vision support: {}",
+                    model_to_check,
+                    e
+                );
+                false
            }
        };

-        // 9. Generate title and summary with Ollama (using multi-source context + image)
+        insight_cx
+            .span()
+            .set_attribute(KeyValue::new("model_has_vision", has_vision));
+
+        // 9. Load image and encode as base64 only if model supports vision
+        let image_base64 = if has_vision {
+            match self.load_image_as_base64(&file_path) {
+                Ok(b64) => {
+                    log::info!("Successfully loaded image for vision-capable model '{}'", model_to_check);
+                    Some(b64)
+                }
+                Err(e) => {
+                    log::warn!("Failed to load image for vision model: {}", e);
+                    None
+                }
+            }
+        } else {
+            log::info!(
+                "Model '{}' does not support vision, skipping image processing",
+                model_to_check
+            );
+            None
+        };
+
+        // 10. Generate title and summary with Ollama (using multi-source context + image if supported)
        let title = ollama_client
            .generate_photo_title(
                date_taken,
                location.as_deref(),
+                contact.as_deref(),
                Some(&combined_context),
                custom_system_prompt.as_deref(),
                image_base64.clone(),
@@ -988,6 +1027,7 @@ impl InsightGenerator {
            .generate_photo_summary(
                date_taken,
                location.as_deref(),
+                contact.as_deref(),
                Some(&combined_context),
                custom_system_prompt.as_deref(),
                image_base64,
@@ -1004,7 +1044,7 @@ impl InsightGenerator {
            .span()
            .set_attribute(KeyValue::new("summary_length", summary.len() as i64));

-        // 9. Store in database
+        // 11. Store in database
        let insight = InsertPhotoInsight {
            file_path: file_path.to_string(),
            title,