feat: add generate_photo_description() to OllamaClient for RAG enrichment

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-18 16:53:34 -04:00
parent c804ee39cb
commit dd0715c081
1 changed files with 31 additions and 0 deletions
@@ -480,6 +480,22 @@ Analyze the image and use specific details from both the visual content and the
            .await
    }
    /// Generate a brief visual description of a photo for use in RAG query enrichment.
    /// Returns 1-2 sentences describing people, location, and activity visible in the image.
    /// Only called when the model has vision capabilities.
    pub async fn generate_photo_description(&self, image_base64: String) -> Result<String> {
        let prompt = "Briefly describe what you see in this image in 1-2 sentences. \
                      Focus on the people, location, and activity.";
        let system = "You are a scene description assistant. Be concise and factual.";
        let images = vec![image_base64];
        let description = self
            .generate_with_images(prompt, Some(system), Some(images))
            .await?;
        Ok(description.trim().to_string())
    }
    /// Generate an embedding vector for text using nomic-embed-text:v1.5
    /// Returns a 768-dimensional vector as Vec<f32>
    pub async fn generate_embedding(&self, text: &str) -> Result<Vec<f32>> {
@@ -664,3 +680,18 @@ struct OllamaBatchEmbedRequest {
 struct OllamaEmbedResponse {
    embeddings: Vec<Vec<f32>>,
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn generate_photo_description_prompt_is_concise() {
        // Verify the method exists and its prompt is sane by checking the
        // constant we'll use. This is a compile + smoke check; actual LLM
        // calls are integration-tested manually.
        let prompt = "Briefly describe what you see in this image in 1-2 sentences. \
                      Focus on the people, location, and activity.";
        assert!(prompt.len() < 200, "Prompt should be concise");
    }
 }