Add Insights Model Discovery and Fallback Handling

2026-01-03 20:27:34 -05:00
parent 1171f19845
commit cf52d4ab76
10 changed files with 419 additions and 80 deletions
--- a/src/ai/ollama.rs
+++ b/src/ai/ollama.rs
@@ -2,25 +2,60 @@ use anyhow::Result;
 use chrono::NaiveDate;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
-
-use crate::memories::MemoryItem;
+use std::time::Duration;

 #[derive(Clone)]
 pub struct OllamaClient {
    client: Client,
-    pub base_url: String,
-    pub model: String,
+    pub primary_url: String,
+    pub fallback_url: Option<String>,
+    pub primary_model: String,
+    pub fallback_model: Option<String>,
 }

 impl OllamaClient {
-    pub fn new(base_url: String, model: String) -> Self {
+    pub fn new(
+        primary_url: String,
+        fallback_url: Option<String>,
+        primary_model: String,
+        fallback_model: Option<String>,
+    ) -> Self {
        Self {
-            client: Client::new(),
-            base_url,
-            model,
+            client: Client::builder()
+                .connect_timeout(Duration::from_secs(5)) // Quick connection timeout
+                .timeout(Duration::from_secs(120)) // Total request timeout for generation
+                .build()
+                .unwrap_or_else(|_| Client::new()),
+            primary_url,
+            fallback_url,
+            primary_model,
+            fallback_model,
        }
    }

+    /// List available models on an Ollama server
+    pub async fn list_models(url: &str) -> Result<Vec<String>> {
+        let client = Client::builder()
+            .connect_timeout(Duration::from_secs(5))
+            .timeout(Duration::from_secs(10))
+            .build()?;
+
+        let response = client.get(&format!("{}/api/tags", url)).send().await?;
+
+        if !response.status().is_success() {
+            return Err(anyhow::anyhow!("Failed to list models from {}", url));
+        }
+
+        let tags_response: OllamaTagsResponse = response.json().await?;
+        Ok(tags_response.models.into_iter().map(|m| m.name).collect())
+    }
+
+    /// Check if a model is available on a server
+    pub async fn is_model_available(url: &str, model_name: &str) -> Result<bool> {
+        let models = Self::list_models(url).await?;
+        Ok(models.iter().any(|m| m == model_name))
+    }
+
    /// Extract final answer from thinking model output
    /// Handles <think>...</think> tags and takes everything after
    fn extract_final_answer(&self, response: &str) -> String {
@@ -38,17 +73,15 @@ impl OllamaClient {
        response.to_string()
    }

-    pub async fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String> {
-        log::debug!("=== Ollama Request ===");
-        log::debug!("Model: {}", self.model);
-        if let Some(sys) = system {
-            log::debug!("System: {}", sys);
-        }
-        log::debug!("Prompt:\n{}", prompt);
-        log::debug!("=====================");
-
+    async fn try_generate(
+        &self,
+        url: &str,
+        model: &str,
+        prompt: &str,
+        system: Option<&str>,
+    ) -> Result<String> {
        let request = OllamaRequest {
-            model: self.model.clone(),
+            model: model.to_string(),
            prompt: prompt.to_string(),
            stream: false,
            system: system.map(|s| s.to_string()),
@@ -56,7 +89,7 @@ impl OllamaClient {

        let response = self
            .client
-            .post(&format!("{}/api/generate", self.base_url))
+            .post(&format!("{}/api/generate", url))
            .json(&request)
            .send()
            .await?;
@@ -64,7 +97,6 @@ impl OllamaClient {
        if !response.status().is_success() {
            let status = response.status();
            let error_body = response.text().await.unwrap_or_default();
-            log::error!("Ollama request failed: {} - {}", status, error_body);
            return Err(anyhow::anyhow!(
                "Ollama request failed: {} - {}",
                status,
@@ -73,13 +105,77 @@ impl OllamaClient {
        }

        let result: OllamaResponse = response.json().await?;
+        Ok(result.response)
+    }
+
+    pub async fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String> {
+        log::debug!("=== Ollama Request ===");
+        log::debug!("Primary model: {}", self.primary_model);
+        if let Some(sys) = system {
+            log::debug!("System: {}", sys);
+        }
+        log::debug!("Prompt:\n{}", prompt);
+        log::debug!("=====================");
+
+        // Try primary server first with primary model
+        log::info!(
+            "Attempting to generate with primary server: {} (model: {})",
+            self.primary_url,
+            self.primary_model
+        );
+        let primary_result = self
+            .try_generate(&self.primary_url, &self.primary_model, prompt, system)
+            .await;
+
+        let raw_response = match primary_result {
+            Ok(response) => {
+                log::info!("Successfully generated response from primary server");
+                response
+            }
+            Err(e) => {
+                log::warn!("Primary server failed: {}", e);
+
+                // Try fallback server if available
+                if let Some(fallback_url) = &self.fallback_url {
+                    // Use fallback model if specified, otherwise use primary model
+                    let fallback_model =
+                        self.fallback_model.as_ref().unwrap_or(&self.primary_model);
+
+                    log::info!(
+                        "Attempting to generate with fallback server: {} (model: {})",
+                        fallback_url,
+                        fallback_model
+                    );
+                    match self
+                        .try_generate(fallback_url, fallback_model, prompt, system)
+                        .await
+                    {
+                        Ok(response) => {
+                            log::info!("Successfully generated response from fallback server");
+                            response
+                        }
+                        Err(fallback_e) => {
+                            log::error!("Fallback server also failed: {}", fallback_e);
+                            return Err(anyhow::anyhow!(
+                                "Both primary and fallback servers failed. Primary: {}, Fallback: {}",
+                                e,
+                                fallback_e
+                            ));
+                        }
+                    }
+                } else {
+                    log::error!("No fallback server configured");
+                    return Err(e);
+                }
+            }
+        };

        log::debug!("=== Ollama Response ===");
-        log::debug!("Raw response: {}", result.response.trim());
+        log::debug!("Raw response: {}", raw_response.trim());
        log::debug!("=======================");

        // Extract final answer from thinking model output
-        let cleaned = self.extract_final_answer(&result.response);
+        let cleaned = self.extract_final_answer(&raw_response);

        log::debug!("=== Cleaned Response ===");
        log::debug!("Final answer: {}", cleaned);
@@ -99,7 +195,7 @@ impl OllamaClient {
        let sms_str = sms_summary.unwrap_or("No messages");

        let prompt = format!(
-            r#"Create a short title (maximum 8 words) for this photo:
+            r#"Create a short title (maximum 8 words) about this moment:

 Date: {}
 Location: {}
@@ -113,8 +209,7 @@ Return ONLY the title, nothing else."#,
            sms_str
        );

-        let system =
-            "You are a memory assistant. Use only the information provided. Do not invent details.";
+        let system = "You are my long term memory assistant. Use only the information provided. Do not invent details.";

        let title = self.generate(&prompt, Some(system)).await?;
        Ok(title.trim().trim_matches('"').to_string())
@@ -127,7 +222,7 @@ Return ONLY the title, nothing else."#,
        location: Option<&str>,
        sms_summary: Option<&str>,
    ) -> Result<String> {
-        let location_str = location.unwrap_or("somewhere");
+        let location_str = location.unwrap_or("Unknown");
        let sms_str = sms_summary.unwrap_or("No messages");

        let prompt = format!(
@@ -137,7 +232,7 @@ Date: {}
 Location: {}
 Messages: {}

-Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cam in a casual but fluent tone. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
+Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
            date.format("%B %d, %Y"),
            location_str,
            sms_str
@@ -147,15 +242,6 @@ Use only the specific details provided above. Mention people's names, places, or

        self.generate(&prompt, Some(system)).await
    }
-
-}
-
-pub struct MemoryContext {
-    pub date: NaiveDate,
-    pub photos: Vec<MemoryItem>,
-    pub sms_summary: Option<String>,
-    pub locations: Vec<String>,
-    pub cameras: Vec<String>,
 }

 #[derive(Serialize)]
@@ -171,3 +257,13 @@ struct OllamaRequest {
 struct OllamaResponse {
    response: String,
 }
+
+#[derive(Deserialize)]
+struct OllamaTagsResponse {
+    models: Vec<OllamaModel>,
+}
+
+#[derive(Deserialize)]
+struct OllamaModel {
+    name: String,
+}