Add Insights Model Discovery and Fallback Handling

This commit is contained in:
Cameron
2026-01-03 20:27:34 -05:00
parent 1171f19845
commit cf52d4ab76
10 changed files with 419 additions and 80 deletions

View File

@@ -2,25 +2,60 @@ use anyhow::Result;
use chrono::NaiveDate;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use crate::memories::MemoryItem;
use std::time::Duration;
#[derive(Clone)]
pub struct OllamaClient {
client: Client,
pub base_url: String,
pub model: String,
pub primary_url: String,
pub fallback_url: Option<String>,
pub primary_model: String,
pub fallback_model: Option<String>,
}
impl OllamaClient {
pub fn new(base_url: String, model: String) -> Self {
pub fn new(
primary_url: String,
fallback_url: Option<String>,
primary_model: String,
fallback_model: Option<String>,
) -> Self {
Self {
client: Client::new(),
base_url,
model,
client: Client::builder()
.connect_timeout(Duration::from_secs(5)) // Quick connection timeout
.timeout(Duration::from_secs(120)) // Total request timeout for generation
.build()
.unwrap_or_else(|_| Client::new()),
primary_url,
fallback_url,
primary_model,
fallback_model,
}
}
/// List available models on an Ollama server
pub async fn list_models(url: &str) -> Result<Vec<String>> {
let client = Client::builder()
.connect_timeout(Duration::from_secs(5))
.timeout(Duration::from_secs(10))
.build()?;
let response = client.get(&format!("{}/api/tags", url)).send().await?;
if !response.status().is_success() {
return Err(anyhow::anyhow!("Failed to list models from {}", url));
}
let tags_response: OllamaTagsResponse = response.json().await?;
Ok(tags_response.models.into_iter().map(|m| m.name).collect())
}
/// Check if a model is available on a server
pub async fn is_model_available(url: &str, model_name: &str) -> Result<bool> {
let models = Self::list_models(url).await?;
Ok(models.iter().any(|m| m == model_name))
}
/// Extract final answer from thinking model output
/// Handles <think>...</think> tags and takes everything after
fn extract_final_answer(&self, response: &str) -> String {
@@ -38,17 +73,15 @@ impl OllamaClient {
response.to_string()
}
pub async fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String> {
log::debug!("=== Ollama Request ===");
log::debug!("Model: {}", self.model);
if let Some(sys) = system {
log::debug!("System: {}", sys);
}
log::debug!("Prompt:\n{}", prompt);
log::debug!("=====================");
async fn try_generate(
&self,
url: &str,
model: &str,
prompt: &str,
system: Option<&str>,
) -> Result<String> {
let request = OllamaRequest {
model: self.model.clone(),
model: model.to_string(),
prompt: prompt.to_string(),
stream: false,
system: system.map(|s| s.to_string()),
@@ -56,7 +89,7 @@ impl OllamaClient {
let response = self
.client
.post(&format!("{}/api/generate", self.base_url))
.post(&format!("{}/api/generate", url))
.json(&request)
.send()
.await?;
@@ -64,7 +97,6 @@ impl OllamaClient {
if !response.status().is_success() {
let status = response.status();
let error_body = response.text().await.unwrap_or_default();
log::error!("Ollama request failed: {} - {}", status, error_body);
return Err(anyhow::anyhow!(
"Ollama request failed: {} - {}",
status,
@@ -73,13 +105,77 @@ impl OllamaClient {
}
let result: OllamaResponse = response.json().await?;
Ok(result.response)
}
pub async fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String> {
log::debug!("=== Ollama Request ===");
log::debug!("Primary model: {}", self.primary_model);
if let Some(sys) = system {
log::debug!("System: {}", sys);
}
log::debug!("Prompt:\n{}", prompt);
log::debug!("=====================");
// Try primary server first with primary model
log::info!(
"Attempting to generate with primary server: {} (model: {})",
self.primary_url,
self.primary_model
);
let primary_result = self
.try_generate(&self.primary_url, &self.primary_model, prompt, system)
.await;
let raw_response = match primary_result {
Ok(response) => {
log::info!("Successfully generated response from primary server");
response
}
Err(e) => {
log::warn!("Primary server failed: {}", e);
// Try fallback server if available
if let Some(fallback_url) = &self.fallback_url {
// Use fallback model if specified, otherwise use primary model
let fallback_model =
self.fallback_model.as_ref().unwrap_or(&self.primary_model);
log::info!(
"Attempting to generate with fallback server: {} (model: {})",
fallback_url,
fallback_model
);
match self
.try_generate(fallback_url, fallback_model, prompt, system)
.await
{
Ok(response) => {
log::info!("Successfully generated response from fallback server");
response
}
Err(fallback_e) => {
log::error!("Fallback server also failed: {}", fallback_e);
return Err(anyhow::anyhow!(
"Both primary and fallback servers failed. Primary: {}, Fallback: {}",
e,
fallback_e
));
}
}
} else {
log::error!("No fallback server configured");
return Err(e);
}
}
};
log::debug!("=== Ollama Response ===");
log::debug!("Raw response: {}", result.response.trim());
log::debug!("Raw response: {}", raw_response.trim());
log::debug!("=======================");
// Extract final answer from thinking model output
let cleaned = self.extract_final_answer(&result.response);
let cleaned = self.extract_final_answer(&raw_response);
log::debug!("=== Cleaned Response ===");
log::debug!("Final answer: {}", cleaned);
@@ -99,7 +195,7 @@ impl OllamaClient {
let sms_str = sms_summary.unwrap_or("No messages");
let prompt = format!(
r#"Create a short title (maximum 8 words) for this photo:
r#"Create a short title (maximum 8 words) about this moment:
Date: {}
Location: {}
@@ -113,8 +209,7 @@ Return ONLY the title, nothing else."#,
sms_str
);
let system =
"You are a memory assistant. Use only the information provided. Do not invent details.";
let system = "You are my long term memory assistant. Use only the information provided. Do not invent details.";
let title = self.generate(&prompt, Some(system)).await?;
Ok(title.trim().trim_matches('"').to_string())
@@ -127,7 +222,7 @@ Return ONLY the title, nothing else."#,
location: Option<&str>,
sms_summary: Option<&str>,
) -> Result<String> {
let location_str = location.unwrap_or("somewhere");
let location_str = location.unwrap_or("Unknown");
let sms_str = sms_summary.unwrap_or("No messages");
let prompt = format!(
@@ -137,7 +232,7 @@ Date: {}
Location: {}
Messages: {}
Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cam in a casual but fluent tone. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
date.format("%B %d, %Y"),
location_str,
sms_str
@@ -147,15 +242,6 @@ Use only the specific details provided above. Mention people's names, places, or
self.generate(&prompt, Some(system)).await
}
}
pub struct MemoryContext {
pub date: NaiveDate,
pub photos: Vec<MemoryItem>,
pub sms_summary: Option<String>,
pub locations: Vec<String>,
pub cameras: Vec<String>,
}
#[derive(Serialize)]
@@ -171,3 +257,13 @@ struct OllamaRequest {
struct OllamaResponse {
response: String,
}
#[derive(Deserialize)]
struct OllamaTagsResponse {
models: Vec<OllamaModel>,
}
#[derive(Deserialize)]
struct OllamaModel {
name: String,
}