Add Insights Model Discovery and Fallback Handling
This commit is contained in:
168
src/ai/ollama.rs
168
src/ai/ollama.rs
@@ -2,25 +2,60 @@ use anyhow::Result;
|
||||
use chrono::NaiveDate;
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::memories::MemoryItem;
|
||||
use std::time::Duration;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct OllamaClient {
|
||||
client: Client,
|
||||
pub base_url: String,
|
||||
pub model: String,
|
||||
pub primary_url: String,
|
||||
pub fallback_url: Option<String>,
|
||||
pub primary_model: String,
|
||||
pub fallback_model: Option<String>,
|
||||
}
|
||||
|
||||
impl OllamaClient {
|
||||
pub fn new(base_url: String, model: String) -> Self {
|
||||
pub fn new(
|
||||
primary_url: String,
|
||||
fallback_url: Option<String>,
|
||||
primary_model: String,
|
||||
fallback_model: Option<String>,
|
||||
) -> Self {
|
||||
Self {
|
||||
client: Client::new(),
|
||||
base_url,
|
||||
model,
|
||||
client: Client::builder()
|
||||
.connect_timeout(Duration::from_secs(5)) // Quick connection timeout
|
||||
.timeout(Duration::from_secs(120)) // Total request timeout for generation
|
||||
.build()
|
||||
.unwrap_or_else(|_| Client::new()),
|
||||
primary_url,
|
||||
fallback_url,
|
||||
primary_model,
|
||||
fallback_model,
|
||||
}
|
||||
}
|
||||
|
||||
/// List available models on an Ollama server
|
||||
pub async fn list_models(url: &str) -> Result<Vec<String>> {
|
||||
let client = Client::builder()
|
||||
.connect_timeout(Duration::from_secs(5))
|
||||
.timeout(Duration::from_secs(10))
|
||||
.build()?;
|
||||
|
||||
let response = client.get(&format!("{}/api/tags", url)).send().await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(anyhow::anyhow!("Failed to list models from {}", url));
|
||||
}
|
||||
|
||||
let tags_response: OllamaTagsResponse = response.json().await?;
|
||||
Ok(tags_response.models.into_iter().map(|m| m.name).collect())
|
||||
}
|
||||
|
||||
/// Check if a model is available on a server
|
||||
pub async fn is_model_available(url: &str, model_name: &str) -> Result<bool> {
|
||||
let models = Self::list_models(url).await?;
|
||||
Ok(models.iter().any(|m| m == model_name))
|
||||
}
|
||||
|
||||
/// Extract final answer from thinking model output
|
||||
/// Handles <think>...</think> tags and takes everything after
|
||||
fn extract_final_answer(&self, response: &str) -> String {
|
||||
@@ -38,17 +73,15 @@ impl OllamaClient {
|
||||
response.to_string()
|
||||
}
|
||||
|
||||
pub async fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String> {
|
||||
log::debug!("=== Ollama Request ===");
|
||||
log::debug!("Model: {}", self.model);
|
||||
if let Some(sys) = system {
|
||||
log::debug!("System: {}", sys);
|
||||
}
|
||||
log::debug!("Prompt:\n{}", prompt);
|
||||
log::debug!("=====================");
|
||||
|
||||
async fn try_generate(
|
||||
&self,
|
||||
url: &str,
|
||||
model: &str,
|
||||
prompt: &str,
|
||||
system: Option<&str>,
|
||||
) -> Result<String> {
|
||||
let request = OllamaRequest {
|
||||
model: self.model.clone(),
|
||||
model: model.to_string(),
|
||||
prompt: prompt.to_string(),
|
||||
stream: false,
|
||||
system: system.map(|s| s.to_string()),
|
||||
@@ -56,7 +89,7 @@ impl OllamaClient {
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&format!("{}/api/generate", self.base_url))
|
||||
.post(&format!("{}/api/generate", url))
|
||||
.json(&request)
|
||||
.send()
|
||||
.await?;
|
||||
@@ -64,7 +97,6 @@ impl OllamaClient {
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_body = response.text().await.unwrap_or_default();
|
||||
log::error!("Ollama request failed: {} - {}", status, error_body);
|
||||
return Err(anyhow::anyhow!(
|
||||
"Ollama request failed: {} - {}",
|
||||
status,
|
||||
@@ -73,13 +105,77 @@ impl OllamaClient {
|
||||
}
|
||||
|
||||
let result: OllamaResponse = response.json().await?;
|
||||
Ok(result.response)
|
||||
}
|
||||
|
||||
pub async fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String> {
|
||||
log::debug!("=== Ollama Request ===");
|
||||
log::debug!("Primary model: {}", self.primary_model);
|
||||
if let Some(sys) = system {
|
||||
log::debug!("System: {}", sys);
|
||||
}
|
||||
log::debug!("Prompt:\n{}", prompt);
|
||||
log::debug!("=====================");
|
||||
|
||||
// Try primary server first with primary model
|
||||
log::info!(
|
||||
"Attempting to generate with primary server: {} (model: {})",
|
||||
self.primary_url,
|
||||
self.primary_model
|
||||
);
|
||||
let primary_result = self
|
||||
.try_generate(&self.primary_url, &self.primary_model, prompt, system)
|
||||
.await;
|
||||
|
||||
let raw_response = match primary_result {
|
||||
Ok(response) => {
|
||||
log::info!("Successfully generated response from primary server");
|
||||
response
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Primary server failed: {}", e);
|
||||
|
||||
// Try fallback server if available
|
||||
if let Some(fallback_url) = &self.fallback_url {
|
||||
// Use fallback model if specified, otherwise use primary model
|
||||
let fallback_model =
|
||||
self.fallback_model.as_ref().unwrap_or(&self.primary_model);
|
||||
|
||||
log::info!(
|
||||
"Attempting to generate with fallback server: {} (model: {})",
|
||||
fallback_url,
|
||||
fallback_model
|
||||
);
|
||||
match self
|
||||
.try_generate(fallback_url, fallback_model, prompt, system)
|
||||
.await
|
||||
{
|
||||
Ok(response) => {
|
||||
log::info!("Successfully generated response from fallback server");
|
||||
response
|
||||
}
|
||||
Err(fallback_e) => {
|
||||
log::error!("Fallback server also failed: {}", fallback_e);
|
||||
return Err(anyhow::anyhow!(
|
||||
"Both primary and fallback servers failed. Primary: {}, Fallback: {}",
|
||||
e,
|
||||
fallback_e
|
||||
));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log::error!("No fallback server configured");
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
log::debug!("=== Ollama Response ===");
|
||||
log::debug!("Raw response: {}", result.response.trim());
|
||||
log::debug!("Raw response: {}", raw_response.trim());
|
||||
log::debug!("=======================");
|
||||
|
||||
// Extract final answer from thinking model output
|
||||
let cleaned = self.extract_final_answer(&result.response);
|
||||
let cleaned = self.extract_final_answer(&raw_response);
|
||||
|
||||
log::debug!("=== Cleaned Response ===");
|
||||
log::debug!("Final answer: {}", cleaned);
|
||||
@@ -99,7 +195,7 @@ impl OllamaClient {
|
||||
let sms_str = sms_summary.unwrap_or("No messages");
|
||||
|
||||
let prompt = format!(
|
||||
r#"Create a short title (maximum 8 words) for this photo:
|
||||
r#"Create a short title (maximum 8 words) about this moment:
|
||||
|
||||
Date: {}
|
||||
Location: {}
|
||||
@@ -113,8 +209,7 @@ Return ONLY the title, nothing else."#,
|
||||
sms_str
|
||||
);
|
||||
|
||||
let system =
|
||||
"You are a memory assistant. Use only the information provided. Do not invent details.";
|
||||
let system = "You are my long term memory assistant. Use only the information provided. Do not invent details.";
|
||||
|
||||
let title = self.generate(&prompt, Some(system)).await?;
|
||||
Ok(title.trim().trim_matches('"').to_string())
|
||||
@@ -127,7 +222,7 @@ Return ONLY the title, nothing else."#,
|
||||
location: Option<&str>,
|
||||
sms_summary: Option<&str>,
|
||||
) -> Result<String> {
|
||||
let location_str = location.unwrap_or("somewhere");
|
||||
let location_str = location.unwrap_or("Unknown");
|
||||
let sms_str = sms_summary.unwrap_or("No messages");
|
||||
|
||||
let prompt = format!(
|
||||
@@ -137,7 +232,7 @@ Date: {}
|
||||
Location: {}
|
||||
Messages: {}
|
||||
|
||||
Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cam in a casual but fluent tone. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
|
||||
Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
sms_str
|
||||
@@ -147,15 +242,6 @@ Use only the specific details provided above. Mention people's names, places, or
|
||||
|
||||
self.generate(&prompt, Some(system)).await
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
pub struct MemoryContext {
|
||||
pub date: NaiveDate,
|
||||
pub photos: Vec<MemoryItem>,
|
||||
pub sms_summary: Option<String>,
|
||||
pub locations: Vec<String>,
|
||||
pub cameras: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
@@ -171,3 +257,13 @@ struct OllamaRequest {
|
||||
struct OllamaResponse {
|
||||
response: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OllamaTagsResponse {
|
||||
models: Vec<OllamaModel>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OllamaModel {
|
||||
name: String,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user