Add Insights Model Discovery and Fallback Handling

2026-01-03 20:27:34 -05:00
parent 1171f19845
commit cf52d4ab76
10 changed files with 419 additions and 80 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -250,8 +250,31 @@ Optional:
 WATCH_QUICK_INTERVAL_SECONDS=60        # Quick scan interval
 WATCH_FULL_INTERVAL_SECONDS=3600       # Full scan interval
 OTLP_OTLS_ENDPOINT=http://...          # OpenTelemetry collector (release builds)
+
+# AI Insights Configuration
+OLLAMA_PRIMARY_URL=http://desktop:11434        # Primary Ollama server (e.g., desktop)
+OLLAMA_FALLBACK_URL=http://server:11434        # Fallback Ollama server (optional, always-on)
+OLLAMA_PRIMARY_MODEL=nemotron-3-nano:30b       # Model for primary server (default: nemotron-3-nano:30b)
+OLLAMA_FALLBACK_MODEL=llama3.2:3b              # Model for fallback server (optional, uses primary if not set)
+SMS_API_URL=http://localhost:8000              # SMS message API endpoint (default: localhost:8000)
+SMS_API_TOKEN=your-api-token                   # SMS API authentication token (optional)
 ```

+**AI Insights Fallback Behavior:**
+- Primary server is tried first with its configured model (5-second connection timeout)
+- On connection failure, automatically falls back to secondary server with its model (if configured)
+- If `OLLAMA_FALLBACK_MODEL` not set, uses same model as primary server on fallback
+- Total request timeout is 120 seconds to accommodate slow LLM inference
+- Logs indicate which server and model was used (info level) and failover attempts (warn level)
+- Backwards compatible: `OLLAMA_URL` and `OLLAMA_MODEL` still supported as fallbacks
+
+**Model Discovery:**
+The `OllamaClient` provides methods to query available models:
+- `OllamaClient::list_models(url)` - Returns list of all models on a server
+- `OllamaClient::is_model_available(url, model_name)` - Checks if a specific model exists
+
+This allows runtime verification of model availability before generating insights.
+
 ## Dependencies of Note

 - **actix-web**: HTTP framework
--- a/src/ai/handlers.rs
+++ b/src/ai/handlers.rs
@@ -1,13 +1,16 @@
 use actix_web::{HttpResponse, Responder, delete, get, post, web};
 use serde::{Deserialize, Serialize};

-use crate::ai::InsightGenerator;
+use crate::ai::{InsightGenerator, OllamaClient};
 use crate::data::Claims;
 use crate::database::InsightDao;
+use crate::utils::normalize_path;

 #[derive(Debug, Deserialize)]
 pub struct GeneratePhotoInsightRequest {
    pub file_path: String,
+    #[serde(default)]
+    pub model: Option<String>,
 }

 #[derive(Debug, Deserialize)]
@@ -25,6 +28,20 @@ pub struct PhotoInsightResponse {
    pub model_version: String,
 }

+#[derive(Debug, Serialize)]
+pub struct AvailableModelsResponse {
+    pub primary: ServerModels,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub fallback: Option<ServerModels>,
+}
+
+#[derive(Debug, Serialize)]
+pub struct ServerModels {
+    pub url: String,
+    pub models: Vec<String>,
+    pub default_model: String,
+}
+
 /// POST /insights/generate - Generate insight for a specific photo
 #[post("/insights/generate")]
 pub async fn generate_insight_handler(
@@ -32,14 +49,17 @@ pub async fn generate_insight_handler(
    request: web::Json<GeneratePhotoInsightRequest>,
    insight_generator: web::Data<InsightGenerator>,
 ) -> impl Responder {
+    let normalized_path = normalize_path(&request.file_path);
+
    log::info!(
-        "Manual insight generation triggered for photo: {}",
-        request.file_path
+        "Manual insight generation triggered for photo: {} with model: {:?}",
+        normalized_path,
+        request.model
    );

-    // Generate insight
+    // Generate insight with optional custom model
    match insight_generator
-        .generate_insight_for_photo(&request.file_path)
+        .generate_insight_for_photo_with_model(&normalized_path, request.model.clone())
        .await
    {
        Ok(()) => HttpResponse::Ok().json(serde_json::json!({
@@ -62,12 +82,13 @@ pub async fn get_insight_handler(
    query: web::Query<GetPhotoInsightQuery>,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
 ) -> impl Responder {
-    log::debug!("Fetching insight for {}", query.path);
+    let normalized_path = normalize_path(&query.path);
+    log::debug!("Fetching insight for {}", normalized_path);

    let otel_context = opentelemetry::Context::new();
    let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");

-    match dao.get_insight(&otel_context, &query.path) {
+    match dao.get_insight(&otel_context, &normalized_path) {
        Ok(Some(insight)) => {
            let response = PhotoInsightResponse {
                id: insight.id,
@@ -98,12 +119,13 @@ pub async fn delete_insight_handler(
    query: web::Query<GetPhotoInsightQuery>,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
 ) -> impl Responder {
-    log::info!("Deleting insight for {}", query.path);
+    let normalized_path = normalize_path(&query.path);
+    log::info!("Deleting insight for {}", normalized_path);

    let otel_context = opentelemetry::Context::new();
    let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");

-    match dao.delete_insight(&otel_context, &query.path) {
+    match dao.delete_insight(&otel_context, &normalized_path) {
        Ok(()) => HttpResponse::Ok().json(serde_json::json!({
            "success": true,
            "message": "Insight deleted successfully"
@@ -152,3 +174,53 @@ pub async fn get_all_insights_handler(
        }
    }
 }
+
+/// GET /insights/models - List available models from both servers
+#[get("/insights/models")]
+pub async fn get_available_models_handler(
+    _claims: Claims,
+    app_state: web::Data<crate::state::AppState>,
+) -> impl Responder {
+    log::debug!("Fetching available models");
+
+    let ollama_client = &app_state.ollama;
+
+    // Fetch models from primary server
+    let primary_models = match OllamaClient::list_models(&ollama_client.primary_url).await {
+        Ok(models) => models,
+        Err(e) => {
+            log::warn!("Failed to fetch models from primary server: {:?}", e);
+            vec![]
+        }
+    };
+
+    let primary = ServerModels {
+        url: ollama_client.primary_url.clone(),
+        models: primary_models,
+        default_model: ollama_client.primary_model.clone(),
+    };
+
+    // Fetch models from fallback server if configured
+    let fallback = if let Some(fallback_url) = &ollama_client.fallback_url {
+        match OllamaClient::list_models(fallback_url).await {
+            Ok(models) => Some(ServerModels {
+                url: fallback_url.clone(),
+                models,
+                default_model: ollama_client
+                    .fallback_model
+                    .clone()
+                    .unwrap_or_else(|| ollama_client.primary_model.clone()),
+            }),
+            Err(e) => {
+                log::warn!("Failed to fetch models from fallback server: {:?}", e);
+                None
+            }
+        }
+    } else {
+        None
+    };
+
+    let response = AvailableModelsResponse { primary, fallback };
+
+    HttpResponse::Ok().json(response)
+}
--- a/src/ai/insight_generator.rs
+++ b/src/ai/insight_generator.rs
@@ -1,6 +1,7 @@
 use anyhow::Result;
-use chrono::Utc;
+use chrono::{DateTime, Utc};
 use serde::Deserialize;
+use std::fs::File;
 use std::sync::{Arc, Mutex};

 use crate::ai::ollama::OllamaClient;
@@ -8,6 +9,7 @@ use crate::ai::sms_client::SmsApiClient;
 use crate::database::models::InsertPhotoInsight;
 use crate::database::{ExifDao, InsightDao};
 use crate::memories::extract_date_from_filename;
+use crate::utils::normalize_path;

 #[derive(Deserialize)]
 struct NominatimResponse {
@@ -20,9 +22,7 @@ struct NominatimAddress {
    city: Option<String>,
    town: Option<String>,
    village: Option<String>,
-    county: Option<String>,
    state: Option<String>,
-    country: Option<String>,
 }

 #[derive(Clone)]
@@ -31,6 +31,7 @@ pub struct InsightGenerator {
    sms_client: SmsApiClient,
    insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
    exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
+    base_path: String,
 }

 impl InsightGenerator {
@@ -39,12 +40,14 @@ impl InsightGenerator {
        sms_client: SmsApiClient,
        insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
        exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
+        base_path: String,
    ) -> Self {
        Self {
            ollama,
            sms_client,
            insight_dao,
            exif_dao,
+            base_path,
        }
    }

@@ -69,16 +72,35 @@ impl InsightGenerator {
        None
    }

-    /// Generate AI insight for a single photo
-    pub async fn generate_insight_for_photo(&self, file_path: &str) -> Result<()> {
+    /// Generate AI insight for a single photo with optional custom model
+    pub async fn generate_insight_for_photo_with_model(
+        &self,
+        file_path: &str,
+        custom_model: Option<String>,
+    ) -> Result<()> {
+        // Normalize path to ensure consistent forward slashes in database
+        let file_path = normalize_path(file_path);
        log::info!("Generating insight for photo: {}", file_path);

+        // Create custom Ollama client if model is specified
+        let ollama_client = if let Some(model) = custom_model {
+            log::info!("Using custom model: {}", model);
+            OllamaClient::new(
+                self.ollama.primary_url.clone(),
+                self.ollama.fallback_url.clone(),
+                model.clone(),
+                Some(model), // Use the same custom model for fallback server
+            )
+        } else {
+            self.ollama.clone()
+        };
+
        // 1. Get EXIF data for the photo
        let otel_context = opentelemetry::Context::new();
        let exif = {
            let mut exif_dao = self.exif_dao.lock().expect("Unable to lock ExifDao");
            exif_dao
-                .get_exif(&otel_context, file_path)
+                .get_exif(&otel_context, &file_path)
                .map_err(|e| anyhow::anyhow!("Failed to get EXIF: {:?}", e))?
        };

@@ -88,17 +110,33 @@ impl InsightGenerator {
        } else {
            log::warn!("No date_taken in EXIF for {}, trying filename", file_path);

-            extract_date_from_filename(file_path)
+            extract_date_from_filename(&file_path)
                .map(|dt| dt.timestamp())
+                .or_else(|| {
+                    // Combine base_path with file_path to get full path
+                    let full_path = std::path::Path::new(&self.base_path).join(&file_path);
+                    File::open(&full_path)
+                        .and_then(|f| f.metadata())
+                        .and_then(|m| m.created().or(m.modified()))
+                        .map(|t| DateTime::<Utc>::from(t).timestamp())
+                        .inspect_err(|e| {
+                            log::warn!(
+                                "Failed to get file timestamp for insight {}: {}",
+                                file_path,
+                                e
+                            )
+                        })
+                        .ok()
+                })
                .unwrap_or_else(|| Utc::now().timestamp())
        };

-        let date_taken = chrono::DateTime::from_timestamp(timestamp, 0)
+        let date_taken = DateTime::from_timestamp(timestamp, 0)
            .map(|dt| dt.date_naive())
            .unwrap_or_else(|| Utc::now().date_naive());

        // 3. Extract contact name from file path
-        let contact = Self::extract_contact_from_path(file_path);
+        let contact = Self::extract_contact_from_path(&file_path);
        log::info!("Extracted contact from path: {:?}", contact);

        // 4. Fetch SMS messages for the contact (±1 day)
@@ -124,7 +162,7 @@ impl InsightGenerator {
        let sms_summary = if !sms_messages.is_empty() {
            match self
                .sms_client
-                .summarize_context(&sms_messages, &self.ollama)
+                .summarize_context(&sms_messages, &ollama_client)
                .await
            {
                Ok(summary) => Some(summary),
@@ -157,13 +195,11 @@ impl InsightGenerator {
        );

        // 7. Generate title and summary with Ollama
-        let title = self
-            .ollama
+        let title = ollama_client
            .generate_photo_title(date_taken, location.as_deref(), sms_summary.as_deref())
            .await?;

-        let summary = self
-            .ollama
+        let summary = ollama_client
            .generate_photo_summary(date_taken, location.as_deref(), sms_summary.as_deref())
            .await?;

@@ -176,7 +212,7 @@ impl InsightGenerator {
            title,
            summary,
            generated_at: Utc::now().timestamp(),
-            model_version: self.ollama.model.clone(),
+            model_version: ollama_client.primary_model.clone(),
        };

        let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
--- a/src/ai/mod.rs
+++ b/src/ai/mod.rs
@@ -4,7 +4,8 @@ pub mod ollama;
 pub mod sms_client;

 pub use handlers::{
-    delete_insight_handler, generate_insight_handler, get_all_insights_handler, get_insight_handler,
+    delete_insight_handler, generate_insight_handler, get_all_insights_handler,
+    get_available_models_handler, get_insight_handler,
 };
 pub use insight_generator::InsightGenerator;
 pub use ollama::OllamaClient;
--- a/src/ai/ollama.rs
+++ b/src/ai/ollama.rs
@@ -2,25 +2,60 @@ use anyhow::Result;
 use chrono::NaiveDate;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
-
-use crate::memories::MemoryItem;
+use std::time::Duration;

 #[derive(Clone)]
 pub struct OllamaClient {
    client: Client,
-    pub base_url: String,
-    pub model: String,
+    pub primary_url: String,
+    pub fallback_url: Option<String>,
+    pub primary_model: String,
+    pub fallback_model: Option<String>,
 }

 impl OllamaClient {
-    pub fn new(base_url: String, model: String) -> Self {
+    pub fn new(
+        primary_url: String,
+        fallback_url: Option<String>,
+        primary_model: String,
+        fallback_model: Option<String>,
+    ) -> Self {
        Self {
-            client: Client::new(),
-            base_url,
-            model,
+            client: Client::builder()
+                .connect_timeout(Duration::from_secs(5)) // Quick connection timeout
+                .timeout(Duration::from_secs(120)) // Total request timeout for generation
+                .build()
+                .unwrap_or_else(|_| Client::new()),
+            primary_url,
+            fallback_url,
+            primary_model,
+            fallback_model,
        }
    }

+    /// List available models on an Ollama server
+    pub async fn list_models(url: &str) -> Result<Vec<String>> {
+        let client = Client::builder()
+            .connect_timeout(Duration::from_secs(5))
+            .timeout(Duration::from_secs(10))
+            .build()?;
+
+        let response = client.get(&format!("{}/api/tags", url)).send().await?;
+
+        if !response.status().is_success() {
+            return Err(anyhow::anyhow!("Failed to list models from {}", url));
+        }
+
+        let tags_response: OllamaTagsResponse = response.json().await?;
+        Ok(tags_response.models.into_iter().map(|m| m.name).collect())
+    }
+
+    /// Check if a model is available on a server
+    pub async fn is_model_available(url: &str, model_name: &str) -> Result<bool> {
+        let models = Self::list_models(url).await?;
+        Ok(models.iter().any(|m| m == model_name))
+    }
+
    /// Extract final answer from thinking model output
    /// Handles <think>...</think> tags and takes everything after
    fn extract_final_answer(&self, response: &str) -> String {
@@ -38,17 +73,15 @@ impl OllamaClient {
        response.to_string()
    }

-    pub async fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String> {
-        log::debug!("=== Ollama Request ===");
-        log::debug!("Model: {}", self.model);
-        if let Some(sys) = system {
-            log::debug!("System: {}", sys);
-        }
-        log::debug!("Prompt:\n{}", prompt);
-        log::debug!("=====================");
-
+    async fn try_generate(
+        &self,
+        url: &str,
+        model: &str,
+        prompt: &str,
+        system: Option<&str>,
+    ) -> Result<String> {
        let request = OllamaRequest {
-            model: self.model.clone(),
+            model: model.to_string(),
            prompt: prompt.to_string(),
            stream: false,
            system: system.map(|s| s.to_string()),
@@ -56,7 +89,7 @@ impl OllamaClient {

        let response = self
            .client
-            .post(&format!("{}/api/generate", self.base_url))
+            .post(&format!("{}/api/generate", url))
            .json(&request)
            .send()
            .await?;
@@ -64,7 +97,6 @@ impl OllamaClient {
        if !response.status().is_success() {
            let status = response.status();
            let error_body = response.text().await.unwrap_or_default();
-            log::error!("Ollama request failed: {} - {}", status, error_body);
            return Err(anyhow::anyhow!(
                "Ollama request failed: {} - {}",
                status,
@@ -73,13 +105,77 @@ impl OllamaClient {
        }

        let result: OllamaResponse = response.json().await?;
+        Ok(result.response)
+    }
+
+    pub async fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String> {
+        log::debug!("=== Ollama Request ===");
+        log::debug!("Primary model: {}", self.primary_model);
+        if let Some(sys) = system {
+            log::debug!("System: {}", sys);
+        }
+        log::debug!("Prompt:\n{}", prompt);
+        log::debug!("=====================");
+
+        // Try primary server first with primary model
+        log::info!(
+            "Attempting to generate with primary server: {} (model: {})",
+            self.primary_url,
+            self.primary_model
+        );
+        let primary_result = self
+            .try_generate(&self.primary_url, &self.primary_model, prompt, system)
+            .await;
+
+        let raw_response = match primary_result {
+            Ok(response) => {
+                log::info!("Successfully generated response from primary server");
+                response
+            }
+            Err(e) => {
+                log::warn!("Primary server failed: {}", e);
+
+                // Try fallback server if available
+                if let Some(fallback_url) = &self.fallback_url {
+                    // Use fallback model if specified, otherwise use primary model
+                    let fallback_model =
+                        self.fallback_model.as_ref().unwrap_or(&self.primary_model);
+
+                    log::info!(
+                        "Attempting to generate with fallback server: {} (model: {})",
+                        fallback_url,
+                        fallback_model
+                    );
+                    match self
+                        .try_generate(fallback_url, fallback_model, prompt, system)
+                        .await
+                    {
+                        Ok(response) => {
+                            log::info!("Successfully generated response from fallback server");
+                            response
+                        }
+                        Err(fallback_e) => {
+                            log::error!("Fallback server also failed: {}", fallback_e);
+                            return Err(anyhow::anyhow!(
+                                "Both primary and fallback servers failed. Primary: {}, Fallback: {}",
+                                e,
+                                fallback_e
+                            ));
+                        }
+                    }
+                } else {
+                    log::error!("No fallback server configured");
+                    return Err(e);
+                }
+            }
+        };

        log::debug!("=== Ollama Response ===");
-        log::debug!("Raw response: {}", result.response.trim());
+        log::debug!("Raw response: {}", raw_response.trim());
        log::debug!("=======================");

        // Extract final answer from thinking model output
-        let cleaned = self.extract_final_answer(&result.response);
+        let cleaned = self.extract_final_answer(&raw_response);

        log::debug!("=== Cleaned Response ===");
        log::debug!("Final answer: {}", cleaned);
@@ -99,7 +195,7 @@ impl OllamaClient {
        let sms_str = sms_summary.unwrap_or("No messages");

        let prompt = format!(
-            r#"Create a short title (maximum 8 words) for this photo:
+            r#"Create a short title (maximum 8 words) about this moment:

 Date: {}
 Location: {}
@@ -113,8 +209,7 @@ Return ONLY the title, nothing else."#,
            sms_str
        );

-        let system =
-            "You are a memory assistant. Use only the information provided. Do not invent details.";
+        let system = "You are my long term memory assistant. Use only the information provided. Do not invent details.";

        let title = self.generate(&prompt, Some(system)).await?;
        Ok(title.trim().trim_matches('"').to_string())
@@ -127,7 +222,7 @@ Return ONLY the title, nothing else."#,
        location: Option<&str>,
        sms_summary: Option<&str>,
    ) -> Result<String> {
-        let location_str = location.unwrap_or("somewhere");
+        let location_str = location.unwrap_or("Unknown");
        let sms_str = sms_summary.unwrap_or("No messages");

        let prompt = format!(
@@ -137,7 +232,7 @@ Date: {}
 Location: {}
 Messages: {}

-Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cam in a casual but fluent tone. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
+Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
            date.format("%B %d, %Y"),
            location_str,
            sms_str
@@ -147,15 +242,6 @@ Use only the specific details provided above. Mention people's names, places, or

        self.generate(&prompt, Some(system)).await
    }
-
-}
-
-pub struct MemoryContext {
-    pub date: NaiveDate,
-    pub photos: Vec<MemoryItem>,
-    pub sms_summary: Option<String>,
-    pub locations: Vec<String>,
-    pub cameras: Vec<String>,
 }

 #[derive(Serialize)]
@@ -171,3 +257,13 @@ struct OllamaRequest {
 struct OllamaResponse {
    response: String,
 }
+
+#[derive(Deserialize)]
+struct OllamaTagsResponse {
+    models: Vec<OllamaModel>,
+}
+
+#[derive(Deserialize)]
+struct OllamaModel {
+    name: String,
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -18,6 +18,7 @@ pub mod state;
 pub mod tags;
 #[cfg(test)]
 pub mod testhelpers;
+pub mod utils;
 pub mod video;

 // Re-export commonly used types
--- a/src/main.rs
+++ b/src/main.rs
@@ -46,7 +46,6 @@ use crate::tags::*;
 use crate::video::actors::{
    ProcessMessage, ScanDirectoryMessage, create_playlist, generate_video_thumbnail,
 };
-use crate::video::generate_video_gifs;
 use log::{debug, error, info, trace, warn};
 use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
 use opentelemetry::{KeyValue, global};
@@ -62,6 +61,7 @@ mod files;
 mod geo;
 mod state;
 mod tags;
+mod utils;
 mod video;

 mod memories;
@@ -802,6 +802,7 @@ fn main() -> std::io::Result<()> {
                .service(ai::get_insight_handler)
                .service(ai::delete_insight_handler)
                .service(ai::get_all_insights_handler)
+                .service(ai::get_available_models_handler)
                .add_feature(add_tag_services::<_, SqliteTagDao>)
                .app_data(app_data.clone())
                .app_data::<Data<RealFileSystem>>(Data::new(RealFileSystem::new(
--- a/src/state.rs
+++ b/src/state.rs
@@ -65,10 +65,21 @@ impl AppState {
 impl Default for AppState {
    fn default() -> Self {
        // Initialize AI clients
-        let ollama_url =
-            env::var("OLLAMA_URL").unwrap_or_else(|_| "http://localhost:11434".to_string());
-        let ollama_model = env::var("OLLAMA_MODEL").unwrap_or_else(|_| "llama3.2".to_string());
-        let ollama = OllamaClient::new(ollama_url, ollama_model);
+        let ollama_primary_url = env::var("OLLAMA_PRIMARY_URL").unwrap_or_else(|_| {
+            env::var("OLLAMA_URL").unwrap_or_else(|_| "http://localhost:11434".to_string())
+        });
+        let ollama_fallback_url = env::var("OLLAMA_FALLBACK_URL").ok();
+        let ollama_primary_model = env::var("OLLAMA_PRIMARY_MODEL")
+            .or_else(|_| env::var("OLLAMA_MODEL"))
+            .unwrap_or_else(|_| "nemotron-3-nano:30b".to_string());
+        let ollama_fallback_model = env::var("OLLAMA_FALLBACK_MODEL").ok();
+
+        let ollama = OllamaClient::new(
+            ollama_primary_url,
+            ollama_fallback_url,
+            ollama_primary_model,
+            ollama_fallback_model,
+        );

        let sms_api_url =
            env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
@@ -81,17 +92,21 @@ impl Default for AppState {
        let exif_dao: Arc<Mutex<Box<dyn ExifDao>>> =
            Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));

+        // Load base path
+        let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env");
+
        // Initialize InsightGenerator
        let insight_generator = InsightGenerator::new(
            ollama.clone(),
            sms_client.clone(),
            insight_dao.clone(),
            exif_dao.clone(),
+            base_path.clone(),
        );

        Self::new(
            Arc::new(StreamActor {}.start()),
-            env::var("BASE_PATH").expect("BASE_PATH was not set in the env"),
+            base_path,
            env::var("THUMBNAILS").expect("THUMBNAILS was not set in the env"),
            env::var("VIDEO_PATH").expect("VIDEO_PATH was not set in the env"),
            env::var("GIFS_DIRECTORY").expect("GIFS_DIRECTORY was not set in the env"),
@@ -119,8 +134,12 @@ impl AppState {
        let gif_path = create_test_subdir(&base_path, "gifs");

        // Initialize test AI clients
-        let ollama =
-            OllamaClient::new("http://localhost:11434".to_string(), "llama3.2".to_string());
+        let ollama = OllamaClient::new(
+            "http://localhost:11434".to_string(),
+            None,
+            "llama3.2".to_string(),
+            None,
+        );
        let sms_client = SmsApiClient::new("http://localhost:8000".to_string(), None);

        // Initialize test DAOs
@@ -130,17 +149,19 @@ impl AppState {
            Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));

        // Initialize test InsightGenerator
+        let base_path_str = base_path.to_string_lossy().to_string();
        let insight_generator = InsightGenerator::new(
            ollama.clone(),
            sms_client.clone(),
            insight_dao.clone(),
            exif_dao.clone(),
+            base_path_str.clone(),
        );

        // Create the AppState with the temporary paths
        AppState::new(
            Arc::new(StreamActor {}.start()),
-            base_path.to_string_lossy().to_string(),
+            base_path_str,
            thumbnail_path.to_string_lossy().to_string(),
            video_path.to_string_lossy().to_string(),
            gif_path.to_string_lossy().to_string(),
--- a/src/tags.rs
+++ b/src/tags.rs
@@ -1,5 +1,6 @@
 use crate::data::GetTagsRequest;
 use crate::otel::{extract_context_from_request, global_tracer, trace_db_call};
+use crate::utils::normalize_path;
 use crate::{Claims, ThumbnailRequest, connect, data::AddTagRequest, error::IntoHttpError, schema};
 use actix_web::dev::{ServiceFactory, ServiceRequest};
 use actix_web::{App, HttpRequest, HttpResponse, Responder, web};
@@ -41,6 +42,7 @@ async fn add_tag<D: TagDao>(
    let span = tracer.start_with_context("add_tag", &context);
    let span_context = opentelemetry::Context::current_with_span(span);
    let tag_name = body.tag_name.clone();
+    let normalized_path = normalize_path(&body.file_name);

    let mut tag_dao = tag_dao.lock().expect("Unable to get TagDao");

@@ -52,12 +54,12 @@ async fn add_tag<D: TagDao>(
            } else {
                info!(
                    "Creating missing tag: '{:?}' for file: '{}'",
-                    tag_name, &body.file_name
+                    tag_name, &normalized_path
                );
                tag_dao.create_tag(&span_context, tag_name.trim())
            }
        })
-        .and_then(|tag| tag_dao.tag_file(&span_context, &body.file_name, tag.id))
+        .and_then(|tag| tag_dao.tag_file(&span_context, &normalized_path, tag.id))
        .map(|_| {
            span_context.span().set_status(Status::Ok);
            HttpResponse::Ok()
@@ -74,9 +76,10 @@ async fn get_tags<D: TagDao>(
    let context = extract_context_from_request(&http_request);
    let span = global_tracer().start_with_context("get_tags", &context);
    let span_context = opentelemetry::Context::current_with_span(span);
+    let normalized_path = normalize_path(&request.path);
    let mut tag_dao = tag_dao.lock().expect("Unable to get TagDao");
    tag_dao
-        .get_tags_for_path(&span_context, &request.path)
+        .get_tags_for_path(&span_context, &normalized_path)
        .map(|tags| {
            span_context.span().set_status(Status::Ok);
            HttpResponse::Ok().json(tags)
@@ -139,10 +142,11 @@ async fn remove_tagged_photo<D: TagDao>(
    let context = extract_context_from_request(&http_request);
    let span = global_tracer().start_with_context("remove_tagged_photo", &context);
    let span_context = opentelemetry::Context::current_with_span(span);
+    let normalized_path = normalize_path(&request.file_name);

    let mut tag_dao = tag_dao.lock().expect("Unable to get TagDao");
    tag_dao
-        .remove_tag(&span_context, &request.tag_name, &request.file_name)
+        .remove_tag(&span_context, &request.tag_name, &normalized_path)
        .map(|result| {
            span_context.span().set_status(Status::Ok);

@@ -165,8 +169,9 @@ async fn update_tags<D: TagDao>(
    let context = extract_context_from_request(&http_request);
    let span = global_tracer().start_with_context("update_tags", &context);
    let span_context = opentelemetry::Context::current_with_span(span);
+    let normalized_path = normalize_path(&request.file_name);

-    dao.get_tags_for_path(&span_context, &request.file_name)
+    dao.get_tags_for_path(&span_context, &normalized_path)
        .and_then(|existing_tags| {
            dao.get_all_tags(&span_context, None)
                .map(|all| (existing_tags, all))
@@ -180,9 +185,9 @@ async fn update_tags<D: TagDao>(
            for tag in tags_to_remove {
                info!(
                    "Removing tag {:?} from file: {:?}",
-                    tag.name, request.file_name
+                    tag.name, normalized_path
                );
-                dao.remove_tag(&span_context, &tag.name, &request.file_name)
+                dao.remove_tag(&span_context, &tag.name, &normalized_path)
                    .unwrap_or_else(|err| panic!("{:?} Unable to remove tag {:?}", err, &tag.name));
            }

@@ -194,14 +199,14 @@ async fn update_tags<D: TagDao>(
            for (_, new_tag) in new_tags {
                info!(
                    "Adding tag {:?} to file: {:?}",
-                    new_tag.name, request.file_name
+                    new_tag.name, normalized_path
                );

-                dao.tag_file(&span_context, &request.file_name, new_tag.id)
+                dao.tag_file(&span_context, &normalized_path, new_tag.id)
                    .with_context(|| {
                        format!(
                            "Unable to tag file {:?} with tag: {:?}",
-                            request.file_name, new_tag.name
+                            normalized_path, new_tag.name
                        )
                    })
                    .unwrap();
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -0,0 +1,83 @@
+/// Normalize a file path to use forward slashes for cross-platform consistency
+/// This ensures paths stored in the database always use `/` regardless of OS
+///
+/// # Examples
+/// ```
+/// use image_api::utils::normalize_path;
+///
+/// assert_eq!(normalize_path("foo\\bar\\baz.jpg"), "foo/bar/baz.jpg");
+/// assert_eq!(normalize_path("foo/bar/baz.jpg"), "foo/bar/baz.jpg");
+/// ```
+pub fn normalize_path(path: &str) -> String {
+    path.replace('\\', "/")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_normalize_path_with_backslashes() {
+        assert_eq!(normalize_path("foo\\bar\\baz.jpg"), "foo/bar/baz.jpg");
+    }
+
+    #[test]
+    fn test_normalize_path_with_forward_slashes() {
+        assert_eq!(normalize_path("foo/bar/baz.jpg"), "foo/bar/baz.jpg");
+    }
+
+    #[test]
+    fn test_normalize_path_mixed() {
+        assert_eq!(
+            normalize_path("foo\\bar/baz\\qux.jpg"),
+            "foo/bar/baz/qux.jpg"
+        );
+    }
+
+    #[test]
+    fn test_normalize_path_empty() {
+        assert_eq!(normalize_path(""), "");
+    }
+
+    #[test]
+    fn test_normalize_path_absolute_windows() {
+        assert_eq!(
+            normalize_path("C:\\Users\\Photos\\image.jpg"),
+            "C:/Users/Photos/image.jpg"
+        );
+    }
+
+    #[test]
+    fn test_normalize_path_unc_path() {
+        assert_eq!(
+            normalize_path("\\\\server\\share\\folder\\file.jpg"),
+            "//server/share/folder/file.jpg"
+        );
+    }
+
+    #[test]
+    fn test_normalize_path_single_filename() {
+        assert_eq!(normalize_path("image.jpg"), "image.jpg");
+    }
+
+    #[test]
+    fn test_normalize_path_trailing_slash() {
+        assert_eq!(normalize_path("foo\\bar\\"), "foo/bar/");
+    }
+
+    #[test]
+    fn test_normalize_path_multiple_consecutive_backslashes() {
+        assert_eq!(
+            normalize_path("foo\\\\bar\\\\\\baz.jpg"),
+            "foo//bar///baz.jpg"
+        );
+    }
+
+    #[test]
+    fn test_normalize_path_deep_nesting() {
+        assert_eq!(
+            normalize_path("a\\b\\c\\d\\e\\f\\g\\file.jpg"),
+            "a/b/c/d/e/f/g/file.jpg"
+        );
+    }
+}