From c703a47f175752b332cd9d8381c20f900be0f4f8 Mon Sep 17 00:00:00 2001 From: Cameron Date: Mon, 13 Apr 2026 09:23:40 -0400 Subject: [PATCH] Add the ability to rate insights to curate training data --- src/ai/handlers.rs | 100 +++++++++++++++++++++++++++++++++++ src/ai/insight_generator.rs | 44 +++++++++++++-- src/ai/mod.rs | 5 +- src/database/insights_dao.rs | 55 +++++++++++++++++++ src/database/models.rs | 3 ++ src/database/schema.rs | 2 + src/main.rs | 2 + 7 files changed, 204 insertions(+), 7 deletions(-) diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index 2d4f905..60e0964 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -25,6 +25,18 @@ pub struct GetPhotoInsightQuery { pub path: String, } +#[derive(Debug, Deserialize)] +pub struct RateInsightRequest { + pub file_path: String, + pub approved: bool, +} + +#[derive(Debug, Deserialize)] +pub struct ExportTrainingDataQuery { + #[serde(default)] + pub approved_only: Option, +} + #[derive(Debug, Serialize)] pub struct PhotoInsightResponse { pub id: i32, @@ -37,6 +49,8 @@ pub struct PhotoInsightResponse { pub prompt_eval_count: Option, #[serde(skip_serializing_if = "Option::is_none")] pub eval_count: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub approved: Option, } #[derive(Debug, Serialize)] @@ -139,6 +153,7 @@ pub async fn get_insight_handler( model_version: insight.model_version, prompt_eval_count: None, eval_count: None, + approved: insight.approved, }; HttpResponse::Ok().json(response) } @@ -205,6 +220,7 @@ pub async fn get_all_insights_handler( model_version: insight.model_version, prompt_eval_count: None, eval_count: None, + approved: insight.approved, }) .collect(); @@ -287,6 +303,7 @@ pub async fn generate_agentic_insight_handler( model_version: insight.model_version, prompt_eval_count, eval_count, + approved: insight.approved, }; HttpResponse::Ok().json(response) } @@ -377,3 +394,86 @@ pub async fn get_available_models_handler( HttpResponse::Ok().json(response) } + +/// POST /insights/rate - Rate an insight (thumbs up/down for training data) +#[post("/insights/rate")] +pub async fn rate_insight_handler( + _claims: Claims, + request: web::Json, + insight_dao: web::Data>>, +) -> impl Responder { + let normalized_path = normalize_path(&request.file_path); + log::info!( + "Rating insight for {}: approved={}", + normalized_path, + request.approved + ); + + let otel_context = opentelemetry::Context::new(); + let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); + + match dao.rate_insight(&otel_context, &normalized_path, request.approved) { + Ok(()) => HttpResponse::Ok().json(serde_json::json!({ + "success": true, + "message": "Insight rated successfully" + })), + Err(e) => { + log::error!("Failed to rate insight: {:?}", e); + HttpResponse::InternalServerError().json(serde_json::json!({ + "error": format!("Failed to rate insight: {:?}", e) + })) + } + } +} + +/// GET /insights/training-data - Export approved training data as JSONL +#[get("/insights/training-data")] +pub async fn export_training_data_handler( + _claims: Claims, + query: web::Query, + insight_dao: web::Data>>, +) -> impl Responder { + let approved_only = query.approved_only.unwrap_or(true); + log::info!("Exporting training data (approved_only={})", approved_only); + + let otel_context = opentelemetry::Context::new(); + let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); + + let insights = if approved_only { + dao.get_approved_insights(&otel_context) + } else { + dao.get_all_insights(&otel_context) + }; + + match insights { + Ok(insights) => { + let mut jsonl = String::new(); + for insight in &insights { + if let Some(ref messages) = insight.training_messages { + let entry = serde_json::json!({ + "file_path": insight.file_path, + "model_version": insight.model_version, + "generated_at": insight.generated_at, + "title": insight.title, + "summary": insight.summary, + "messages": serde_json::from_str::(messages) + .unwrap_or(serde_json::Value::Null), + }); + jsonl.push_str(&entry.to_string()); + jsonl.push('\n'); + } + } + + HttpResponse::Ok() + .content_type("application/jsonl") + .insert_header(("Content-Disposition", "attachment; filename=\"training_data.jsonl\"")) + .body(jsonl) + } + Err(e) => { + log::error!("Failed to export training data: {:?}", e); + HttpResponse::InternalServerError().json(serde_json::json!({ + "error": format!("Failed to export training data: {:?}", e) + })) + } + } +} diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 4c0f266..603f704 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -1,6 +1,6 @@ use anyhow::Result; use base64::Engine as _; -use chrono::{DateTime, NaiveDate, Utc}; +use chrono::{DateTime, Local, NaiveDate, Utc}; use image::ImageFormat; use opentelemetry::KeyValue; use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer}; @@ -1165,6 +1165,7 @@ impl InsightGenerator { generated_at: Utc::now().timestamp(), model_version: ollama_client.primary_model.clone(), is_current: true, + training_messages: None, }; let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao"); @@ -1367,6 +1368,7 @@ Return ONLY the summary, nothing else."#, "recall_facts_for_photo" => self.tool_recall_facts_for_photo(arguments, cx).await, "store_entity" => self.tool_store_entity(arguments, ollama, cx).await, "store_fact" => self.tool_store_fact(arguments, file_path, cx).await, + "get_current_datetime" => Self::tool_get_current_datetime(), unknown => format!("Unknown tool: {}", unknown), }; if result.starts_with("Error") || result.starts_with("No ") { @@ -2023,6 +2025,16 @@ Return ONLY the summary, nothing else."#, ) } + /// Tool: get_current_datetime — returns the current local date and time + fn tool_get_current_datetime() -> String { + let now = Local::now(); + format!( + "Current date/time: {} ({})", + now.format("%Y-%m-%d %H:%M:%S %Z"), + now.format("%A") + ) + } + // ── Agentic insight generation ────────────────────────────────────── /// Build the list of tool definitions for the agentic loop @@ -2237,6 +2249,15 @@ Return ONLY the summary, nothing else."#, }), )); + tools.push(Tool::function( + "get_current_datetime", + "Get the current date and time. Useful for understanding how long ago the photo was taken.", + serde_json::json!({ + "type": "object", + "properties": {} + }), + )); + if has_vision { tools.push(Tool::function( "describe_photo", @@ -2630,10 +2651,13 @@ Return ONLY the summary, nothing else."#, "Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as Cameron.", )); let (final_response, prompt_tokens, eval_tokens) = - ollama_client.chat_with_tools(messages, vec![]).await?; + ollama_client + .chat_with_tools(messages.clone(), vec![]) + .await?; last_prompt_eval_count = prompt_tokens; last_eval_count = eval_tokens; - final_content = final_response.content; + final_content = final_response.content.clone(); + messages.push(final_response); } loop_cx @@ -2653,7 +2677,16 @@ Return ONLY the summary, nothing else."#, &final_content[..final_content.len().min(200)] ); - // 14. Store insight (returns the persisted row including its new id) + // 14. Serialize the full message history for training data + let training_messages = match serde_json::to_string(&messages) { + Ok(json) => Some(json), + Err(e) => { + log::warn!("Failed to serialize training messages: {}", e); + None + } + }; + + // 15. Store insight (returns the persisted row including its new id) let insight = InsertPhotoInsight { file_path: file_path.to_string(), title, @@ -2661,6 +2694,7 @@ Return ONLY the summary, nothing else."#, generated_at: Utc::now().timestamp(), model_version: ollama_client.primary_model.clone(), is_current: true, + training_messages, }; let stored = { @@ -2682,7 +2716,7 @@ Return ONLY the summary, nothing else."#, let stored_insight = stored?; - // 15. Backfill source_insight_id on all facts recorded for this photo during the loop + // 16. Backfill source_insight_id on all facts recorded for this photo during the loop { let mut kdao = self .knowledge_dao diff --git a/src/ai/mod.rs b/src/ai/mod.rs index 49c6651..4e682fb 100644 --- a/src/ai/mod.rs +++ b/src/ai/mod.rs @@ -8,8 +8,9 @@ pub mod sms_client; #[allow(unused_imports)] pub use daily_summary_job::{generate_daily_summaries, strip_summary_boilerplate}; pub use handlers::{ - delete_insight_handler, generate_agentic_insight_handler, generate_insight_handler, - get_all_insights_handler, get_available_models_handler, get_insight_handler, + delete_insight_handler, export_training_data_handler, generate_agentic_insight_handler, + generate_insight_handler, get_all_insights_handler, get_available_models_handler, + get_insight_handler, rate_insight_handler, }; pub use insight_generator::InsightGenerator; pub use ollama::{ModelCapabilities, OllamaClient}; diff --git a/src/database/insights_dao.rs b/src/database/insights_dao.rs index 9dff438..473bb3c 100644 --- a/src/database/insights_dao.rs +++ b/src/database/insights_dao.rs @@ -37,6 +37,18 @@ pub trait InsightDao: Sync + Send { &mut self, context: &opentelemetry::Context, ) -> Result, DbError>; + + fn rate_insight( + &mut self, + context: &opentelemetry::Context, + file_path: &str, + approved: bool, + ) -> Result<(), DbError>; + + fn get_approved_insights( + &mut self, + context: &opentelemetry::Context, + ) -> Result, DbError>; } pub struct SqliteInsightDao { @@ -169,4 +181,47 @@ impl InsightDao for SqliteInsightDao { }) .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + + fn rate_insight( + &mut self, + context: &opentelemetry::Context, + path: &str, + is_approved: bool, + ) -> Result<(), DbError> { + trace_db_call(context, "update", "rate_insight", |_span| { + use schema::photo_insights::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get InsightDao"); + + diesel::update( + photo_insights + .filter(file_path.eq(path)) + .filter(is_current.eq(true)), + ) + .set(approved.eq(Some(is_approved))) + .execute(connection.deref_mut()) + .map(|_| ()) + .map_err(|_| anyhow::anyhow!("Update error")) + }) + .map_err(|_| DbError::new(DbErrorKind::UpdateError)) + } + + fn get_approved_insights( + &mut self, + context: &opentelemetry::Context, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_approved_insights", |_span| { + use schema::photo_insights::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get InsightDao"); + + photo_insights + .filter(approved.eq(true)) + .filter(training_messages.is_not_null()) + .order(generated_at.desc()) + .load::(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } } diff --git a/src/database/models.rs b/src/database/models.rs index 93309fc..237e9b4 100644 --- a/src/database/models.rs +++ b/src/database/models.rs @@ -86,6 +86,7 @@ pub struct InsertPhotoInsight { pub generated_at: i64, pub model_version: String, pub is_current: bool, + pub training_messages: Option, } #[derive(Serialize, Queryable, Clone, Debug)] @@ -97,6 +98,8 @@ pub struct PhotoInsight { pub generated_at: i64, pub model_version: String, pub is_current: bool, + pub training_messages: Option, + pub approved: Option, } // --- Knowledge memory models --- diff --git a/src/database/schema.rs b/src/database/schema.rs index cbcff68..bddced4 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -151,6 +151,8 @@ diesel::table! { generated_at -> BigInt, model_version -> Text, is_current -> Bool, + training_messages -> Nullable, + approved -> Nullable, } } diff --git a/src/main.rs b/src/main.rs index ab6a48d..8a95d2d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1185,6 +1185,8 @@ fn main() -> std::io::Result<()> { .service(ai::delete_insight_handler) .service(ai::get_all_insights_handler) .service(ai::get_available_models_handler) + .service(ai::rate_insight_handler) + .service(ai::export_training_data_handler) .add_feature(add_tag_services::<_, SqliteTagDao>) .add_feature(knowledge::add_knowledge_services::<_, SqliteKnowledgeDao>) .app_data(app_data.clone())