Add the ability to rate insights to curate training data
This commit is contained in:
@@ -25,6 +25,18 @@ pub struct GetPhotoInsightQuery {
|
|||||||
pub path: String,
|
pub path: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct RateInsightRequest {
|
||||||
|
pub file_path: String,
|
||||||
|
pub approved: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct ExportTrainingDataQuery {
|
||||||
|
#[serde(default)]
|
||||||
|
pub approved_only: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
pub struct PhotoInsightResponse {
|
pub struct PhotoInsightResponse {
|
||||||
pub id: i32,
|
pub id: i32,
|
||||||
@@ -37,6 +49,8 @@ pub struct PhotoInsightResponse {
|
|||||||
pub prompt_eval_count: Option<i32>,
|
pub prompt_eval_count: Option<i32>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub eval_count: Option<i32>,
|
pub eval_count: Option<i32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub approved: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
@@ -139,6 +153,7 @@ pub async fn get_insight_handler(
|
|||||||
model_version: insight.model_version,
|
model_version: insight.model_version,
|
||||||
prompt_eval_count: None,
|
prompt_eval_count: None,
|
||||||
eval_count: None,
|
eval_count: None,
|
||||||
|
approved: insight.approved,
|
||||||
};
|
};
|
||||||
HttpResponse::Ok().json(response)
|
HttpResponse::Ok().json(response)
|
||||||
}
|
}
|
||||||
@@ -205,6 +220,7 @@ pub async fn get_all_insights_handler(
|
|||||||
model_version: insight.model_version,
|
model_version: insight.model_version,
|
||||||
prompt_eval_count: None,
|
prompt_eval_count: None,
|
||||||
eval_count: None,
|
eval_count: None,
|
||||||
|
approved: insight.approved,
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
@@ -287,6 +303,7 @@ pub async fn generate_agentic_insight_handler(
|
|||||||
model_version: insight.model_version,
|
model_version: insight.model_version,
|
||||||
prompt_eval_count,
|
prompt_eval_count,
|
||||||
eval_count,
|
eval_count,
|
||||||
|
approved: insight.approved,
|
||||||
};
|
};
|
||||||
HttpResponse::Ok().json(response)
|
HttpResponse::Ok().json(response)
|
||||||
}
|
}
|
||||||
@@ -377,3 +394,86 @@ pub async fn get_available_models_handler(
|
|||||||
|
|
||||||
HttpResponse::Ok().json(response)
|
HttpResponse::Ok().json(response)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// POST /insights/rate - Rate an insight (thumbs up/down for training data)
|
||||||
|
#[post("/insights/rate")]
|
||||||
|
pub async fn rate_insight_handler(
|
||||||
|
_claims: Claims,
|
||||||
|
request: web::Json<RateInsightRequest>,
|
||||||
|
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
|
||||||
|
) -> impl Responder {
|
||||||
|
let normalized_path = normalize_path(&request.file_path);
|
||||||
|
log::info!(
|
||||||
|
"Rating insight for {}: approved={}",
|
||||||
|
normalized_path,
|
||||||
|
request.approved
|
||||||
|
);
|
||||||
|
|
||||||
|
let otel_context = opentelemetry::Context::new();
|
||||||
|
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
|
||||||
|
|
||||||
|
match dao.rate_insight(&otel_context, &normalized_path, request.approved) {
|
||||||
|
Ok(()) => HttpResponse::Ok().json(serde_json::json!({
|
||||||
|
"success": true,
|
||||||
|
"message": "Insight rated successfully"
|
||||||
|
})),
|
||||||
|
Err(e) => {
|
||||||
|
log::error!("Failed to rate insight: {:?}", e);
|
||||||
|
HttpResponse::InternalServerError().json(serde_json::json!({
|
||||||
|
"error": format!("Failed to rate insight: {:?}", e)
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// GET /insights/training-data - Export approved training data as JSONL
|
||||||
|
#[get("/insights/training-data")]
|
||||||
|
pub async fn export_training_data_handler(
|
||||||
|
_claims: Claims,
|
||||||
|
query: web::Query<ExportTrainingDataQuery>,
|
||||||
|
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
|
||||||
|
) -> impl Responder {
|
||||||
|
let approved_only = query.approved_only.unwrap_or(true);
|
||||||
|
log::info!("Exporting training data (approved_only={})", approved_only);
|
||||||
|
|
||||||
|
let otel_context = opentelemetry::Context::new();
|
||||||
|
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
|
||||||
|
|
||||||
|
let insights = if approved_only {
|
||||||
|
dao.get_approved_insights(&otel_context)
|
||||||
|
} else {
|
||||||
|
dao.get_all_insights(&otel_context)
|
||||||
|
};
|
||||||
|
|
||||||
|
match insights {
|
||||||
|
Ok(insights) => {
|
||||||
|
let mut jsonl = String::new();
|
||||||
|
for insight in &insights {
|
||||||
|
if let Some(ref messages) = insight.training_messages {
|
||||||
|
let entry = serde_json::json!({
|
||||||
|
"file_path": insight.file_path,
|
||||||
|
"model_version": insight.model_version,
|
||||||
|
"generated_at": insight.generated_at,
|
||||||
|
"title": insight.title,
|
||||||
|
"summary": insight.summary,
|
||||||
|
"messages": serde_json::from_str::<serde_json::Value>(messages)
|
||||||
|
.unwrap_or(serde_json::Value::Null),
|
||||||
|
});
|
||||||
|
jsonl.push_str(&entry.to_string());
|
||||||
|
jsonl.push('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HttpResponse::Ok()
|
||||||
|
.content_type("application/jsonl")
|
||||||
|
.insert_header(("Content-Disposition", "attachment; filename=\"training_data.jsonl\""))
|
||||||
|
.body(jsonl)
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
log::error!("Failed to export training data: {:?}", e);
|
||||||
|
HttpResponse::InternalServerError().json(serde_json::json!({
|
||||||
|
"error": format!("Failed to export training data: {:?}", e)
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use base64::Engine as _;
|
use base64::Engine as _;
|
||||||
use chrono::{DateTime, NaiveDate, Utc};
|
use chrono::{DateTime, Local, NaiveDate, Utc};
|
||||||
use image::ImageFormat;
|
use image::ImageFormat;
|
||||||
use opentelemetry::KeyValue;
|
use opentelemetry::KeyValue;
|
||||||
use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
|
use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
|
||||||
@@ -1165,6 +1165,7 @@ impl InsightGenerator {
|
|||||||
generated_at: Utc::now().timestamp(),
|
generated_at: Utc::now().timestamp(),
|
||||||
model_version: ollama_client.primary_model.clone(),
|
model_version: ollama_client.primary_model.clone(),
|
||||||
is_current: true,
|
is_current: true,
|
||||||
|
training_messages: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
|
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
|
||||||
@@ -1367,6 +1368,7 @@ Return ONLY the summary, nothing else."#,
|
|||||||
"recall_facts_for_photo" => self.tool_recall_facts_for_photo(arguments, cx).await,
|
"recall_facts_for_photo" => self.tool_recall_facts_for_photo(arguments, cx).await,
|
||||||
"store_entity" => self.tool_store_entity(arguments, ollama, cx).await,
|
"store_entity" => self.tool_store_entity(arguments, ollama, cx).await,
|
||||||
"store_fact" => self.tool_store_fact(arguments, file_path, cx).await,
|
"store_fact" => self.tool_store_fact(arguments, file_path, cx).await,
|
||||||
|
"get_current_datetime" => Self::tool_get_current_datetime(),
|
||||||
unknown => format!("Unknown tool: {}", unknown),
|
unknown => format!("Unknown tool: {}", unknown),
|
||||||
};
|
};
|
||||||
if result.starts_with("Error") || result.starts_with("No ") {
|
if result.starts_with("Error") || result.starts_with("No ") {
|
||||||
@@ -2023,6 +2025,16 @@ Return ONLY the summary, nothing else."#,
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Tool: get_current_datetime — returns the current local date and time
|
||||||
|
fn tool_get_current_datetime() -> String {
|
||||||
|
let now = Local::now();
|
||||||
|
format!(
|
||||||
|
"Current date/time: {} ({})",
|
||||||
|
now.format("%Y-%m-%d %H:%M:%S %Z"),
|
||||||
|
now.format("%A")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
// ── Agentic insight generation ──────────────────────────────────────
|
// ── Agentic insight generation ──────────────────────────────────────
|
||||||
|
|
||||||
/// Build the list of tool definitions for the agentic loop
|
/// Build the list of tool definitions for the agentic loop
|
||||||
@@ -2237,6 +2249,15 @@ Return ONLY the summary, nothing else."#,
|
|||||||
}),
|
}),
|
||||||
));
|
));
|
||||||
|
|
||||||
|
tools.push(Tool::function(
|
||||||
|
"get_current_datetime",
|
||||||
|
"Get the current date and time. Useful for understanding how long ago the photo was taken.",
|
||||||
|
serde_json::json!({
|
||||||
|
"type": "object",
|
||||||
|
"properties": {}
|
||||||
|
}),
|
||||||
|
));
|
||||||
|
|
||||||
if has_vision {
|
if has_vision {
|
||||||
tools.push(Tool::function(
|
tools.push(Tool::function(
|
||||||
"describe_photo",
|
"describe_photo",
|
||||||
@@ -2630,10 +2651,13 @@ Return ONLY the summary, nothing else."#,
|
|||||||
"Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as Cameron.",
|
"Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as Cameron.",
|
||||||
));
|
));
|
||||||
let (final_response, prompt_tokens, eval_tokens) =
|
let (final_response, prompt_tokens, eval_tokens) =
|
||||||
ollama_client.chat_with_tools(messages, vec![]).await?;
|
ollama_client
|
||||||
|
.chat_with_tools(messages.clone(), vec![])
|
||||||
|
.await?;
|
||||||
last_prompt_eval_count = prompt_tokens;
|
last_prompt_eval_count = prompt_tokens;
|
||||||
last_eval_count = eval_tokens;
|
last_eval_count = eval_tokens;
|
||||||
final_content = final_response.content;
|
final_content = final_response.content.clone();
|
||||||
|
messages.push(final_response);
|
||||||
}
|
}
|
||||||
|
|
||||||
loop_cx
|
loop_cx
|
||||||
@@ -2653,7 +2677,16 @@ Return ONLY the summary, nothing else."#,
|
|||||||
&final_content[..final_content.len().min(200)]
|
&final_content[..final_content.len().min(200)]
|
||||||
);
|
);
|
||||||
|
|
||||||
// 14. Store insight (returns the persisted row including its new id)
|
// 14. Serialize the full message history for training data
|
||||||
|
let training_messages = match serde_json::to_string(&messages) {
|
||||||
|
Ok(json) => Some(json),
|
||||||
|
Err(e) => {
|
||||||
|
log::warn!("Failed to serialize training messages: {}", e);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// 15. Store insight (returns the persisted row including its new id)
|
||||||
let insight = InsertPhotoInsight {
|
let insight = InsertPhotoInsight {
|
||||||
file_path: file_path.to_string(),
|
file_path: file_path.to_string(),
|
||||||
title,
|
title,
|
||||||
@@ -2661,6 +2694,7 @@ Return ONLY the summary, nothing else."#,
|
|||||||
generated_at: Utc::now().timestamp(),
|
generated_at: Utc::now().timestamp(),
|
||||||
model_version: ollama_client.primary_model.clone(),
|
model_version: ollama_client.primary_model.clone(),
|
||||||
is_current: true,
|
is_current: true,
|
||||||
|
training_messages,
|
||||||
};
|
};
|
||||||
|
|
||||||
let stored = {
|
let stored = {
|
||||||
@@ -2682,7 +2716,7 @@ Return ONLY the summary, nothing else."#,
|
|||||||
|
|
||||||
let stored_insight = stored?;
|
let stored_insight = stored?;
|
||||||
|
|
||||||
// 15. Backfill source_insight_id on all facts recorded for this photo during the loop
|
// 16. Backfill source_insight_id on all facts recorded for this photo during the loop
|
||||||
{
|
{
|
||||||
let mut kdao = self
|
let mut kdao = self
|
||||||
.knowledge_dao
|
.knowledge_dao
|
||||||
|
|||||||
@@ -8,8 +8,9 @@ pub mod sms_client;
|
|||||||
#[allow(unused_imports)]
|
#[allow(unused_imports)]
|
||||||
pub use daily_summary_job::{generate_daily_summaries, strip_summary_boilerplate};
|
pub use daily_summary_job::{generate_daily_summaries, strip_summary_boilerplate};
|
||||||
pub use handlers::{
|
pub use handlers::{
|
||||||
delete_insight_handler, generate_agentic_insight_handler, generate_insight_handler,
|
delete_insight_handler, export_training_data_handler, generate_agentic_insight_handler,
|
||||||
get_all_insights_handler, get_available_models_handler, get_insight_handler,
|
generate_insight_handler, get_all_insights_handler, get_available_models_handler,
|
||||||
|
get_insight_handler, rate_insight_handler,
|
||||||
};
|
};
|
||||||
pub use insight_generator::InsightGenerator;
|
pub use insight_generator::InsightGenerator;
|
||||||
pub use ollama::{ModelCapabilities, OllamaClient};
|
pub use ollama::{ModelCapabilities, OllamaClient};
|
||||||
|
|||||||
@@ -37,6 +37,18 @@ pub trait InsightDao: Sync + Send {
|
|||||||
&mut self,
|
&mut self,
|
||||||
context: &opentelemetry::Context,
|
context: &opentelemetry::Context,
|
||||||
) -> Result<Vec<PhotoInsight>, DbError>;
|
) -> Result<Vec<PhotoInsight>, DbError>;
|
||||||
|
|
||||||
|
fn rate_insight(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
file_path: &str,
|
||||||
|
approved: bool,
|
||||||
|
) -> Result<(), DbError>;
|
||||||
|
|
||||||
|
fn get_approved_insights(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
) -> Result<Vec<PhotoInsight>, DbError>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct SqliteInsightDao {
|
pub struct SqliteInsightDao {
|
||||||
@@ -169,4 +181,47 @@ impl InsightDao for SqliteInsightDao {
|
|||||||
})
|
})
|
||||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn rate_insight(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
path: &str,
|
||||||
|
is_approved: bool,
|
||||||
|
) -> Result<(), DbError> {
|
||||||
|
trace_db_call(context, "update", "rate_insight", |_span| {
|
||||||
|
use schema::photo_insights::dsl::*;
|
||||||
|
|
||||||
|
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
|
||||||
|
|
||||||
|
diesel::update(
|
||||||
|
photo_insights
|
||||||
|
.filter(file_path.eq(path))
|
||||||
|
.filter(is_current.eq(true)),
|
||||||
|
)
|
||||||
|
.set(approved.eq(Some(is_approved)))
|
||||||
|
.execute(connection.deref_mut())
|
||||||
|
.map(|_| ())
|
||||||
|
.map_err(|_| anyhow::anyhow!("Update error"))
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_approved_insights(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
) -> Result<Vec<PhotoInsight>, DbError> {
|
||||||
|
trace_db_call(context, "query", "get_approved_insights", |_span| {
|
||||||
|
use schema::photo_insights::dsl::*;
|
||||||
|
|
||||||
|
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
|
||||||
|
|
||||||
|
photo_insights
|
||||||
|
.filter(approved.eq(true))
|
||||||
|
.filter(training_messages.is_not_null())
|
||||||
|
.order(generated_at.desc())
|
||||||
|
.load::<PhotoInsight>(connection.deref_mut())
|
||||||
|
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -86,6 +86,7 @@ pub struct InsertPhotoInsight {
|
|||||||
pub generated_at: i64,
|
pub generated_at: i64,
|
||||||
pub model_version: String,
|
pub model_version: String,
|
||||||
pub is_current: bool,
|
pub is_current: bool,
|
||||||
|
pub training_messages: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Queryable, Clone, Debug)]
|
#[derive(Serialize, Queryable, Clone, Debug)]
|
||||||
@@ -97,6 +98,8 @@ pub struct PhotoInsight {
|
|||||||
pub generated_at: i64,
|
pub generated_at: i64,
|
||||||
pub model_version: String,
|
pub model_version: String,
|
||||||
pub is_current: bool,
|
pub is_current: bool,
|
||||||
|
pub training_messages: Option<String>,
|
||||||
|
pub approved: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Knowledge memory models ---
|
// --- Knowledge memory models ---
|
||||||
|
|||||||
@@ -151,6 +151,8 @@ diesel::table! {
|
|||||||
generated_at -> BigInt,
|
generated_at -> BigInt,
|
||||||
model_version -> Text,
|
model_version -> Text,
|
||||||
is_current -> Bool,
|
is_current -> Bool,
|
||||||
|
training_messages -> Nullable<Text>,
|
||||||
|
approved -> Nullable<Bool>,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1185,6 +1185,8 @@ fn main() -> std::io::Result<()> {
|
|||||||
.service(ai::delete_insight_handler)
|
.service(ai::delete_insight_handler)
|
||||||
.service(ai::get_all_insights_handler)
|
.service(ai::get_all_insights_handler)
|
||||||
.service(ai::get_available_models_handler)
|
.service(ai::get_available_models_handler)
|
||||||
|
.service(ai::rate_insight_handler)
|
||||||
|
.service(ai::export_training_data_handler)
|
||||||
.add_feature(add_tag_services::<_, SqliteTagDao>)
|
.add_feature(add_tag_services::<_, SqliteTagDao>)
|
||||||
.add_feature(knowledge::add_knowledge_services::<_, SqliteKnowledgeDao>)
|
.add_feature(knowledge::add_knowledge_services::<_, SqliteKnowledgeDao>)
|
||||||
.app_data(app_data.clone())
|
.app_data(app_data.clone())
|
||||||
|
|||||||
Reference in New Issue
Block a user