987 lines
36 KiB
Rust
987 lines
36 KiB
Rust
use actix_web::{HttpRequest, HttpResponse, Responder, delete, get, post, web};
|
|
use opentelemetry::KeyValue;
|
|
use opentelemetry::trace::{Span, Status, Tracer};
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
use crate::ai::insight_chat::{ChatStreamEvent, ChatTurnRequest};
|
|
use crate::ai::ollama::ChatMessage;
|
|
use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient};
|
|
use crate::data::Claims;
|
|
use crate::database::{ExifDao, InsightDao};
|
|
use crate::libraries;
|
|
use crate::otel::{extract_context_from_request, global_tracer};
|
|
use crate::state::AppState;
|
|
use crate::utils::normalize_path;
|
|
|
|
/// Hardcoded few-shot exemplars for the agentic endpoint. Populate with the
|
|
/// ids of approved insights whose `training_messages` should be compressed
|
|
/// into trajectory form and injected into the system prompt. Empty = no
|
|
/// change in behavior. Request-level `fewshot_insight_ids` overrides this
|
|
/// when non-empty.
|
|
// const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[2918, 2908];
|
|
const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[];
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
pub struct GeneratePhotoInsightRequest {
|
|
pub file_path: String,
|
|
#[serde(default)]
|
|
pub model: Option<String>,
|
|
#[serde(default)]
|
|
pub system_prompt: Option<String>,
|
|
#[serde(default)]
|
|
pub num_ctx: Option<i32>,
|
|
#[serde(default)]
|
|
pub temperature: Option<f32>,
|
|
#[serde(default)]
|
|
pub top_p: Option<f32>,
|
|
#[serde(default)]
|
|
pub top_k: Option<i32>,
|
|
#[serde(default)]
|
|
pub min_p: Option<f32>,
|
|
/// `"local"` (default, Ollama with images) | `"hybrid"` (local vision +
|
|
/// OpenRouter chat). Only respected by the agentic endpoint.
|
|
#[serde(default)]
|
|
pub backend: Option<String>,
|
|
/// Insight ids whose stored `training_messages` should be compressed
|
|
/// into few-shot trajectories and injected into the system prompt.
|
|
/// Silently truncated to the first 2. When absent/empty, the handler
|
|
/// falls back to `DEFAULT_FEWSHOT_INSIGHT_IDS`.
|
|
#[serde(default)]
|
|
pub fewshot_insight_ids: Option<Vec<i32>>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
pub struct GetPhotoInsightQuery {
|
|
pub path: String,
|
|
/// Library context for this lookup. Used to pick the right content
|
|
/// hash when the same rel_path exists under multiple roots.
|
|
#[serde(default)]
|
|
pub library: Option<String>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
pub struct RateInsightRequest {
|
|
pub file_path: String,
|
|
pub approved: bool,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
pub struct ExportTrainingDataQuery {
|
|
#[serde(default)]
|
|
pub approved_only: Option<bool>,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct PhotoInsightResponse {
|
|
pub id: i32,
|
|
pub file_path: String,
|
|
pub title: String,
|
|
pub summary: String,
|
|
pub generated_at: i64,
|
|
pub model_version: String,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub prompt_eval_count: Option<i32>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub eval_count: Option<i32>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub approved: Option<bool>,
|
|
pub backend: String,
|
|
/// True when the insight was generated agentically and a chat
|
|
/// continuation can be started against it. Drives the mobile chat button.
|
|
pub has_training_messages: bool,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct AvailableModelsResponse {
|
|
pub primary: ServerModels,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub fallback: Option<ServerModels>,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct ServerModels {
|
|
pub url: String,
|
|
pub models: Vec<ModelCapabilities>,
|
|
pub default_model: String,
|
|
}
|
|
|
|
/// POST /insights/generate - Generate insight for a specific photo
|
|
#[post("/insights/generate")]
|
|
pub async fn generate_insight_handler(
|
|
http_request: HttpRequest,
|
|
_claims: Claims,
|
|
request: web::Json<GeneratePhotoInsightRequest>,
|
|
insight_generator: web::Data<InsightGenerator>,
|
|
) -> impl Responder {
|
|
let parent_context = extract_context_from_request(&http_request);
|
|
let tracer = global_tracer();
|
|
let mut span = tracer.start_with_context("http.insights.generate", &parent_context);
|
|
|
|
let normalized_path = normalize_path(&request.file_path);
|
|
|
|
span.set_attribute(KeyValue::new("file_path", normalized_path.clone()));
|
|
if let Some(ref model) = request.model {
|
|
span.set_attribute(KeyValue::new("model", model.clone()));
|
|
}
|
|
if let Some(ref prompt) = request.system_prompt {
|
|
span.set_attribute(KeyValue::new("has_custom_prompt", true));
|
|
span.set_attribute(KeyValue::new("prompt_length", prompt.len() as i64));
|
|
}
|
|
if let Some(ctx) = request.num_ctx {
|
|
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
|
|
}
|
|
|
|
log::info!(
|
|
"Manual insight generation triggered for photo: {} with model: {:?}, custom_prompt: {}, num_ctx: {:?}",
|
|
normalized_path,
|
|
request.model,
|
|
request.system_prompt.is_some(),
|
|
request.num_ctx
|
|
);
|
|
|
|
// Generate insight with optional custom model, system prompt, and context size
|
|
let result = insight_generator
|
|
.generate_insight_for_photo_with_config(
|
|
&normalized_path,
|
|
request.model.clone(),
|
|
request.system_prompt.clone(),
|
|
request.num_ctx,
|
|
request.temperature,
|
|
request.top_p,
|
|
request.top_k,
|
|
request.min_p,
|
|
)
|
|
.await;
|
|
|
|
match result {
|
|
Ok(()) => {
|
|
span.set_status(Status::Ok);
|
|
HttpResponse::Ok().json(serde_json::json!({
|
|
"success": true,
|
|
"message": "Insight generated successfully"
|
|
}))
|
|
}
|
|
Err(e) => {
|
|
log::error!("Failed to generate insight: {:?}", e);
|
|
span.set_status(Status::error(e.to_string()));
|
|
HttpResponse::InternalServerError().json(serde_json::json!({
|
|
"error": format!("Failed to generate insight: {:?}", e)
|
|
}))
|
|
}
|
|
}
|
|
}
|
|
|
|
/// GET /insights?path=/path/to/photo.jpg - Fetch insight for specific photo
|
|
#[get("/insights")]
|
|
pub async fn get_insight_handler(
|
|
_claims: Claims,
|
|
query: web::Query<GetPhotoInsightQuery>,
|
|
app_state: web::Data<AppState>,
|
|
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
|
|
exif_dao: web::Data<std::sync::Mutex<Box<dyn ExifDao>>>,
|
|
) -> impl Responder {
|
|
let normalized_path = normalize_path(&query.path);
|
|
log::debug!("Fetching insight for {}", normalized_path);
|
|
|
|
let otel_context = opentelemetry::Context::new();
|
|
|
|
// Expand to rel_paths sharing content so an insight generated under
|
|
// library 1 still shows when the same photo is viewed from library 2.
|
|
let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
|
.ok()
|
|
.flatten()
|
|
.unwrap_or_else(|| app_state.primary_library());
|
|
let sibling_paths = {
|
|
let mut exif = exif_dao.lock().expect("Unable to lock ExifDao");
|
|
exif.get_rel_paths_sharing_content(&otel_context, library.id, &normalized_path)
|
|
.unwrap_or_else(|_| vec![normalized_path.clone()])
|
|
};
|
|
|
|
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
|
|
|
|
match dao.get_insight_for_paths(&otel_context, &sibling_paths) {
|
|
Ok(Some(insight)) => {
|
|
let response = PhotoInsightResponse {
|
|
id: insight.id,
|
|
file_path: insight.file_path,
|
|
title: insight.title,
|
|
summary: insight.summary,
|
|
generated_at: insight.generated_at,
|
|
model_version: insight.model_version,
|
|
prompt_eval_count: None,
|
|
eval_count: None,
|
|
approved: insight.approved,
|
|
has_training_messages: insight.training_messages.is_some(),
|
|
backend: insight.backend,
|
|
};
|
|
HttpResponse::Ok().json(response)
|
|
}
|
|
Ok(None) => HttpResponse::NotFound().json(serde_json::json!({
|
|
"error": "Insight not found"
|
|
})),
|
|
Err(e) => {
|
|
log::error!("Failed to fetch insight ({}): {:?}", &query.path, e);
|
|
HttpResponse::InternalServerError().json(serde_json::json!({
|
|
"error": format!("Failed to fetch insight: {:?}", e)
|
|
}))
|
|
}
|
|
}
|
|
}
|
|
|
|
/// DELETE /insights?path=/path/to/photo.jpg - Remove insight (will regenerate on next request)
|
|
#[delete("/insights")]
|
|
pub async fn delete_insight_handler(
|
|
_claims: Claims,
|
|
query: web::Query<GetPhotoInsightQuery>,
|
|
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
|
|
) -> impl Responder {
|
|
let normalized_path = normalize_path(&query.path);
|
|
log::info!("Deleting insight for {}", normalized_path);
|
|
|
|
let otel_context = opentelemetry::Context::new();
|
|
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
|
|
|
|
match dao.delete_insight(&otel_context, &normalized_path) {
|
|
Ok(()) => HttpResponse::Ok().json(serde_json::json!({
|
|
"success": true,
|
|
"message": "Insight deleted successfully"
|
|
})),
|
|
Err(e) => {
|
|
log::error!("Failed to delete insight: {:?}", e);
|
|
HttpResponse::InternalServerError().json(serde_json::json!({
|
|
"error": format!("Failed to delete insight: {:?}", e)
|
|
}))
|
|
}
|
|
}
|
|
}
|
|
|
|
/// GET /insights/all - Get all insights
|
|
#[get("/insights/all")]
|
|
pub async fn get_all_insights_handler(
|
|
_claims: Claims,
|
|
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
|
|
) -> impl Responder {
|
|
log::debug!("Fetching all insights");
|
|
|
|
let otel_context = opentelemetry::Context::new();
|
|
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
|
|
|
|
match dao.get_all_insights(&otel_context) {
|
|
Ok(insights) => {
|
|
let responses: Vec<PhotoInsightResponse> = insights
|
|
.into_iter()
|
|
.map(|insight| PhotoInsightResponse {
|
|
id: insight.id,
|
|
file_path: insight.file_path,
|
|
title: insight.title,
|
|
summary: insight.summary,
|
|
generated_at: insight.generated_at,
|
|
model_version: insight.model_version,
|
|
prompt_eval_count: None,
|
|
eval_count: None,
|
|
approved: insight.approved,
|
|
has_training_messages: insight.training_messages.is_some(),
|
|
backend: insight.backend,
|
|
})
|
|
.collect();
|
|
|
|
HttpResponse::Ok().json(responses)
|
|
}
|
|
Err(e) => {
|
|
log::error!("Failed to fetch all insights: {:?}", e);
|
|
HttpResponse::InternalServerError().json(serde_json::json!({
|
|
"error": format!("Failed to fetch insights: {:?}", e)
|
|
}))
|
|
}
|
|
}
|
|
}
|
|
|
|
/// POST /insights/generate/agentic - Generate insight using agentic tool-calling loop
|
|
#[post("/insights/generate/agentic")]
|
|
pub async fn generate_agentic_insight_handler(
|
|
http_request: HttpRequest,
|
|
_claims: Claims,
|
|
request: web::Json<GeneratePhotoInsightRequest>,
|
|
insight_generator: web::Data<InsightGenerator>,
|
|
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
|
|
) -> impl Responder {
|
|
let parent_context = extract_context_from_request(&http_request);
|
|
let tracer = global_tracer();
|
|
let mut span = tracer.start_with_context("http.insights.generate_agentic", &parent_context);
|
|
|
|
let normalized_path = normalize_path(&request.file_path);
|
|
|
|
span.set_attribute(KeyValue::new("file_path", normalized_path.clone()));
|
|
if let Some(ref model) = request.model {
|
|
span.set_attribute(KeyValue::new("model", model.clone()));
|
|
}
|
|
if let Some(ref prompt) = request.system_prompt {
|
|
span.set_attribute(KeyValue::new("has_custom_prompt", true));
|
|
span.set_attribute(KeyValue::new("prompt_length", prompt.len() as i64));
|
|
}
|
|
if let Some(ctx) = request.num_ctx {
|
|
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
|
|
}
|
|
|
|
let max_iterations: usize = std::env::var("AGENTIC_MAX_ITERATIONS")
|
|
.ok()
|
|
.and_then(|v| v.parse().ok())
|
|
.unwrap_or(12);
|
|
|
|
span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
|
|
|
|
log::info!(
|
|
"Agentic insight generation triggered for photo: {} with model: {:?}, max_iterations: {}",
|
|
normalized_path,
|
|
request.model,
|
|
max_iterations
|
|
);
|
|
|
|
if let Some(ref b) = request.backend {
|
|
span.set_attribute(KeyValue::new("backend", b.clone()));
|
|
}
|
|
|
|
// Resolve few-shot ids: request-provided ids take precedence when
|
|
// non-empty; otherwise fall back to the hardcoded defaults.
|
|
let fewshot_ids: Vec<i32> = match request.fewshot_insight_ids.as_deref() {
|
|
Some(ids) if !ids.is_empty() => ids.iter().take(2).copied().collect(),
|
|
_ => DEFAULT_FEWSHOT_INSIGHT_IDS
|
|
.iter()
|
|
.take(2)
|
|
.copied()
|
|
.collect(),
|
|
};
|
|
span.set_attribute(KeyValue::new("fewshot_count", fewshot_ids.len() as i64));
|
|
|
|
let fewshot_examples: Vec<Vec<ChatMessage>> = {
|
|
let otel_context = opentelemetry::Context::new();
|
|
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
|
|
fewshot_ids
|
|
.iter()
|
|
.filter_map(|id| {
|
|
let insight = dao.get_insight_by_id(&otel_context, *id).ok().flatten()?;
|
|
let json = insight.training_messages?;
|
|
match serde_json::from_str::<Vec<ChatMessage>>(&json) {
|
|
Ok(msgs) => Some(msgs),
|
|
Err(e) => {
|
|
log::warn!(
|
|
"Few-shot insight {} has malformed training_messages: {}",
|
|
id,
|
|
e
|
|
);
|
|
None
|
|
}
|
|
}
|
|
})
|
|
.collect()
|
|
};
|
|
|
|
let result = insight_generator
|
|
.generate_agentic_insight_for_photo(
|
|
&normalized_path,
|
|
request.model.clone(),
|
|
request.system_prompt.clone(),
|
|
request.num_ctx,
|
|
request.temperature,
|
|
request.top_p,
|
|
request.top_k,
|
|
request.min_p,
|
|
max_iterations,
|
|
request.backend.clone(),
|
|
fewshot_examples,
|
|
fewshot_ids,
|
|
)
|
|
.await;
|
|
|
|
match result {
|
|
Ok((prompt_eval_count, eval_count)) => {
|
|
span.set_status(Status::Ok);
|
|
// Fetch the stored insight to return it
|
|
let otel_context = opentelemetry::Context::new();
|
|
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
|
|
match dao.get_insight(&otel_context, &normalized_path) {
|
|
Ok(Some(insight)) => {
|
|
let response = PhotoInsightResponse {
|
|
id: insight.id,
|
|
file_path: insight.file_path,
|
|
title: insight.title,
|
|
summary: insight.summary,
|
|
generated_at: insight.generated_at,
|
|
model_version: insight.model_version,
|
|
prompt_eval_count,
|
|
eval_count,
|
|
approved: insight.approved,
|
|
has_training_messages: insight.training_messages.is_some(),
|
|
backend: insight.backend,
|
|
};
|
|
HttpResponse::Ok().json(response)
|
|
}
|
|
Ok(None) => HttpResponse::Ok().json(serde_json::json!({
|
|
"success": true,
|
|
"message": "Agentic insight generated successfully"
|
|
})),
|
|
Err(e) => {
|
|
log::warn!("Insight stored but failed to retrieve: {:?}", e);
|
|
HttpResponse::Ok().json(serde_json::json!({
|
|
"success": true,
|
|
"message": "Agentic insight generated successfully"
|
|
}))
|
|
}
|
|
}
|
|
}
|
|
Err(e) => {
|
|
let error_msg = format!("{:?}", e);
|
|
log::error!("Failed to generate agentic insight: {}", error_msg);
|
|
span.set_status(Status::error(error_msg.clone()));
|
|
|
|
if error_msg.contains("tool calling not supported")
|
|
|| error_msg.contains("model not available")
|
|
{
|
|
HttpResponse::BadRequest().json(serde_json::json!({
|
|
"error": format!("Failed to generate agentic insight: {}", error_msg)
|
|
}))
|
|
} else if error_msg.contains("error parsing tool call") {
|
|
HttpResponse::BadRequest().json(serde_json::json!({
|
|
"error": "Model is not compatible with Ollama's tool calling protocol. Try a model known to support native tool calling (e.g. llama3.1, llama3.2, qwen2.5, mistral-nemo)."
|
|
}))
|
|
} else {
|
|
HttpResponse::InternalServerError().json(serde_json::json!({
|
|
"error": format!("Failed to generate agentic insight: {}", error_msg)
|
|
}))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// GET /insights/models - List available models from both servers with capabilities
|
|
#[get("/insights/models")]
|
|
pub async fn get_available_models_handler(
|
|
_claims: Claims,
|
|
app_state: web::Data<crate::state::AppState>,
|
|
) -> impl Responder {
|
|
log::debug!("Fetching available models with capabilities");
|
|
|
|
let ollama_client = &app_state.ollama;
|
|
|
|
// Fetch models with capabilities from primary server
|
|
let primary_models =
|
|
match OllamaClient::list_models_with_capabilities(&ollama_client.primary_url).await {
|
|
Ok(models) => models,
|
|
Err(e) => {
|
|
log::warn!("Failed to fetch models from primary server: {:?}", e);
|
|
vec![]
|
|
}
|
|
};
|
|
|
|
let primary = ServerModels {
|
|
url: ollama_client.primary_url.clone(),
|
|
models: primary_models,
|
|
default_model: ollama_client.primary_model.clone(),
|
|
};
|
|
|
|
// Fetch models with capabilities from fallback server if configured
|
|
let fallback = if let Some(fallback_url) = &ollama_client.fallback_url {
|
|
match OllamaClient::list_models_with_capabilities(fallback_url).await {
|
|
Ok(models) => Some(ServerModels {
|
|
url: fallback_url.clone(),
|
|
models,
|
|
default_model: ollama_client
|
|
.fallback_model
|
|
.clone()
|
|
.unwrap_or_else(|| ollama_client.primary_model.clone()),
|
|
}),
|
|
Err(e) => {
|
|
log::warn!("Failed to fetch models from fallback server: {:?}", e);
|
|
None
|
|
}
|
|
}
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let response = AvailableModelsResponse { primary, fallback };
|
|
|
|
HttpResponse::Ok().json(response)
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct OpenRouterModelsResponse {
|
|
pub models: Vec<String>,
|
|
pub default_model: Option<String>,
|
|
pub configured: bool,
|
|
}
|
|
|
|
/// GET /insights/openrouter/models - Curated OpenRouter model ids exposed
|
|
/// to clients for the hybrid backend. Returned verbatim from
|
|
/// `OPENROUTER_ALLOWED_MODELS`; no live call to OpenRouter.
|
|
#[get("/insights/openrouter/models")]
|
|
pub async fn get_openrouter_models_handler(
|
|
_claims: Claims,
|
|
app_state: web::Data<crate::state::AppState>,
|
|
) -> impl Responder {
|
|
let configured = app_state.openrouter.is_some();
|
|
let default_model = app_state
|
|
.openrouter
|
|
.as_ref()
|
|
.map(|c| c.primary_model.clone());
|
|
let response = OpenRouterModelsResponse {
|
|
models: app_state.openrouter_allowed_models.clone(),
|
|
default_model,
|
|
configured,
|
|
};
|
|
HttpResponse::Ok().json(response)
|
|
}
|
|
|
|
/// POST /insights/rate - Rate an insight (thumbs up/down for training data)
|
|
#[post("/insights/rate")]
|
|
pub async fn rate_insight_handler(
|
|
_claims: Claims,
|
|
request: web::Json<RateInsightRequest>,
|
|
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
|
|
) -> impl Responder {
|
|
let normalized_path = normalize_path(&request.file_path);
|
|
log::info!(
|
|
"Rating insight for {}: approved={}",
|
|
normalized_path,
|
|
request.approved
|
|
);
|
|
|
|
let otel_context = opentelemetry::Context::new();
|
|
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
|
|
|
|
match dao.rate_insight(&otel_context, &normalized_path, request.approved) {
|
|
Ok(()) => HttpResponse::Ok().json(serde_json::json!({
|
|
"success": true,
|
|
"message": "Insight rated successfully"
|
|
})),
|
|
Err(e) => {
|
|
log::error!("Failed to rate insight: {:?}", e);
|
|
HttpResponse::InternalServerError().json(serde_json::json!({
|
|
"error": format!("Failed to rate insight: {:?}", e)
|
|
}))
|
|
}
|
|
}
|
|
}
|
|
|
|
/// GET /insights/training-data - Export approved training data as JSONL
|
|
#[get("/insights/training-data")]
|
|
pub async fn export_training_data_handler(
|
|
_claims: Claims,
|
|
query: web::Query<ExportTrainingDataQuery>,
|
|
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
|
|
) -> impl Responder {
|
|
let approved_only = query.approved_only.unwrap_or(true);
|
|
log::info!("Exporting training data (approved_only={})", approved_only);
|
|
|
|
let otel_context = opentelemetry::Context::new();
|
|
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
|
|
|
|
let insights = if approved_only {
|
|
dao.get_approved_insights(&otel_context)
|
|
} else {
|
|
dao.get_all_insights(&otel_context)
|
|
};
|
|
|
|
match insights {
|
|
Ok(insights) => {
|
|
let mut jsonl = String::new();
|
|
for insight in &insights {
|
|
if let Some(ref messages) = insight.training_messages {
|
|
let entry = serde_json::json!({
|
|
"file_path": insight.file_path,
|
|
"model_version": insight.model_version,
|
|
"generated_at": insight.generated_at,
|
|
"title": insight.title,
|
|
"summary": insight.summary,
|
|
"messages": serde_json::from_str::<serde_json::Value>(messages)
|
|
.unwrap_or(serde_json::Value::Null),
|
|
});
|
|
jsonl.push_str(&entry.to_string());
|
|
jsonl.push('\n');
|
|
}
|
|
}
|
|
|
|
HttpResponse::Ok()
|
|
.content_type("application/jsonl")
|
|
.insert_header((
|
|
"Content-Disposition",
|
|
"attachment; filename=\"training_data.jsonl\"",
|
|
))
|
|
.body(jsonl)
|
|
}
|
|
Err(e) => {
|
|
log::error!("Failed to export training data: {:?}", e);
|
|
HttpResponse::InternalServerError().json(serde_json::json!({
|
|
"error": format!("Failed to export training data: {:?}", e)
|
|
}))
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
pub struct ChatTurnHttpRequest {
|
|
pub file_path: String,
|
|
#[serde(default)]
|
|
pub library: Option<String>,
|
|
pub user_message: String,
|
|
#[serde(default)]
|
|
pub model: Option<String>,
|
|
#[serde(default)]
|
|
pub backend: Option<String>,
|
|
#[serde(default)]
|
|
pub num_ctx: Option<i32>,
|
|
#[serde(default)]
|
|
pub temperature: Option<f32>,
|
|
#[serde(default)]
|
|
pub top_p: Option<f32>,
|
|
#[serde(default)]
|
|
pub top_k: Option<i32>,
|
|
#[serde(default)]
|
|
pub min_p: Option<f32>,
|
|
#[serde(default)]
|
|
pub max_iterations: Option<usize>,
|
|
#[serde(default)]
|
|
pub amend: bool,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct ChatTurnHttpResponse {
|
|
pub assistant_message: String,
|
|
pub tool_calls_made: usize,
|
|
pub iterations_used: usize,
|
|
pub truncated: bool,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub prompt_eval_count: Option<i32>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub eval_count: Option<i32>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub amended_insight_id: Option<i32>,
|
|
pub backend: String,
|
|
pub model: String,
|
|
}
|
|
|
|
/// POST /insights/chat — submit a follow-up turn against an existing insight.
|
|
#[post("/insights/chat")]
|
|
pub async fn chat_turn_handler(
|
|
http_request: HttpRequest,
|
|
_claims: Claims,
|
|
request: web::Json<ChatTurnHttpRequest>,
|
|
app_state: web::Data<AppState>,
|
|
) -> impl Responder {
|
|
let parent_context = extract_context_from_request(&http_request);
|
|
let tracer = global_tracer();
|
|
let mut span = tracer.start_with_context("http.insights.chat", &parent_context);
|
|
span.set_attribute(KeyValue::new("file_path", request.file_path.clone()));
|
|
|
|
let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
|
|
Ok(Some(lib)) => lib,
|
|
Ok(None) => app_state.primary_library(),
|
|
Err(e) => {
|
|
return HttpResponse::BadRequest().json(serde_json::json!({
|
|
"error": format!("invalid library: {}", e)
|
|
}));
|
|
}
|
|
};
|
|
|
|
let chat_req = ChatTurnRequest {
|
|
library_id: library.id,
|
|
file_path: request.file_path.clone(),
|
|
user_message: request.user_message.clone(),
|
|
model: request.model.clone(),
|
|
backend: request.backend.clone(),
|
|
num_ctx: request.num_ctx,
|
|
temperature: request.temperature,
|
|
top_p: request.top_p,
|
|
top_k: request.top_k,
|
|
min_p: request.min_p,
|
|
max_iterations: request.max_iterations,
|
|
amend: request.amend,
|
|
};
|
|
|
|
match app_state.insight_chat.chat_turn(chat_req).await {
|
|
Ok(result) => {
|
|
span.set_status(Status::Ok);
|
|
HttpResponse::Ok().json(ChatTurnHttpResponse {
|
|
assistant_message: result.assistant_message,
|
|
tool_calls_made: result.tool_calls_made,
|
|
iterations_used: result.iterations_used,
|
|
truncated: result.truncated,
|
|
prompt_eval_count: result.prompt_eval_count,
|
|
eval_count: result.eval_count,
|
|
amended_insight_id: result.amended_insight_id,
|
|
backend: result.backend_used,
|
|
model: result.model_used,
|
|
})
|
|
}
|
|
Err(e) => {
|
|
let msg = format!("{}", e);
|
|
log::error!("Chat turn failed: {}", msg);
|
|
span.set_status(Status::error(msg.clone()));
|
|
|
|
// Map well-known errors to client-facing 4xx codes.
|
|
if msg.contains("no insight found") {
|
|
HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
|
|
} else if msg.contains("no chat history") {
|
|
HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
|
|
} else if msg.contains("user_message")
|
|
|| msg.contains("unknown backend")
|
|
|| msg.contains("switching from local to hybrid")
|
|
|| msg.contains("hybrid backend unavailable")
|
|
{
|
|
HttpResponse::BadRequest().json(serde_json::json!({ "error": msg }))
|
|
} else {
|
|
HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
pub struct ChatHistoryQuery {
|
|
pub path: String,
|
|
#[serde(default)]
|
|
pub library: Option<String>,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct ChatHistoryHttpResponse {
|
|
pub messages: Vec<RenderedHistoryMessage>,
|
|
pub turn_count: usize,
|
|
pub model_version: String,
|
|
pub backend: String,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct RenderedHistoryMessage {
|
|
pub role: String,
|
|
pub content: String,
|
|
pub is_initial: bool,
|
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
|
pub tools: Vec<HistoryToolInvocation>,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct HistoryToolInvocation {
|
|
pub name: String,
|
|
pub arguments: serde_json::Value,
|
|
pub result: String,
|
|
#[serde(skip_serializing_if = "std::ops::Not::not")]
|
|
pub result_truncated: bool,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
pub struct ChatRewindHttpRequest {
|
|
pub file_path: String,
|
|
#[serde(default)]
|
|
pub library: Option<String>,
|
|
/// 0-based index into the rendered transcript. The message at this
|
|
/// index, and everything after it, is discarded. Must be > 0 — the
|
|
/// initial user message is protected.
|
|
pub discard_from_rendered_index: usize,
|
|
}
|
|
|
|
/// POST /insights/chat/rewind — truncate the stored conversation so the
|
|
/// rendered message at `discard_from_rendered_index` (and everything after)
|
|
/// is removed. Use when a user wants to retry a turn with a different
|
|
/// prompt without prior replies poisoning context.
|
|
#[post("/insights/chat/rewind")]
|
|
pub async fn chat_rewind_handler(
|
|
_claims: Claims,
|
|
request: web::Json<ChatRewindHttpRequest>,
|
|
app_state: web::Data<AppState>,
|
|
) -> impl Responder {
|
|
let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
|
|
Ok(Some(lib)) => lib,
|
|
Ok(None) => app_state.primary_library(),
|
|
Err(e) => {
|
|
return HttpResponse::BadRequest().json(serde_json::json!({
|
|
"error": format!("invalid library: {}", e)
|
|
}));
|
|
}
|
|
};
|
|
|
|
match app_state
|
|
.insight_chat
|
|
.rewind_history(
|
|
library.id,
|
|
&request.file_path,
|
|
request.discard_from_rendered_index,
|
|
)
|
|
.await
|
|
{
|
|
Ok(()) => HttpResponse::Ok().json(serde_json::json!({ "success": true })),
|
|
Err(e) => {
|
|
let msg = format!("{}", e);
|
|
log::error!("Chat rewind failed: {}", msg);
|
|
if msg.contains("no insight found") {
|
|
HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
|
|
} else if msg.contains("no chat history") {
|
|
HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
|
|
} else if msg.contains("cannot discard the initial") || msg.contains("out of range") {
|
|
HttpResponse::BadRequest().json(serde_json::json!({ "error": msg }))
|
|
} else {
|
|
HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// GET /insights/chat/history — return the rendered transcript for a photo.
|
|
#[get("/insights/chat/history")]
|
|
pub async fn chat_history_handler(
|
|
_claims: Claims,
|
|
query: web::Query<ChatHistoryQuery>,
|
|
app_state: web::Data<AppState>,
|
|
) -> impl Responder {
|
|
// library param parsed for parity with other insight endpoints, even
|
|
// though load_history currently keys on file_path alone (matches the
|
|
// existing get_insight DAO contract).
|
|
let _library = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
|
.ok()
|
|
.flatten()
|
|
.unwrap_or_else(|| app_state.primary_library());
|
|
|
|
match app_state.insight_chat.load_history(&query.path) {
|
|
Ok(view) => HttpResponse::Ok().json(ChatHistoryHttpResponse {
|
|
messages: view
|
|
.messages
|
|
.into_iter()
|
|
.map(|m| RenderedHistoryMessage {
|
|
role: m.role,
|
|
content: m.content,
|
|
is_initial: m.is_initial,
|
|
tools: m
|
|
.tools
|
|
.into_iter()
|
|
.map(|t| HistoryToolInvocation {
|
|
name: t.name,
|
|
arguments: t.arguments,
|
|
result: t.result,
|
|
result_truncated: t.result_truncated,
|
|
})
|
|
.collect(),
|
|
})
|
|
.collect(),
|
|
turn_count: view.turn_count,
|
|
model_version: view.model_version,
|
|
backend: view.backend,
|
|
}),
|
|
Err(e) => {
|
|
let msg = format!("{}", e);
|
|
if msg.contains("no insight found") {
|
|
HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
|
|
} else if msg.contains("no chat history") {
|
|
HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
|
|
} else {
|
|
HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// POST /insights/chat/stream — streaming variant of /insights/chat.
|
|
/// Returns `text/event-stream` with one event per chat stream event.
|
|
#[post("/insights/chat/stream")]
|
|
pub async fn chat_stream_handler(
|
|
_claims: Claims,
|
|
request: web::Json<ChatTurnHttpRequest>,
|
|
app_state: web::Data<AppState>,
|
|
) -> HttpResponse {
|
|
let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
|
|
Ok(Some(lib)) => lib,
|
|
Ok(None) => app_state.primary_library(),
|
|
Err(e) => {
|
|
return HttpResponse::BadRequest().json(serde_json::json!({
|
|
"error": format!("invalid library: {}", e)
|
|
}));
|
|
}
|
|
};
|
|
|
|
let chat_req = ChatTurnRequest {
|
|
library_id: library.id,
|
|
file_path: request.file_path.clone(),
|
|
user_message: request.user_message.clone(),
|
|
model: request.model.clone(),
|
|
backend: request.backend.clone(),
|
|
num_ctx: request.num_ctx,
|
|
temperature: request.temperature,
|
|
top_p: request.top_p,
|
|
top_k: request.top_k,
|
|
min_p: request.min_p,
|
|
max_iterations: request.max_iterations,
|
|
amend: request.amend,
|
|
};
|
|
|
|
let service = app_state.insight_chat.clone();
|
|
let events = service.chat_turn_stream(chat_req);
|
|
|
|
// Map ChatStreamEvent → SSE frame bytes.
|
|
let sse_stream = futures::stream::StreamExt::map(events, |ev| {
|
|
let frame = render_sse_frame(&ev);
|
|
Ok::<_, actix_web::Error>(actix_web::web::Bytes::from(frame))
|
|
});
|
|
|
|
HttpResponse::Ok()
|
|
.content_type("text/event-stream")
|
|
.insert_header(("Cache-Control", "no-cache"))
|
|
.insert_header(("X-Accel-Buffering", "no")) // nginx: disable response buffering
|
|
.streaming(sse_stream)
|
|
}
|
|
|
|
fn render_sse_frame(ev: &ChatStreamEvent) -> String {
|
|
let (event_name, payload) = match ev {
|
|
ChatStreamEvent::IterationStart { n, max } => {
|
|
("iteration_start", serde_json::json!({ "n": n, "max": max }))
|
|
}
|
|
ChatStreamEvent::Truncated => ("truncated", serde_json::json!({})),
|
|
ChatStreamEvent::TextDelta(delta) => ("text", serde_json::json!({ "delta": delta })),
|
|
ChatStreamEvent::ToolCall {
|
|
index,
|
|
name,
|
|
arguments,
|
|
} => (
|
|
"tool_call",
|
|
serde_json::json!({ "index": index, "name": name, "arguments": arguments }),
|
|
),
|
|
ChatStreamEvent::ToolResult {
|
|
index,
|
|
name,
|
|
result,
|
|
result_truncated,
|
|
} => (
|
|
"tool_result",
|
|
serde_json::json!({
|
|
"index": index,
|
|
"name": name,
|
|
"result": result,
|
|
"result_truncated": result_truncated,
|
|
}),
|
|
),
|
|
ChatStreamEvent::Done {
|
|
tool_calls_made,
|
|
iterations_used,
|
|
truncated,
|
|
prompt_eval_count,
|
|
eval_count,
|
|
amended_insight_id,
|
|
backend_used,
|
|
model_used,
|
|
} => (
|
|
"done",
|
|
serde_json::json!({
|
|
"tool_calls_made": tool_calls_made,
|
|
"iterations_used": iterations_used,
|
|
"truncated": truncated,
|
|
"prompt_eval_count": prompt_eval_count,
|
|
"eval_count": eval_count,
|
|
"amended_insight_id": amended_insight_id,
|
|
"backend": backend_used,
|
|
"model": model_used,
|
|
}),
|
|
),
|
|
ChatStreamEvent::Error(msg) => ("error", serde_json::json!({ "message": msg })),
|
|
};
|
|
let data = serde_json::to_string(&payload).unwrap_or_else(|_| "{}".to_string());
|
|
format!("event: {}\ndata: {}\n\n", event_name, data)
|
|
}
|