feat(ai): chat continuation for photo insights (server v1)

Adds POST /insights/chat and GET /insights/chat/history. Replays the
stored agentic conversation through the same backend the insight was
generated with (or a per-turn override), runs a short tool-calling
loop, and persists the extended history in append or amend mode.

Backend switching: same-backend or hybrid->local replay verbatim;
local->hybrid is rejected in v1 (would require on-the-fly vision
description rewrite).

Per-(library, file) async mutex serialises concurrent turns. Soft
context budget drops oldest tool_call+result pairs when the
serialized history exceeds num_ctx - 2048 tokens.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron
2026-04-21 13:00:27 -04:00
parent e2eefbd156
commit 0b9528f61e
7 changed files with 907 additions and 7 deletions

View File

@@ -3,6 +3,7 @@ use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, Tracer};
use serde::{Deserialize, Serialize};
use crate::ai::insight_chat::ChatTurnRequest;
use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient};
use crate::data::Claims;
use crate::database::{ExifDao, InsightDao};
@@ -70,6 +71,9 @@ pub struct PhotoInsightResponse {
#[serde(skip_serializing_if = "Option::is_none")]
pub approved: Option<bool>,
pub backend: String,
/// True when the insight was generated agentically and a chat
/// continuation can be started against it. Drives the mobile chat button.
pub has_training_messages: bool,
}
#[derive(Debug, Serialize)]
@@ -192,6 +196,7 @@ pub async fn get_insight_handler(
prompt_eval_count: None,
eval_count: None,
approved: insight.approved,
has_training_messages: insight.training_messages.is_some(),
backend: insight.backend,
};
HttpResponse::Ok().json(response)
@@ -260,6 +265,7 @@ pub async fn get_all_insights_handler(
prompt_eval_count: None,
eval_count: None,
approved: insight.approved,
has_training_messages: insight.training_messages.is_some(),
backend: insight.backend,
})
.collect();
@@ -353,6 +359,7 @@ pub async fn generate_agentic_insight_handler(
prompt_eval_count,
eval_count,
approved: insight.approved,
has_training_messages: insight.training_messages.is_some(),
backend: insight.backend,
};
HttpResponse::Ok().json(response)
@@ -558,3 +565,186 @@ pub async fn export_training_data_handler(
}
}
}
#[derive(Debug, Deserialize)]
pub struct ChatTurnHttpRequest {
pub file_path: String,
#[serde(default)]
pub library: Option<String>,
pub user_message: String,
#[serde(default)]
pub model: Option<String>,
#[serde(default)]
pub backend: Option<String>,
#[serde(default)]
pub num_ctx: Option<i32>,
#[serde(default)]
pub temperature: Option<f32>,
#[serde(default)]
pub top_p: Option<f32>,
#[serde(default)]
pub top_k: Option<i32>,
#[serde(default)]
pub min_p: Option<f32>,
#[serde(default)]
pub max_iterations: Option<usize>,
#[serde(default)]
pub amend: bool,
}
#[derive(Debug, Serialize)]
pub struct ChatTurnHttpResponse {
pub assistant_message: String,
pub tool_calls_made: usize,
pub iterations_used: usize,
pub truncated: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_eval_count: Option<i32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub eval_count: Option<i32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub amended_insight_id: Option<i32>,
pub backend: String,
pub model: String,
}
/// POST /insights/chat — submit a follow-up turn against an existing insight.
#[post("/insights/chat")]
pub async fn chat_turn_handler(
http_request: HttpRequest,
_claims: Claims,
request: web::Json<ChatTurnHttpRequest>,
app_state: web::Data<AppState>,
) -> impl Responder {
let parent_context = extract_context_from_request(&http_request);
let tracer = global_tracer();
let mut span = tracer.start_with_context("http.insights.chat", &parent_context);
span.set_attribute(KeyValue::new("file_path", request.file_path.clone()));
let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
Ok(Some(lib)) => lib,
Ok(None) => app_state.primary_library(),
Err(e) => {
return HttpResponse::BadRequest().json(serde_json::json!({
"error": format!("invalid library: {}", e)
}));
}
};
let chat_req = ChatTurnRequest {
library_id: library.id,
file_path: request.file_path.clone(),
user_message: request.user_message.clone(),
model: request.model.clone(),
backend: request.backend.clone(),
num_ctx: request.num_ctx,
temperature: request.temperature,
top_p: request.top_p,
top_k: request.top_k,
min_p: request.min_p,
max_iterations: request.max_iterations,
amend: request.amend,
};
match app_state.insight_chat.chat_turn(chat_req).await {
Ok(result) => {
span.set_status(Status::Ok);
HttpResponse::Ok().json(ChatTurnHttpResponse {
assistant_message: result.assistant_message,
tool_calls_made: result.tool_calls_made,
iterations_used: result.iterations_used,
truncated: result.truncated,
prompt_eval_count: result.prompt_eval_count,
eval_count: result.eval_count,
amended_insight_id: result.amended_insight_id,
backend: result.backend_used,
model: result.model_used,
})
}
Err(e) => {
let msg = format!("{}", e);
log::error!("Chat turn failed: {}", msg);
span.set_status(Status::error(msg.clone()));
// Map well-known errors to client-facing 4xx codes.
if msg.contains("no insight found") {
HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
} else if msg.contains("no chat history") {
HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
} else if msg.contains("user_message")
|| msg.contains("unknown backend")
|| msg.contains("switching from local to hybrid")
|| msg.contains("hybrid backend unavailable")
{
HttpResponse::BadRequest().json(serde_json::json!({ "error": msg }))
} else {
HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
}
}
}
}
#[derive(Debug, Deserialize)]
pub struct ChatHistoryQuery {
pub path: String,
#[serde(default)]
pub library: Option<String>,
}
#[derive(Debug, Serialize)]
pub struct ChatHistoryHttpResponse {
pub messages: Vec<RenderedHistoryMessage>,
pub turn_count: usize,
pub model_version: String,
pub backend: String,
}
#[derive(Debug, Serialize)]
pub struct RenderedHistoryMessage {
pub role: String,
pub content: String,
pub is_initial: bool,
}
/// GET /insights/chat/history — return the rendered transcript for a photo.
#[get("/insights/chat/history")]
pub async fn chat_history_handler(
_claims: Claims,
query: web::Query<ChatHistoryQuery>,
app_state: web::Data<AppState>,
) -> impl Responder {
// library param parsed for parity with other insight endpoints, even
// though load_history currently keys on file_path alone (matches the
// existing get_insight DAO contract).
let _library = libraries::resolve_library_param(&app_state, query.library.as_deref())
.ok()
.flatten()
.unwrap_or_else(|| app_state.primary_library());
match app_state.insight_chat.load_history(&query.path) {
Ok(view) => HttpResponse::Ok().json(ChatHistoryHttpResponse {
messages: view
.messages
.into_iter()
.map(|m| RenderedHistoryMessage {
role: m.role,
content: m.content,
is_initial: m.is_initial,
})
.collect(),
turn_count: view.turn_count,
model_version: view.model_version,
backend: view.backend,
}),
Err(e) => {
let msg = format!("{}", e);
if msg.contains("no insight found") {
HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
} else if msg.contains("no chat history") {
HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
} else {
HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
}
}
}
}