feat(ai): few-shot exemplars + sticky Ollama preference

- Few-shot injection on /insights/generate/agentic: compresses prior
  training_messages into trajectory blocks (tool calls + result summaries)
  and injects into the system prompt. Hardcoded default ids with optional
  request override.
- New fewshot_source_ids column on photo_insights (+ migration) to track
  which exemplars influenced a given row, for downstream training-set
  filtering. Chat amend rows stamp None with a lineage note.
- Ollama client now remembers which server (primary/fallback) most
  recently succeeded and tries it first on the next call, via a shared
  Arc<AtomicBool>. Avoids re-404ing the primary on every agent iteration
  when the chosen model only lives on the fallback.
- Demote noisy logs: daily_summary "Summary match" lines to debug;
  inner chat_with_tools non-2xx body log from error to warn (outer
  layer owns the terminal-error signal).
- Drift-guard tests for summarize_tool_result covering the success /
  empty / error / unknown shape for every tool.
- Tidy: three pre-existing clippy warnings cleaned up.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron
2026-04-24 13:54:06 -04:00
parent 29f32b9d22
commit f0ae9f95dc
12 changed files with 639 additions and 82 deletions

View File

@@ -4,6 +4,7 @@ use opentelemetry::trace::{Span, Status, Tracer};
use serde::{Deserialize, Serialize};
use crate::ai::insight_chat::{ChatStreamEvent, ChatTurnRequest};
use crate::ai::ollama::ChatMessage;
use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient};
use crate::data::Claims;
use crate::database::{ExifDao, InsightDao};
@@ -12,6 +13,13 @@ use crate::otel::{extract_context_from_request, global_tracer};
use crate::state::AppState;
use crate::utils::normalize_path;
/// Hardcoded few-shot exemplars for the agentic endpoint. Populate with the
/// ids of approved insights whose `training_messages` should be compressed
/// into trajectory form and injected into the system prompt. Empty = no
/// change in behavior. Request-level `fewshot_insight_ids` overrides this
/// when non-empty.
const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[2918, 2908];
#[derive(Debug, Deserialize)]
pub struct GeneratePhotoInsightRequest {
pub file_path: String,
@@ -33,6 +41,12 @@ pub struct GeneratePhotoInsightRequest {
/// OpenRouter chat). Only respected by the agentic endpoint.
#[serde(default)]
pub backend: Option<String>,
/// Insight ids whose stored `training_messages` should be compressed
/// into few-shot trajectories and injected into the system prompt.
/// Silently truncated to the first 2. When absent/empty, the handler
/// falls back to `DEFAULT_FEWSHOT_INSIGHT_IDS`.
#[serde(default)]
pub fewshot_insight_ids: Option<Vec<i32>>,
}
#[derive(Debug, Deserialize)]
@@ -326,6 +340,41 @@ pub async fn generate_agentic_insight_handler(
span.set_attribute(KeyValue::new("backend", b.clone()));
}
// Resolve few-shot ids: request-provided ids take precedence when
// non-empty; otherwise fall back to the hardcoded defaults.
let fewshot_ids: Vec<i32> = match request.fewshot_insight_ids.as_deref() {
Some(ids) if !ids.is_empty() => ids.iter().take(2).copied().collect(),
_ => DEFAULT_FEWSHOT_INSIGHT_IDS
.iter()
.take(2)
.copied()
.collect(),
};
span.set_attribute(KeyValue::new("fewshot_count", fewshot_ids.len() as i64));
let fewshot_examples: Vec<Vec<ChatMessage>> = {
let otel_context = opentelemetry::Context::new();
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
fewshot_ids
.iter()
.filter_map(|id| {
let insight = dao.get_insight_by_id(&otel_context, *id).ok().flatten()?;
let json = insight.training_messages?;
match serde_json::from_str::<Vec<ChatMessage>>(&json) {
Ok(msgs) => Some(msgs),
Err(e) => {
log::warn!(
"Few-shot insight {} has malformed training_messages: {}",
id,
e
);
None
}
}
})
.collect()
};
let result = insight_generator
.generate_agentic_insight_for_photo(
&normalized_path,
@@ -338,6 +387,8 @@ pub async fn generate_agentic_insight_handler(
request.min_p,
max_iterations,
request.backend.clone(),
fewshot_examples,
fewshot_ids,
)
.await;