feat(ai): few-shot exemplars + sticky Ollama preference
- Few-shot injection on /insights/generate/agentic: compresses prior training_messages into trajectory blocks (tool calls + result summaries) and injects into the system prompt. Hardcoded default ids with optional request override. - New fewshot_source_ids column on photo_insights (+ migration) to track which exemplars influenced a given row, for downstream training-set filtering. Chat amend rows stamp None with a lineage note. - Ollama client now remembers which server (primary/fallback) most recently succeeded and tries it first on the next call, via a shared Arc<AtomicBool>. Avoids re-404ing the primary on every agent iteration when the chosen model only lives on the fallback. - Demote noisy logs: daily_summary "Summary match" lines to debug; inner chat_with_tools non-2xx body log from error to warn (outer layer owns the terminal-error signal). - Drift-guard tests for summarize_tool_result covering the success / empty / error / unknown shape for every tool. - Tidy: three pre-existing clippy warnings cleaned up. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,7 @@ use opentelemetry::trace::{Span, Status, Tracer};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::ai::insight_chat::{ChatStreamEvent, ChatTurnRequest};
|
||||
use crate::ai::ollama::ChatMessage;
|
||||
use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient};
|
||||
use crate::data::Claims;
|
||||
use crate::database::{ExifDao, InsightDao};
|
||||
@@ -12,6 +13,13 @@ use crate::otel::{extract_context_from_request, global_tracer};
|
||||
use crate::state::AppState;
|
||||
use crate::utils::normalize_path;
|
||||
|
||||
/// Hardcoded few-shot exemplars for the agentic endpoint. Populate with the
|
||||
/// ids of approved insights whose `training_messages` should be compressed
|
||||
/// into trajectory form and injected into the system prompt. Empty = no
|
||||
/// change in behavior. Request-level `fewshot_insight_ids` overrides this
|
||||
/// when non-empty.
|
||||
const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[2918, 2908];
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct GeneratePhotoInsightRequest {
|
||||
pub file_path: String,
|
||||
@@ -33,6 +41,12 @@ pub struct GeneratePhotoInsightRequest {
|
||||
/// OpenRouter chat). Only respected by the agentic endpoint.
|
||||
#[serde(default)]
|
||||
pub backend: Option<String>,
|
||||
/// Insight ids whose stored `training_messages` should be compressed
|
||||
/// into few-shot trajectories and injected into the system prompt.
|
||||
/// Silently truncated to the first 2. When absent/empty, the handler
|
||||
/// falls back to `DEFAULT_FEWSHOT_INSIGHT_IDS`.
|
||||
#[serde(default)]
|
||||
pub fewshot_insight_ids: Option<Vec<i32>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
@@ -326,6 +340,41 @@ pub async fn generate_agentic_insight_handler(
|
||||
span.set_attribute(KeyValue::new("backend", b.clone()));
|
||||
}
|
||||
|
||||
// Resolve few-shot ids: request-provided ids take precedence when
|
||||
// non-empty; otherwise fall back to the hardcoded defaults.
|
||||
let fewshot_ids: Vec<i32> = match request.fewshot_insight_ids.as_deref() {
|
||||
Some(ids) if !ids.is_empty() => ids.iter().take(2).copied().collect(),
|
||||
_ => DEFAULT_FEWSHOT_INSIGHT_IDS
|
||||
.iter()
|
||||
.take(2)
|
||||
.copied()
|
||||
.collect(),
|
||||
};
|
||||
span.set_attribute(KeyValue::new("fewshot_count", fewshot_ids.len() as i64));
|
||||
|
||||
let fewshot_examples: Vec<Vec<ChatMessage>> = {
|
||||
let otel_context = opentelemetry::Context::new();
|
||||
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
|
||||
fewshot_ids
|
||||
.iter()
|
||||
.filter_map(|id| {
|
||||
let insight = dao.get_insight_by_id(&otel_context, *id).ok().flatten()?;
|
||||
let json = insight.training_messages?;
|
||||
match serde_json::from_str::<Vec<ChatMessage>>(&json) {
|
||||
Ok(msgs) => Some(msgs),
|
||||
Err(e) => {
|
||||
log::warn!(
|
||||
"Few-shot insight {} has malformed training_messages: {}",
|
||||
id,
|
||||
e
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
|
||||
let result = insight_generator
|
||||
.generate_agentic_insight_for_photo(
|
||||
&normalized_path,
|
||||
@@ -338,6 +387,8 @@ pub async fn generate_agentic_insight_handler(
|
||||
request.min_p,
|
||||
max_iterations,
|
||||
request.backend.clone(),
|
||||
fewshot_examples,
|
||||
fewshot_ids,
|
||||
)
|
||||
.await;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user