feat(ai): USER_NAME env + shared summary prompt + test-bin knobs

Introduces USER_NAME (default "Me") as the single source for the message
sender label and the first-person persona across daily summaries, SMS
context, insight generation, and chat. Eliminates the "Me:" transcript /
"what I did" ambiguity that confused smaller models, and unhardcodes
"Cameron" from prompt text + the knowledge-graph owner entity. Set
USER_NAME=Cameron in .env to preserve the existing owner entity row
(keyed on UNIQUE(name, entity_type)) — otherwise the next run creates
a fresh owner entity and orphans the existing facts/photo-links.

Also:
- search_messages redirect: when the model calls it with date/contact
  but no query, return a hint pointing at get_sms_messages instead of
  a bare missing-parameter error (prevents same-turn retry loops)
- sharpen search_messages vs get_sms_messages tool descriptions so
  content-vs-time-based intent is unambiguous
- extract build_daily_summary_prompt (+ DAILY_SUMMARY_MESSAGE_LIMIT,
  DAILY_SUMMARY_SYSTEM_PROMPT) shared by daily_summary_job and
  test_daily_summary binary — prompt tweaks now land in both
- EMBEDDING_MODEL const; fixes both insert sites that stored
  "mxbai-embed-large:335m" while generate_embeddings actually runs
  "nomic-embed-text:v1.5"
- test_daily_summary: add --num-ctx / --temperature / --top-p /
  --top-k / --min-p flags wired into OllamaClient setters, and print
  the configured knobs at the top of each run
- OllamaClient::generate now logs prompt/gen token counts and tok/s
  via log_chat_metrics (symmetric with chat_with_tools)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron
2026-04-22 23:39:37 -04:00
parent e4a3536f87
commit 6831f50993
6 changed files with 226 additions and 156 deletions

View File

@@ -1,7 +1,10 @@
use anyhow::Result;
use chrono::NaiveDate;
use clap::Parser;
use image_api::ai::{OllamaClient, SmsApiClient, strip_summary_boilerplate};
use image_api::ai::{
EMBEDDING_MODEL, OllamaClient, SmsApiClient, build_daily_summary_prompt,
strip_summary_boilerplate, user_display_name,
};
use image_api::database::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
use std::env;
use std::sync::{Arc, Mutex};
@@ -25,6 +28,26 @@ struct Args {
#[arg(short, long)]
model: Option<String>,
/// Context window size passed as Ollama `num_ctx`. Omit for server default.
#[arg(long)]
num_ctx: Option<i32>,
/// Sampling temperature. Omit for server default.
#[arg(long)]
temperature: Option<f32>,
/// Top-p (nucleus) sampling. Omit for server default.
#[arg(long)]
top_p: Option<f32>,
/// Top-k sampling. Omit for server default.
#[arg(long)]
top_k: Option<i32>,
/// Min-p sampling. Omit for server default.
#[arg(long)]
min_p: Option<f32>,
/// Test mode: Generate but don't save to database (shows output only)
#[arg(short = 't', long, default_value_t = false)]
test_mode: bool,
@@ -86,12 +109,28 @@ async fn main() -> Result<()> {
.unwrap_or_else(|_| "nemotron-3-nano:30b".to_string())
});
let ollama = OllamaClient::new(
let mut ollama = OllamaClient::new(
ollama_primary_url,
ollama_fallback_url.clone(),
model_to_use.clone(),
Some(model_to_use), // Use same model for fallback
);
if let Some(ctx) = args.num_ctx {
ollama.set_num_ctx(Some(ctx));
}
if args.temperature.is_some()
|| args.top_p.is_some()
|| args.top_k.is_some()
|| args.min_p.is_some()
{
ollama.set_sampling_params(args.temperature, args.top_p, args.top_k, args.min_p);
}
// Surface what's actually configured so comparison runs are auditable.
println!(
"num_ctx={:?} temperature={:?} top_p={:?} top_k={:?} min_p={:?}",
args.num_ctx, args.temperature, args.top_p, args.top_k, args.min_p
);
let sms_api_url =
env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
@@ -160,9 +199,10 @@ async fn main() -> Result<()> {
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
if args.verbose {
let user_name = user_display_name();
println!("\nMessage preview:");
for (i, msg) in messages.iter().take(3).enumerate() {
let sender = if msg.is_sent { "Me" } else { &msg.contact };
let sender: &str = if msg.is_sent { &user_name } else { &msg.contact };
let preview = msg.body.chars().take(60).collect::<String>();
println!(" {}. {}: {}...", i + 1, sender, preview);
}
@@ -172,64 +212,11 @@ async fn main() -> Result<()> {
println!();
}
// Format messages for LLM
let messages_text: String = messages
.iter()
.take(200)
.map(|m| {
if m.is_sent {
format!("Me: {}", m.body)
} else {
format!("{}: {}", m.contact, m.body)
}
})
.collect::<Vec<_>>()
.join("\n");
let prompt = format!(
r#"Summarize this day's conversation between me and {}.
CRITICAL FORMAT RULES:
- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
- Do NOT repeat the date at the beginning
- Start DIRECTLY with the content - begin with a person's name or action
- Write in past tense, as if recording what happened
NARRATIVE (3-5 sentences):
- What specific topics, activities, or events were discussed?
- What places, people, or organizations were mentioned?
- What plans were made or decisions discussed?
- Clearly distinguish between what "I" did versus what {} did
KEYWORDS (comma-separated):
5-10 specific keywords that capture this conversation's unique content:
- Proper nouns (people, places, brands)
- Specific activities ("drum corps audition" not just "music")
- Distinctive terms that make this day unique
Date: {} ({})
Messages:
{}
YOUR RESPONSE (follow this format EXACTLY):
Summary: [Start directly with content, NO preamble]
Keywords: [specific, unique terms]"#,
args.contact,
args.contact,
date.format("%B %d, %Y"),
weekday,
messages_text
);
let (prompt, system_prompt) = build_daily_summary_prompt(&args.contact, date, messages);
println!("Generating summary...");
let summary = ollama
.generate(
&prompt,
Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."),
)
.await?;
let summary = ollama.generate(&prompt, Some(system_prompt)).await?;
println!("\n📝 GENERATED SUMMARY:");
println!("─────────────────────────────────────────");
@@ -256,8 +243,7 @@ Keywords: [specific, unique terms]"#,
message_count: messages.len() as i32,
embedding,
created_at: chrono::Utc::now().timestamp(),
// model_version: "nomic-embed-text:v1.5".to_string(),
model_version: "mxbai-embed-large:335m".to_string(),
model_version: EMBEDDING_MODEL.to_string(),
};
let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao");