From 6831f509935de54d59b7b0f43dbbf4d479bc8b62 Mon Sep 17 00:00:00 2001 From: Cameron Date: Wed, 22 Apr 2026 23:39:37 -0400 Subject: [PATCH] feat(ai): USER_NAME env + shared summary prompt + test-bin knobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces USER_NAME (default "Me") as the single source for the message sender label and the first-person persona across daily summaries, SMS context, insight generation, and chat. Eliminates the "Me:" transcript / "what I did" ambiguity that confused smaller models, and unhardcodes "Cameron" from prompt text + the knowledge-graph owner entity. Set USER_NAME=Cameron in .env to preserve the existing owner entity row (keyed on UNIQUE(name, entity_type)) — otherwise the next run creates a fresh owner entity and orphans the existing facts/photo-links. Also: - search_messages redirect: when the model calls it with date/contact but no query, return a hint pointing at get_sms_messages instead of a bare missing-parameter error (prevents same-turn retry loops) - sharpen search_messages vs get_sms_messages tool descriptions so content-vs-time-based intent is unambiguous - extract build_daily_summary_prompt (+ DAILY_SUMMARY_MESSAGE_LIMIT, DAILY_SUMMARY_SYSTEM_PROMPT) shared by daily_summary_job and test_daily_summary binary — prompt tweaks now land in both - EMBEDDING_MODEL const; fixes both insert sites that stored "mxbai-embed-large:335m" while generate_embeddings actually runs "nomic-embed-text:v1.5" - test_daily_summary: add --num-ctx / --temperature / --top-p / --top-k / --min-p flags wired into OllamaClient setters, and print the configured knobs at the top of each run - OllamaClient::generate now logs prompt/gen token counts and tok/s via log_chat_metrics (symmetric with chat_with_tools) Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ai/daily_summary_job.rs | 136 +++++++++++++++++++--------------- src/ai/insight_generator.rs | 79 ++++++++++++++------ src/ai/mod.rs | 16 +++- src/ai/ollama.rs | 42 ++++++++--- src/ai/sms_client.rs | 3 +- src/bin/test_daily_summary.rs | 106 ++++++++++++-------------- 6 files changed, 226 insertions(+), 156 deletions(-) diff --git a/src/ai/daily_summary_job.rs b/src/ai/daily_summary_job.rs index 9d9c9e0..18ad15c 100644 --- a/src/ai/daily_summary_job.rs +++ b/src/ai/daily_summary_job.rs @@ -6,12 +6,84 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; use tokio::time::sleep; -use crate::ai::{OllamaClient, SmsApiClient, SmsMessage}; +use crate::ai::{EMBEDDING_MODEL, OllamaClient, SmsApiClient, SmsMessage, user_display_name}; use crate::database::{DailySummaryDao, InsertDailySummary}; use crate::otel::global_tracer; /// Strip boilerplate prefixes and common phrases from summaries before embedding. /// This improves embedding diversity by removing structural similarity. +/// Maximum number of messages passed to the summarizer for a single day. +/// Tuned to avoid token overflow on typical chat models; shared between +/// the production job and the test binary so they can't drift. +pub const DAILY_SUMMARY_MESSAGE_LIMIT: usize = 300; + +/// System prompt used when generating daily conversation summaries. +pub const DAILY_SUMMARY_SYSTEM_PROMPT: &str = + "You are a conversation summarizer. Create clear, factual summaries with \ + precise subject attribution AND extract distinctive keywords. Focus on \ + specific, unique terms that differentiate this conversation from others."; + +/// Build the prompt for a single day's conversation summary. Shared by the +/// production job and the test binary so prompt tweaks land in both places. +/// Returns `(prompt, system_prompt)`. +pub fn build_daily_summary_prompt( + contact: &str, + date: &NaiveDate, + messages: &[SmsMessage], +) -> (String, &'static str) { + let user_name = user_display_name(); + let messages_text: String = messages + .iter() + .take(DAILY_SUMMARY_MESSAGE_LIMIT) + .map(|m| { + if m.is_sent { + format!("{}: {}", user_name, m.body) + } else { + format!("{}: {}", m.contact, m.body) + } + }) + .collect::>() + .join("\n"); + + let prompt = format!( + r#"Summarize this day's conversation between {user_name} and {contact}. + +CRITICAL FORMAT RULES: +- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles +- Do NOT repeat the date at the beginning +- Start DIRECTLY with the content - begin with a person's name or action +- Write in past tense, as if recording what happened + +NARRATIVE (4-8 sentences): +- What specific topics, activities, or events were discussed? +- What places, people, or organizations were mentioned? +- What plans were made or decisions discussed? +- Clearly distinguish between what {user_name} did versus what {contact} did + +KEYWORDS (comma-separated): +5-10 specific keywords that capture this conversation's unique content: +- Proper nouns (people, places, brands) +- Specific activities ("drum corps audition" not just "music") +- Distinctive terms that make this day unique + +Date: {month_day_year} ({weekday}) +Messages: +{messages_text} + +YOUR RESPONSE (follow this format EXACTLY): +Summary: [Start directly with content, NO preamble] + +Keywords: [specific, unique terms]"#, + user_name = user_name, + contact = contact, + month_day_year = date.format("%B %d, %Y"), + weekday = date.format("%A"), + messages_text = messages_text, + ); + + (prompt, DAILY_SUMMARY_SYSTEM_PROMPT) +} + pub fn strip_summary_boilerplate(summary: &str) -> String { let mut text = summary.trim().to_string(); @@ -290,65 +362,10 @@ async fn generate_and_store_daily_summary( span.set_attribute(KeyValue::new("contact", contact.to_string())); span.set_attribute(KeyValue::new("message_count", messages.len() as i64)); - // Format messages for LLM - let messages_text: String = messages - .iter() - .take(200) // Limit to 200 messages per day to avoid token overflow - .map(|m| { - if m.is_sent { - format!("Me: {}", m.body) - } else { - format!("{}: {}", m.contact, m.body) - } - }) - .collect::>() - .join("\n"); - - let weekday = date.format("%A"); - - let prompt = format!( - r#"Summarize this day's conversation between me and {}. - -CRITICAL FORMAT RULES: -- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles -- Do NOT repeat the date at the beginning -- Start DIRECTLY with the content - begin with a person's name or action -- Write in past tense, as if recording what happened - -NARRATIVE (3-5 sentences): -- What specific topics, activities, or events were discussed? -- What places, people, or organizations were mentioned? -- What plans were made or decisions discussed? -- Clearly distinguish between what "I" did versus what {} did - -KEYWORDS (comma-separated): -5-10 specific keywords that capture this conversation's unique content: -- Proper nouns (people, places, brands) -- Specific activities ("drum corps audition" not just "music") -- Distinctive terms that make this day unique - -Date: {} ({}) -Messages: -{} - -YOUR RESPONSE (follow this format EXACTLY): -Summary: [Start directly with content, NO preamble] - -Keywords: [specific, unique terms]"#, - contact, - contact, - date.format("%B %d, %Y"), - weekday, - messages_text - ); + let (prompt, system_prompt) = build_daily_summary_prompt(contact, date, messages); // Generate summary with LLM - let summary = ollama - .generate( - &prompt, - Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."), - ) - .await?; + let summary = ollama.generate(&prompt, Some(system_prompt)).await?; log::debug!( "Generated summary for {}: {}", @@ -381,8 +398,7 @@ Keywords: [specific, unique terms]"#, message_count: messages.len() as i32, embedding, created_at: Utc::now().timestamp(), - // model_version: "nomic-embed-text:v1.5".to_string(), - model_version: "mxbai-embed-large:335m".to_string(), + model_version: EMBEDDING_MODEL.to_string(), }; // Create context from current span for DB operation diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index a1edd0c..bffd141 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -13,6 +13,7 @@ use crate::ai::llm_client::LlmClient; use crate::ai::ollama::{ChatMessage, OllamaClient, Tool}; use crate::ai::openrouter::OpenRouterClient; use crate::ai::sms_client::SmsApiClient; +use crate::ai::user_display_name; use crate::database::models::InsertPhotoInsight; use crate::database::{ CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao, @@ -1260,10 +1261,14 @@ impl InsightGenerator { // Format a sample of messages for topic extraction let sample_size = messages.len().min(20); + let user_name = user_display_name(); let sample_text: Vec = messages .iter() .take(sample_size) - .map(|m| format!("{}: {}", if m.is_sent { "Me" } else { &m.contact }, m.body)) + .map(|m| { + let sender: &str = if m.is_sent { &user_name } else { &m.contact }; + format!("{}: {}", sender, m.body) + }) .collect(); let prompt = format!( @@ -1361,10 +1366,11 @@ Return ONLY the summary, nothing else."#, } // Format messages + let user_name = user_display_name(); let formatted: Vec = messages .iter() .map(|m| { - let sender = if m.is_sent { "Me" } else { &m.contact }; + let sender: &str = if m.is_sent { &user_name } else { &m.contact }; let timestamp = chrono::DateTime::from_timestamp(m.timestamp, 0) .map(|dt| { dt.with_timezone(&Local) @@ -1624,7 +1630,21 @@ Return ONLY the summary, nothing else."#, async fn tool_search_messages(&self, args: &serde_json::Value) -> String { let query = match args.get("query").and_then(|v| v.as_str()) { Some(q) if !q.trim().is_empty() => q.trim(), - _ => return "Error: missing required parameter 'query'".to_string(), + _ => { + // Redirect when the model reached for this tool with a + // date/contact-shaped intent — get_sms_messages is the right + // call. Without this hint, small models often just retry + // search_messages again with the same args. + let has_date = args.get("date").is_some(); + let has_contact = args.get("contact").is_some(); + if has_date || has_contact { + return "Error: search_messages needs a 'query' (keywords/phrase). \ + To fetch messages around a date or from a contact, call \ + get_sms_messages with { date, contact? } instead." + .to_string(); + } + return "Error: missing required parameter 'query'".to_string(); + } }; if query.len() < 3 { return "Error: query must be at least 3 characters".to_string(); @@ -1662,11 +1682,12 @@ Return ONLY the summary, nothing else."#, hits.len(), mode )); + let user_name = user_display_name(); for h in hits { let date = chrono::DateTime::from_timestamp(h.date, 0) .map(|dt| dt.format("%Y-%m-%d").to_string()) .unwrap_or_else(|| h.date.to_string()); - let direction = if h.type_ == 2 { "Me" } else { &h.contact_name }; + let direction: &str = if h.type_ == 2 { &user_name } else { &h.contact_name }; let score = h .similarity_score .map(|s| format!(" [score {:.2}]", s)) @@ -1726,11 +1747,12 @@ Return ONLY the summary, nothing else."#, .await { Ok(messages) if !messages.is_empty() => { + let user_name = user_display_name(); let formatted: Vec = messages .iter() .take(limit) .map(|m| { - let sender = if m.is_sent { "Me" } else { &m.contact }; + let sender: &str = if m.is_sent { &user_name } else { &m.contact }; let ts = DateTime::from_timestamp(m.timestamp, 0) .map(|dt| { dt.with_timezone(&Local) @@ -2359,7 +2381,7 @@ Return ONLY the summary, nothing else."#, ), Tool::function( "search_messages", - "Keyword/semantic/hybrid search over ALL SMS message bodies (not just summaries) across all time. Prefer this for specific phrases, proper nouns, URLs, or when you don't know the date. Modes: 'fts5' (keyword, supports \"phrase\" / prefix* / AND / NEAR(w1 w2, 5)), 'semantic' (embedding similarity), 'hybrid' (recommended — merges both via reciprocal rank fusion).", + "CONTENT search over SMS message bodies by keywords/phrases/topics across all time. Use when you're looking for specific wording (phrases, proper nouns, URLs, topics) and DON'T have a date in mind. NOT for time-based queries — if you know the date or want messages around a date, call get_sms_messages instead. Modes: 'fts5' (keyword, supports \"phrase\" / prefix* / AND / NEAR(w1 w2, 5)), 'semantic' (embedding similarity), 'hybrid' (recommended — merges both via reciprocal rank fusion).", serde_json::json!({ "type": "object", "required": ["query"], @@ -2382,7 +2404,7 @@ Return ONLY the summary, nothing else."#, ), Tool::function( "get_sms_messages", - "Fetch SMS/text messages near a specific date. Returns the actual message conversation. Omit contact to search across all conversations.", + "TIME-BASED fetch of SMS/text messages around a specific date (and optionally from a specific contact). Returns the actual message conversation for that window. Use this whenever you know the date or want the context around a photo's timestamp. Omit contact to search across all conversations. For keyword/topic search without a date, use search_messages instead.", serde_json::json!({ "type": "object", "required": ["date"], @@ -2561,7 +2583,7 @@ Return ONLY the summary, nothing else."#, }, "object_entity_id": { "type": "integer", - "description": "Use when the object is a known entity (e.g. Cameron's entity ID for 'is_friend_of Cameron'). Takes precedence over object_value." + "description": "Use when the object is a known entity (e.g. another person's entity ID for 'is_friend_of '). Takes precedence over object_value." }, "object_value": { "type": "string", @@ -2871,8 +2893,9 @@ Return ONLY the summary, nothing else."#, }; // 6. Clear existing entity-photo links for this file so the run starts fresh, - // and ensure the owner entity (Cameron) exists so the agent can reference it. - let cameron_entity_id: Option = { + // and ensure the owner entity exists so the agent can reference it. + let owner_name = user_display_name(); + let owner_entity_id: Option = { let mut kdao = self .knowledge_dao .lock() @@ -2888,9 +2911,12 @@ Return ONLY the summary, nothing else."#, // Upsert the owner entity so the agent always has a stable entity ID to reference. let owner = crate::database::models::InsertEntity { - name: "Cameron".to_string(), + name: owner_name.clone(), entity_type: "person".to_string(), - description: "The owner of this photo collection. All memories are written from Cameron's perspective.".to_string(), + description: format!( + "The owner of this photo collection. All memories are written from {}'s perspective.", + owner_name + ), embedding: None, confidence: 1.0, status: "active".to_string(), @@ -2899,11 +2925,11 @@ Return ONLY the summary, nothing else."#, }; match kdao.upsert_entity(&insight_cx, owner) { Ok(e) => { - log::info!("Cameron entity ID: {}", e.id); + log::info!("Owner entity '{}' ID: {}", owner_name, e.id); Some(e.id) } Err(e) => { - log::warn!("Failed to upsert Cameron entity: {:?}", e); + log::warn!("Failed to upsert owner entity '{}': {:?}", owner_name, e); None } } @@ -2953,28 +2979,30 @@ Return ONLY the summary, nothing else."#, }; // 8. Build system message - let cameron_id_note = match cameron_entity_id { + let owner_id_note = match owner_entity_id { Some(id) => format!( - "\n\nYour identity in the knowledge store: Cameron (entity ID: {}). \ - When storing facts where you (Cameron) are the object — for example, someone is your friend, \ + "\n\nYour identity in the knowledge store: {name} (entity ID: {id}). \ + When storing facts where you ({name}) are the object — for example, someone is your friend, \ sibling, or colleague — use subject_entity_id for the other person and set object_value to \ - \"Cameron\" (or use store_fact with the other person as subject). When storing facts about \ - Cameron directly, use {} as the subject_entity_id.", - id, id + \"{name}\" (or use store_fact with the other person as subject). When storing facts about \ + {name} directly, use {id} as the subject_entity_id.", + name = owner_name, + id = id ), None => String::new(), }; let base_system = format!( - "You are a personal photo memory assistant helping to reconstruct a memory from a photo.{cameron_id_note}\n\n\ + "You are a personal photo memory assistant helping to reconstruct a memory from a photo.{owner_id_note}\n\n\ IMPORTANT INSTRUCTIONS:\n\ 1. You MUST call multiple tools to gather context BEFORE writing any final insight. Do not produce a final answer after only one or two tool calls.\n\ - 2. When calling get_sms_messages and search_rag, always make at least one call WITHOUT a contact filter to capture what else was happening in Cameron's life around this date — other conversations, events, and activities provide important wider context even when a specific contact is known.\n\ + 2. When calling get_sms_messages and search_rag, always make at least one call WITHOUT a contact filter to capture what else was happening in {owner_name}'s life around this date — other conversations, events, and activities provide important wider context even when a specific contact is known.\n\ 3. Use recall_facts_for_photo to load any previously stored knowledge about subjects in this photo.\n\ 4. Use recall_entities to look up known people, places, or things that appear in this photo.\n\ 5. When you identify people, places, events, or notable things in this photo: use store_entity to record them and store_fact to record key facts (relationships, roles, attributes). This builds a persistent memory for future insights.\n\ 6. Only produce your final insight AFTER you have gathered context from at least 5 tool calls.\n\ 7. If a tool returns no results, that is useful information — continue calling the remaining tools anyway.", - cameron_id_note = cameron_id_note + owner_id_note = owner_id_note, + owner_name = owner_name ); let system_content = if let Some(ref custom) = custom_system_prompt { format!("{}\n\n{}", custom, base_system) @@ -3125,7 +3153,10 @@ Return ONLY the summary, nothing else."#, iterations_used ); messages.push(ChatMessage::user( - "Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as Cameron.", + &format!( + "Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as {}.", + user_display_name() + ), )); let (final_response, prompt_tokens, eval_tokens) = chat_backend .chat_with_tools(messages.clone(), vec![]) diff --git a/src/ai/mod.rs b/src/ai/mod.rs index 8e38930..94e8541 100644 --- a/src/ai/mod.rs +++ b/src/ai/mod.rs @@ -9,7 +9,10 @@ pub mod sms_client; // strip_summary_boilerplate is used by binaries (test_daily_summary), not the library #[allow(unused_imports)] -pub use daily_summary_job::{generate_daily_summaries, strip_summary_boilerplate}; +pub use daily_summary_job::{ + DAILY_SUMMARY_MESSAGE_LIMIT, DAILY_SUMMARY_SYSTEM_PROMPT, build_daily_summary_prompt, + generate_daily_summaries, strip_summary_boilerplate, +}; pub use handlers::{ chat_history_handler, chat_rewind_handler, chat_stream_handler, chat_turn_handler, delete_insight_handler, export_training_data_handler, generate_agentic_insight_handler, @@ -21,5 +24,14 @@ pub use insight_generator::InsightGenerator; pub use llm_client::{ ChatMessage, LlmClient, ModelCapabilities, Tool, ToolCall, ToolCallFunction, ToolFunction, }; -pub use ollama::OllamaClient; +pub use ollama::{EMBEDDING_MODEL, OllamaClient}; pub use sms_client::{SmsApiClient, SmsMessage}; + +/// Display name used for the user in message transcripts and first-person +/// prompt text. Reads the `USER_NAME` env var; defaults to `"Me"`. Models +/// often confuse `"Me:"` in a transcript with their own role — setting +/// `USER_NAME=Cameron` (or similar) in the environment eliminates that +/// ambiguity across daily summaries, insight generation, and chat. +pub fn user_display_name() -> String { + std::env::var("USER_NAME").unwrap_or_else(|_| "Me".to_string()) +} diff --git a/src/ai/ollama.rs b/src/ai/ollama.rs index 1dc67f8..81185f0 100644 --- a/src/ai/ollama.rs +++ b/src/ai/ollama.rs @@ -19,6 +19,11 @@ pub use crate::ai::llm_client::{ToolCall, ToolCallFunction, ToolFunction}; // Cache duration: 15 minutes const CACHE_DURATION_SECS: u64 = 15 * 60; +/// Embedding model used across the app. Callers that persist a +/// `model_version` alongside an embedding should read this constant so the +/// stored label always matches what `generate_embeddings` actually ran. +pub const EMBEDDING_MODEL: &str = "nomic-embed-text:v1.5"; + // Cached entry with timestamp #[derive(Clone)] struct CachedEntry { @@ -349,6 +354,12 @@ impl OllamaClient { } let result: OllamaResponse = response.json().await?; + log_chat_metrics( + result.prompt_eval_count, + result.prompt_eval_duration, + result.eval_count, + result.eval_duration, + ); Ok(result.response) } @@ -481,6 +492,7 @@ Capture the key moment or theme. Return ONLY the title, nothing else."#, ) -> Result { let location_str = location.unwrap_or("Unknown"); let sms_str = sms_summary.unwrap_or("No messages"); + let user_name = crate::ai::user_display_name(); let prompt = if image_base64.is_some() { if let Some(contact_name) = contact { @@ -492,13 +504,14 @@ Location: {} Person/Contact: {} Messages: {} -Analyze the image and use specific details from both the visual content and the context above. The photo is from a folder for {}, so they are likely in or related to this photo. Mention people's names (especially {}), places, or activities if they appear in either the image or the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#, +Analyze the image and use specific details from both the visual content and the context above. The photo is from a folder for {}, so they are likely in or related to this photo. Mention people's names (especially {}), places, or activities if they appear in either the image or the context. Write in first person as {} with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#, date.format("%B %d, %Y"), location_str, contact_name, sms_str, contact_name, - contact_name + contact_name, + user_name ) } else { format!( @@ -508,10 +521,11 @@ Date: {} Location: {} Messages: {} -Analyze the image and use specific details from both the visual content and the context above. Mention people's names, places, or activities if they appear in either the image or the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#, +Analyze the image and use specific details from both the visual content and the context above. Mention people's names, places, or activities if they appear in either the image or the context. Write in first person as {} with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#, date.format("%B %d, %Y"), location_str, - sms_str + sms_str, + user_name ) } } else if let Some(contact_name) = contact { @@ -523,13 +537,14 @@ Analyze the image and use specific details from both the visual content and the Person/Contact: {} Messages: {} - Use only the specific details provided above. The photo is from a folder for {}, so they are likely related to this moment. Mention people's names (especially {}), places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#, + Use only the specific details provided above. The photo is from a folder for {}, so they are likely related to this moment. Mention people's names (especially {}), places, or activities if they appear in the context. Write in first person as {} with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#, date.format("%B %d, %Y"), location_str, contact_name, sms_str, contact_name, - contact_name + contact_name, + user_name ) } else { format!( @@ -539,10 +554,11 @@ Analyze the image and use specific details from both the visual content and the Location: {} Messages: {} - Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#, + Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as {} with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#, date.format("%B %d, %Y"), location_str, - sms_str + sms_str, + user_name ) }; @@ -892,7 +908,7 @@ Analyze the image and use specific details from both the visual content and the /// Returns a vector of 768-dimensional vectors /// This is much more efficient than calling generate_embedding multiple times pub async fn generate_embeddings(&self, texts: &[&str]) -> Result>> { - let embedding_model = "nomic-embed-text:v1.5"; + let embedding_model = EMBEDDING_MODEL; log::debug!("=== Ollama Batch Embedding Request ==="); log::debug!("Model: {}", embedding_model); @@ -1145,6 +1161,14 @@ struct OllamaStreamMessage { #[derive(Deserialize)] struct OllamaResponse { response: String, + #[serde(default)] + prompt_eval_count: Option, + #[serde(default)] + prompt_eval_duration: Option, + #[serde(default)] + eval_count: Option, + #[serde(default)] + eval_duration: Option, } fn log_chat_metrics( diff --git a/src/ai/sms_client.rs b/src/ai/sms_client.rs index 57d28a1..ad6d28e 100644 --- a/src/ai/sms_client.rs +++ b/src/ai/sms_client.rs @@ -299,12 +299,13 @@ impl SmsApiClient { } // Create prompt for Ollama with sender/receiver distinction + let user_name = crate::ai::user_display_name(); let messages_text: String = messages .iter() .take(60) // Limit to avoid token overflow .map(|m| { if m.is_sent { - format!("Me: {}", m.body) + format!("{}: {}", user_name, m.body) } else { format!("{}: {}", m.contact, m.body) } diff --git a/src/bin/test_daily_summary.rs b/src/bin/test_daily_summary.rs index fbbb621..aff2790 100644 --- a/src/bin/test_daily_summary.rs +++ b/src/bin/test_daily_summary.rs @@ -1,7 +1,10 @@ use anyhow::Result; use chrono::NaiveDate; use clap::Parser; -use image_api::ai::{OllamaClient, SmsApiClient, strip_summary_boilerplate}; +use image_api::ai::{ + EMBEDDING_MODEL, OllamaClient, SmsApiClient, build_daily_summary_prompt, + strip_summary_boilerplate, user_display_name, +}; use image_api::database::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao}; use std::env; use std::sync::{Arc, Mutex}; @@ -25,6 +28,26 @@ struct Args { #[arg(short, long)] model: Option, + /// Context window size passed as Ollama `num_ctx`. Omit for server default. + #[arg(long)] + num_ctx: Option, + + /// Sampling temperature. Omit for server default. + #[arg(long)] + temperature: Option, + + /// Top-p (nucleus) sampling. Omit for server default. + #[arg(long)] + top_p: Option, + + /// Top-k sampling. Omit for server default. + #[arg(long)] + top_k: Option, + + /// Min-p sampling. Omit for server default. + #[arg(long)] + min_p: Option, + /// Test mode: Generate but don't save to database (shows output only) #[arg(short = 't', long, default_value_t = false)] test_mode: bool, @@ -86,12 +109,28 @@ async fn main() -> Result<()> { .unwrap_or_else(|_| "nemotron-3-nano:30b".to_string()) }); - let ollama = OllamaClient::new( + let mut ollama = OllamaClient::new( ollama_primary_url, ollama_fallback_url.clone(), model_to_use.clone(), Some(model_to_use), // Use same model for fallback ); + if let Some(ctx) = args.num_ctx { + ollama.set_num_ctx(Some(ctx)); + } + if args.temperature.is_some() + || args.top_p.is_some() + || args.top_k.is_some() + || args.min_p.is_some() + { + ollama.set_sampling_params(args.temperature, args.top_p, args.top_k, args.min_p); + } + + // Surface what's actually configured so comparison runs are auditable. + println!( + "num_ctx={:?} temperature={:?} top_p={:?} top_k={:?} min_p={:?}", + args.num_ctx, args.temperature, args.top_p, args.top_k, args.min_p + ); let sms_api_url = env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string()); @@ -160,9 +199,10 @@ async fn main() -> Result<()> { println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); if args.verbose { + let user_name = user_display_name(); println!("\nMessage preview:"); for (i, msg) in messages.iter().take(3).enumerate() { - let sender = if msg.is_sent { "Me" } else { &msg.contact }; + let sender: &str = if msg.is_sent { &user_name } else { &msg.contact }; let preview = msg.body.chars().take(60).collect::(); println!(" {}. {}: {}...", i + 1, sender, preview); } @@ -172,64 +212,11 @@ async fn main() -> Result<()> { println!(); } - // Format messages for LLM - let messages_text: String = messages - .iter() - .take(200) - .map(|m| { - if m.is_sent { - format!("Me: {}", m.body) - } else { - format!("{}: {}", m.contact, m.body) - } - }) - .collect::>() - .join("\n"); - - let prompt = format!( - r#"Summarize this day's conversation between me and {}. - -CRITICAL FORMAT RULES: -- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles -- Do NOT repeat the date at the beginning -- Start DIRECTLY with the content - begin with a person's name or action -- Write in past tense, as if recording what happened - -NARRATIVE (3-5 sentences): -- What specific topics, activities, or events were discussed? -- What places, people, or organizations were mentioned? -- What plans were made or decisions discussed? -- Clearly distinguish between what "I" did versus what {} did - -KEYWORDS (comma-separated): -5-10 specific keywords that capture this conversation's unique content: -- Proper nouns (people, places, brands) -- Specific activities ("drum corps audition" not just "music") -- Distinctive terms that make this day unique - -Date: {} ({}) -Messages: -{} - -YOUR RESPONSE (follow this format EXACTLY): -Summary: [Start directly with content, NO preamble] - -Keywords: [specific, unique terms]"#, - args.contact, - args.contact, - date.format("%B %d, %Y"), - weekday, - messages_text - ); + let (prompt, system_prompt) = build_daily_summary_prompt(&args.contact, date, messages); println!("Generating summary..."); - let summary = ollama - .generate( - &prompt, - Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."), - ) - .await?; + let summary = ollama.generate(&prompt, Some(system_prompt)).await?; println!("\n📝 GENERATED SUMMARY:"); println!("─────────────────────────────────────────"); @@ -256,8 +243,7 @@ Keywords: [specific, unique terms]"#, message_count: messages.len() as i32, embedding, created_at: chrono::Utc::now().timestamp(), - // model_version: "nomic-embed-text:v1.5".to_string(), - model_version: "mxbai-embed-large:335m".to_string(), + model_version: EMBEDDING_MODEL.to_string(), }; let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao");