feat(ai): USER_NAME env + shared summary prompt + test-bin knobs

Introduces USER_NAME (default "Me") as the single source for the message sender label and the first-person persona across daily summaries, SMS context, insight generation, and chat. Eliminates the "Me:" transcript / "what I did" ambiguity that confused smaller models, and unhardcodes "Cameron" from prompt text + the knowledge-graph owner entity. Set USER_NAME=Cameron in .env to preserve the existing owner entity row (keyed on UNIQUE(name, entity_type)) — otherwise the next run creates a fresh owner entity and orphans the existing facts/photo-links. Also: - search_messages redirect: when the model calls it with date/contact but no query, return a hint pointing at get_sms_messages instead of a bare missing-parameter error (prevents same-turn retry loops) - sharpen search_messages vs get_sms_messages tool descriptions so content-vs-time-based intent is unambiguous - extract build_daily_summary_prompt (+ DAILY_SUMMARY_MESSAGE_LIMIT, DAILY_SUMMARY_SYSTEM_PROMPT) shared by daily_summary_job and test_daily_summary binary — prompt tweaks now land in both - EMBEDDING_MODEL const; fixes both insert sites that stored "mxbai-embed-large:335m" while generate_embeddings actually runs "nomic-embed-text:v1.5" - test_daily_summary: add --num-ctx / --temperature / --top-p / --top-k / --min-p flags wired into OllamaClient setters, and print the configured knobs at the top of each run - OllamaClient::generate now logs prompt/gen token counts and tok/s via log_chat_metrics (symmetric with chat_with_tools) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 23:39:37 -04:00
parent e4a3536f87
commit 6831f50993
6 changed files with 226 additions and 156 deletions
--- a/src/bin/test_daily_summary.rs
+++ b/src/bin/test_daily_summary.rs
@@ -1,7 +1,10 @@
 use anyhow::Result;
 use chrono::NaiveDate;
 use clap::Parser;
-use image_api::ai::{OllamaClient, SmsApiClient, strip_summary_boilerplate};
+use image_api::ai::{
+    EMBEDDING_MODEL, OllamaClient, SmsApiClient, build_daily_summary_prompt,
+    strip_summary_boilerplate, user_display_name,
+};
 use image_api::database::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
 use std::env;
 use std::sync::{Arc, Mutex};
@@ -25,6 +28,26 @@ struct Args {
    #[arg(short, long)]
    model: Option<String>,

+    /// Context window size passed as Ollama `num_ctx`. Omit for server default.
+    #[arg(long)]
+    num_ctx: Option<i32>,
+
+    /// Sampling temperature. Omit for server default.
+    #[arg(long)]
+    temperature: Option<f32>,
+
+    /// Top-p (nucleus) sampling. Omit for server default.
+    #[arg(long)]
+    top_p: Option<f32>,
+
+    /// Top-k sampling. Omit for server default.
+    #[arg(long)]
+    top_k: Option<i32>,
+
+    /// Min-p sampling. Omit for server default.
+    #[arg(long)]
+    min_p: Option<f32>,
+
    /// Test mode: Generate but don't save to database (shows output only)
    #[arg(short = 't', long, default_value_t = false)]
    test_mode: bool,
@@ -86,12 +109,28 @@ async fn main() -> Result<()> {
            .unwrap_or_else(|_| "nemotron-3-nano:30b".to_string())
    });

-    let ollama = OllamaClient::new(
+    let mut ollama = OllamaClient::new(
        ollama_primary_url,
        ollama_fallback_url.clone(),
        model_to_use.clone(),
        Some(model_to_use), // Use same model for fallback
    );
+    if let Some(ctx) = args.num_ctx {
+        ollama.set_num_ctx(Some(ctx));
+    }
+    if args.temperature.is_some()
+        || args.top_p.is_some()
+        || args.top_k.is_some()
+        || args.min_p.is_some()
+    {
+        ollama.set_sampling_params(args.temperature, args.top_p, args.top_k, args.min_p);
+    }
+
+    // Surface what's actually configured so comparison runs are auditable.
+    println!(
+        "num_ctx={:?} temperature={:?} top_p={:?} top_k={:?} min_p={:?}",
+        args.num_ctx, args.temperature, args.top_p, args.top_k, args.min_p
+    );

    let sms_api_url =
        env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
@@ -160,9 +199,10 @@ async fn main() -> Result<()> {
        println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");

        if args.verbose {
+            let user_name = user_display_name();
            println!("\nMessage preview:");
            for (i, msg) in messages.iter().take(3).enumerate() {
-                let sender = if msg.is_sent { "Me" } else { &msg.contact };
+                let sender: &str = if msg.is_sent { &user_name } else { &msg.contact };
                let preview = msg.body.chars().take(60).collect::<String>();
                println!("  {}. {}: {}...", i + 1, sender, preview);
            }
@@ -172,64 +212,11 @@ async fn main() -> Result<()> {
            println!();
        }

-        // Format messages for LLM
-        let messages_text: String = messages
-            .iter()
-            .take(200)
-            .map(|m| {
-                if m.is_sent {
-                    format!("Me: {}", m.body)
-                } else {
-                    format!("{}: {}", m.contact, m.body)
-                }
-            })
-            .collect::<Vec<_>>()
-            .join("\n");
-
-        let prompt = format!(
-            r#"Summarize this day's conversation between me and {}.
-
-CRITICAL FORMAT RULES:
- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
- Do NOT repeat the date at the beginning
- Start DIRECTLY with the content - begin with a person's name or action
- Write in past tense, as if recording what happened
-
-NARRATIVE (3-5 sentences):
- What specific topics, activities, or events were discussed?
- What places, people, or organizations were mentioned?
- What plans were made or decisions discussed?
- Clearly distinguish between what "I" did versus what {} did
-
-KEYWORDS (comma-separated):
-5-10 specific keywords that capture this conversation's unique content:
- Proper nouns (people, places, brands)
- Specific activities ("drum corps audition" not just "music")
- Distinctive terms that make this day unique
-
-Date: {} ({})
-Messages:
-{}
-
-YOUR RESPONSE (follow this format EXACTLY):
-Summary: [Start directly with content, NO preamble]
-
-Keywords: [specific, unique terms]"#,
-            args.contact,
-            args.contact,
-            date.format("%B %d, %Y"),
-            weekday,
-            messages_text
-        );
+        let (prompt, system_prompt) = build_daily_summary_prompt(&args.contact, date, messages);

        println!("Generating summary...");

-        let summary = ollama
-            .generate(
-                &prompt,
-                Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."),
-            )
-            .await?;
+        let summary = ollama.generate(&prompt, Some(system_prompt)).await?;

        println!("\n📝 GENERATED SUMMARY:");
        println!("─────────────────────────────────────────");
@@ -256,8 +243,7 @@ Keywords: [specific, unique terms]"#,
                message_count: messages.len() as i32,
                embedding,
                created_at: chrono::Utc::now().timestamp(),
-                // model_version: "nomic-embed-text:v1.5".to_string(),
-                model_version: "mxbai-embed-large:335m".to_string(),
+                model_version: EMBEDDING_MODEL.to_string(),
            };

            let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao");