289 lines
10 KiB
Rust
289 lines
10 KiB
Rust
use anyhow::Result;
|
|
use chrono::NaiveDate;
|
|
use clap::Parser;
|
|
use image_api::ai::{OllamaClient, SmsApiClient, strip_summary_boilerplate};
|
|
use image_api::database::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
|
|
use std::env;
|
|
use std::sync::{Arc, Mutex};
|
|
|
|
#[derive(Parser, Debug)]
|
|
#[command(author, version, about = "Test daily summary generation with different models and prompts", long_about = None)]
|
|
struct Args {
|
|
/// Contact name to generate summaries for
|
|
#[arg(short, long)]
|
|
contact: String,
|
|
|
|
/// Start date (YYYY-MM-DD)
|
|
#[arg(short, long)]
|
|
start: String,
|
|
|
|
/// End date (YYYY-MM-DD)
|
|
#[arg(short, long)]
|
|
end: String,
|
|
|
|
/// Optional: Override the model to use (e.g., "qwen2.5:32b", "llama3.1:30b")
|
|
#[arg(short, long)]
|
|
model: Option<String>,
|
|
|
|
/// Test mode: Generate but don't save to database (shows output only)
|
|
#[arg(short = 't', long, default_value_t = false)]
|
|
test_mode: bool,
|
|
|
|
/// Show message count and preview
|
|
#[arg(short, long, default_value_t = false)]
|
|
verbose: bool,
|
|
}
|
|
|
|
#[tokio::main]
|
|
async fn main() -> Result<()> {
|
|
// Load .env file
|
|
dotenv::dotenv().ok();
|
|
|
|
// Initialize logging
|
|
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
|
|
|
|
let args = Args::parse();
|
|
|
|
// Parse dates
|
|
let start_date = NaiveDate::parse_from_str(&args.start, "%Y-%m-%d")
|
|
.expect("Invalid start date format. Use YYYY-MM-DD");
|
|
let end_date = NaiveDate::parse_from_str(&args.end, "%Y-%m-%d")
|
|
.expect("Invalid end date format. Use YYYY-MM-DD");
|
|
|
|
println!("========================================");
|
|
println!("Daily Summary Generation Test Tool");
|
|
println!("========================================");
|
|
println!("Contact: {}", args.contact);
|
|
println!("Date range: {} to {}", start_date, end_date);
|
|
println!("Days: {}", (end_date - start_date).num_days() + 1);
|
|
if let Some(ref model) = args.model {
|
|
println!("Model: {}", model);
|
|
} else {
|
|
println!(
|
|
"Model: {} (from env)",
|
|
env::var("OLLAMA_PRIMARY_MODEL")
|
|
.or_else(|_| env::var("OLLAMA_MODEL"))
|
|
.unwrap_or_else(|_| "nemotron-3-nano:30b".to_string())
|
|
);
|
|
}
|
|
if args.test_mode {
|
|
println!("⚠ TEST MODE: Results will NOT be saved to database");
|
|
}
|
|
println!("========================================");
|
|
println!();
|
|
|
|
// Initialize AI clients
|
|
let ollama_primary_url = env::var("OLLAMA_PRIMARY_URL")
|
|
.or_else(|_| env::var("OLLAMA_URL"))
|
|
.unwrap_or_else(|_| "http://localhost:11434".to_string());
|
|
|
|
let ollama_fallback_url = env::var("OLLAMA_FALLBACK_URL").ok();
|
|
|
|
// Use provided model or fallback to env
|
|
let model_to_use = args.model.clone().unwrap_or_else(|| {
|
|
env::var("OLLAMA_PRIMARY_MODEL")
|
|
.or_else(|_| env::var("OLLAMA_MODEL"))
|
|
.unwrap_or_else(|_| "nemotron-3-nano:30b".to_string())
|
|
});
|
|
|
|
let ollama = OllamaClient::new(
|
|
ollama_primary_url,
|
|
ollama_fallback_url.clone(),
|
|
model_to_use.clone(),
|
|
Some(model_to_use), // Use same model for fallback
|
|
);
|
|
|
|
let sms_api_url =
|
|
env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
|
|
let sms_api_token = env::var("SMS_API_TOKEN").ok();
|
|
let sms_client = SmsApiClient::new(sms_api_url, sms_api_token);
|
|
|
|
// Initialize DAO
|
|
let summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>> =
|
|
Arc::new(Mutex::new(Box::new(SqliteDailySummaryDao::new())));
|
|
|
|
// Fetch messages for contact
|
|
println!("Fetching messages for {}...", args.contact);
|
|
let all_messages = sms_client
|
|
.fetch_all_messages_for_contact(&args.contact)
|
|
.await?;
|
|
|
|
println!(
|
|
"Found {} total messages for {}",
|
|
all_messages.len(),
|
|
args.contact
|
|
);
|
|
println!();
|
|
|
|
// Filter to date range and group by date
|
|
let mut messages_by_date = std::collections::HashMap::new();
|
|
|
|
for msg in all_messages {
|
|
if let Some(dt) = chrono::DateTime::from_timestamp(msg.timestamp, 0) {
|
|
let date = dt.date_naive();
|
|
if date >= start_date && date <= end_date {
|
|
messages_by_date
|
|
.entry(date)
|
|
.or_insert_with(Vec::new)
|
|
.push(msg);
|
|
}
|
|
}
|
|
}
|
|
|
|
if messages_by_date.is_empty() {
|
|
println!("⚠ No messages found in date range");
|
|
return Ok(());
|
|
}
|
|
|
|
println!("Found {} days with messages", messages_by_date.len());
|
|
println!();
|
|
|
|
// Sort dates
|
|
let mut dates: Vec<NaiveDate> = messages_by_date.keys().cloned().collect();
|
|
dates.sort();
|
|
|
|
// Process each day
|
|
for (idx, date) in dates.iter().enumerate() {
|
|
let messages = messages_by_date.get(date).unwrap();
|
|
let date_str = date.format("%Y-%m-%d").to_string();
|
|
let weekday = date.format("%A");
|
|
|
|
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
println!(
|
|
"Day {}/{}: {} ({}) - {} messages",
|
|
idx + 1,
|
|
dates.len(),
|
|
date_str,
|
|
weekday,
|
|
messages.len()
|
|
);
|
|
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
|
|
if args.verbose {
|
|
println!("\nMessage preview:");
|
|
for (i, msg) in messages.iter().take(3).enumerate() {
|
|
let sender = if msg.is_sent { "Me" } else { &msg.contact };
|
|
let preview = msg.body.chars().take(60).collect::<String>();
|
|
println!(" {}. {}: {}...", i + 1, sender, preview);
|
|
}
|
|
if messages.len() > 3 {
|
|
println!(" ... and {} more", messages.len() - 3);
|
|
}
|
|
println!();
|
|
}
|
|
|
|
// Format messages for LLM
|
|
let messages_text: String = messages
|
|
.iter()
|
|
.take(200)
|
|
.map(|m| {
|
|
if m.is_sent {
|
|
format!("Me: {}", m.body)
|
|
} else {
|
|
format!("{}: {}", m.contact, m.body)
|
|
}
|
|
})
|
|
.collect::<Vec<_>>()
|
|
.join("\n");
|
|
|
|
let prompt = format!(
|
|
r#"Summarize this day's conversation between me and {}.
|
|
|
|
CRITICAL FORMAT RULES:
|
|
- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
|
|
- Do NOT repeat the date at the beginning
|
|
- Start DIRECTLY with the content - begin with a person's name or action
|
|
- Write in past tense, as if recording what happened
|
|
|
|
NARRATIVE (3-5 sentences):
|
|
- What specific topics, activities, or events were discussed?
|
|
- What places, people, or organizations were mentioned?
|
|
- What plans were made or decisions discussed?
|
|
- Clearly distinguish between what "I" did versus what {} did
|
|
|
|
KEYWORDS (comma-separated):
|
|
5-10 specific keywords that capture this conversation's unique content:
|
|
- Proper nouns (people, places, brands)
|
|
- Specific activities ("drum corps audition" not just "music")
|
|
- Distinctive terms that make this day unique
|
|
|
|
Date: {} ({})
|
|
Messages:
|
|
{}
|
|
|
|
YOUR RESPONSE (follow this format EXACTLY):
|
|
Summary: [Start directly with content, NO preamble]
|
|
|
|
Keywords: [specific, unique terms]"#,
|
|
args.contact,
|
|
args.contact,
|
|
date.format("%B %d, %Y"),
|
|
weekday,
|
|
messages_text
|
|
);
|
|
|
|
println!("Generating summary...");
|
|
|
|
let summary = ollama
|
|
.generate(
|
|
&prompt,
|
|
Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."),
|
|
)
|
|
.await?;
|
|
|
|
println!("\n📝 GENERATED SUMMARY:");
|
|
println!("─────────────────────────────────────────");
|
|
println!("{}", summary.trim());
|
|
println!("─────────────────────────────────────────");
|
|
|
|
if !args.test_mode {
|
|
println!("\nStripping boilerplate for embedding...");
|
|
let stripped = strip_summary_boilerplate(&summary);
|
|
println!(
|
|
"Stripped: {}...",
|
|
stripped.chars().take(80).collect::<String>()
|
|
);
|
|
|
|
println!("\nGenerating embedding...");
|
|
let embedding = ollama.generate_embedding(&stripped).await?;
|
|
println!("✓ Embedding generated ({} dimensions)", embedding.len());
|
|
|
|
println!("Saving to database...");
|
|
let insert = InsertDailySummary {
|
|
date: date_str.clone(),
|
|
contact: args.contact.clone(),
|
|
summary: summary.trim().to_string(),
|
|
message_count: messages.len() as i32,
|
|
embedding,
|
|
created_at: chrono::Utc::now().timestamp(),
|
|
// model_version: "nomic-embed-text:v1.5".to_string(),
|
|
model_version: "mxbai-embed-large:335m".to_string(),
|
|
};
|
|
|
|
let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao");
|
|
let context = opentelemetry::Context::new();
|
|
|
|
match dao.store_summary(&context, insert) {
|
|
Ok(_) => println!("✓ Saved to database"),
|
|
Err(e) => println!("✗ Database error: {:?}", e),
|
|
}
|
|
} else {
|
|
println!("\n⚠ TEST MODE: Not saved to database");
|
|
}
|
|
|
|
println!();
|
|
|
|
// Rate limiting between days
|
|
if idx < dates.len() - 1 {
|
|
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
|
}
|
|
}
|
|
|
|
println!("========================================");
|
|
println!("✓ Complete!");
|
|
println!("Processed {} days", dates.len());
|
|
println!("========================================");
|
|
|
|
Ok(())
|
|
}
|