Files
ImageApi/src/ai/mod.rs
T
Cameron Cordes a410683edf fix: fail fast when LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured
Previously embed_one() silently fell back to Ollama embeddings,
which would load nomic-embed-text into VRAM alongside llama-swap —
wasting memory on an unintended model. Now returns an error with
an actionable message instead.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 13:02:42 -04:00

130 lines
4.6 KiB
Rust

pub mod apollo_client;
pub mod backend;
pub mod clip_client;
pub mod daily_summary_job;
pub mod face_client;
pub mod handlers;
pub mod insight_chat;
pub mod insight_generator;
pub mod llamacpp;
pub mod llm_client;
pub mod ollama;
pub mod openrouter;
pub mod sms_client;
// strip_summary_boilerplate is used by binaries (test_daily_summary), not the library
#[allow(unused_imports)]
pub use daily_summary_job::{
DAILY_SUMMARY_MESSAGE_LIMIT, DAILY_SUMMARY_SYSTEM_PROMPT, build_daily_summary_prompt,
generate_daily_summaries, strip_summary_boilerplate,
};
pub use handlers::{
cancel_generation_handler, chat_history_handler, chat_rewind_handler, chat_stream_handler,
chat_turn_handler, delete_insight_handler, export_training_data_handler,
generate_agentic_insight_handler, generate_insight_handler, generation_status_handler,
get_all_insights_handler, get_available_models_handler, get_insight_handler,
get_openrouter_models_handler, rate_insight_handler,
};
pub use insight_generator::InsightGenerator;
pub use llamacpp::LlamaCppClient;
#[allow(unused_imports)]
pub use llm_client::{
ChatMessage, LlmClient, ModelCapabilities, Tool, ToolCall, ToolCallFunction, ToolFunction,
};
pub use ollama::{EMBEDDING_MODEL, OllamaClient};
pub use sms_client::{SmsApiClient, SmsMessage};
/// Display name used for the user in message transcripts and first-person
/// prompt text. Reads the `USER_NAME` env var; defaults to `"Me"`. Models
/// often confuse `"Me:"` in a transcript with their own role — setting
/// `USER_NAME=Cameron` (or similar) in the environment eliminates that
/// ambiguity across daily summaries, insight generation, and chat.
pub fn user_display_name() -> String {
std::env::var("USER_NAME").unwrap_or_else(|_| "Me".to_string())
}
/// One switch for the "local" LLM stack: when `LLM_BACKEND=llamacpp` is
/// set, chat / vision describe / embeddings all route through llama-swap
/// instead of Ollama. Any other value (including unset, the default) is
/// Ollama. This is intentionally global — embeddings must be drawn from
/// a single source or similarity search across the index breaks (mixed
/// vector spaces, possibly mixed dims). The `backend=hybrid` per-request
/// override remains orthogonal: it always sends chat to OpenRouter, and
/// uses `LLM_BACKEND` for the describe-then-inline vision pass.
pub fn local_backend_is_llamacpp() -> bool {
matches!(
std::env::var("LLM_BACKEND")
.ok()
.as_deref()
.map(|s| s.trim().to_lowercase())
.as_deref(),
Some("llamacpp")
)
}
/// Embed one string via the configured local backend. Routes through
/// llama-swap when `LLM_BACKEND=llamacpp` (and a client is configured),
/// else Ollama. Returns the single embedding vector. See
/// [`local_backend_is_llamacpp`] for the rationale on consistency.
pub async fn embed_one(
ollama: &OllamaClient,
llamacpp: Option<&LlamaCppClient>,
text: &str,
) -> anyhow::Result<Vec<f32>> {
if local_backend_is_llamacpp() {
if let Some(lc) = llamacpp {
let mut vecs = <LlamaCppClient as LlmClient>::generate_embeddings(lc, &[text]).await?;
return vecs
.pop()
.ok_or_else(|| anyhow::anyhow!("llama-swap returned no embeddings"));
}
anyhow::bail!(
"LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured — \
set LLAMA_SWAP_URL or switch to LLM_BACKEND=ollama"
);
}
ollama.generate_embedding(text).await
}
#[cfg(test)]
mod env_dispatch_tests {
use super::*;
fn with_env<F: FnOnce()>(key: &str, val: Option<&str>, f: F) {
let prev = std::env::var(key).ok();
match val {
Some(v) => unsafe { std::env::set_var(key, v) },
None => unsafe { std::env::remove_var(key) },
}
f();
match prev {
Some(v) => unsafe { std::env::set_var(key, v) },
None => unsafe { std::env::remove_var(key) },
}
}
#[test]
fn llm_backend_defaults_to_ollama() {
with_env("LLM_BACKEND", None, || {
assert!(!local_backend_is_llamacpp());
});
}
#[test]
fn llm_backend_llamacpp_case_insensitive() {
with_env("LLM_BACKEND", Some("LlamaCpp"), || {
assert!(local_backend_is_llamacpp());
});
with_env("LLM_BACKEND", Some(" llamacpp "), || {
assert!(local_backend_is_llamacpp());
});
}
#[test]
fn llm_backend_unknown_value_is_ollama() {
with_env("LLM_BACKEND", Some("vllm"), || {
assert!(!local_backend_is_llamacpp());
});
}
}