feature/llamacpp-backend #101
27
.env.example
27
.env.example
@@ -53,6 +53,33 @@ AGENTIC_CHAT_MAX_ITERATIONS=6
|
|||||||
# OPENROUTER_HTTP_REFERER=https://your-site.example
|
# OPENROUTER_HTTP_REFERER=https://your-site.example
|
||||||
# OPENROUTER_APP_TITLE=ImageApi
|
# OPENROUTER_APP_TITLE=ImageApi
|
||||||
|
|
||||||
|
# ── AI Insights — local backend switch ──────────────────────────────────
|
||||||
|
# Picks which local LLM stack the server uses for chat, vision describe,
|
||||||
|
# and embeddings. `ollama` (default) uses the OLLAMA_* settings above;
|
||||||
|
# `llamacpp` uses the LLAMA_SWAP_* settings below. The switch is global
|
||||||
|
# and applies to both `backend=local` and `backend=hybrid` (hybrid keeps
|
||||||
|
# chat on OpenRouter but still uses this stack for the describe pass).
|
||||||
|
# Don't flip mid-deploy without re-embedding existing index rows —
|
||||||
|
# mixed vector spaces break similarity search.
|
||||||
|
# LLM_BACKEND=ollama
|
||||||
|
|
||||||
|
# ── AI Insights — llama.cpp / llama-swap (optional) ─────────────────────
|
||||||
|
# Set LLAMA_SWAP_URL plus LLM_BACKEND=llamacpp to swap the local stack
|
||||||
|
# off Ollama. Talks OpenAI-compatible /v1 to a llama-swap proxy fronting
|
||||||
|
# per-slot llama-server instances. Chat models receive images directly
|
||||||
|
# via content-parts (vision-capable models assumed); a separate vision
|
||||||
|
# slot is used only by the describe_photo tool and describe-image utility.
|
||||||
|
# LLAMA_SWAP_URL=http://localhost:9292/v1
|
||||||
|
# LLAMA_SWAP_PRIMARY_MODEL=chat
|
||||||
|
# Optional dedicated vision slot for describe_image. Defaults to
|
||||||
|
# PRIMARY_MODEL so describe_photo works without extra config.
|
||||||
|
# LLAMA_SWAP_VISION_MODEL=vision
|
||||||
|
# LLAMA_SWAP_EMBEDDING_MODEL=embed
|
||||||
|
# Comma-separated allowlist surfaced by /insights/models when
|
||||||
|
# LLM_BACKEND=llamacpp. All report has_vision=true.
|
||||||
|
# LLAMA_SWAP_ALLOWED_MODELS=chat,vision,embed
|
||||||
|
# LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180
|
||||||
|
|
||||||
# ── AI Insights — sibling services (optional) ───────────────────────────
|
# ── AI Insights — sibling services (optional) ───────────────────────────
|
||||||
# Apollo (places, face inference, CLIP encoders). Single-Apollo deploys
|
# Apollo (places, face inference, CLIP encoders). Single-Apollo deploys
|
||||||
# typically set only APOLLO_API_BASE_URL and let the face + CLIP
|
# typically set only APOLLO_API_BASE_URL and let the face + CLIP
|
||||||
|
|||||||
76
CLAUDE.md
76
CLAUDE.md
@@ -473,7 +473,7 @@ GET /memories?path=...&recursive=true
|
|||||||
POST /insights/generate (non-agentic single-shot)
|
POST /insights/generate (non-agentic single-shot)
|
||||||
POST /insights/generate/agentic (tool-calling loop; body: { file_path, backend?, model?, ... })
|
POST /insights/generate/agentic (tool-calling loop; body: { file_path, backend?, model?, ... })
|
||||||
GET /insights?path=...&library=...
|
GET /insights?path=...&library=...
|
||||||
GET /insights/models (local Ollama models + capabilities)
|
GET /insights/models (local-backend models + capabilities; Ollama OR llama-swap based on LLM_BACKEND)
|
||||||
GET /insights/openrouter/models (curated OpenRouter allowlist)
|
GET /insights/openrouter/models (curated OpenRouter allowlist)
|
||||||
POST /insights/rate (thumbs up/down for training data)
|
POST /insights/rate (thumbs up/down for training data)
|
||||||
|
|
||||||
@@ -631,6 +631,27 @@ OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small # Optional, embeddings
|
|||||||
OPENROUTER_HTTP_REFERER=https://your-site.example # Optional attribution header
|
OPENROUTER_HTTP_REFERER=https://your-site.example # Optional attribution header
|
||||||
OPENROUTER_APP_TITLE=ImageApi # Optional attribution header
|
OPENROUTER_APP_TITLE=ImageApi # Optional attribution header
|
||||||
|
|
||||||
|
# Local LLM backend switch. `ollama` (default) keeps the OLLAMA_* settings
|
||||||
|
# above; `llamacpp` swaps the entire local stack (chat + vision describe +
|
||||||
|
# embeddings) over to llama-swap. The switch is global and applies to
|
||||||
|
# `backend=local` requests and to `backend=hybrid`'s describe pass (hybrid
|
||||||
|
# chat still goes to OpenRouter). Don't flip mid-deploy without
|
||||||
|
# re-embedding — mixed vector spaces break similarity search.
|
||||||
|
LLM_BACKEND=ollama
|
||||||
|
|
||||||
|
# llama.cpp / llama-swap (used when LLM_BACKEND=llamacpp). OpenAI-compatible
|
||||||
|
# proxy hosting one or more llama-server processes. Chat models receive
|
||||||
|
# images directly via content-parts (all models assumed vision-capable).
|
||||||
|
LLAMA_SWAP_URL=http://localhost:9292/v1 # Required when LLM_BACKEND=llamacpp
|
||||||
|
LLAMA_SWAP_PRIMARY_MODEL=chat # Chat slot id (matches config.yaml)
|
||||||
|
LLAMA_SWAP_VISION_MODEL= # Dedicated vision slot for describe_image / describe_photo
|
||||||
|
# tool. Defaults to PRIMARY_MODEL when unset.
|
||||||
|
LLAMA_SWAP_EMBEDDING_MODEL=embed # Embedding slot id
|
||||||
|
LLAMA_SWAP_ALLOWED_MODELS=chat,coder # Curated allowlist surfaced by GET /insights/models
|
||||||
|
# when LLM_BACKEND=llamacpp. All report has_vision=true.
|
||||||
|
# Empty = picker shows only the configured primary model.
|
||||||
|
LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180 # Per-request timeout; bump for slow CPU offload
|
||||||
|
|
||||||
# Insight Chat Continuation
|
# Insight Chat Continuation
|
||||||
AGENTIC_CHAT_MAX_ITERATIONS=6 # Cap on tool-calling iterations per chat turn (default 6)
|
AGENTIC_CHAT_MAX_ITERATIONS=6 # Cap on tool-calling iterations per chat turn (default 6)
|
||||||
```
|
```
|
||||||
@@ -650,10 +671,50 @@ The `OllamaClient` provides methods to query available models:
|
|||||||
|
|
||||||
This allows runtime verification of model availability before generating insights.
|
This allows runtime verification of model availability before generating insights.
|
||||||
|
|
||||||
|
**Local backend switch (`LLM_BACKEND`):**
|
||||||
|
|
||||||
|
One env var decides which "local" stack the server runs against — `ollama`
|
||||||
|
(default) or `llamacpp`. It's global on purpose: chat, vision, and
|
||||||
|
embeddings all route through the same backend, so the embedding-vector
|
||||||
|
column in SQLite stays in one vector space. Don't flip mid-deploy without
|
||||||
|
re-embedding the affected rows — similarity search will collapse.
|
||||||
|
|
||||||
|
- `LLM_BACKEND=ollama`: chat, vision, and embeddings use Ollama. Vision
|
||||||
|
capability is probed per-model via `/api/show`.
|
||||||
|
- `LLM_BACKEND=llamacpp`: chat models receive images directly via OpenAI
|
||||||
|
content-parts (all models assumed vision-capable). Embeddings hit the
|
||||||
|
`embed` slot. A dedicated `LLAMA_SWAP_VISION_MODEL` slot (defaults to
|
||||||
|
the chat model) handles `describe_image` for the `describe_photo` tool.
|
||||||
|
Requires `LLAMA_SWAP_URL`.
|
||||||
|
|
||||||
|
The per-request `backend=hybrid` override is orthogonal: it always sends
|
||||||
|
chat to OpenRouter (text-only, images are pre-described and inlined), but
|
||||||
|
the describe + embed passes still route through whichever `LLM_BACKEND`
|
||||||
|
is configured.
|
||||||
|
|
||||||
|
**Backend dispatch (`ResolvedBackend`):**
|
||||||
|
|
||||||
|
`InsightGenerator::resolve_backend(kind, overrides)` is the single entry
|
||||||
|
point that builds clients for a request. Returns a `ResolvedBackend` with
|
||||||
|
two roles: `.chat()` (the agentic/chat client) and `.local()` (local-only
|
||||||
|
utility calls: rerank, describe_image, embeddings). `BackendKind` is an
|
||||||
|
enum (`Local` | `Hybrid`) replacing the stringly-typed `"local"` /
|
||||||
|
`"hybrid"` labels. `SamplingOverrides` groups model/ctx/temp/top_p/top_k/
|
||||||
|
min_p per-request overrides. All downstream code (`execute_tool`,
|
||||||
|
`run_streaming_agentic_loop`, etc.) takes `&ResolvedBackend` rather than
|
||||||
|
individual client references.
|
||||||
|
|
||||||
|
`GET /insights/models` returns the local-backend models with capabilities
|
||||||
|
in the same envelope shape regardless of `LLM_BACKEND`: Ollama servers
|
||||||
|
when `ollama`, llama-swap slots (from `LLAMA_SWAP_ALLOWED_MODELS`) when
|
||||||
|
`llamacpp`. No `/insights/llamacpp/models` — the picker reads a single
|
||||||
|
endpoint.
|
||||||
|
|
||||||
**Hybrid Backend (OpenRouter):**
|
**Hybrid Backend (OpenRouter):**
|
||||||
- Per-request opt-in via `backend=hybrid` on `POST /insights/generate/agentic`.
|
- Per-request opt-in via `backend=hybrid` on `POST /insights/generate/agentic`.
|
||||||
- Local Ollama still describes the image (vision); the description is inlined
|
- Vision describe happens before the agentic loop; the description is inlined
|
||||||
into the chat prompt and the agentic loop runs on OpenRouter.
|
into the chat prompt and the agentic loop runs on OpenRouter. Vision
|
||||||
|
routes through whichever `LLM_BACKEND` is configured.
|
||||||
- `request.model` (if provided) overrides `OPENROUTER_DEFAULT_MODEL` for that
|
- `request.model` (if provided) overrides `OPENROUTER_DEFAULT_MODEL` for that
|
||||||
call. The mobile picker reads from `OPENROUTER_ALLOWED_MODELS`.
|
call. The mobile picker reads from `OPENROUTER_ALLOWED_MODELS`.
|
||||||
- No live capability precheck — the operator-curated allowlist is trusted.
|
- No live capability precheck — the operator-curated allowlist is trusted.
|
||||||
@@ -661,6 +722,15 @@ This allows runtime verification of model availability before generating insight
|
|||||||
- `GET /insights/openrouter/models` returns `{ models, default_model, configured }`
|
- `GET /insights/openrouter/models` returns `{ models, default_model, configured }`
|
||||||
for client picker UIs.
|
for client picker UIs.
|
||||||
|
|
||||||
|
**Cross-replay matrix (chat continuation):**
|
||||||
|
- `local → local` allowed (whether served by Ollama or llama-swap; that's
|
||||||
|
a deploy-time decision, not a request-time one).
|
||||||
|
- `hybrid → hybrid` allowed.
|
||||||
|
- `hybrid → local` allowed (the inlined description replays as text).
|
||||||
|
- `local → hybrid` rejected — the stored transcript has raw images in the
|
||||||
|
first user message and OpenRouter providers don't accept that shape
|
||||||
|
consistently. Regenerate the insight in hybrid mode instead.
|
||||||
|
|
||||||
**Insight Chat Continuation:**
|
**Insight Chat Continuation:**
|
||||||
|
|
||||||
After an agentic insight is generated, the full `Vec<ChatMessage>` transcript is
|
After an agentic insight is generated, the full `Vec<ChatMessage>` transcript is
|
||||||
|
|||||||
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -2051,7 +2051,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "image-api"
|
name = "image-api"
|
||||||
version = "1.1.0"
|
version = "1.2.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix",
|
"actix",
|
||||||
"actix-cors",
|
"actix-cors",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "image-api"
|
name = "image-api"
|
||||||
version = "1.1.0"
|
version = "1.2.0"
|
||||||
authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
|
authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
|
|||||||
140
src/ai/backend.rs
Normal file
140
src/ai/backend.rs
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
use anyhow::{Result, anyhow};
|
||||||
|
|
||||||
|
use crate::ai::llm_client::LlmClient;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum BackendKind {
|
||||||
|
Local,
|
||||||
|
Hybrid,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BackendKind {
|
||||||
|
pub fn parse(s: &str) -> Result<Self> {
|
||||||
|
match s.trim().to_lowercase().as_str() {
|
||||||
|
"local" | "" => Ok(Self::Local),
|
||||||
|
"hybrid" => Ok(Self::Hybrid),
|
||||||
|
other => Err(anyhow!(
|
||||||
|
"unknown backend '{}'; expected 'local' or 'hybrid'",
|
||||||
|
other
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_str(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::Local => "local",
|
||||||
|
Self::Hybrid => "hybrid",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for BackendKind {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.write_str(self.as_str())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SamplingOverrides {
|
||||||
|
pub model: Option<String>,
|
||||||
|
pub num_ctx: Option<i32>,
|
||||||
|
pub temperature: Option<f32>,
|
||||||
|
pub top_p: Option<f32>,
|
||||||
|
pub top_k: Option<i32>,
|
||||||
|
pub min_p: Option<f32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SamplingOverrides {
|
||||||
|
pub fn has_sampling(&self) -> bool {
|
||||||
|
self.temperature.is_some()
|
||||||
|
|| self.top_p.is_some()
|
||||||
|
|| self.top_k.is_some()
|
||||||
|
|| self.min_p.is_some()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ResolvedBackend {
|
||||||
|
chat: Box<dyn LlmClient>,
|
||||||
|
local: Box<dyn LlmClient>,
|
||||||
|
pub kind: BackendKind,
|
||||||
|
/// `true` when the chat model receives images directly (Ollama with
|
||||||
|
/// vision, or llamacpp). `false` for hybrid where we describe-then-inline.
|
||||||
|
pub images_inline: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ResolvedBackend {
|
||||||
|
pub fn new(
|
||||||
|
chat: Box<dyn LlmClient>,
|
||||||
|
local: Box<dyn LlmClient>,
|
||||||
|
kind: BackendKind,
|
||||||
|
images_inline: bool,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
chat,
|
||||||
|
local,
|
||||||
|
kind,
|
||||||
|
images_inline,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn chat(&self) -> &dyn LlmClient {
|
||||||
|
self.chat.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn local(&self) -> &dyn LlmClient {
|
||||||
|
self.local.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn model(&self) -> &str {
|
||||||
|
self.chat.primary_model()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_backend_kind() {
|
||||||
|
assert_eq!(BackendKind::parse("local").unwrap(), BackendKind::Local);
|
||||||
|
assert_eq!(BackendKind::parse("hybrid").unwrap(), BackendKind::Hybrid);
|
||||||
|
assert_eq!(BackendKind::parse(" Local ").unwrap(), BackendKind::Local);
|
||||||
|
assert_eq!(BackendKind::parse("HYBRID").unwrap(), BackendKind::Hybrid);
|
||||||
|
assert_eq!(BackendKind::parse("").unwrap(), BackendKind::Local);
|
||||||
|
assert!(BackendKind::parse("vllm").is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn backend_kind_as_str_roundtrips() {
|
||||||
|
assert_eq!(
|
||||||
|
BackendKind::parse(BackendKind::Local.as_str()).unwrap(),
|
||||||
|
BackendKind::Local
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
BackendKind::parse(BackendKind::Hybrid.as_str()).unwrap(),
|
||||||
|
BackendKind::Hybrid
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sampling_overrides_has_sampling() {
|
||||||
|
let empty = SamplingOverrides {
|
||||||
|
model: None,
|
||||||
|
num_ctx: None,
|
||||||
|
temperature: None,
|
||||||
|
top_p: None,
|
||||||
|
top_k: None,
|
||||||
|
min_p: None,
|
||||||
|
};
|
||||||
|
assert!(!empty.has_sampling());
|
||||||
|
|
||||||
|
let with_temp = SamplingOverrides {
|
||||||
|
model: None,
|
||||||
|
num_ctx: Some(4096),
|
||||||
|
temperature: Some(0.7),
|
||||||
|
top_p: None,
|
||||||
|
top_k: None,
|
||||||
|
min_p: None,
|
||||||
|
};
|
||||||
|
assert!(with_temp.has_sampling());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -470,7 +470,16 @@ pub async fn generate_agentic_insight_handler(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// GET /insights/models - List available models from both servers with capabilities
|
/// GET /insights/models - Local-backend models with capabilities. Returns
|
||||||
|
/// Ollama servers when `LLM_BACKEND=ollama` (default), or llama-swap slots
|
||||||
|
/// when `LLM_BACKEND=llamacpp`. Same envelope shape either way so the
|
||||||
|
/// client picker doesn't have to branch on backend kind.
|
||||||
|
///
|
||||||
|
/// For llama-swap: `models` comes verbatim from `LLAMA_SWAP_ALLOWED_MODELS`
|
||||||
|
/// (no live `/v1/models` probe), `has_vision` is true only for the
|
||||||
|
/// configured `LLAMA_SWAP_VISION_MODEL` slot id, and `has_tool_calling` is
|
||||||
|
/// reported as true for every slot (llama-server is launched with `--jinja`
|
||||||
|
/// by convention — a misconfigured slot surfaces as a chat-call error).
|
||||||
#[get("/insights/models")]
|
#[get("/insights/models")]
|
||||||
pub async fn get_available_models_handler(
|
pub async fn get_available_models_handler(
|
||||||
_claims: Claims,
|
_claims: Claims,
|
||||||
@@ -478,6 +487,29 @@ pub async fn get_available_models_handler(
|
|||||||
) -> impl Responder {
|
) -> impl Responder {
|
||||||
log::debug!("Fetching available models with capabilities");
|
log::debug!("Fetching available models with capabilities");
|
||||||
|
|
||||||
|
if crate::ai::local_backend_is_llamacpp()
|
||||||
|
&& let Some(lc) = app_state.llamacpp.as_ref()
|
||||||
|
{
|
||||||
|
let models: Vec<ModelCapabilities> = app_state
|
||||||
|
.llamacpp_allowed_models
|
||||||
|
.iter()
|
||||||
|
.map(|name| ModelCapabilities {
|
||||||
|
name: name.clone(),
|
||||||
|
has_vision: true,
|
||||||
|
has_tool_calling: true,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let primary = ServerModels {
|
||||||
|
url: lc.base_url.clone(),
|
||||||
|
models,
|
||||||
|
default_model: lc.primary_model.clone(),
|
||||||
|
};
|
||||||
|
return HttpResponse::Ok().json(AvailableModelsResponse {
|
||||||
|
primary,
|
||||||
|
fallback: None,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
let ollama_client = &app_state.ollama;
|
let ollama_client = &app_state.ollama;
|
||||||
|
|
||||||
// Fetch models with capabilities from primary server
|
// Fetch models with capabilities from primary server
|
||||||
|
|||||||
@@ -6,10 +6,9 @@ use std::collections::HashMap;
|
|||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
use tokio::sync::Mutex as TokioMutex;
|
use tokio::sync::Mutex as TokioMutex;
|
||||||
|
|
||||||
|
use crate::ai::backend::{BackendKind, ResolvedBackend, SamplingOverrides};
|
||||||
use crate::ai::insight_generator::InsightGenerator;
|
use crate::ai::insight_generator::InsightGenerator;
|
||||||
use crate::ai::llm_client::{ChatMessage, LlmClient, LlmStreamEvent, Tool};
|
use crate::ai::llm_client::{ChatMessage, LlmStreamEvent, Tool};
|
||||||
use crate::ai::ollama::OllamaClient;
|
|
||||||
use crate::ai::openrouter::OpenRouterClient;
|
|
||||||
use crate::database::InsightDao;
|
use crate::database::InsightDao;
|
||||||
use crate::database::models::InsertPhotoInsight;
|
use crate::database::models::InsertPhotoInsight;
|
||||||
use crate::otel::global_tracer;
|
use crate::otel::global_tracer;
|
||||||
@@ -91,8 +90,6 @@ pub struct ChatTurnResult {
|
|||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct InsightChatService {
|
pub struct InsightChatService {
|
||||||
generator: Arc<InsightGenerator>,
|
generator: Arc<InsightGenerator>,
|
||||||
ollama: OllamaClient,
|
|
||||||
openrouter: Option<Arc<OpenRouterClient>>,
|
|
||||||
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
|
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
|
||||||
chat_locks: ChatLockMap,
|
chat_locks: ChatLockMap,
|
||||||
}
|
}
|
||||||
@@ -100,15 +97,11 @@ pub struct InsightChatService {
|
|||||||
impl InsightChatService {
|
impl InsightChatService {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
generator: Arc<InsightGenerator>,
|
generator: Arc<InsightGenerator>,
|
||||||
ollama: OllamaClient,
|
|
||||||
openrouter: Option<Arc<OpenRouterClient>>,
|
|
||||||
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
|
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
|
||||||
chat_locks: ChatLockMap,
|
chat_locks: ChatLockMap,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
generator,
|
generator,
|
||||||
ollama,
|
|
||||||
openrouter,
|
|
||||||
insight_dao,
|
insight_dao,
|
||||||
chat_locks,
|
chat_locks,
|
||||||
}
|
}
|
||||||
@@ -303,24 +296,10 @@ impl InsightChatService {
|
|||||||
.map(|s| s.trim().to_lowercase())
|
.map(|s| s.trim().to_lowercase())
|
||||||
.filter(|s| !s.is_empty())
|
.filter(|s| !s.is_empty())
|
||||||
.unwrap_or_else(|| stored_backend.clone());
|
.unwrap_or_else(|| stored_backend.clone());
|
||||||
if !matches!(effective_backend.as_str(), "local" | "hybrid") {
|
validate_cross_replay(&stored_backend, &effective_backend)?;
|
||||||
bail!(
|
let kind = BackendKind::parse(&effective_backend)?;
|
||||||
"unknown backend '{}'; expected 'local' or 'hybrid'",
|
span.set_attribute(KeyValue::new("backend", kind.as_str()));
|
||||||
effective_backend
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if stored_backend == "local" && effective_backend == "hybrid" {
|
|
||||||
bail!(
|
|
||||||
"switching from local to hybrid mid-chat isn't supported yet; \
|
|
||||||
regenerate the insight in hybrid mode if you want OpenRouter chat"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
let is_hybrid = effective_backend == "hybrid";
|
|
||||||
span.set_attribute(KeyValue::new("backend", effective_backend.clone()));
|
|
||||||
|
|
||||||
// 4. Build the chat backend client. Ollama in local mode, a freshly
|
|
||||||
// cloned OpenRouter client in hybrid mode (clone so per-request
|
|
||||||
// sampling/model overrides don't leak into shared state).
|
|
||||||
let max_iterations = req
|
let max_iterations = req
|
||||||
.max_iterations
|
.max_iterations
|
||||||
.unwrap_or(DEFAULT_MAX_ITERATIONS)
|
.unwrap_or(DEFAULT_MAX_ITERATIONS)
|
||||||
@@ -328,91 +307,38 @@ impl InsightChatService {
|
|||||||
span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
|
span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
|
||||||
|
|
||||||
let stored_model = insight.model_version.clone();
|
let stored_model = insight.model_version.clone();
|
||||||
let custom_model = req
|
let overrides = SamplingOverrides {
|
||||||
.model
|
model: req
|
||||||
.clone()
|
.model
|
||||||
.or_else(|| Some(stored_model.clone()))
|
.clone()
|
||||||
.filter(|m| !m.is_empty());
|
.or_else(|| Some(stored_model.clone()))
|
||||||
|
.filter(|m| !m.is_empty()),
|
||||||
let mut ollama_client = self.ollama.clone();
|
num_ctx: req.num_ctx,
|
||||||
let mut openrouter_client: Option<OpenRouterClient> = None;
|
temperature: req.temperature,
|
||||||
|
top_p: req.top_p,
|
||||||
if is_hybrid {
|
top_k: req.top_k,
|
||||||
let arc = self.openrouter.as_ref().ok_or_else(|| {
|
min_p: req.min_p,
|
||||||
anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured")
|
|
||||||
})?;
|
|
||||||
let mut c: OpenRouterClient = (**arc).clone();
|
|
||||||
if let Some(ref m) = custom_model {
|
|
||||||
c.primary_model = m.clone();
|
|
||||||
}
|
|
||||||
if req.temperature.is_some()
|
|
||||||
|| req.top_p.is_some()
|
|
||||||
|| req.top_k.is_some()
|
|
||||||
|| req.min_p.is_some()
|
|
||||||
{
|
|
||||||
c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
|
||||||
}
|
|
||||||
if let Some(ctx) = req.num_ctx {
|
|
||||||
c.set_num_ctx(Some(ctx));
|
|
||||||
}
|
|
||||||
openrouter_client = Some(c);
|
|
||||||
} else {
|
|
||||||
// Local-mode model swap. Build a new client when the chat model
|
|
||||||
// differs from the configured one (mirrors the agentic pattern).
|
|
||||||
if let Some(ref m) = custom_model
|
|
||||||
&& m != &self.ollama.primary_model
|
|
||||||
{
|
|
||||||
ollama_client = OllamaClient::new(
|
|
||||||
self.ollama.primary_url.clone(),
|
|
||||||
self.ollama.fallback_url.clone(),
|
|
||||||
m.clone(),
|
|
||||||
Some(m.clone()),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if req.temperature.is_some()
|
|
||||||
|| req.top_p.is_some()
|
|
||||||
|| req.top_k.is_some()
|
|
||||||
|| req.min_p.is_some()
|
|
||||||
{
|
|
||||||
ollama_client.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
|
||||||
}
|
|
||||||
if let Some(ctx) = req.num_ctx {
|
|
||||||
ollama_client.set_num_ctx(Some(ctx));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let chat_backend: &dyn LlmClient = if let Some(ref c) = openrouter_client {
|
|
||||||
c
|
|
||||||
} else {
|
|
||||||
&ollama_client
|
|
||||||
};
|
};
|
||||||
let model_used = chat_backend.primary_model().to_string();
|
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||||
|
let model_used = backend.model().to_string();
|
||||||
span.set_attribute(KeyValue::new("model", model_used.clone()));
|
span.set_attribute(KeyValue::new("model", model_used.clone()));
|
||||||
|
|
||||||
// 5. Decide vision + tool set. In hybrid we always omit
|
// 5. Decide vision + tool set. In hybrid (describe-then-inline) mode
|
||||||
// `describe_photo` (matches the original generation flow). In
|
// we omit `describe_photo`. Otherwise trust the stored history:
|
||||||
// local we trust the stored history's first-user shape: if it
|
// if the first user message carries images, describe_photo stays.
|
||||||
// carries `images`, the original model was vision-capable, and
|
|
||||||
// we keep `describe_photo` available.
|
|
||||||
let local_first_user_has_image = messages
|
let local_first_user_has_image = messages
|
||||||
.iter()
|
.iter()
|
||||||
.find(|m| m.role == "user")
|
.find(|m| m.role == "user")
|
||||||
.and_then(|m| m.images.as_ref())
|
.and_then(|m| m.images.as_ref())
|
||||||
.map(|imgs| !imgs.is_empty())
|
.map(|imgs| !imgs.is_empty())
|
||||||
.unwrap_or(false);
|
.unwrap_or(false);
|
||||||
let offer_describe_tool = !is_hybrid && local_first_user_has_image;
|
let offer_describe_tool = backend.images_inline && local_first_user_has_image;
|
||||||
// current_gate_opts(has_vision) sets gate_opts.has_vision = has_vision
|
|
||||||
// and probes the per-table presence flags. Pass `offer_describe_tool`
|
|
||||||
// directly — the `!is_hybrid && local_first_user_has_image` decision
|
|
||||||
// is the chat-path's vision predicate.
|
|
||||||
let gate_opts = self.generator.current_gate_opts_for_persona(
|
let gate_opts = self.generator.current_gate_opts_for_persona(
|
||||||
offer_describe_tool,
|
offer_describe_tool,
|
||||||
Some((req.user_id, &active_persona)),
|
Some((req.user_id, &active_persona)),
|
||||||
);
|
);
|
||||||
let tools = InsightGenerator::build_tool_definitions(gate_opts);
|
let tools = InsightGenerator::build_tool_definitions(gate_opts);
|
||||||
|
|
||||||
// Image base64 only needed when describe_photo is on the menu. Load
|
|
||||||
// lazily to avoid disk IO when the loop never invokes it.
|
|
||||||
let image_base64: Option<String> = if offer_describe_tool {
|
let image_base64: Option<String> = if offer_describe_tool {
|
||||||
self.generator.load_image_as_base64(&normalized).ok()
|
self.generator.load_image_as_base64(&normalized).ok()
|
||||||
} else {
|
} else {
|
||||||
@@ -461,13 +387,13 @@ impl InsightChatService {
|
|||||||
iterations_used = iteration + 1;
|
iterations_used = iteration + 1;
|
||||||
log::info!("Chat iteration {}/{}", iterations_used, max_iterations);
|
log::info!("Chat iteration {}/{}", iterations_used, max_iterations);
|
||||||
|
|
||||||
let (response, prompt_tokens, eval_tokens) = chat_backend
|
let (response, prompt_tokens, eval_tokens) = backend
|
||||||
|
.chat()
|
||||||
.chat_with_tools(messages.clone(), tools.clone())
|
.chat_with_tools(messages.clone(), tools.clone())
|
||||||
.await?;
|
.await?;
|
||||||
last_prompt_eval_count = prompt_tokens;
|
last_prompt_eval_count = prompt_tokens;
|
||||||
last_eval_count = eval_tokens;
|
last_eval_count = eval_tokens;
|
||||||
|
|
||||||
// Ollama rejects non-object tool-call arguments on replay.
|
|
||||||
let mut response = response;
|
let mut response = response;
|
||||||
if let Some(ref mut tcs) = response.tool_calls {
|
if let Some(ref mut tcs) = response.tool_calls {
|
||||||
for tc in tcs.iter_mut() {
|
for tc in tcs.iter_mut() {
|
||||||
@@ -495,13 +421,11 @@ impl InsightChatService {
|
|||||||
.execute_tool(
|
.execute_tool(
|
||||||
&tool_call.function.name,
|
&tool_call.function.name,
|
||||||
&tool_call.function.arguments,
|
&tool_call.function.arguments,
|
||||||
&ollama_client,
|
&backend,
|
||||||
&image_base64,
|
&image_base64,
|
||||||
&normalized,
|
&normalized,
|
||||||
req.user_id,
|
req.user_id,
|
||||||
&active_persona,
|
&active_persona,
|
||||||
&model_used,
|
|
||||||
&effective_backend,
|
|
||||||
&loop_cx,
|
&loop_cx,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
@@ -515,8 +439,6 @@ impl InsightChatService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if final_content.is_empty() {
|
if final_content.is_empty() {
|
||||||
// The model never produced a final answer; ask once more without
|
|
||||||
// tools to force a textual reply.
|
|
||||||
log::info!(
|
log::info!(
|
||||||
"Chat loop exhausted after {} iterations, requesting final answer",
|
"Chat loop exhausted after {} iterations, requesting final answer",
|
||||||
iterations_used
|
iterations_used
|
||||||
@@ -524,7 +446,8 @@ impl InsightChatService {
|
|||||||
messages.push(ChatMessage::user(
|
messages.push(ChatMessage::user(
|
||||||
"Please write your final answer now without calling any more tools.",
|
"Please write your final answer now without calling any more tools.",
|
||||||
));
|
));
|
||||||
let (final_response, prompt_tokens, eval_tokens) = chat_backend
|
let (final_response, prompt_tokens, eval_tokens) = backend
|
||||||
|
.chat()
|
||||||
.chat_with_tools(messages.clone(), vec![])
|
.chat_with_tools(messages.clone(), vec![])
|
||||||
.await?;
|
.await?;
|
||||||
last_prompt_eval_count = prompt_tokens;
|
last_prompt_eval_count = prompt_tokens;
|
||||||
@@ -560,7 +483,8 @@ impl InsightChatService {
|
|||||||
Capture the key moment or theme. Return ONLY the title, nothing else.",
|
Capture the key moment or theme. Return ONLY the title, nothing else.",
|
||||||
final_content
|
final_content
|
||||||
);
|
);
|
||||||
let title_raw = chat_backend
|
let title_raw = backend
|
||||||
|
.chat()
|
||||||
.generate(
|
.generate(
|
||||||
&title_prompt,
|
&title_prompt,
|
||||||
Some(
|
Some(
|
||||||
@@ -585,7 +509,7 @@ impl InsightChatService {
|
|||||||
model_version: model_used.clone(),
|
model_version: model_used.clone(),
|
||||||
is_current: true,
|
is_current: true,
|
||||||
training_messages: Some(json),
|
training_messages: Some(json),
|
||||||
backend: effective_backend.clone(),
|
backend: kind.as_str().to_string(),
|
||||||
fewshot_source_ids: None,
|
fewshot_source_ids: None,
|
||||||
content_hash: None,
|
content_hash: None,
|
||||||
};
|
};
|
||||||
@@ -610,7 +534,7 @@ impl InsightChatService {
|
|||||||
prompt_eval_count: last_prompt_eval_count,
|
prompt_eval_count: last_prompt_eval_count,
|
||||||
eval_count: last_eval_count,
|
eval_count: last_eval_count,
|
||||||
amended_insight_id,
|
amended_insight_id,
|
||||||
backend_used: effective_backend,
|
backend_used: kind.as_str().to_string(),
|
||||||
model_used,
|
model_used,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -799,19 +723,8 @@ impl InsightChatService {
|
|||||||
.map(|s| s.trim().to_lowercase())
|
.map(|s| s.trim().to_lowercase())
|
||||||
.filter(|s| !s.is_empty())
|
.filter(|s| !s.is_empty())
|
||||||
.unwrap_or_else(|| stored_backend.clone());
|
.unwrap_or_else(|| stored_backend.clone());
|
||||||
if !matches!(effective_backend.as_str(), "local" | "hybrid") {
|
let kind = BackendKind::parse(&effective_backend)?;
|
||||||
bail!(
|
validate_cross_replay(&stored_backend, kind.as_str())?;
|
||||||
"unknown backend '{}'; expected 'local' or 'hybrid'",
|
|
||||||
effective_backend
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if stored_backend == "local" && effective_backend == "hybrid" {
|
|
||||||
bail!(
|
|
||||||
"switching from local to hybrid mid-chat isn't supported yet; \
|
|
||||||
regenerate the insight in hybrid mode if you want OpenRouter chat"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
let is_hybrid = effective_backend == "hybrid";
|
|
||||||
|
|
||||||
let max_iterations = req
|
let max_iterations = req
|
||||||
.max_iterations
|
.max_iterations
|
||||||
@@ -819,27 +732,31 @@ impl InsightChatService {
|
|||||||
.clamp(1, env_max_iterations());
|
.clamp(1, env_max_iterations());
|
||||||
|
|
||||||
let stored_model = insight.model_version.clone();
|
let stored_model = insight.model_version.clone();
|
||||||
let custom_model = req
|
let overrides = SamplingOverrides {
|
||||||
.model
|
model: req
|
||||||
.clone()
|
.model
|
||||||
.or_else(|| Some(stored_model.clone()))
|
.clone()
|
||||||
.filter(|m| !m.is_empty());
|
.or_else(|| Some(stored_model.clone()))
|
||||||
|
.filter(|m| !m.is_empty()),
|
||||||
|
num_ctx: req.num_ctx,
|
||||||
|
temperature: req.temperature,
|
||||||
|
top_p: req.top_p,
|
||||||
|
top_k: req.top_k,
|
||||||
|
min_p: req.min_p,
|
||||||
|
};
|
||||||
|
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||||
|
let model_used = backend.model().to_string();
|
||||||
|
|
||||||
let (chat_backend_holder, ollama_client) =
|
// Tool set — images_inline mode + first user turn carries an image →
|
||||||
self.build_chat_clients(is_hybrid, custom_model.as_deref(), &req)?;
|
// offer describe_photo. Describe-then-inline mode (hybrid only):
|
||||||
let chat_backend: &dyn LlmClient = chat_backend_holder.as_ref();
|
// visual description was inlined at bootstrap, no describe tool needed.
|
||||||
let model_used = chat_backend.primary_model().to_string();
|
|
||||||
|
|
||||||
// Tool set — local mode + first user turn carries an image →
|
|
||||||
// offer describe_photo. Hybrid: visual description was inlined
|
|
||||||
// when the insight was bootstrapped, no describe tool needed.
|
|
||||||
let local_first_user_has_image = messages
|
let local_first_user_has_image = messages
|
||||||
.iter()
|
.iter()
|
||||||
.find(|m| m.role == "user")
|
.find(|m| m.role == "user")
|
||||||
.and_then(|m| m.images.as_ref())
|
.and_then(|m| m.images.as_ref())
|
||||||
.map(|imgs| !imgs.is_empty())
|
.map(|imgs| !imgs.is_empty())
|
||||||
.unwrap_or(false);
|
.unwrap_or(false);
|
||||||
let offer_describe_tool = !is_hybrid && local_first_user_has_image;
|
let offer_describe_tool = backend.images_inline && local_first_user_has_image;
|
||||||
let gate_opts = self.generator.current_gate_opts_for_persona(
|
let gate_opts = self.generator.current_gate_opts_for_persona(
|
||||||
offer_describe_tool,
|
offer_describe_tool,
|
||||||
Some((req.user_id, &active_persona)),
|
Some((req.user_id, &active_persona)),
|
||||||
@@ -870,16 +787,13 @@ impl InsightChatService {
|
|||||||
|
|
||||||
let outcome = self
|
let outcome = self
|
||||||
.run_streaming_agentic_loop(
|
.run_streaming_agentic_loop(
|
||||||
chat_backend,
|
&backend,
|
||||||
&ollama_client,
|
|
||||||
&mut messages,
|
&mut messages,
|
||||||
tools,
|
tools,
|
||||||
&image_base64,
|
&image_base64,
|
||||||
&normalized,
|
&normalized,
|
||||||
req.user_id,
|
req.user_id,
|
||||||
&active_persona,
|
&active_persona,
|
||||||
&model_used,
|
|
||||||
&effective_backend,
|
|
||||||
max_iterations,
|
max_iterations,
|
||||||
&tx,
|
&tx,
|
||||||
)
|
)
|
||||||
@@ -907,7 +821,8 @@ impl InsightChatService {
|
|||||||
|
|
||||||
let mut amended_insight_id: Option<i32> = None;
|
let mut amended_insight_id: Option<i32> = None;
|
||||||
if req.amend {
|
if req.amend {
|
||||||
let title = self.generate_title(chat_backend, &final_content).await?;
|
let (title, body) = crate::ai::insight_generator::parse_title_body(&final_content);
|
||||||
|
let final_content = body;
|
||||||
|
|
||||||
// Amended rows intentionally do not inherit the parent's
|
// Amended rows intentionally do not inherit the parent's
|
||||||
// `fewshot_source_ids`. The parent's few-shot influence is still
|
// `fewshot_source_ids`. The parent's few-shot influence is still
|
||||||
@@ -923,7 +838,7 @@ impl InsightChatService {
|
|||||||
model_version: model_used.clone(),
|
model_version: model_used.clone(),
|
||||||
is_current: true,
|
is_current: true,
|
||||||
training_messages: Some(json),
|
training_messages: Some(json),
|
||||||
backend: effective_backend.clone(),
|
backend: kind.as_str().to_string(),
|
||||||
fewshot_source_ids: None,
|
fewshot_source_ids: None,
|
||||||
content_hash: None,
|
content_hash: None,
|
||||||
};
|
};
|
||||||
@@ -949,7 +864,7 @@ impl InsightChatService {
|
|||||||
eval_tokens: last_eval_count,
|
eval_tokens: last_eval_count,
|
||||||
num_ctx: req.num_ctx,
|
num_ctx: req.num_ctx,
|
||||||
amended_insight_id,
|
amended_insight_id,
|
||||||
backend_used: effective_backend,
|
backend_used: kind.as_str().to_string(),
|
||||||
model_used,
|
model_used,
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
@@ -975,18 +890,23 @@ impl InsightChatService {
|
|||||||
.filter(|s| !s.trim().is_empty())
|
.filter(|s| !s.trim().is_empty())
|
||||||
.unwrap_or_else(|| "default".to_string());
|
.unwrap_or_else(|| "default".to_string());
|
||||||
let effective_backend = resolve_bootstrap_backend(req.backend.as_deref())?;
|
let effective_backend = resolve_bootstrap_backend(req.backend.as_deref())?;
|
||||||
let is_hybrid = effective_backend == "hybrid";
|
let kind = BackendKind::parse(&effective_backend)?;
|
||||||
|
|
||||||
let max_iterations = req
|
let max_iterations = req
|
||||||
.max_iterations
|
.max_iterations
|
||||||
.unwrap_or(DEFAULT_MAX_ITERATIONS)
|
.unwrap_or(DEFAULT_MAX_ITERATIONS)
|
||||||
.clamp(1, env_max_iterations());
|
.clamp(1, env_max_iterations());
|
||||||
|
|
||||||
let custom_model = req.model.clone().filter(|m| !m.is_empty());
|
let overrides = SamplingOverrides {
|
||||||
let (chat_backend_holder, ollama_client) =
|
model: req.model.clone().filter(|m| !m.is_empty()),
|
||||||
self.build_chat_clients(is_hybrid, custom_model.as_deref(), &req)?;
|
num_ctx: req.num_ctx,
|
||||||
let chat_backend: &dyn LlmClient = chat_backend_holder.as_ref();
|
temperature: req.temperature,
|
||||||
let model_used = chat_backend.primary_model().to_string();
|
top_p: req.top_p,
|
||||||
|
top_k: req.top_k,
|
||||||
|
min_p: req.min_p,
|
||||||
|
};
|
||||||
|
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||||
|
let model_used = backend.model().to_string();
|
||||||
|
|
||||||
// Load image bytes once. RAW preview fallback is handled inside
|
// Load image bytes once. RAW preview fallback is handled inside
|
||||||
// load_image_as_base64. Errors degrade silently — a chat that
|
// load_image_as_base64. Errors degrade silently — a chat that
|
||||||
@@ -1007,18 +927,17 @@ impl InsightChatService {
|
|||||||
_ => None,
|
_ => None,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Hybrid backend: pre-describe the image via local Ollama vision
|
// Describe-then-inline (hybrid only): pre-describe the image so a
|
||||||
// so OpenRouter chat models (which can't see images directly) get
|
// text-only chat model gets the visual description inline.
|
||||||
// the visual description as text. Mirrors the same pre-describe
|
// images_inline backends send images directly to the chat model.
|
||||||
// pass that `generate_agentic_insight_for_photo` does for hybrid.
|
let visual_block = if !backend.images_inline {
|
||||||
let visual_block = if is_hybrid {
|
|
||||||
match image_base64.as_deref() {
|
match image_base64.as_deref() {
|
||||||
Some(b64) => match self.ollama.describe_image(b64).await {
|
Some(b64) => match backend.local().describe_image(b64).await {
|
||||||
Ok(desc) => {
|
Ok(desc) => {
|
||||||
format!("Visual description (from local vision model):\n{}\n", desc)
|
format!("Visual description (from local vision model):\n{}\n", desc)
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
log::warn!("hybrid bootstrap: local describe_image failed: {}", e);
|
log::warn!("{} bootstrap: describe_image failed: {}", kind.as_str(), e);
|
||||||
String::new()
|
String::new()
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -1028,10 +947,10 @@ impl InsightChatService {
|
|||||||
String::new()
|
String::new()
|
||||||
};
|
};
|
||||||
|
|
||||||
// Tool gates. Local + image present → expose describe_photo so
|
// Tool gates. images_inline + image present → expose describe_photo so
|
||||||
// the chat model can re-look at the photo on demand. Hybrid:
|
// the chat model can re-look at the photo on demand. Non-inline:
|
||||||
// already inlined, no tool needed.
|
// already inlined, no tool needed.
|
||||||
let offer_describe_tool = !is_hybrid && image_base64.is_some();
|
let offer_describe_tool = backend.images_inline && image_base64.is_some();
|
||||||
let gate_opts = self.generator.current_gate_opts_for_persona(
|
let gate_opts = self.generator.current_gate_opts_for_persona(
|
||||||
offer_describe_tool,
|
offer_describe_tool,
|
||||||
Some((req.user_id, &active_persona)),
|
Some((req.user_id, &active_persona)),
|
||||||
@@ -1057,23 +976,22 @@ impl InsightChatService {
|
|||||||
);
|
);
|
||||||
let system_msg = ChatMessage::system(system_content);
|
let system_msg = ChatMessage::system(system_content);
|
||||||
let mut user_msg = ChatMessage::user(req.user_message.clone());
|
let mut user_msg = ChatMessage::user(req.user_message.clone());
|
||||||
if !is_hybrid && let Some(ref img) = image_base64 {
|
if backend.images_inline {
|
||||||
user_msg.images = Some(vec![img.clone()]);
|
if let Some(ref img) = image_base64 {
|
||||||
|
user_msg.images = Some(vec![img.clone()]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
let mut messages = vec![system_msg, user_msg];
|
let mut messages = vec![system_msg, user_msg];
|
||||||
|
|
||||||
let outcome = self
|
let outcome = self
|
||||||
.run_streaming_agentic_loop(
|
.run_streaming_agentic_loop(
|
||||||
chat_backend,
|
&backend,
|
||||||
&ollama_client,
|
|
||||||
&mut messages,
|
&mut messages,
|
||||||
tools,
|
tools,
|
||||||
&image_base64,
|
&image_base64,
|
||||||
&normalized,
|
&normalized,
|
||||||
req.user_id,
|
req.user_id,
|
||||||
&active_persona,
|
&active_persona,
|
||||||
&model_used,
|
|
||||||
&effective_backend,
|
|
||||||
max_iterations,
|
max_iterations,
|
||||||
&tx,
|
&tx,
|
||||||
)
|
)
|
||||||
@@ -1086,7 +1004,7 @@ impl InsightChatService {
|
|||||||
final_content,
|
final_content,
|
||||||
} = outcome;
|
} = outcome;
|
||||||
|
|
||||||
let title = self.generate_title(chat_backend, &final_content).await?;
|
let (title, body) = crate::ai::insight_generator::parse_title_body(&final_content);
|
||||||
|
|
||||||
let json = serde_json::to_string(&messages)
|
let json = serde_json::to_string(&messages)
|
||||||
.map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
|
.map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
|
||||||
@@ -1094,12 +1012,12 @@ impl InsightChatService {
|
|||||||
library_id: req.library_id,
|
library_id: req.library_id,
|
||||||
file_path: normalized.clone(),
|
file_path: normalized.clone(),
|
||||||
title,
|
title,
|
||||||
summary: final_content,
|
summary: body,
|
||||||
generated_at: Utc::now().timestamp(),
|
generated_at: Utc::now().timestamp(),
|
||||||
model_version: model_used.clone(),
|
model_version: model_used.clone(),
|
||||||
is_current: true,
|
is_current: true,
|
||||||
training_messages: Some(json),
|
training_messages: Some(json),
|
||||||
backend: effective_backend.clone(),
|
backend: kind.as_str().to_string(),
|
||||||
fewshot_source_ids: None,
|
fewshot_source_ids: None,
|
||||||
content_hash: None,
|
content_hash: None,
|
||||||
};
|
};
|
||||||
@@ -1122,7 +1040,7 @@ impl InsightChatService {
|
|||||||
eval_tokens: last_eval_count,
|
eval_tokens: last_eval_count,
|
||||||
num_ctx: req.num_ctx,
|
num_ctx: req.num_ctx,
|
||||||
amended_insight_id: Some(stored.id),
|
amended_insight_id: Some(stored.id),
|
||||||
backend_used: effective_backend,
|
backend_used: kind.as_str().to_string(),
|
||||||
model_used,
|
model_used,
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
@@ -1130,105 +1048,19 @@ impl InsightChatService {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Set up chat clients (Ollama + optional OpenRouter) shared by
|
|
||||||
/// bootstrap and continuation. Returns the chat-side backend client
|
|
||||||
/// (boxed because hybrid and local return different concrete types)
|
|
||||||
/// and the Ollama client used for describe-image / local tool calls.
|
|
||||||
fn build_chat_clients(
|
|
||||||
&self,
|
|
||||||
is_hybrid: bool,
|
|
||||||
custom_model: Option<&str>,
|
|
||||||
req: &ChatTurnRequest,
|
|
||||||
) -> Result<(Box<dyn LlmClient>, OllamaClient)> {
|
|
||||||
let mut ollama_client = self.ollama.clone();
|
|
||||||
|
|
||||||
if is_hybrid {
|
|
||||||
let arc = self.openrouter.as_ref().ok_or_else(|| {
|
|
||||||
anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured")
|
|
||||||
})?;
|
|
||||||
let mut c: OpenRouterClient = (**arc).clone();
|
|
||||||
if let Some(m) = custom_model {
|
|
||||||
c.primary_model = m.to_string();
|
|
||||||
}
|
|
||||||
if req.temperature.is_some()
|
|
||||||
|| req.top_p.is_some()
|
|
||||||
|| req.top_k.is_some()
|
|
||||||
|| req.min_p.is_some()
|
|
||||||
{
|
|
||||||
c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
|
||||||
}
|
|
||||||
if let Some(ctx) = req.num_ctx {
|
|
||||||
c.set_num_ctx(Some(ctx));
|
|
||||||
}
|
|
||||||
return Ok((Box::new(c), ollama_client));
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(m) = custom_model
|
|
||||||
&& m != self.ollama.primary_model
|
|
||||||
{
|
|
||||||
ollama_client = OllamaClient::new(
|
|
||||||
self.ollama.primary_url.clone(),
|
|
||||||
self.ollama.fallback_url.clone(),
|
|
||||||
m.to_string(),
|
|
||||||
Some(m.to_string()),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if req.temperature.is_some()
|
|
||||||
|| req.top_p.is_some()
|
|
||||||
|| req.top_k.is_some()
|
|
||||||
|| req.min_p.is_some()
|
|
||||||
{
|
|
||||||
ollama_client.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
|
||||||
}
|
|
||||||
if let Some(ctx) = req.num_ctx {
|
|
||||||
ollama_client.set_num_ctx(Some(ctx));
|
|
||||||
}
|
|
||||||
Ok((Box::new(ollama_client.clone()), ollama_client))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Generate a short title via the same chat backend so voice stays
|
|
||||||
/// consistent with the body. Mirrors generate_agentic_insight_for_photo's
|
|
||||||
/// titling pass.
|
|
||||||
async fn generate_title(
|
|
||||||
&self,
|
|
||||||
chat_backend: &dyn LlmClient,
|
|
||||||
final_content: &str,
|
|
||||||
) -> Result<String> {
|
|
||||||
let title_prompt = format!(
|
|
||||||
"Create a short title (maximum 8 words) for the following journal entry:\n\n{}\n\n\
|
|
||||||
Capture the key moment or theme. Return ONLY the title, nothing else.",
|
|
||||||
final_content
|
|
||||||
);
|
|
||||||
let title_raw = chat_backend
|
|
||||||
.generate(
|
|
||||||
&title_prompt,
|
|
||||||
Some(
|
|
||||||
"You are my long term memory assistant. Use only the information provided. Do not invent details.",
|
|
||||||
),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
Ok(title_raw.trim().trim_matches('"').to_string())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Drive the agentic loop with streaming SSE events. Shared between
|
/// Drive the agentic loop with streaming SSE events. Shared between
|
||||||
/// bootstrap and continuation. Mutates `messages` in place (response
|
/// bootstrap and continuation. Mutates `messages` in place (response
|
||||||
/// turns + tool results are appended) and returns counters + the
|
/// turns + tool results are appended) and returns counters + the
|
||||||
/// final assistant content.
|
/// final assistant content.
|
||||||
async fn run_streaming_agentic_loop(
|
async fn run_streaming_agentic_loop(
|
||||||
&self,
|
&self,
|
||||||
chat_backend: &dyn LlmClient,
|
backend: &ResolvedBackend,
|
||||||
ollama_client: &OllamaClient,
|
|
||||||
messages: &mut Vec<ChatMessage>,
|
messages: &mut Vec<ChatMessage>,
|
||||||
tools: Vec<Tool>,
|
tools: Vec<Tool>,
|
||||||
image_base64: &Option<String>,
|
image_base64: &Option<String>,
|
||||||
normalized: &str,
|
normalized: &str,
|
||||||
user_id: i32,
|
user_id: i32,
|
||||||
active_persona: &str,
|
active_persona: &str,
|
||||||
// Provenance — stamped onto any store_fact tool call made
|
|
||||||
// during this loop. Mirrors the non-streaming chat path.
|
|
||||||
model_used: &str,
|
|
||||||
effective_backend: &str,
|
|
||||||
max_iterations: usize,
|
max_iterations: usize,
|
||||||
tx: &tokio::sync::mpsc::Sender<ChatStreamEvent>,
|
tx: &tokio::sync::mpsc::Sender<ChatStreamEvent>,
|
||||||
) -> Result<AgenticLoopOutcome> {
|
) -> Result<AgenticLoopOutcome> {
|
||||||
@@ -1247,7 +1079,8 @@ impl InsightChatService {
|
|||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let mut stream = chat_backend
|
let mut stream = backend
|
||||||
|
.chat()
|
||||||
.chat_with_tools_stream(messages.clone(), tools.clone())
|
.chat_with_tools_stream(messages.clone(), tools.clone())
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
@@ -1304,13 +1137,11 @@ impl InsightChatService {
|
|||||||
.execute_tool(
|
.execute_tool(
|
||||||
&tool_call.function.name,
|
&tool_call.function.name,
|
||||||
&tool_call.function.arguments,
|
&tool_call.function.arguments,
|
||||||
ollama_client,
|
backend,
|
||||||
image_base64,
|
image_base64,
|
||||||
normalized,
|
normalized,
|
||||||
user_id,
|
user_id,
|
||||||
active_persona,
|
active_persona,
|
||||||
model_used,
|
|
||||||
effective_backend,
|
|
||||||
&cx,
|
&cx,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
@@ -1345,7 +1176,8 @@ impl InsightChatService {
|
|||||||
messages.push(ChatMessage::user(
|
messages.push(ChatMessage::user(
|
||||||
"Please write your final answer now without calling any more tools.",
|
"Please write your final answer now without calling any more tools.",
|
||||||
));
|
));
|
||||||
let mut stream = chat_backend
|
let mut stream = backend
|
||||||
|
.chat()
|
||||||
.chat_with_tools_stream(messages.clone(), vec![])
|
.chat_with_tools_stream(messages.clone(), vec![])
|
||||||
.await?;
|
.await?;
|
||||||
let mut final_message: Option<ChatMessage> = None;
|
let mut final_message: Option<ChatMessage> = None;
|
||||||
@@ -1459,6 +1291,34 @@ fn resolve_date_taken_for_context(
|
|||||||
.map(|dt| dt.format("%Y-%m-%d").to_string())
|
.map(|dt| dt.format("%Y-%m-%d").to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Validate a stored→effective backend transition for a chat continuation.
|
||||||
|
/// Continuation runs against a transcript that was generated with a specific
|
||||||
|
/// backend; the only blocked transition is `local → hybrid`, because the
|
||||||
|
/// stored transcript has images embedded in the first user message and the
|
||||||
|
/// hybrid path (OpenRouter chat with describe-then-inline) can't replay
|
||||||
|
/// raw image bytes through OpenRouter consistently across providers.
|
||||||
|
/// `hybrid → local` is allowed (the inlined description replays verbatim
|
||||||
|
/// as text).
|
||||||
|
///
|
||||||
|
/// Whether "local" routes through Ollama or llama-swap is decided at
|
||||||
|
/// startup by `LLM_BACKEND`; both share the same transcript shape from
|
||||||
|
/// the chat-replay perspective.
|
||||||
|
fn validate_cross_replay(stored: &str, effective: &str) -> Result<()> {
|
||||||
|
if !matches!(effective, "local" | "hybrid") {
|
||||||
|
bail!(
|
||||||
|
"unknown backend '{}'; expected 'local' or 'hybrid'",
|
||||||
|
effective
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if stored == "local" && effective == "hybrid" {
|
||||||
|
bail!(
|
||||||
|
"switching from local to hybrid mid-chat isn't supported; \
|
||||||
|
regenerate the insight in hybrid mode if you want OpenRouter chat"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Pick the backend label for bootstrap. Bootstrap has no stored insight
|
/// Pick the backend label for bootstrap. Bootstrap has no stored insight
|
||||||
/// to defer to (that's continuation's behaviour), so the default is
|
/// to defer to (that's continuation's behaviour), so the default is
|
||||||
/// `"local"`. Returns an error if the supplied label is non-empty but
|
/// `"local"`. Returns an error if the supplied label is non-empty but
|
||||||
@@ -2082,10 +1942,40 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn bootstrap_backend_rejects_unknown_label() {
|
fn bootstrap_backend_rejects_unknown_label() {
|
||||||
let err = resolve_bootstrap_backend(Some("openrouter")).unwrap_err();
|
// `llamacpp` is no longer a per-request backend value — it's chosen
|
||||||
let msg = format!("{}", err);
|
// at deploy time via `LLM_BACKEND`.
|
||||||
assert!(msg.contains("unknown backend"));
|
for label in &["openrouter", "llamacpp", "ollama"] {
|
||||||
assert!(msg.contains("openrouter"));
|
let err = resolve_bootstrap_backend(Some(label)).unwrap_err();
|
||||||
|
let msg = format!("{}", err);
|
||||||
|
assert!(msg.contains("unknown backend"), "label={}", label);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cross_replay_rejects_local_to_hybrid() {
|
||||||
|
let err = validate_cross_replay("local", "hybrid").unwrap_err();
|
||||||
|
assert!(format!("{}", err).contains("local to hybrid"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cross_replay_allows_supported_transitions() {
|
||||||
|
assert!(validate_cross_replay("local", "local").is_ok());
|
||||||
|
assert!(validate_cross_replay("hybrid", "hybrid").is_ok());
|
||||||
|
// Hybrid → local replays the inlined description as plain text.
|
||||||
|
assert!(validate_cross_replay("hybrid", "local").is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cross_replay_rejects_unknown_effective() {
|
||||||
|
// Both "openrouter" and the former "llamacpp" value are unknown now.
|
||||||
|
for label in &["openrouter", "llamacpp"] {
|
||||||
|
let err = validate_cross_replay("local", label).unwrap_err();
|
||||||
|
assert!(
|
||||||
|
format!("{}", err).contains("unknown backend"),
|
||||||
|
"label={}",
|
||||||
|
label
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
1143
src/ai/llamacpp.rs
Normal file
1143
src/ai/llamacpp.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,10 +1,12 @@
|
|||||||
pub mod apollo_client;
|
pub mod apollo_client;
|
||||||
|
pub mod backend;
|
||||||
pub mod clip_client;
|
pub mod clip_client;
|
||||||
pub mod daily_summary_job;
|
pub mod daily_summary_job;
|
||||||
pub mod face_client;
|
pub mod face_client;
|
||||||
pub mod handlers;
|
pub mod handlers;
|
||||||
pub mod insight_chat;
|
pub mod insight_chat;
|
||||||
pub mod insight_generator;
|
pub mod insight_generator;
|
||||||
|
pub mod llamacpp;
|
||||||
pub mod llm_client;
|
pub mod llm_client;
|
||||||
pub mod ollama;
|
pub mod ollama;
|
||||||
pub mod openrouter;
|
pub mod openrouter;
|
||||||
@@ -23,6 +25,7 @@ pub use handlers::{
|
|||||||
get_insight_handler, get_openrouter_models_handler, rate_insight_handler,
|
get_insight_handler, get_openrouter_models_handler, rate_insight_handler,
|
||||||
};
|
};
|
||||||
pub use insight_generator::InsightGenerator;
|
pub use insight_generator::InsightGenerator;
|
||||||
|
pub use llamacpp::LlamaCppClient;
|
||||||
#[allow(unused_imports)]
|
#[allow(unused_imports)]
|
||||||
pub use llm_client::{
|
pub use llm_client::{
|
||||||
ChatMessage, LlmClient, ModelCapabilities, Tool, ToolCall, ToolCallFunction, ToolFunction,
|
ChatMessage, LlmClient, ModelCapabilities, Tool, ToolCall, ToolCallFunction, ToolFunction,
|
||||||
@@ -38,3 +41,87 @@ pub use sms_client::{SmsApiClient, SmsMessage};
|
|||||||
pub fn user_display_name() -> String {
|
pub fn user_display_name() -> String {
|
||||||
std::env::var("USER_NAME").unwrap_or_else(|_| "Me".to_string())
|
std::env::var("USER_NAME").unwrap_or_else(|_| "Me".to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// One switch for the "local" LLM stack: when `LLM_BACKEND=llamacpp` is
|
||||||
|
/// set, chat / vision describe / embeddings all route through llama-swap
|
||||||
|
/// instead of Ollama. Any other value (including unset, the default) is
|
||||||
|
/// Ollama. This is intentionally global — embeddings must be drawn from
|
||||||
|
/// a single source or similarity search across the index breaks (mixed
|
||||||
|
/// vector spaces, possibly mixed dims). The `backend=hybrid` per-request
|
||||||
|
/// override remains orthogonal: it always sends chat to OpenRouter, and
|
||||||
|
/// uses `LLM_BACKEND` for the describe-then-inline vision pass.
|
||||||
|
pub fn local_backend_is_llamacpp() -> bool {
|
||||||
|
matches!(
|
||||||
|
std::env::var("LLM_BACKEND")
|
||||||
|
.ok()
|
||||||
|
.as_deref()
|
||||||
|
.map(|s| s.trim().to_lowercase())
|
||||||
|
.as_deref(),
|
||||||
|
Some("llamacpp")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Embed one string via the configured local backend. Routes through
|
||||||
|
/// llama-swap when `LLM_BACKEND=llamacpp` (and a client is configured),
|
||||||
|
/// else Ollama. Returns the single embedding vector. See
|
||||||
|
/// [`local_backend_is_llamacpp`] for the rationale on consistency.
|
||||||
|
pub async fn embed_one(
|
||||||
|
ollama: &OllamaClient,
|
||||||
|
llamacpp: Option<&LlamaCppClient>,
|
||||||
|
text: &str,
|
||||||
|
) -> anyhow::Result<Vec<f32>> {
|
||||||
|
if local_backend_is_llamacpp() {
|
||||||
|
if let Some(lc) = llamacpp {
|
||||||
|
let mut vecs = <LlamaCppClient as LlmClient>::generate_embeddings(lc, &[text]).await?;
|
||||||
|
return vecs
|
||||||
|
.pop()
|
||||||
|
.ok_or_else(|| anyhow::anyhow!("llama-swap returned no embeddings"));
|
||||||
|
}
|
||||||
|
log::warn!(
|
||||||
|
"LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured; falling back to Ollama embeddings"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
ollama.generate_embedding(text).await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod env_dispatch_tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn with_env<F: FnOnce()>(key: &str, val: Option<&str>, f: F) {
|
||||||
|
let prev = std::env::var(key).ok();
|
||||||
|
match val {
|
||||||
|
Some(v) => unsafe { std::env::set_var(key, v) },
|
||||||
|
None => unsafe { std::env::remove_var(key) },
|
||||||
|
}
|
||||||
|
f();
|
||||||
|
match prev {
|
||||||
|
Some(v) => unsafe { std::env::set_var(key, v) },
|
||||||
|
None => unsafe { std::env::remove_var(key) },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn llm_backend_defaults_to_ollama() {
|
||||||
|
with_env("LLM_BACKEND", None, || {
|
||||||
|
assert!(!local_backend_is_llamacpp());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn llm_backend_llamacpp_case_insensitive() {
|
||||||
|
with_env("LLM_BACKEND", Some("LlamaCpp"), || {
|
||||||
|
assert!(local_backend_is_llamacpp());
|
||||||
|
});
|
||||||
|
with_env("LLM_BACKEND", Some(" llamacpp "), || {
|
||||||
|
assert!(local_backend_is_llamacpp());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn llm_backend_unknown_value_is_ollama() {
|
||||||
|
with_env("LLM_BACKEND", Some("vllm"), || {
|
||||||
|
assert!(!local_backend_is_llamacpp());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -281,6 +281,9 @@ impl SmsApiClient {
|
|||||||
if let Some(cid) = params.contact_id {
|
if let Some(cid) = params.contact_id {
|
||||||
url.push_str(&format!("&contact_id={}", cid));
|
url.push_str(&format!("&contact_id={}", cid));
|
||||||
}
|
}
|
||||||
|
if let Some(ref c) = params.contact {
|
||||||
|
url.push_str(&format!("&contact={}", urlencoding::encode(c)));
|
||||||
|
}
|
||||||
if let Some(off) = params.offset {
|
if let Some(off) = params.offset {
|
||||||
url.push_str(&format!("&offset={}", off));
|
url.push_str(&format!("&offset={}", off));
|
||||||
}
|
}
|
||||||
@@ -413,6 +416,9 @@ pub struct SmsSearchParams<'a> {
|
|||||||
pub mode: &'a str,
|
pub mode: &'a str,
|
||||||
pub limit: usize,
|
pub limit: usize,
|
||||||
pub contact_id: Option<i64>,
|
pub contact_id: Option<i64>,
|
||||||
|
/// Contact name (case-insensitive). Resolved to a numeric ID by the
|
||||||
|
/// SMS-API server when `contact_id` is not set.
|
||||||
|
pub contact: Option<String>,
|
||||||
/// Unix-seconds inclusive lower bound on `date`.
|
/// Unix-seconds inclusive lower bound on `date`.
|
||||||
pub date_from: Option<i64>,
|
pub date_from: Option<i64>,
|
||||||
/// Unix-seconds inclusive upper bound on `date`.
|
/// Unix-seconds inclusive upper bound on `date`.
|
||||||
|
|||||||
@@ -195,6 +195,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
let generator = InsightGenerator::new(
|
let generator = InsightGenerator::new(
|
||||||
ollama,
|
ollama,
|
||||||
None,
|
None,
|
||||||
|
None,
|
||||||
sms_client,
|
sms_client,
|
||||||
apollo_client,
|
apollo_client,
|
||||||
insight_dao.clone(),
|
insight_dao.clone(),
|
||||||
|
|||||||
@@ -62,6 +62,15 @@ pub fn large_preview_path(thumbs_dir: &Path, hash: &str) -> PathBuf {
|
|||||||
.join(format!("{}.jpg", hash))
|
.join(format!("{}.jpg", hash))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Hash-keyed xlarge-preview path: `<thumbs_dir>/_xlarge/<hash[..2]>/<hash>.jpg`.
|
||||||
|
pub fn xlarge_preview_path(thumbs_dir: &Path, hash: &str) -> PathBuf {
|
||||||
|
let shard = shard_prefix(hash);
|
||||||
|
thumbs_dir
|
||||||
|
.join("_xlarge")
|
||||||
|
.join(shard)
|
||||||
|
.join(format!("{}.jpg", hash))
|
||||||
|
}
|
||||||
|
|
||||||
/// Hash-keyed HLS output directory: `<video_dir>/<hash[..2]>/<hash>/`.
|
/// Hash-keyed HLS output directory: `<video_dir>/<hash[..2]>/<hash>/`.
|
||||||
/// The playlist lives at `playlist.m3u8` inside this directory and its
|
/// The playlist lives at `playlist.m3u8` inside this directory and its
|
||||||
/// segments are co-located so HLS relative references Just Work. See
|
/// segments are co-located so HLS relative references Just Work. See
|
||||||
|
|||||||
@@ -194,6 +194,7 @@ pub enum MediaType {
|
|||||||
#[serde(rename_all = "lowercase")]
|
#[serde(rename_all = "lowercase")]
|
||||||
pub enum PhotoSize {
|
pub enum PhotoSize {
|
||||||
Full,
|
Full,
|
||||||
|
XLarge,
|
||||||
Large,
|
Large,
|
||||||
Thumb,
|
Thumb,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -83,12 +83,14 @@ pub async fn get_image(
|
|||||||
if let Some((library, path)) = resolved {
|
if let Some((library, path)) = resolved {
|
||||||
let image_size = req.size.unwrap_or(PhotoSize::Full);
|
let image_size = req.size.unwrap_or(PhotoSize::Full);
|
||||||
|
|
||||||
// `size=large` is only meaningful for stills — there's no useful
|
// `size=large|xlarge` is only meaningful for stills — there's no
|
||||||
// "2048px video preview" tier. Videos fall back to the existing
|
// useful "resized video preview" tier. Videos fall back to the
|
||||||
// thumb pipeline (which already handles gif/static selection).
|
// existing thumb pipeline (which already handles gif/static
|
||||||
// `mut` so the Large branch can downgrade itself to `Full` after a
|
// selection). `mut` so preview branches can downgrade to `Full`
|
||||||
// generation failure (RAW-preview branch below keys off `Full`).
|
// after a generation failure.
|
||||||
let mut image_size = if image_size == PhotoSize::Large && file_types::is_video_file(&path) {
|
let mut image_size = if (image_size == PhotoSize::Large || image_size == PhotoSize::XLarge)
|
||||||
|
&& file_types::is_video_file(&path)
|
||||||
|
{
|
||||||
PhotoSize::Thumb
|
PhotoSize::Thumb
|
||||||
} else {
|
} else {
|
||||||
image_size
|
image_size
|
||||||
@@ -196,6 +198,93 @@ pub async fn get_image(
|
|||||||
image_size = PhotoSize::Full;
|
image_size = PhotoSize::Full;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if image_size == PhotoSize::XLarge {
|
||||||
|
let relative_path = path
|
||||||
|
.strip_prefix(&library.root_path)
|
||||||
|
.expect("Error stripping library root prefix from xlarge preview");
|
||||||
|
let relative_path_str = relative_path.to_string_lossy().replace('\\', "/");
|
||||||
|
let thumbs = Path::new(&app_state.thumbnail_path);
|
||||||
|
let xlarge_dir = thumbs.join("_xlarge");
|
||||||
|
|
||||||
|
let hash_xlarge_path: Option<PathBuf> = {
|
||||||
|
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||||
|
match dao.get_exif(&context, &relative_path_str) {
|
||||||
|
Ok(Some(row)) => row
|
||||||
|
.content_hash
|
||||||
|
.as_deref()
|
||||||
|
.map(|h| content_hash::xlarge_preview_path(thumbs, h)),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let scoped_legacy_xlarge_path =
|
||||||
|
content_hash::library_scoped_legacy_path(&xlarge_dir, library.id, relative_path);
|
||||||
|
|
||||||
|
let existing = hash_xlarge_path
|
||||||
|
.as_ref()
|
||||||
|
.filter(|p| p.exists())
|
||||||
|
.cloned()
|
||||||
|
.or_else(|| {
|
||||||
|
if scoped_legacy_xlarge_path.exists() {
|
||||||
|
Some(scoped_legacy_xlarge_path.clone())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if let Some(found) = existing {
|
||||||
|
if let Ok(file) = NamedFile::open(&found) {
|
||||||
|
span.set_status(Status::Ok);
|
||||||
|
return file
|
||||||
|
.use_etag(true)
|
||||||
|
.use_last_modified(true)
|
||||||
|
.prefer_utf8(true)
|
||||||
|
.into_response(&request);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let dest = hash_xlarge_path
|
||||||
|
.clone()
|
||||||
|
.unwrap_or_else(|| scoped_legacy_xlarge_path.clone());
|
||||||
|
let src = path.clone();
|
||||||
|
let dest_for_block = dest.clone();
|
||||||
|
let generated = web::block(move || {
|
||||||
|
if let Some(parent) = dest_for_block.parent() {
|
||||||
|
std::fs::create_dir_all(parent)?;
|
||||||
|
}
|
||||||
|
let tmp = dest_for_block.with_extension("jpg.tmp");
|
||||||
|
crate::thumbnails::generate_xlarge_preview(&src, &tmp)?;
|
||||||
|
std::fs::rename(&tmp, &dest_for_block)?;
|
||||||
|
Ok::<(), std::io::Error>(())
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
match generated {
|
||||||
|
Ok(Ok(())) => {
|
||||||
|
if let Ok(file) = NamedFile::open(&dest) {
|
||||||
|
span.set_status(Status::Ok);
|
||||||
|
return file
|
||||||
|
.use_etag(true)
|
||||||
|
.use_last_modified(true)
|
||||||
|
.prefer_utf8(true)
|
||||||
|
.into_response(&request);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
warn!(
|
||||||
|
"XLarge preview generation failed for {:?}: {} — falling back to original",
|
||||||
|
path, e
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!(
|
||||||
|
"XLarge preview blocking-pool error for {:?}: {} — falling back to original",
|
||||||
|
path, e
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
image_size = PhotoSize::Full;
|
||||||
|
}
|
||||||
|
|
||||||
if image_size == PhotoSize::Thumb {
|
if image_size == PhotoSize::Thumb {
|
||||||
let relative_path = path
|
let relative_path = path
|
||||||
.strip_prefix(&library.root_path)
|
.strip_prefix(&library.root_path)
|
||||||
|
|||||||
59
src/state.rs
59
src/state.rs
@@ -2,6 +2,7 @@ use crate::ai::apollo_client::ApolloClient;
|
|||||||
use crate::ai::clip_client::ClipClient;
|
use crate::ai::clip_client::ClipClient;
|
||||||
use crate::ai::face_client::FaceClient;
|
use crate::ai::face_client::FaceClient;
|
||||||
use crate::ai::insight_chat::{ChatLockMap, InsightChatService};
|
use crate::ai::insight_chat::{ChatLockMap, InsightChatService};
|
||||||
|
use crate::ai::llamacpp::LlamaCppClient;
|
||||||
use crate::ai::openrouter::OpenRouterClient;
|
use crate::ai::openrouter::OpenRouterClient;
|
||||||
use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
|
use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
|
||||||
use crate::database::{
|
use crate::database::{
|
||||||
@@ -62,6 +63,16 @@ pub struct AppState {
|
|||||||
/// Curated list of OpenRouter model ids exposed to clients. Sourced from
|
/// Curated list of OpenRouter model ids exposed to clients. Sourced from
|
||||||
/// `OPENROUTER_ALLOWED_MODELS` (comma-separated). Empty when unset.
|
/// `OPENROUTER_ALLOWED_MODELS` (comma-separated). Empty when unset.
|
||||||
pub openrouter_allowed_models: Vec<String>,
|
pub openrouter_allowed_models: Vec<String>,
|
||||||
|
/// `None` when `LLAMA_SWAP_URL` is not configured. Consulted only when a
|
||||||
|
/// request explicitly opts into `backend=llamacpp`. Same shape as the
|
||||||
|
/// `openrouter` slot — present here so handlers can route to it without
|
||||||
|
/// threading through the generator.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub llamacpp: Option<Arc<LlamaCppClient>>,
|
||||||
|
/// Curated list of llama-swap model ids exposed to clients. Sourced from
|
||||||
|
/// `LLAMA_SWAP_ALLOWED_MODELS` (comma-separated). Empty when unset; the
|
||||||
|
/// server then falls back to `LLAMA_SWAP_PRIMARY_MODEL`.
|
||||||
|
pub llamacpp_allowed_models: Vec<String>,
|
||||||
pub sms_client: SmsApiClient,
|
pub sms_client: SmsApiClient,
|
||||||
pub insight_generator: InsightGenerator,
|
pub insight_generator: InsightGenerator,
|
||||||
/// Chat continuation service. Hold an Arc so handlers can clone cheaply.
|
/// Chat continuation service. Hold an Arc so handlers can clone cheaply.
|
||||||
@@ -105,6 +116,8 @@ impl AppState {
|
|||||||
ollama: OllamaClient,
|
ollama: OllamaClient,
|
||||||
openrouter: Option<Arc<OpenRouterClient>>,
|
openrouter: Option<Arc<OpenRouterClient>>,
|
||||||
openrouter_allowed_models: Vec<String>,
|
openrouter_allowed_models: Vec<String>,
|
||||||
|
llamacpp: Option<Arc<LlamaCppClient>>,
|
||||||
|
llamacpp_allowed_models: Vec<String>,
|
||||||
sms_client: SmsApiClient,
|
sms_client: SmsApiClient,
|
||||||
insight_generator: InsightGenerator,
|
insight_generator: InsightGenerator,
|
||||||
insight_chat: Arc<InsightChatService>,
|
insight_chat: Arc<InsightChatService>,
|
||||||
@@ -145,6 +158,8 @@ impl AppState {
|
|||||||
ollama,
|
ollama,
|
||||||
openrouter,
|
openrouter,
|
||||||
openrouter_allowed_models,
|
openrouter_allowed_models,
|
||||||
|
llamacpp,
|
||||||
|
llamacpp_allowed_models,
|
||||||
sms_client,
|
sms_client,
|
||||||
insight_generator,
|
insight_generator,
|
||||||
insight_chat,
|
insight_chat,
|
||||||
@@ -186,6 +201,9 @@ impl Default for AppState {
|
|||||||
let openrouter = build_openrouter_from_env();
|
let openrouter = build_openrouter_from_env();
|
||||||
let openrouter_allowed_models = parse_openrouter_allowed_models();
|
let openrouter_allowed_models = parse_openrouter_allowed_models();
|
||||||
|
|
||||||
|
let llamacpp = build_llamacpp_from_env();
|
||||||
|
let llamacpp_allowed_models = parse_llamacpp_allowed_models();
|
||||||
|
|
||||||
let sms_api_url =
|
let sms_api_url =
|
||||||
env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
|
env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
|
||||||
let sms_api_token = env::var("SMS_API_TOKEN").ok();
|
let sms_api_token = env::var("SMS_API_TOKEN").ok();
|
||||||
@@ -250,6 +268,7 @@ impl Default for AppState {
|
|||||||
let insight_generator = InsightGenerator::new(
|
let insight_generator = InsightGenerator::new(
|
||||||
ollama.clone(),
|
ollama.clone(),
|
||||||
openrouter.clone(),
|
openrouter.clone(),
|
||||||
|
llamacpp.clone(),
|
||||||
sms_client.clone(),
|
sms_client.clone(),
|
||||||
apollo_client.clone(),
|
apollo_client.clone(),
|
||||||
insight_dao.clone(),
|
insight_dao.clone(),
|
||||||
@@ -271,8 +290,6 @@ impl Default for AppState {
|
|||||||
Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
|
Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
|
||||||
let insight_chat = Arc::new(InsightChatService::new(
|
let insight_chat = Arc::new(InsightChatService::new(
|
||||||
Arc::new(insight_generator.clone()),
|
Arc::new(insight_generator.clone()),
|
||||||
ollama.clone(),
|
|
||||||
openrouter.clone(),
|
|
||||||
insight_dao.clone(),
|
insight_dao.clone(),
|
||||||
chat_locks,
|
chat_locks,
|
||||||
));
|
));
|
||||||
@@ -294,6 +311,8 @@ impl Default for AppState {
|
|||||||
ollama,
|
ollama,
|
||||||
openrouter,
|
openrouter,
|
||||||
openrouter_allowed_models,
|
openrouter_allowed_models,
|
||||||
|
llamacpp,
|
||||||
|
llamacpp_allowed_models,
|
||||||
sms_client,
|
sms_client,
|
||||||
insight_generator,
|
insight_generator,
|
||||||
insight_chat,
|
insight_chat,
|
||||||
@@ -335,6 +354,37 @@ fn parse_openrouter_allowed_models() -> Vec<String> {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Build a `LlamaCppClient` from environment variables. Returns `None` when
|
||||||
|
/// `LLAMA_SWAP_URL` is unset. The client is constructed unconditionally
|
||||||
|
/// when the URL is set (so it's available even under `LLM_BACKEND=ollama`
|
||||||
|
/// for ad-hoc tooling), but the agentic / chat paths only route through it
|
||||||
|
/// when `LLM_BACKEND=llamacpp`. Slot ids default to the names the bundled
|
||||||
|
/// `llama-swap/config.yaml` uses — `chat` / `vision` / `embed`.
|
||||||
|
fn build_llamacpp_from_env() -> Option<Arc<LlamaCppClient>> {
|
||||||
|
let base_url = env::var("LLAMA_SWAP_URL").ok()?;
|
||||||
|
let primary_model = env::var("LLAMA_SWAP_PRIMARY_MODEL").ok();
|
||||||
|
let mut client = LlamaCppClient::new(Some(base_url), primary_model);
|
||||||
|
if let Ok(model) = env::var("LLAMA_SWAP_EMBEDDING_MODEL") {
|
||||||
|
client.set_embedding_model(model);
|
||||||
|
}
|
||||||
|
if let Ok(model) = env::var("LLAMA_SWAP_VISION_MODEL") {
|
||||||
|
client.set_vision_model(model);
|
||||||
|
}
|
||||||
|
Some(Arc::new(client))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse `LLAMA_SWAP_ALLOWED_MODELS` (comma-separated) into a vec. Used to
|
||||||
|
/// populate the model picker when `LLM_BACKEND=llamacpp` — `/insights/models`
|
||||||
|
/// surfaces these slots with capabilities. Empty when unset.
|
||||||
|
fn parse_llamacpp_allowed_models() -> Vec<String> {
|
||||||
|
env::var("LLAMA_SWAP_ALLOWED_MODELS")
|
||||||
|
.unwrap_or_default()
|
||||||
|
.split(',')
|
||||||
|
.map(|s| s.trim().to_string())
|
||||||
|
.filter(|s| !s.is_empty())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
impl AppState {
|
impl AppState {
|
||||||
/// Creates an AppState instance for testing with temporary directories
|
/// Creates an AppState instance for testing with temporary directories
|
||||||
@@ -397,6 +447,7 @@ impl AppState {
|
|||||||
let insight_generator = InsightGenerator::new(
|
let insight_generator = InsightGenerator::new(
|
||||||
ollama.clone(),
|
ollama.clone(),
|
||||||
None,
|
None,
|
||||||
|
None,
|
||||||
sms_client.clone(),
|
sms_client.clone(),
|
||||||
apollo_client.clone(),
|
apollo_client.clone(),
|
||||||
insight_dao.clone(),
|
insight_dao.clone(),
|
||||||
@@ -416,8 +467,6 @@ impl AppState {
|
|||||||
Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
|
Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
|
||||||
let insight_chat = Arc::new(InsightChatService::new(
|
let insight_chat = Arc::new(InsightChatService::new(
|
||||||
Arc::new(insight_generator.clone()),
|
Arc::new(insight_generator.clone()),
|
||||||
ollama.clone(),
|
|
||||||
None,
|
|
||||||
insight_dao.clone(),
|
insight_dao.clone(),
|
||||||
chat_locks,
|
chat_locks,
|
||||||
));
|
));
|
||||||
@@ -445,6 +494,8 @@ impl AppState {
|
|||||||
ollama,
|
ollama,
|
||||||
None,
|
None,
|
||||||
Vec::new(),
|
Vec::new(),
|
||||||
|
None,
|
||||||
|
Vec::new(),
|
||||||
sms_client,
|
sms_client,
|
||||||
insight_generator,
|
insight_generator,
|
||||||
insight_chat,
|
insight_chat,
|
||||||
|
|||||||
@@ -36,12 +36,19 @@ use crate::video::actors::{generate_image_thumbnail_ffmpeg, generate_video_thumb
|
|||||||
/// `size=full` and the handler streams the original bytes.
|
/// `size=full` and the handler streams the original bytes.
|
||||||
pub const LARGE_PREVIEW_MAX_DIM: u32 = 2048;
|
pub const LARGE_PREVIEW_MAX_DIM: u32 = 2048;
|
||||||
|
|
||||||
/// JPEG quality for the large preview tier. 85 is the conventional
|
/// JPEG quality for the large and xlarge preview tiers. 85 is the
|
||||||
/// "indistinguishable from source at viewing size" point — well above the
|
/// conventional "indistinguishable from source at viewing size" point —
|
||||||
/// `image` crate's default ~75, but well below quality-90+ territory where
|
/// well above the `image` crate's default ~75, but well below quality-90+
|
||||||
/// file size doubles for no perceptible win.
|
/// territory where file size doubles for no perceptible win.
|
||||||
const LARGE_PREVIEW_JPEG_QUALITY: u8 = 85;
|
const LARGE_PREVIEW_JPEG_QUALITY: u8 = 85;
|
||||||
|
|
||||||
|
/// Maximum long-edge size (px) for the xlarge preview tier. Bridges the
|
||||||
|
/// gap between `large` (2048px, ~16MB decoded) and the original bytes
|
||||||
|
/// (potentially 48+ MP / ~192MB decoded). At 4096px the decoded bitmap is
|
||||||
|
/// ~64MB — enough for 2-3× pinch-zoom on any phone before the viewer
|
||||||
|
/// needs to stream the true original.
|
||||||
|
pub const XLARGE_PREVIEW_MAX_DIM: u32 = 4096;
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
pub static ref IMAGE_GAUGE: IntGauge = IntGauge::new(
|
pub static ref IMAGE_GAUGE: IntGauge = IntGauge::new(
|
||||||
"imageserver_image_total",
|
"imageserver_image_total",
|
||||||
@@ -205,6 +212,86 @@ fn generate_large_preview_ffmpeg(src: &Path, dest: &Path) -> std::io::Result<()>
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Generate the on-demand xlarge-preview tier (≈4096 long edge JPEG).
|
||||||
|
///
|
||||||
|
/// Same waterfall as [`generate_large_preview`] but targeting
|
||||||
|
/// [`XLARGE_PREVIEW_MAX_DIM`]. Sources whose long edge is already below
|
||||||
|
/// the cap are encoded at native size (no upscale).
|
||||||
|
pub fn generate_xlarge_preview(src: &Path, dest: &Path) -> std::io::Result<()> {
|
||||||
|
let orientation = exif::read_orientation(src).unwrap_or(1);
|
||||||
|
|
||||||
|
if let Some(preview) = exif::extract_embedded_jpeg_preview(src) {
|
||||||
|
let img = image::load_from_memory(&preview).map_err(|e| {
|
||||||
|
std::io::Error::new(
|
||||||
|
std::io::ErrorKind::InvalidData,
|
||||||
|
format!("decode embedded preview {:?}: {}", src, e),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
let img = exif::apply_orientation(img, orientation);
|
||||||
|
return encode_xlarge_jpeg(img, dest);
|
||||||
|
}
|
||||||
|
|
||||||
|
if file_types::needs_ffmpeg_thumbnail(src) {
|
||||||
|
return generate_xlarge_preview_ffmpeg(src, dest);
|
||||||
|
}
|
||||||
|
|
||||||
|
let img = image::open(src).map_err(|e| {
|
||||||
|
std::io::Error::new(std::io::ErrorKind::InvalidData, format!("{:?}: {}", src, e))
|
||||||
|
})?;
|
||||||
|
let img = exif::apply_orientation(img, orientation);
|
||||||
|
encode_xlarge_jpeg(img, dest)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_xlarge_jpeg(img: image::DynamicImage, dest: &Path) -> std::io::Result<()> {
|
||||||
|
let (w, h) = img.dimensions();
|
||||||
|
let max_dim = w.max(h);
|
||||||
|
let scaled = if max_dim > XLARGE_PREVIEW_MAX_DIM {
|
||||||
|
img.thumbnail(XLARGE_PREVIEW_MAX_DIM, XLARGE_PREVIEW_MAX_DIM)
|
||||||
|
} else {
|
||||||
|
img
|
||||||
|
};
|
||||||
|
let file = std::fs::File::create(dest)
|
||||||
|
.map_err(|e| std::io::Error::other(format!("create {:?}: {}", dest, e)))?;
|
||||||
|
let mut writer = std::io::BufWriter::new(file);
|
||||||
|
let mut encoder = JpegEncoder::new_with_quality(&mut writer, LARGE_PREVIEW_JPEG_QUALITY);
|
||||||
|
encoder
|
||||||
|
.encode_image(&scaled)
|
||||||
|
.map_err(|e| std::io::Error::other(format!("encode {:?}: {}", dest, e)))?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn generate_xlarge_preview_ffmpeg(src: &Path, dest: &Path) -> std::io::Result<()> {
|
||||||
|
let vf = format!(
|
||||||
|
"scale='if(gt(iw,ih),min(iw,{cap}),-1)':'if(gt(iw,ih),-1,min(ih,{cap}))'",
|
||||||
|
cap = XLARGE_PREVIEW_MAX_DIM
|
||||||
|
);
|
||||||
|
let output = Command::new("ffmpeg")
|
||||||
|
.arg("-y")
|
||||||
|
.arg("-i")
|
||||||
|
.arg(src)
|
||||||
|
.arg("-vframes")
|
||||||
|
.arg("1")
|
||||||
|
.arg("-vf")
|
||||||
|
.arg(&vf)
|
||||||
|
.arg("-q:v")
|
||||||
|
.arg("5")
|
||||||
|
.arg("-f")
|
||||||
|
.arg("image2")
|
||||||
|
.arg("-c:v")
|
||||||
|
.arg("mjpeg")
|
||||||
|
.arg(dest)
|
||||||
|
.output()?;
|
||||||
|
|
||||||
|
if !output.status.success() {
|
||||||
|
return Err(std::io::Error::other(format!(
|
||||||
|
"ffmpeg failed ({}): {}",
|
||||||
|
output.status,
|
||||||
|
String::from_utf8_lossy(&output.stderr).trim()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn create_thumbnails(libs: &[libraries::Library], excluded_dirs: &[String]) {
|
pub fn create_thumbnails(libs: &[libraries::Library], excluded_dirs: &[String]) {
|
||||||
let tracer = global_tracer();
|
let tracer = global_tracer();
|
||||||
let span = tracer.start("creating thumbnails");
|
let span = tracer.start("creating thumbnails");
|
||||||
|
|||||||
Reference in New Issue
Block a user