2026-04-26 23:01:35 +00:00
33 changed files with 5739 additions and 606 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,12 +1,15 @@
 /target
 database/target
 *.db
 *.db.bak
 .env
 /tmp
 # Default ignored files
 .idea/shelf/
 .idea/workspace.xml
 .idea/inspectionProfiles/
 .idea/markdown.xml
 # Datasource local storage ignored files
 .idea/dataSources*
 .idea/dataSources.local.xml
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -169,6 +169,20 @@ POST   /image/tags/batch (bulk tag updates)
 // Memories (week-based grouping)
 GET /memories?path=...&recursive=true
 // AI Insights
 POST /insights/generate              (non-agentic single-shot)
 POST /insights/generate/agentic      (tool-calling loop; body: { file_path, backend?, model?, ... })
 GET  /insights?path=...&library=...
 GET  /insights/models                (local Ollama models + capabilities)
 GET  /insights/openrouter/models     (curated OpenRouter allowlist)
 POST /insights/rate                  (thumbs up/down for training data)
 // Insight Chat Continuation
 POST /insights/chat                  (single-turn reply, non-streaming)
 POST /insights/chat/stream           (SSE: text / tool_call / tool_result / truncated / done)
 GET  /insights/chat/history?path=... (rendered transcript with tool invocations)
 POST /insights/chat/rewind           (truncate transcript at a rendered index)
 ```
 **Request Types:**
@@ -256,8 +270,23 @@ OLLAMA_PRIMARY_URL=http://desktop:11434        # Primary Ollama server (e.g., de
 OLLAMA_FALLBACK_URL=http://server:11434        # Fallback Ollama server (optional, always-on)
 OLLAMA_PRIMARY_MODEL=nemotron-3-nano:30b       # Model for primary server (default: nemotron-3-nano:30b)
 OLLAMA_FALLBACK_MODEL=llama3.2:3b              # Model for fallback server (optional, uses primary if not set)
 OLLAMA_REQUEST_TIMEOUT_SECONDS=120             # Per-request generation timeout (default 120). Increase for slow CPU-offloaded models.
 SMS_API_URL=http://localhost:8000              # SMS message API endpoint (default: localhost:8000)
 SMS_API_TOKEN=your-api-token                   # SMS API authentication token (optional)
 # OpenRouter (Hybrid Backend) - keeps embeddings + vision local, routes chat to OpenRouter
 OPENROUTER_API_KEY=sk-or-...                   # Required to enable hybrid backend
 OPENROUTER_DEFAULT_MODEL=anthropic/claude-sonnet-4   # Used when client doesn't pick a model
 OPENROUTER_ALLOWED_MODELS=openai/gpt-4o-mini,anthropic/claude-haiku-4-5,google/gemini-2.5-flash
                                                # Curated allowlist exposed to clients via
                                                # GET /insights/openrouter/models. Empty = no picker.
 OPENROUTER_BASE_URL=https://openrouter.ai/api/v1     # Override base URL (optional)
 OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small  # Optional, embeddings stay local today
 OPENROUTER_HTTP_REFERER=https://your-site.example    # Optional attribution header
 OPENROUTER_APP_TITLE=ImageApi                  # Optional attribution header
 # Insight Chat Continuation
 AGENTIC_CHAT_MAX_ITERATIONS=6                  # Cap on tool-calling iterations per chat turn (default 6)
 ```
 **AI Insights Fallback Behavior:**
@@ -275,6 +304,67 @@ The `OllamaClient` provides methods to query available models:
 This allows runtime verification of model availability before generating insights.
 **Hybrid Backend (OpenRouter):**
 - Per-request opt-in via `backend=hybrid` on `POST /insights/generate/agentic`.
 - Local Ollama still describes the image (vision); the description is inlined
  into the chat prompt and the agentic loop runs on OpenRouter.
 - `request.model` (if provided) overrides `OPENROUTER_DEFAULT_MODEL` for that
  call. The mobile picker reads from `OPENROUTER_ALLOWED_MODELS`.
 - No live capability precheck — the operator-curated allowlist is trusted.
  A bad model id surfaces as a chat-call error.
 - `GET /insights/openrouter/models` returns `{ models, default_model, configured }`
  for client picker UIs.
 **Insight Chat Continuation:**
 After an agentic insight is generated, the full `Vec<ChatMessage>` transcript is
 stored in `photo_insights.training_messages` and can be continued via the
 chat endpoints. The `PhotoInsightResponse.has_training_messages` flag tells
 clients whether chat is available for a given insight.
 - `POST /insights/chat` runs one turn of the agentic loop against the replayed
  history. Body: `{ file_path, library?, user_message, model?, backend?, num_ctx?,
  temperature?, top_p?, top_k?, min_p?, max_iterations?, amend? }`.
 - `POST /insights/chat/stream` is the SSE variant — same request body, response
  is `text/event-stream` with events: `iteration_start`, `text` (delta), `tool_call`,
  `tool_result`, `truncated`, `done`, plus a server-emitted `error_message` on
  failure. Preferred by the mobile client for live tool-chip updates.
 - `GET /insights/chat/history?path=...&library=...` returns the rendered
  transcript. Each assistant message carries a `tools: [{name, arguments, result,
  result_truncated?}]` array with the tool invocations that led up to it. Tool
  results over 2000 chars are truncated with `result_truncated: true`.
 - `POST /insights/chat/rewind` truncates the transcript at a given rendered
  index (drops that message + any tool-call scaffolding that preceded it + all
  later turns). Index 0 is protected. Used for "try again from here" flows.
 Backend routing rules (matches agentic-insight generation):
 - Stored `backend` on the insight row is authoritative by default.
 - `request.backend` may override per-turn. `local -> hybrid` is rejected in
  v1 (would require on-the-fly visual-description rewrite); `hybrid -> local`
  replays verbatim since the description is already inlined as text.
 - `request.model` overrides the chat model (an Ollama id in local mode, an
  OpenRouter id in hybrid mode).
 Persistence:
 - Append mode (default): re-serialize the full history and `UPDATE` the same
  row's `training_messages`.
 - Amend mode (`amend: true`): regenerate the title, insert a new insight row
  via `store_insight` (auto-flips prior rows' `is_current=false`). Response
  surfaces the new row's id as `amended_insight_id`.
 Per-`(library_id, file_path)` async mutex (`AppState.insight_chat.chat_locks`)
 serialises concurrent turns on the same insight so the JSON blob doesn't race.
 Context management is a soft bound: if the serialized history exceeds
 `num_ctx - 2048` tokens (cheap 4-byte/token heuristic), the oldest
 assistant-tool_call + tool_result pairs are dropped until under budget. The
 initial user message (with any images) and system prompt are always preserved.
 The `truncated` event / flag is surfaced to the client when a drop occurred.
 Configurable env:
 - `AGENTIC_CHAT_MAX_ITERATIONS` — cap on tool-calling iterations per turn
  (default 6). Per-request `max_iterations` is clamped to this cap.
 ## Dependencies of Note
 - **actix-web**: HTTP framework
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -486,6 +486,28 @@ version = "0.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 [[package]]
 name = "async-stream"
 version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476"
 dependencies = [
 "async-stream-impl",
 "futures-core",
 "pin-project-lite",
 ]
 [[package]]
 name = "async-stream-impl"
 version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
 dependencies = [
 "proc-macro2",
 "quote",
 "syn",
 ]
 [[package]]
 name = "async-trait"
 version = "0.1.89"
@@ -886,6 +908,12 @@ version = "0.8.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
 [[package]]
 name = "crunchy"
 version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
 [[package]]
 name = "crypto-common"
 version = "0.1.6"
@@ -1196,6 +1224,26 @@ version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
 [[package]]
 name = "fax"
 version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f05de7d48f37cd6730705cbca900770cab77a89f413d23e100ad7fad7795a0ab"
 dependencies = [
 "fax_derive",
 ]
 [[package]]
 name = "fax_derive"
 version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a0aca10fb742cb43f9e7bb8467c91aa9bcb8e3ffbc6a6f7389bb93ffc920577d"
 dependencies = [
 "proc-macro2",
 "quote",
 "syn",
 ]
 [[package]]
 name = "fdeflate"
 version = "0.3.7"
@@ -1479,6 +1527,17 @@ dependencies = [
 "tracing",
 ]
 [[package]]
 name = "half"
 version = "2.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
 dependencies = [
 "cfg-if",
 "crunchy",
 "zerocopy",
 ]
 [[package]]
 name = "hashbrown"
 version = "0.14.5"
@@ -1821,11 +1880,14 @@ checksum = "1c6a3ce16143778e24df6f95365f12ed105425b22abefd289dd88a64bab59605"
 dependencies = [
 "bytemuck",
 "byteorder-lite",
 "image-webp",
 "moxcms",
 "num-traits",
 "png",
 "ravif",
 "rayon",
 "rgb",
 "tiff",
 "zune-core",
 "zune-jpeg",
 ]
@@ -1843,9 +1905,12 @@ dependencies = [
 "actix-web",
 "actix-web-prom",
 "anyhow",
 "async-stream",
 "async-trait",
 "base64",
 "bcrypt",
 "blake3",
 "bytes",
 "chrono",
 "clap",
 "diesel",
@@ -1877,11 +1942,22 @@ dependencies = [
 "serde_json",
 "tempfile",
 "tokio",
 "tokio-util",
 "urlencoding",
 "walkdir",
 "zerocopy",
 ]
 [[package]]
 name = "image-webp"
 version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "525e9ff3e1a4be2fbea1fdf0e98686a6d98b4d8f937e1bf7402245af1909e8c3"
 dependencies = [
 "byteorder-lite",
 "quick-error",
 ]
 [[package]]
 name = "imgref"
 version = "1.11.0"
@@ -3124,12 +3200,14 @@ dependencies = [
 "sync_wrapper",
 "tokio",
 "tokio-native-tls",
 "tokio-util",
 "tower",
 "tower-http",
 "tower-service",
 "url",
 "wasm-bindgen",
 "wasm-bindgen-futures",
 "wasm-streams",
 "web-sys",
 ]
@@ -3684,6 +3762,20 @@ dependencies = [
 "syn",
 ]
 [[package]]
 name = "tiff"
 version = "0.10.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "af9605de7fee8d9551863fd692cce7637f548dbd9db9180fcc07ccc6d26c336f"
 dependencies = [
 "fax",
 "flate2",
 "half",
 "quick-error",
 "weezl",
 "zune-jpeg",
 ]
 [[package]]
 name = "time"
 version = "0.3.42"
@@ -4218,6 +4310,19 @@ dependencies = [
 "unicode-ident",
 ]
 [[package]]
 name = "wasm-streams"
 version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
 dependencies = [
 "futures-util",
 "js-sys",
 "wasm-bindgen",
 "wasm-bindgen-futures",
 "web-sys",
 ]
 [[package]]
 name = "web-sys"
 version = "0.3.77"
@@ -4228,6 +4333,12 @@ dependencies = [
 "wasm-bindgen",
 ]
 [[package]]
 name = "weezl"
 version = "0.1.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88"
 [[package]]
 name = "winapi"
 version = "0.3.9"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,7 +29,7 @@ chrono = "0.4"
 clap = { version = "4.5", features = ["derive"] }
 dotenv = "0.15"
 bcrypt = "0.17.1"
-image = { version = "0.25.5", default-features = false, features = ["jpeg", "png", "rayon"] }
+image = { version = "0.25.5", default-features = false, features = ["jpeg", "png", "rayon", "webp", "tiff", "avif"] }
 infer = "0.16"
 walkdir = "2.4.0"
 rayon = "1.5"
@@ -49,10 +49,14 @@ opentelemetry-appender-log = "0.31.0"
 tempfile = "3.20.0"
 regex = "1.11.1"
 exif = { package = "kamadak-exif", version = "0.6.1" }
-reqwest = { version = "0.12", features = ["json"] }
+reqwest = { version = "0.12", features = ["json", "stream"] }
 async-stream = "0.3"
 tokio-util = { version = "0.7", features = ["io"] }
 bytes = "1"
 urlencoding = "2.1"
 zerocopy = "0.8"
 ical = "0.11"
 scraper = "0.20"
 base64 = "0.22"
 blake3 = "1.5"
 async-trait = "0.1"
--- a/README.md
+++ b/README.md
@@ -14,14 +14,43 @@ Upon first run it will generate thumbnails for all images and videos at `BASE_PA
 - **RAG-based Context Retrieval** - Semantic search over daily conversation summaries
 - **Automatic Daily Summaries** - LLM-generated summaries of daily conversations with embeddings
 ## External Dependencies
 ### ffmpeg (required)
 `ffmpeg` must be on `PATH`. It is used for:
 - **HLS video streaming** — transcoding/segmenting source videos into `.m3u8` + `.ts` playlists
 - **Video thumbnails** — extracting a frame at the 3-second mark
 - **Video preview clips** — short looping previews for the Video Wall
 - **HEIC / HEIF thumbnails** — decoding Apple's HEIC format (your ffmpeg build must include
  `libheif`; most modern builds do)
 Builds used in development: the `gyan.dev` full build on Windows, and distro `ffmpeg`
 packages on Linux work fine. If HEIC thumbnails silently fail, check
 `ffmpeg -formats | grep heif` to confirm HEIF support.
 ### RAW photo thumbnails (no extra dependency)
 RAW formats (ARW, NEF, CR2, CR3, DNG, RAF, ORF, RW2, PEF, SRW, TIFF) are thumbnailed
 by reading the embedded JPEG preview from the TIFF IFD1 using `kamadak-exif`. No
 external RAW decoder (libraw / dcraw) is required. Files without an embedded preview
 fall back to ffmpeg (works for most NEF files), and anything that still can't be
 decoded is marked with a `<thumb>.unsupported` sentinel in the thumbnail directory
 so we don't retry it every scan. Delete those sentinels to force retries after a
 tooling upgrade.
 ## Environment
 There are a handful of required environment variables to have the API run.
 They should be defined where the binary is located or above it in an `.env` file.
 You must have `ffmpeg` installed for streaming video and generating video thumbnails.
 - `DATABASE_URL` is a path or url to a database (currently only SQLite is tested)
 - `BASE_PATH` is the root from which you want to serve images and videos
- `THUMBNAILS` is a path where generated thumbnails should be stored
+- `THUMBNAILS` is a path where generated thumbnails should be stored. Thumbnails
  mirror the source tree under `BASE_PATH` and keep the source's original
  extension (e.g. `foo.arw` or `bar.mp4`), though the file contents are always
  JPEG bytes — browsers content-sniff. Files that can't be thumbnailed by the
  `image` crate, ffmpeg, or an embedded RAW preview get a zero-byte
  `<thumb_path>.unsupported` sentinel in this directory so subsequent scans
  skip them. Delete the `*.unsupported` files to force retries (for example
  after upgrading ffmpeg or adding libheif)
 - `VIDEO_PATH` is a path where HLS playlists and video parts should be stored
 - `GIFS_DIRECTORY` is a path where generated video GIF thumbnails should be stored
 - `BIND_URL` is the url and port to bind to (typically your own IP address)
@@ -50,6 +79,29 @@ The following environment variables configure AI-powered photo insights and dail
 - `OLLAMA_URL` - Used if `OLLAMA_PRIMARY_URL` not set
 - `OLLAMA_MODEL` - Used if `OLLAMA_PRIMARY_MODEL` not set
 #### OpenRouter Configuration (Hybrid Backend)
 The hybrid agentic backend keeps embeddings + vision local (Ollama) while routing
 chat + tool-calling to OpenRouter. Enabled per-request when the client sends
 `backend=hybrid`.
 - `OPENROUTER_API_KEY` - OpenRouter API key. Required to enable the hybrid backend.
 - `OPENROUTER_DEFAULT_MODEL` - Model id used when the client doesn't specify one
  [default: `anthropic/claude-sonnet-4`]
  - Example: `openai/gpt-4o-mini`, `google/gemini-2.5-flash`
 - `OPENROUTER_ALLOWED_MODELS` - Comma-separated curated allowlist exposed to
  clients via `GET /insights/openrouter/models`. The mobile picker shows only
  these. Empty/unset = no picker, server default is used.
  - Example: `openai/gpt-4o-mini,anthropic/claude-haiku-4-5,google/gemini-2.5-flash`
 - `OPENROUTER_BASE_URL` - Override base URL [default: `https://openrouter.ai/api/v1`]
 - `OPENROUTER_EMBEDDING_MODEL` - Embedding model for OpenRouter
  [default: `openai/text-embedding-3-small`]. Only used if/when embeddings are
  routed through OpenRouter (currently embeddings stay local).
 - `OPENROUTER_HTTP_REFERER` - Optional `HTTP-Referer` for OpenRouter attribution
 - `OPENROUTER_APP_TITLE` - Optional `X-Title` for OpenRouter attribution
 Capability checks are skipped for the curated allowlist — bad model ids surface
 as a 4xx from the chat call. Pick tool-capable models.
 #### SMS API Configuration
 - `SMS_API_URL` - URL to SMS message API [default: `http://localhost:8000`]
  - Used to fetch conversation data for context in insights
@@ -60,6 +112,24 @@ The following environment variables configure AI-powered photo insights and dail
  - Controls how many times the model can invoke tools before being forced to produce a final answer
  - Increase for more thorough context gathering; decrease to limit response time
 #### Insight Chat Continuation
 After an agentic insight is generated, the conversation can be continued. Endpoints:
 - `POST /insights/chat` — single-turn reply (non-streaming)
 - `POST /insights/chat/stream` — SSE variant with live `text` deltas and
  `tool_call` / `tool_result` events. Mobile client uses this.
 - `GET /insights/chat/history?path=...&library=...` — rendered transcript;
  each assistant message carries a `tools: [{name, arguments, result}]` array
 - `POST /insights/chat/rewind` — truncate transcript at a rendered index
  (drops that message + any preceding tool scaffolding + later turns). Used
  for "try again from here" flows. The initial user message is protected.
 Amend mode (`amend: true` in the chat request body) regenerates the insight's
 title and inserts a new row instead of appending to the existing transcript,
 so you can rewrite the saved summary from within chat.
 - `AGENTIC_CHAT_MAX_ITERATIONS` - Cap on tool-calling iterations per chat turn [default: `6`]
  - Per-request `max_iterations` (when sent by the client) is clamped to this cap
 #### Fallback Behavior
 - Primary server is tried first with 5-second connection timeout
 - On failure, automatically falls back to secondary server (if configured)
--- a/migrations/2026-04-20-000000_add_backend_to_insights/down.sql
+++ b/migrations/2026-04-20-000000_add_backend_to_insights/down.sql
@@ -0,0 +1,23 @@
 -- SQLite can't DROP COLUMN cleanly on older versions; rebuild the table.
 CREATE TABLE photo_insights_backup AS
    SELECT id, library_id, rel_path, title, summary, generated_at, model_version,
           is_current, training_messages, approved
    FROM photo_insights;
 DROP TABLE photo_insights;
 CREATE TABLE photo_insights (
    id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
    library_id INTEGER NOT NULL REFERENCES libraries(id),
    rel_path TEXT NOT NULL,
    title TEXT NOT NULL,
    summary TEXT NOT NULL,
    generated_at BIGINT NOT NULL,
    model_version TEXT NOT NULL,
    is_current BOOLEAN NOT NULL DEFAULT TRUE,
    training_messages TEXT,
    approved BOOLEAN
 );
 INSERT INTO photo_insights
    SELECT id, library_id, rel_path, title, summary, generated_at, model_version,
           is_current, training_messages, approved
    FROM photo_insights_backup;
 DROP TABLE photo_insights_backup;
--- a/migrations/2026-04-20-000000_add_backend_to_insights/up.sql
+++ b/migrations/2026-04-20-000000_add_backend_to_insights/up.sql
@@ -0,0 +1 @@
 ALTER TABLE photo_insights ADD COLUMN backend TEXT NOT NULL DEFAULT 'local';
--- a/migrations/2026-04-24-000000_add_fewshot_source_to_insights/down.sql
+++ b/migrations/2026-04-24-000000_add_fewshot_source_to_insights/down.sql
@@ -0,0 +1,24 @@
 -- SQLite can't DROP COLUMN cleanly on older versions; rebuild the table.
 CREATE TABLE photo_insights_backup AS
    SELECT id, library_id, rel_path, title, summary, generated_at, model_version,
           is_current, training_messages, approved, backend
    FROM photo_insights;
 DROP TABLE photo_insights;
 CREATE TABLE photo_insights (
    id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
    library_id INTEGER NOT NULL REFERENCES libraries(id),
    rel_path TEXT NOT NULL,
    title TEXT NOT NULL,
    summary TEXT NOT NULL,
    generated_at BIGINT NOT NULL,
    model_version TEXT NOT NULL,
    is_current BOOLEAN NOT NULL DEFAULT TRUE,
    training_messages TEXT,
    approved BOOLEAN,
    backend TEXT NOT NULL DEFAULT 'local'
 );
 INSERT INTO photo_insights
    SELECT id, library_id, rel_path, title, summary, generated_at, model_version,
           is_current, training_messages, approved, backend
    FROM photo_insights_backup;
 DROP TABLE photo_insights_backup;
--- a/migrations/2026-04-24-000000_add_fewshot_source_to_insights/up.sql
+++ b/migrations/2026-04-24-000000_add_fewshot_source_to_insights/up.sql
@@ -0,0 +1 @@
 ALTER TABLE photo_insights ADD COLUMN fewshot_source_ids TEXT;
--- a/src/ai/daily_summary_job.rs
+++ b/src/ai/daily_summary_job.rs
@@ -6,12 +6,83 @@ use std::collections::HashMap;
 use std::sync::{Arc, Mutex};
 use tokio::time::sleep;
-use crate::ai::{OllamaClient, SmsApiClient, SmsMessage};
+use crate::ai::{EMBEDDING_MODEL, OllamaClient, SmsApiClient, SmsMessage, user_display_name};
 use crate::database::{DailySummaryDao, InsertDailySummary};
 use crate::otel::global_tracer;
 /// Strip boilerplate prefixes and common phrases from summaries before embedding.
 /// This improves embedding diversity by removing structural similarity.
 /// Maximum number of messages passed to the summarizer for a single day.
 /// Tuned to avoid token overflow on typical chat models; shared between
 /// the production job and the test binary so they can't drift.
 pub const DAILY_SUMMARY_MESSAGE_LIMIT: usize = 300;
 /// System prompt used when generating daily conversation summaries.
 pub const DAILY_SUMMARY_SYSTEM_PROMPT: &str = "You are a conversation summarizer. Create clear, factual summaries with \
     precise subject attribution AND extract distinctive keywords. Focus on \
     specific, unique terms that differentiate this conversation from others.";
 /// Build the prompt for a single day's conversation summary. Shared by the
 /// production job and the test binary so prompt tweaks land in both places.
 /// Returns `(prompt, system_prompt)`.
 pub fn build_daily_summary_prompt(
    contact: &str,
    date: &NaiveDate,
    messages: &[SmsMessage],
 ) -> (String, &'static str) {
    let user_name = user_display_name();
    let messages_text: String = messages
        .iter()
        .take(DAILY_SUMMARY_MESSAGE_LIMIT)
        .map(|m| {
            if m.is_sent {
                format!("{}: {}", user_name, m.body)
            } else {
                format!("{}: {}", m.contact, m.body)
            }
        })
        .collect::<Vec<_>>()
        .join("\n");
    let prompt = format!(
        r#"Summarize this day's conversation between {user_name} and {contact}.
 CRITICAL FORMAT RULES:
 - Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
 - Do NOT repeat the date at the beginning
 - Start DIRECTLY with the content - begin with a person's name or action
 - Write in past tense, as if recording what happened
 NARRATIVE (4-8 sentences):
 - What specific topics, activities, or events were discussed?
 - What places, people, or organizations were mentioned?
 - What plans were made or decisions discussed?
 - Clearly distinguish between what {user_name} did versus what {contact} did
 KEYWORDS (comma-separated):
 5-10 specific keywords that capture this conversation's unique content:
 - Proper nouns (people, places, brands)
 - Specific activities ("drum corps audition" not just "music")
 - Distinctive terms that make this day unique
 Date: {month_day_year} ({weekday})
 Messages:
 {messages_text}
 YOUR RESPONSE (follow this format EXACTLY):
 Summary: [Start directly with content, NO preamble]
 Keywords: [specific, unique terms]"#,
        user_name = user_name,
        contact = contact,
        month_day_year = date.format("%B %d, %Y"),
        weekday = date.format("%A"),
        messages_text = messages_text,
    );
    (prompt, DAILY_SUMMARY_SYSTEM_PROMPT)
 }
 pub fn strip_summary_boilerplate(summary: &str) -> String {
    let mut text = summary.trim().to_string();
@@ -290,65 +361,10 @@ async fn generate_and_store_daily_summary(
    span.set_attribute(KeyValue::new("contact", contact.to_string()));
    span.set_attribute(KeyValue::new("message_count", messages.len() as i64));
-    // Format messages for LLM
+    let (prompt, system_prompt) = build_daily_summary_prompt(contact, date, messages);
    let messages_text: String = messages
        .iter()
        .take(200) // Limit to 200 messages per day to avoid token overflow
        .map(|m| {
            if m.is_sent {
                format!("Me: {}", m.body)
            } else {
                format!("{}: {}", m.contact, m.body)
            }
        })
        .collect::<Vec<_>>()
        .join("\n");
    let weekday = date.format("%A");
    let prompt = format!(
        r#"Summarize this day's conversation between me and {}.
 CRITICAL FORMAT RULES:
 - Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
 - Do NOT repeat the date at the beginning
 - Start DIRECTLY with the content - begin with a person's name or action
 - Write in past tense, as if recording what happened
 NARRATIVE (3-5 sentences):
 - What specific topics, activities, or events were discussed?
 - What places, people, or organizations were mentioned?
 - What plans were made or decisions discussed?
 - Clearly distinguish between what "I" did versus what {} did
 KEYWORDS (comma-separated):
 5-10 specific keywords that capture this conversation's unique content:
 - Proper nouns (people, places, brands)
 - Specific activities ("drum corps audition" not just "music")
 - Distinctive terms that make this day unique
 Date: {} ({})
 Messages:
 {}
 YOUR RESPONSE (follow this format EXACTLY):
 Summary: [Start directly with content, NO preamble]
 Keywords: [specific, unique terms]"#,
        contact,
        contact,
        date.format("%B %d, %Y"),
        weekday,
        messages_text
    );
    // Generate summary with LLM
-    let summary = ollama
+    let summary = ollama.generate(&prompt, Some(system_prompt)).await?;
        .generate(
            &prompt,
            Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."),
        )
        .await?;
    log::debug!(
        "Generated summary for {}: {}",
@@ -381,8 +397,7 @@ Keywords: [specific, unique terms]"#,
        message_count: messages.len() as i32,
        embedding,
        created_at: Utc::now().timestamp(),
-        // model_version: "nomic-embed-text:v1.5".to_string(),
+        model_version: EMBEDDING_MODEL.to_string(),
        model_version: "mxbai-embed-large:335m".to_string(),
    };
    // Create context from current span for DB operation
--- a/src/ai/handlers.rs
+++ b/src/ai/handlers.rs
@@ -3,6 +3,8 @@ use opentelemetry::KeyValue;
 use opentelemetry::trace::{Span, Status, Tracer};
 use serde::{Deserialize, Serialize};
 use crate::ai::insight_chat::{ChatStreamEvent, ChatTurnRequest};
 use crate::ai::ollama::ChatMessage;
 use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient};
 use crate::data::Claims;
 use crate::database::{ExifDao, InsightDao};
@@ -11,6 +13,14 @@ use crate::otel::{extract_context_from_request, global_tracer};
 use crate::state::AppState;
 use crate::utils::normalize_path;
 /// Hardcoded few-shot exemplars for the agentic endpoint. Populate with the
 /// ids of approved insights whose `training_messages` should be compressed
 /// into trajectory form and injected into the system prompt. Empty = no
 /// change in behavior. Request-level `fewshot_insight_ids` overrides this
 /// when non-empty.
 // const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[2918, 2908];
 const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[];
 #[derive(Debug, Deserialize)]
 pub struct GeneratePhotoInsightRequest {
    pub file_path: String,
@@ -28,6 +38,16 @@ pub struct GeneratePhotoInsightRequest {
    pub top_k: Option<i32>,
    #[serde(default)]
    pub min_p: Option<f32>,
    /// `"local"` (default, Ollama with images) | `"hybrid"` (local vision +
    /// OpenRouter chat). Only respected by the agentic endpoint.
    #[serde(default)]
    pub backend: Option<String>,
    /// Insight ids whose stored `training_messages` should be compressed
    /// into few-shot trajectories and injected into the system prompt.
    /// Silently truncated to the first 2. When absent/empty, the handler
    /// falls back to `DEFAULT_FEWSHOT_INSIGHT_IDS`.
    #[serde(default)]
    pub fewshot_insight_ids: Option<Vec<i32>>,
 }
 #[derive(Debug, Deserialize)]
@@ -65,6 +85,10 @@ pub struct PhotoInsightResponse {
    pub eval_count: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub approved: Option<bool>,
    pub backend: String,
    /// True when the insight was generated agentically and a chat
    /// continuation can be started against it. Drives the mobile chat button.
    pub has_training_messages: bool,
 }
 #[derive(Debug, Serialize)]
@@ -187,6 +211,8 @@ pub async fn get_insight_handler(
                prompt_eval_count: None,
                eval_count: None,
                approved: insight.approved,
                has_training_messages: insight.training_messages.is_some(),
                backend: insight.backend,
            };
            HttpResponse::Ok().json(response)
        }
@@ -254,6 +280,8 @@ pub async fn get_all_insights_handler(
                    prompt_eval_count: None,
                    eval_count: None,
                    approved: insight.approved,
                    has_training_messages: insight.training_messages.is_some(),
                    backend: insight.backend,
                })
                .collect();
@@ -309,6 +337,45 @@ pub async fn generate_agentic_insight_handler(
        max_iterations
    );
    if let Some(ref b) = request.backend {
        span.set_attribute(KeyValue::new("backend", b.clone()));
    }
    // Resolve few-shot ids: request-provided ids take precedence when
    // non-empty; otherwise fall back to the hardcoded defaults.
    let fewshot_ids: Vec<i32> = match request.fewshot_insight_ids.as_deref() {
        Some(ids) if !ids.is_empty() => ids.iter().take(2).copied().collect(),
        _ => DEFAULT_FEWSHOT_INSIGHT_IDS
            .iter()
            .take(2)
            .copied()
            .collect(),
    };
    span.set_attribute(KeyValue::new("fewshot_count", fewshot_ids.len() as i64));
    let fewshot_examples: Vec<Vec<ChatMessage>> = {
        let otel_context = opentelemetry::Context::new();
        let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
        fewshot_ids
            .iter()
            .filter_map(|id| {
                let insight = dao.get_insight_by_id(&otel_context, *id).ok().flatten()?;
                let json = insight.training_messages?;
                match serde_json::from_str::<Vec<ChatMessage>>(&json) {
                    Ok(msgs) => Some(msgs),
                    Err(e) => {
                        log::warn!(
                            "Few-shot insight {} has malformed training_messages: {}",
                            id,
                            e
                        );
                        None
                    }
                }
            })
            .collect()
    };
    let result = insight_generator
        .generate_agentic_insight_for_photo(
            &normalized_path,
@@ -320,6 +387,9 @@ pub async fn generate_agentic_insight_handler(
            request.top_k,
            request.min_p,
            max_iterations,
            request.backend.clone(),
            fewshot_examples,
            fewshot_ids,
        )
        .await;
@@ -341,6 +411,8 @@ pub async fn generate_agentic_insight_handler(
                        prompt_eval_count,
                        eval_count,
                        approved: insight.approved,
                        has_training_messages: insight.training_messages.is_some(),
                        backend: insight.backend,
                    };
                    HttpResponse::Ok().json(response)
                }
@@ -432,6 +504,34 @@ pub async fn get_available_models_handler(
    HttpResponse::Ok().json(response)
 }
 #[derive(Debug, Serialize)]
 pub struct OpenRouterModelsResponse {
    pub models: Vec<String>,
    pub default_model: Option<String>,
    pub configured: bool,
 }
 /// GET /insights/openrouter/models - Curated OpenRouter model ids exposed
 /// to clients for the hybrid backend. Returned verbatim from
 /// `OPENROUTER_ALLOWED_MODELS`; no live call to OpenRouter.
 #[get("/insights/openrouter/models")]
 pub async fn get_openrouter_models_handler(
    _claims: Claims,
    app_state: web::Data<crate::state::AppState>,
 ) -> impl Responder {
    let configured = app_state.openrouter.is_some();
    let default_model = app_state
        .openrouter
        .as_ref()
        .map(|c| c.primary_model.clone());
    let response = OpenRouterModelsResponse {
        models: app_state.openrouter_allowed_models.clone(),
        default_model,
        configured,
    };
    HttpResponse::Ok().json(response)
 }
 /// POST /insights/rate - Rate an insight (thumbs up/down for training data)
 #[post("/insights/rate")]
 pub async fn rate_insight_handler(
@@ -517,3 +617,370 @@ pub async fn export_training_data_handler(
        }
    }
 }
 #[derive(Debug, Deserialize)]
 pub struct ChatTurnHttpRequest {
    pub file_path: String,
    #[serde(default)]
    pub library: Option<String>,
    pub user_message: String,
    #[serde(default)]
    pub model: Option<String>,
    #[serde(default)]
    pub backend: Option<String>,
    #[serde(default)]
    pub num_ctx: Option<i32>,
    #[serde(default)]
    pub temperature: Option<f32>,
    #[serde(default)]
    pub top_p: Option<f32>,
    #[serde(default)]
    pub top_k: Option<i32>,
    #[serde(default)]
    pub min_p: Option<f32>,
    #[serde(default)]
    pub max_iterations: Option<usize>,
    #[serde(default)]
    pub amend: bool,
 }
 #[derive(Debug, Serialize)]
 pub struct ChatTurnHttpResponse {
    pub assistant_message: String,
    pub tool_calls_made: usize,
    pub iterations_used: usize,
    pub truncated: bool,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub prompt_eval_count: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub eval_count: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub amended_insight_id: Option<i32>,
    pub backend: String,
    pub model: String,
 }
 /// POST /insights/chat — submit a follow-up turn against an existing insight.
 #[post("/insights/chat")]
 pub async fn chat_turn_handler(
    http_request: HttpRequest,
    _claims: Claims,
    request: web::Json<ChatTurnHttpRequest>,
    app_state: web::Data<AppState>,
 ) -> impl Responder {
    let parent_context = extract_context_from_request(&http_request);
    let tracer = global_tracer();
    let mut span = tracer.start_with_context("http.insights.chat", &parent_context);
    span.set_attribute(KeyValue::new("file_path", request.file_path.clone()));
    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
        Ok(Some(lib)) => lib,
        Ok(None) => app_state.primary_library(),
        Err(e) => {
            return HttpResponse::BadRequest().json(serde_json::json!({
                "error": format!("invalid library: {}", e)
            }));
        }
    };
    let chat_req = ChatTurnRequest {
        library_id: library.id,
        file_path: request.file_path.clone(),
        user_message: request.user_message.clone(),
        model: request.model.clone(),
        backend: request.backend.clone(),
        num_ctx: request.num_ctx,
        temperature: request.temperature,
        top_p: request.top_p,
        top_k: request.top_k,
        min_p: request.min_p,
        max_iterations: request.max_iterations,
        amend: request.amend,
    };
    match app_state.insight_chat.chat_turn(chat_req).await {
        Ok(result) => {
            span.set_status(Status::Ok);
            HttpResponse::Ok().json(ChatTurnHttpResponse {
                assistant_message: result.assistant_message,
                tool_calls_made: result.tool_calls_made,
                iterations_used: result.iterations_used,
                truncated: result.truncated,
                prompt_eval_count: result.prompt_eval_count,
                eval_count: result.eval_count,
                amended_insight_id: result.amended_insight_id,
                backend: result.backend_used,
                model: result.model_used,
            })
        }
        Err(e) => {
            let msg = format!("{}", e);
            log::error!("Chat turn failed: {}", msg);
            span.set_status(Status::error(msg.clone()));
            // Map well-known errors to client-facing 4xx codes.
            if msg.contains("no insight found") {
                HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
            } else if msg.contains("no chat history") {
                HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
            } else if msg.contains("user_message")
                || msg.contains("unknown backend")
                || msg.contains("switching from local to hybrid")
                || msg.contains("hybrid backend unavailable")
            {
                HttpResponse::BadRequest().json(serde_json::json!({ "error": msg }))
            } else {
                HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
            }
        }
    }
 }
 #[derive(Debug, Deserialize)]
 pub struct ChatHistoryQuery {
    pub path: String,
    #[serde(default)]
    pub library: Option<String>,
 }
 #[derive(Debug, Serialize)]
 pub struct ChatHistoryHttpResponse {
    pub messages: Vec<RenderedHistoryMessage>,
    pub turn_count: usize,
    pub model_version: String,
    pub backend: String,
 }
 #[derive(Debug, Serialize)]
 pub struct RenderedHistoryMessage {
    pub role: String,
    pub content: String,
    pub is_initial: bool,
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub tools: Vec<HistoryToolInvocation>,
 }
 #[derive(Debug, Serialize)]
 pub struct HistoryToolInvocation {
    pub name: String,
    pub arguments: serde_json::Value,
    pub result: String,
    #[serde(skip_serializing_if = "std::ops::Not::not")]
    pub result_truncated: bool,
 }
 #[derive(Debug, Deserialize)]
 pub struct ChatRewindHttpRequest {
    pub file_path: String,
    #[serde(default)]
    pub library: Option<String>,
    /// 0-based index into the rendered transcript. The message at this
    /// index, and everything after it, is discarded. Must be > 0 — the
    /// initial user message is protected.
    pub discard_from_rendered_index: usize,
 }
 /// POST /insights/chat/rewind — truncate the stored conversation so the
 /// rendered message at `discard_from_rendered_index` (and everything after)
 /// is removed. Use when a user wants to retry a turn with a different
 /// prompt without prior replies poisoning context.
 #[post("/insights/chat/rewind")]
 pub async fn chat_rewind_handler(
    _claims: Claims,
    request: web::Json<ChatRewindHttpRequest>,
    app_state: web::Data<AppState>,
 ) -> impl Responder {
    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
        Ok(Some(lib)) => lib,
        Ok(None) => app_state.primary_library(),
        Err(e) => {
            return HttpResponse::BadRequest().json(serde_json::json!({
                "error": format!("invalid library: {}", e)
            }));
        }
    };
    match app_state
        .insight_chat
        .rewind_history(
            library.id,
            &request.file_path,
            request.discard_from_rendered_index,
        )
        .await
    {
        Ok(()) => HttpResponse::Ok().json(serde_json::json!({ "success": true })),
        Err(e) => {
            let msg = format!("{}", e);
            log::error!("Chat rewind failed: {}", msg);
            if msg.contains("no insight found") {
                HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
            } else if msg.contains("no chat history") {
                HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
            } else if msg.contains("cannot discard the initial") || msg.contains("out of range") {
                HttpResponse::BadRequest().json(serde_json::json!({ "error": msg }))
            } else {
                HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
            }
        }
    }
 }
 /// GET /insights/chat/history — return the rendered transcript for a photo.
 #[get("/insights/chat/history")]
 pub async fn chat_history_handler(
    _claims: Claims,
    query: web::Query<ChatHistoryQuery>,
    app_state: web::Data<AppState>,
 ) -> impl Responder {
    // library param parsed for parity with other insight endpoints, even
    // though load_history currently keys on file_path alone (matches the
    // existing get_insight DAO contract).
    let _library = libraries::resolve_library_param(&app_state, query.library.as_deref())
        .ok()
        .flatten()
        .unwrap_or_else(|| app_state.primary_library());
    match app_state.insight_chat.load_history(&query.path) {
        Ok(view) => HttpResponse::Ok().json(ChatHistoryHttpResponse {
            messages: view
                .messages
                .into_iter()
                .map(|m| RenderedHistoryMessage {
                    role: m.role,
                    content: m.content,
                    is_initial: m.is_initial,
                    tools: m
                        .tools
                        .into_iter()
                        .map(|t| HistoryToolInvocation {
                            name: t.name,
                            arguments: t.arguments,
                            result: t.result,
                            result_truncated: t.result_truncated,
                        })
                        .collect(),
                })
                .collect(),
            turn_count: view.turn_count,
            model_version: view.model_version,
            backend: view.backend,
        }),
        Err(e) => {
            let msg = format!("{}", e);
            if msg.contains("no insight found") {
                HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
            } else if msg.contains("no chat history") {
                HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
            } else {
                HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
            }
        }
    }
 }
 /// POST /insights/chat/stream — streaming variant of /insights/chat.
 /// Returns `text/event-stream` with one event per chat stream event.
 #[post("/insights/chat/stream")]
 pub async fn chat_stream_handler(
    _claims: Claims,
    request: web::Json<ChatTurnHttpRequest>,
    app_state: web::Data<AppState>,
 ) -> HttpResponse {
    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
        Ok(Some(lib)) => lib,
        Ok(None) => app_state.primary_library(),
        Err(e) => {
            return HttpResponse::BadRequest().json(serde_json::json!({
                "error": format!("invalid library: {}", e)
            }));
        }
    };
    let chat_req = ChatTurnRequest {
        library_id: library.id,
        file_path: request.file_path.clone(),
        user_message: request.user_message.clone(),
        model: request.model.clone(),
        backend: request.backend.clone(),
        num_ctx: request.num_ctx,
        temperature: request.temperature,
        top_p: request.top_p,
        top_k: request.top_k,
        min_p: request.min_p,
        max_iterations: request.max_iterations,
        amend: request.amend,
    };
    let service = app_state.insight_chat.clone();
    let events = service.chat_turn_stream(chat_req);
    // Map ChatStreamEvent → SSE frame bytes.
    let sse_stream = futures::stream::StreamExt::map(events, |ev| {
        let frame = render_sse_frame(&ev);
        Ok::<_, actix_web::Error>(actix_web::web::Bytes::from(frame))
    });
    HttpResponse::Ok()
        .content_type("text/event-stream")
        .insert_header(("Cache-Control", "no-cache"))
        .insert_header(("X-Accel-Buffering", "no")) // nginx: disable response buffering
        .streaming(sse_stream)
 }
 fn render_sse_frame(ev: &ChatStreamEvent) -> String {
    let (event_name, payload) = match ev {
        ChatStreamEvent::IterationStart { n, max } => {
            ("iteration_start", serde_json::json!({ "n": n, "max": max }))
        }
        ChatStreamEvent::Truncated => ("truncated", serde_json::json!({})),
        ChatStreamEvent::TextDelta(delta) => ("text", serde_json::json!({ "delta": delta })),
        ChatStreamEvent::ToolCall {
            index,
            name,
            arguments,
        } => (
            "tool_call",
            serde_json::json!({ "index": index, "name": name, "arguments": arguments }),
        ),
        ChatStreamEvent::ToolResult {
            index,
            name,
            result,
            result_truncated,
        } => (
            "tool_result",
            serde_json::json!({
                "index": index,
                "name": name,
                "result": result,
                "result_truncated": result_truncated,
            }),
        ),
        ChatStreamEvent::Done {
            tool_calls_made,
            iterations_used,
            truncated,
            prompt_eval_count,
            eval_count,
            amended_insight_id,
            backend_used,
            model_used,
        } => (
            "done",
            serde_json::json!({
                "tool_calls_made": tool_calls_made,
                "iterations_used": iterations_used,
                "truncated": truncated,
                "prompt_eval_count": prompt_eval_count,
                "eval_count": eval_count,
                "amended_insight_id": amended_insight_id,
                "backend": backend_used,
                "model": model_used,
            }),
        ),
        ChatStreamEvent::Error(msg) => ("error", serde_json::json!({ "message": msg })),
    };
    let data = serde_json::to_string(&payload).unwrap_or_else(|_| "{}".to_string());
    format!("event: {}\ndata: {}\n\n", event_name, data)
 }
--- a/src/ai/insight_chat.rs
+++ b/src/ai/insight_chat.rs
--- a/src/ai/insight_generator.rs
+++ b/src/ai/insight_generator.rs
--- a/src/ai/llm_client.rs
+++ b/src/ai/llm_client.rs
@@ -0,0 +1,172 @@
 use anyhow::Result;
 use async_trait::async_trait;
 use futures::stream::BoxStream;
 use serde::{Deserialize, Serialize};
 /// Provider-agnostic surface for LLM backends (Ollama, OpenRouter, …).
 ///
 /// Impls translate these canonical shapes at the wire boundary: tool-call
 /// arguments stay as `serde_json::Value` in memory and are stringified only
 /// when a provider requires it (OpenAI-compatible APIs do), and `images`
 /// stays as base64 strings here and is rewritten into content-parts where
 /// needed.
 // First consumer lands in a later PR (OpenRouter impl + hybrid mode routing).
 #[allow(dead_code)]
 #[async_trait]
 pub trait LlmClient: Send + Sync {
    /// Single-shot text generation. Optional system prompt and optional
    /// base64 images (ignored by providers without vision support).
    async fn generate(
        &self,
        prompt: &str,
        system: Option<&str>,
        images: Option<Vec<String>>,
    ) -> Result<String>;
    /// Multi-turn chat with tool definitions. Returns the assistant message
    /// (which may contain tool_calls) plus optional prompt/eval token counts.
    async fn chat_with_tools(
        &self,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
    ) -> Result<(ChatMessage, Option<i32>, Option<i32>)>;
    /// Streaming variant of `chat_with_tools`. The returned stream yields
    /// `TextDelta` items as content is produced, then a single terminal
    /// `Done` carrying the complete assembled message (with tool_calls, if
    /// any) plus token usage counts. Implementations that can't stream may
    /// fall back to calling `chat_with_tools` and emitting the full reply
    /// as one `Done` event.
    async fn chat_with_tools_stream(
        &self,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
    ) -> Result<BoxStream<'static, Result<LlmStreamEvent>>>;
    /// Batch embedding generation. Dimensionality is provider/model specific.
    async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>>;
    /// One-shot vision description of an image. Used to convert images into
    /// plain text for the hybrid-mode conversation flow.
    async fn describe_image(&self, image_base64: &str) -> Result<String>;
    /// Enumerate available models with their capabilities.
    async fn list_models(&self) -> Result<Vec<ModelCapabilities>>;
    /// Look up capabilities for a single model.
    async fn model_capabilities(&self, model: &str) -> Result<ModelCapabilities>;
    /// Primary model identifier this client was constructed with.
    fn primary_model(&self) -> &str;
 }
 /// Events emitted by streaming `chat_with_tools_stream`. A stream is a
 /// sequence of zero or more `TextDelta` events followed by exactly one
 /// `Done`. Callers should treat `Done` as terminal — further items (if any
 /// slip through due to upstream misbehavior) are safe to ignore.
 #[derive(Debug, Clone)]
 pub enum LlmStreamEvent {
    /// Incremental content token(s) from the model. Concatenate in order to
    /// reconstruct the assistant's final text.
    TextDelta(String),
    /// Terminal event with the full assembled message (content + any
    /// tool_calls). `message.content` equals the concatenation of every
    /// preceding `TextDelta.0`.
    Done {
        message: ChatMessage,
        prompt_eval_count: Option<i32>,
        eval_count: Option<i32>,
    },
 }
 /// Tool definition sent to the model (OpenAI-compatible function schema).
 #[derive(Serialize, Clone, Debug)]
 pub struct Tool {
    #[serde(rename = "type")]
    pub tool_type: String, // always "function"
    pub function: ToolFunction,
 }
 #[derive(Serialize, Clone, Debug)]
 pub struct ToolFunction {
    pub name: String,
    pub description: String,
    pub parameters: serde_json::Value,
 }
 impl Tool {
    pub fn function(name: &str, description: &str, parameters: serde_json::Value) -> Self {
        Self {
            tool_type: "function".to_string(),
            function: ToolFunction {
                name: name.to_string(),
                description: description.to_string(),
                parameters,
            },
        }
    }
 }
 /// A message in the chat conversation history.
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ChatMessage {
    pub role: String, // "system" | "user" | "assistant" | "tool"
    /// Empty string (not null) when tool_calls is present — Ollama quirk.
    #[serde(default)]
    pub content: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_calls: Option<Vec<ToolCall>>,
    /// Base64 images — only on user messages to vision-capable models.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub images: Option<Vec<String>>,
 }
 impl ChatMessage {
    pub fn system(content: impl Into<String>) -> Self {
        Self {
            role: "system".to_string(),
            content: content.into(),
            tool_calls: None,
            images: None,
        }
    }
    pub fn user(content: impl Into<String>) -> Self {
        Self {
            role: "user".to_string(),
            content: content.into(),
            tool_calls: None,
            images: None,
        }
    }
    pub fn tool_result(content: impl Into<String>) -> Self {
        Self {
            role: "tool".to_string(),
            content: content.into(),
            tool_calls: None,
            images: None,
        }
    }
 }
 /// Tool call returned by the model in an assistant message.
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ToolCall {
    pub function: ToolCallFunction,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub id: Option<String>,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ToolCallFunction {
    pub name: String,
    /// Canonical shape: native JSON. Providers that use JSON-encoded-string
    /// arguments (OpenAI-compatible) translate at their wire boundary.
    pub arguments: serde_json::Value,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ModelCapabilities {
    pub name: String,
    pub has_vision: bool,
    pub has_tool_calling: bool,
 }
--- a/src/ai/mod.rs
+++ b/src/ai/mod.rs
@@ -1,17 +1,37 @@
 pub mod daily_summary_job;
 pub mod handlers;
 pub mod insight_chat;
 pub mod insight_generator;
 pub mod llm_client;
 pub mod ollama;
 pub mod openrouter;
 pub mod sms_client;
 // strip_summary_boilerplate is used by binaries (test_daily_summary), not the library
 #[allow(unused_imports)]
-pub use daily_summary_job::{generate_daily_summaries, strip_summary_boilerplate};
+pub use daily_summary_job::{
    DAILY_SUMMARY_MESSAGE_LIMIT, DAILY_SUMMARY_SYSTEM_PROMPT, build_daily_summary_prompt,
    generate_daily_summaries, strip_summary_boilerplate,
 };
 pub use handlers::{
    chat_history_handler, chat_rewind_handler, chat_stream_handler, chat_turn_handler,
    delete_insight_handler, export_training_data_handler, generate_agentic_insight_handler,
    generate_insight_handler, get_all_insights_handler, get_available_models_handler,
-    get_insight_handler, rate_insight_handler,
+    get_insight_handler, get_openrouter_models_handler, rate_insight_handler,
 };
 pub use insight_generator::InsightGenerator;
-pub use ollama::{ModelCapabilities, OllamaClient};
+#[allow(unused_imports)]
 pub use llm_client::{
    ChatMessage, LlmClient, ModelCapabilities, Tool, ToolCall, ToolCallFunction, ToolFunction,
 };
 pub use ollama::{EMBEDDING_MODEL, OllamaClient};
 pub use sms_client::{SmsApiClient, SmsMessage};
 /// Display name used for the user in message transcripts and first-person
 /// prompt text. Reads the `USER_NAME` env var; defaults to `"Me"`. Models
 /// often confuse `"Me:"` in a transcript with their own role — setting
 /// `USER_NAME=Cameron` (or similar) in the environment eliminates that
 /// ambiguity across daily summaries, insight generation, and chat.
 pub fn user_display_name() -> String {
    std::env::var("USER_NAME").unwrap_or_else(|_| "Me".to_string())
 }
--- a/src/ai/ollama.rs
+++ b/src/ai/ollama.rs
@@ -1,14 +1,43 @@
 use anyhow::{Context, Result};
 use async_trait::async_trait;
 use chrono::NaiveDate;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};
 use crate::ai::llm_client::{LlmClient, LlmStreamEvent};
 use futures::stream::{BoxStream, StreamExt};
 // Re-export shared types so existing `crate::ai::ollama::{...}` imports
 // continue to resolve.
 pub use crate::ai::llm_client::{ChatMessage, ModelCapabilities, Tool};
 #[allow(unused_imports)]
 pub use crate::ai::llm_client::{ToolCall, ToolCallFunction, ToolFunction};
 // Cache duration: 15 minutes
 const CACHE_DURATION_SECS: u64 = 15 * 60;
 /// Default total request timeout for generation calls, in seconds.
 /// Overridable via `OLLAMA_REQUEST_TIMEOUT_SECONDS` env var for slow
 /// CPU-offloaded models where inference can take several minutes.
 const DEFAULT_REQUEST_TIMEOUT_SECS: u64 = 120;
 fn configured_request_timeout_secs() -> u64 {
    std::env::var("OLLAMA_REQUEST_TIMEOUT_SECONDS")
        .ok()
        .and_then(|v| v.parse::<u64>().ok())
        .filter(|&s| s > 0)
        .unwrap_or(DEFAULT_REQUEST_TIMEOUT_SECS)
 }
 /// Embedding model used across the app. Callers that persist a
 /// `model_version` alongside an embedding should read this constant so the
 /// stored label always matches what `generate_embeddings` actually ran.
 pub const EMBEDDING_MODEL: &str = "nomic-embed-text:v1.5";
 // Cached entry with timestamp
 #[derive(Clone)]
 struct CachedEntry<T> {
@@ -50,6 +79,12 @@ pub struct OllamaClient {
    top_p: Option<f32>,
    top_k: Option<i32>,
    min_p: Option<f32>,
    /// Sticky preference shared across clones: when the fallback server
    /// succeeded most recently, try it first on the next call. Avoids
    /// re-probing the primary with a model it doesn't have loaded across
    /// every iteration of the agent loop. `Arc<AtomicBool>` so cloning
    /// `OllamaClient` shares the flag rather than resetting it.
    prefer_fallback: Arc<AtomicBool>,
 }
 impl OllamaClient {
@@ -62,7 +97,7 @@ impl OllamaClient {
        Self {
            client: Client::builder()
                .connect_timeout(Duration::from_secs(5)) // Quick connection timeout
-                .timeout(Duration::from_secs(120)) // Total request timeout for generation
+                .timeout(Duration::from_secs(configured_request_timeout_secs()))
                .build()
                .unwrap_or_else(|_| Client::new()),
            primary_url,
@@ -74,9 +109,44 @@ impl OllamaClient {
            top_p: None,
            top_k: None,
            min_p: None,
            prefer_fallback: Arc::new(AtomicBool::new(false)),
        }
    }
    /// Return the server attempt order as `(label, url, model)` tuples.
    /// Respects the sticky `prefer_fallback` flag so the most recently
    /// successful server is tried first.
    fn attempt_order(&self) -> Vec<(&'static str, String, String)> {
        let primary = (
            "primary",
            self.primary_url.clone(),
            self.primary_model.clone(),
        );
        let fallback = self.fallback_url.as_ref().map(|url| {
            let model = self
                .fallback_model
                .clone()
                .unwrap_or_else(|| self.primary_model.clone());
            ("fallback", url.clone(), model)
        });
        let prefer_fallback = fallback.is_some() && self.prefer_fallback.load(Ordering::Relaxed);
        let mut order = Vec::with_capacity(2);
        if prefer_fallback {
            if let Some(fb) = fallback.clone() {
                order.push(fb);
            }
            order.push(primary);
        } else {
            order.push(primary);
            if let Some(fb) = fallback {
                order.push(fb);
            }
        }
        order
    }
    pub fn set_num_ctx(&mut self, num_ctx: Option<i32>) {
        self.num_ctx = num_ctx;
    }
@@ -311,6 +381,7 @@ impl OllamaClient {
        prompt: &str,
        system: Option<&str>,
        images: Option<Vec<String>>,
        think: Option<bool>,
    ) -> Result<String> {
        let request = OllamaRequest {
            model: model.to_string(),
@@ -319,6 +390,7 @@ impl OllamaClient {
            system: system.map(|s| s.to_string()),
            options: self.build_options(),
            images,
            think,
        };
        let response = self
@@ -339,6 +411,12 @@ impl OllamaClient {
        }
        let result: OllamaResponse = response.json().await?;
        log_chat_metrics(
            result.prompt_eval_count,
            result.prompt_eval_duration,
            result.eval_count,
            result.eval_duration,
        );
        Ok(result.response)
    }
@@ -346,11 +424,31 @@ impl OllamaClient {
        self.generate_with_images(prompt, system, None).await
    }
    /// Variant of `generate` that sets Ollama's top-level `think: false`.
    /// Used by latency-sensitive callers like the rerank pass, where the
    /// task has nothing to reason about and chain-of-thought tokens are
    /// wasted wall time. Server-side no-op on non-reasoning models.
    pub async fn generate_no_think(&self, prompt: &str, system: Option<&str>) -> Result<String> {
        self.generate_with_options(prompt, system, None, Some(false))
            .await
    }
    pub async fn generate_with_images(
        &self,
        prompt: &str,
        system: Option<&str>,
        images: Option<Vec<String>>,
    ) -> Result<String> {
        self.generate_with_options(prompt, system, images, None)
            .await
    }
    async fn generate_with_options(
        &self,
        prompt: &str,
        system: Option<&str>,
        images: Option<Vec<String>>,
        think: Option<bool>,
    ) -> Result<String> {
        log::debug!("=== Ollama Request ===");
        log::debug!("Primary model: {}", self.primary_model);
@@ -376,6 +474,7 @@ impl OllamaClient {
                prompt,
                system,
                images.clone(),
                think,
            )
            .await;
@@ -399,7 +498,14 @@ impl OllamaClient {
                        fallback_model
                    );
                    match self
-                        .try_generate(fallback_url, fallback_model, prompt, system, images.clone())
+                        .try_generate(
                            fallback_url,
                            fallback_model,
                            prompt,
                            system,
                            images.clone(),
                            think,
                        )
                        .await
                    {
                        Ok(response) => {
@@ -471,6 +577,7 @@ Capture the key moment or theme. Return ONLY the title, nothing else."#,
    ) -> Result<String> {
        let location_str = location.unwrap_or("Unknown");
        let sms_str = sms_summary.unwrap_or("No messages");
        let user_name = crate::ai::user_display_name();
        let prompt = if image_base64.is_some() {
            if let Some(contact_name) = contact {
@@ -482,13 +589,14 @@ Location: {}
 Person/Contact: {}
 Messages: {}
-Analyze the image and use specific details from both the visual content and the context above. The photo is from a folder for {}, so they are likely in or related to this photo. Mention people's names (especially {}), places, or activities if they appear in either the image or the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#,
+Analyze the image and use specific details from both the visual content and the context above. The photo is from a folder for {}, so they are likely in or related to this photo. Mention people's names (especially {}), places, or activities if they appear in either the image or the context. Write in first person as {} with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#,
                    date.format("%B %d, %Y"),
                    location_str,
                    contact_name,
                    sms_str,
                    contact_name,
-                    contact_name
+                    contact_name,
                    user_name
                )
            } else {
                format!(
@@ -498,10 +606,11 @@ Date: {}
 Location: {}
 Messages: {}
-Analyze the image and use specific details from both the visual content and the context above. Mention people's names, places, or activities if they appear in either the image or the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#,
+Analyze the image and use specific details from both the visual content and the context above. Mention people's names, places, or activities if they appear in either the image or the context. Write in first person as {} with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#,
                    date.format("%B %d, %Y"),
                    location_str,
-                    sms_str
+                    sms_str,
                    user_name
                )
            }
        } else if let Some(contact_name) = contact {
@@ -513,13 +622,14 @@ Analyze the image and use specific details from both the visual content and the
        Person/Contact: {}
        Messages: {}
-        Use only the specific details provided above. The photo is from a folder for {}, so they are likely related to this moment. Mention people's names (especially {}), places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
+        Use only the specific details provided above. The photo is from a folder for {}, so they are likely related to this moment. Mention people's names (especially {}), places, or activities if they appear in the context. Write in first person as {} with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
                date.format("%B %d, %Y"),
                location_str,
                contact_name,
                sms_str,
                contact_name,
-                contact_name
+                contact_name,
                user_name
            )
        } else {
            format!(
@@ -529,10 +639,11 @@ Analyze the image and use specific details from both the visual content and the
        Location: {}
        Messages: {}
-        Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
+        Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as {} with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
                date.format("%B %d, %Y"),
                location_str,
-                sms_str
+                sms_str,
                user_name
            )
        };
@@ -561,68 +672,229 @@ Analyze the image and use specific details from both the visual content and the
    /// Send a chat request with tool definitions to /api/chat.
    /// Returns the assistant's response message (may contain tool_calls or final content).
-    /// Uses primary/fallback URL routing same as other generation methods.
+    /// Tries servers in preference order — most recently successful first —
    /// so a fallback-only model doesn't re-404 against the primary on every
    /// iteration of the agent loop.
    pub async fn chat_with_tools(
        &self,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
    ) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
-        // Try primary server first
+        let order = self.attempt_order();
        let mut errors: Vec<String> = Vec::new();
        for (label, url, model) in &order {
            log::info!(
-            "Attempting chat_with_tools with primary server: {} (model: {})",
+                "Attempting chat_with_tools with {} server: {} (model: {})",
-            self.primary_url,
+                label,
-            self.primary_model
+                url,
-        );
+                model
        let primary_result = self
            .try_chat_with_tools(&self.primary_url, messages.clone(), tools.clone())
            .await;
        match primary_result {
            Ok(result) => {
                log::info!("Successfully got chat_with_tools response from primary server");
                Ok(result)
            }
            Err(e) => {
                log::warn!("Primary server chat_with_tools failed: {}", e);
                // Try fallback server if available
                if let Some(fallback_url) = &self.fallback_url {
                    let fallback_model =
                        self.fallback_model.as_ref().unwrap_or(&self.primary_model);
                    log::info!(
                        "Attempting chat_with_tools with fallback server: {} (model: {})",
                        fallback_url,
                        fallback_model
            );
            match self
-                        .try_chat_with_tools(fallback_url, messages, tools)
+                .try_chat_with_tools(url, messages.clone(), tools.clone())
                .await
            {
                Ok(result) => {
                    log::info!(
-                                "Successfully got chat_with_tools response from fallback server"
+                        "Successfully got chat_with_tools response from {} server",
                        label
                    );
-                            Ok(result)
+                    self.prefer_fallback
                        .store(*label == "fallback", Ordering::Relaxed);
                    return Ok(result);
                }
-                        Err(fallback_e) => {
+                Err(e) => {
                    log::warn!("{} server chat_with_tools failed: {}", label, e);
                    errors.push(format!("{}: {}", label, e));
                }
            }
        }
        if order.len() <= 1 {
            log::error!("No fallback server configured; chat_with_tools exhausted");
        } else {
            log::error!(
-                                "Fallback server chat_with_tools also failed: {}",
+                "All {} servers failed for chat_with_tools ({})",
-                                fallback_e
+                order.len(),
                errors.join(" / ")
            );
        }
        Err(anyhow::anyhow!(
-                                "Both primary and fallback servers failed. Primary: {}, Fallback: {}",
+            "chat_with_tools failed on all servers: {}",
-                                e,
+            errors.join(" / ")
                                fallback_e
        ))
    }
    /// Streaming variant of `chat_with_tools`. Tries primary, then falls
    /// back if the initial connection fails; once the stream has begun
    /// emitting, mid-stream errors propagate to the caller. Emits
    /// `TextDelta` events as content tokens arrive and a single terminal
    /// `Done` event when the model marks the turn complete (tool_calls, if
    /// any, live on the final message).
    pub async fn chat_with_tools_stream(
        &self,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
    ) -> Result<BoxStream<'static, Result<LlmStreamEvent>>> {
        // Same preference logic as `chat_with_tools`. Only the initial
        // connection is retried across servers — once the stream begins,
        // mid-stream errors propagate to the caller.
        let order = self.attempt_order();
        let mut last_err: Option<anyhow::Error> = None;
        for (label, url, _model) in &order {
            match self
                .try_chat_with_tools_stream(url, messages.clone(), tools.clone())
                .await
            {
                Ok(s) => {
                    self.prefer_fallback
                        .store(*label == "fallback", Ordering::Relaxed);
                    return Ok(s);
                }
                Err(e) => {
                    log::warn!("Streaming chat on {} server failed: {}", label, e);
                    last_err = Some(e);
                }
            }
        }
        Err(last_err.unwrap_or_else(|| anyhow::anyhow!("No Ollama server configured")))
    }
    async fn try_chat_with_tools_stream(
        &self,
        base_url: &str,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
    ) -> Result<BoxStream<'static, Result<LlmStreamEvent>>> {
        let url = format!("{}/api/chat", base_url);
        let model = if base_url == self.primary_url {
            &self.primary_model
        } else {
-                    log::error!("No fallback server configured");
+            self.fallback_model
-                    Err(e)
+                .as_deref()
                .unwrap_or(&self.primary_model)
        };
        let options = self.build_options();
        let request_body = OllamaChatRequest {
            model,
            messages: &messages,
            stream: true,
            tools,
            options,
        };
        let response = self
            .client
            .post(&url)
            .json(&request_body)
            .send()
            .await
            .with_context(|| format!("Failed to connect to Ollama at {}", url))?;
        if !response.status().is_success() {
            let status = response.status();
            let body = response.text().await.unwrap_or_default();
            anyhow::bail!(
                "Ollama stream request failed with status {}: {}",
                status,
                body
            );
        }
        // Ollama streams NDJSON: each line is a full `OllamaStreamChunk`.
        // We buffer partial lines across chunks from the byte stream.
        let byte_stream = response.bytes_stream();
        let stream = async_stream::stream! {
            let mut buf: Vec<u8> = Vec::new();
            let mut accumulated = String::new();
            let mut tool_calls: Option<Vec<crate::ai::llm_client::ToolCall>> = None;
            let mut role = "assistant".to_string();
            let mut prompt_eval_count: Option<i32> = None;
            let mut eval_count: Option<i32> = None;
            let mut prompt_eval_duration: Option<u64> = None;
            let mut eval_duration: Option<u64> = None;
            let mut done_seen = false;
            let mut byte_stream = byte_stream;
            while let Some(chunk) = byte_stream.next().await {
                let chunk = match chunk {
                    Ok(b) => b,
                    Err(e) => {
                        yield Err(anyhow::anyhow!("stream read failed: {}", e));
                        return;
                    }
                };
                buf.extend_from_slice(&chunk);
                // Drain complete lines; hold any trailing partial.
                while let Some(nl) = buf.iter().position(|b| *b == b'\n') {
                    let line = buf.drain(..=nl).collect::<Vec<_>>();
                    let line_str = match std::str::from_utf8(&line) {
                        Ok(s) => s.trim(),
                        Err(_) => continue,
                    };
                    if line_str.is_empty() {
                        continue;
                    }
                    match serde_json::from_str::<OllamaStreamChunk>(line_str) {
                        Ok(chunk) => {
                            // Accumulate content delta.
                            if !chunk.message.content.is_empty() {
                                accumulated.push_str(&chunk.message.content);
                                yield Ok(LlmStreamEvent::TextDelta(chunk.message.content));
                            }
                            if !chunk.message.role.is_empty() {
                                role = chunk.message.role;
                            }
                            // Ollama only attaches tool_calls on the final chunk.
                            if let Some(tcs) = chunk.message.tool_calls
                                && !tcs.is_empty()
                            {
                                tool_calls = Some(tcs);
                            }
                            if chunk.done {
                                prompt_eval_count = chunk.prompt_eval_count;
                                eval_count = chunk.eval_count;
                                prompt_eval_duration = chunk.prompt_eval_duration;
                                eval_duration = chunk.eval_duration;
                                done_seen = true;
                                break;
                            }
                        }
                        Err(e) => {
                            log::warn!("malformed Ollama stream line: {} ({})", line_str, e);
                        }
                    }
                }
                if done_seen {
                    break;
                }
            }
            // Emit the terminal Done event with the assembled message.
            log_chat_metrics(
                prompt_eval_count,
                prompt_eval_duration,
                eval_count,
                eval_duration,
            );
            let message = ChatMessage {
                role,
                content: accumulated,
                tool_calls,
                images: None,
            };
            yield Ok(LlmStreamEvent::Done {
                message,
                prompt_eval_count,
                eval_count,
            });
        };
        Ok(Box::pin(stream))
    }
    async fn try_chat_with_tools(
@@ -665,8 +937,12 @@ Analyze the image and use specific details from both the visual content and the
        if !response.status().is_success() {
            let status = response.status();
            let body = response.text().await.unwrap_or_default();
-            log::error!(
+            // warn, not error — the outer `chat_with_tools` may recover via
-                "chat_with_tools request body that caused {}: {}",
+            // the fallback server. When both fail, the outer layer emits the
            // actual error log.
            log::warn!(
                "chat_with_tools request to {} got {}: {}",
                base_url,
                status,
                request_json
            );
@@ -682,6 +958,17 @@ Analyze the image and use specific details from both the visual content and the
            .await
            .with_context(|| "Failed to parse Ollama chat response")?;
        // Log performance counters returned by Ollama. Durations are
        // reported in nanoseconds; we render ms + tokens/sec for skim-ability
        // in the server log. Missing fields are left off the line rather
        // than printed as `None`.
        log_chat_metrics(
            chat_response.prompt_eval_count,
            chat_response.prompt_eval_duration,
            chat_response.eval_count,
            chat_response.eval_duration,
        );
        Ok((
            chat_response.message,
            chat_response.prompt_eval_count,
@@ -703,7 +990,7 @@ Analyze the image and use specific details from both the visual content and the
    /// Returns a vector of 768-dimensional vectors
    /// This is much more efficient than calling generate_embedding multiple times
    pub async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
-        let embedding_model = "nomic-embed-text:v1.5";
+        let embedding_model = EMBEDDING_MODEL;
        log::debug!("=== Ollama Batch Embedding Request ===");
        log::debug!("Model: {}", embedding_model);
@@ -818,6 +1105,54 @@ Analyze the image and use specific details from both the visual content and the
    }
 }
 #[async_trait]
 impl LlmClient for OllamaClient {
    async fn generate(
        &self,
        prompt: &str,
        system: Option<&str>,
        images: Option<Vec<String>>,
    ) -> Result<String> {
        self.generate_with_images(prompt, system, images).await
    }
    async fn chat_with_tools(
        &self,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
    ) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
        OllamaClient::chat_with_tools(self, messages, tools).await
    }
    async fn chat_with_tools_stream(
        &self,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
    ) -> Result<BoxStream<'static, Result<LlmStreamEvent>>> {
        OllamaClient::chat_with_tools_stream(self, messages, tools).await
    }
    async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
        OllamaClient::generate_embeddings(self, texts).await
    }
    async fn describe_image(&self, image_base64: &str) -> Result<String> {
        self.generate_photo_description(image_base64).await
    }
    async fn list_models(&self) -> Result<Vec<ModelCapabilities>> {
        Self::list_models_with_capabilities(&self.primary_url).await
    }
    async fn model_capabilities(&self, model: &str) -> Result<ModelCapabilities> {
        Self::check_model_capabilities(&self.primary_url, model).await
    }
    fn primary_model(&self) -> &str {
        &self.primary_model
    }
 }
 #[derive(Serialize)]
 struct OllamaRequest {
    model: String,
@@ -829,6 +1164,12 @@ struct OllamaRequest {
    options: Option<OllamaOptions>,
    #[serde(skip_serializing_if = "Option::is_none")]
    images: Option<Vec<String>>,
    /// Ollama's top-level reasoning-mode toggle (~0.4+). `Some(false)`
    /// asks the server to skip thinking on models that expose a toggle
    /// (Qwen3, Ollama-integrated DeepSeek-R1 distills, GPT-OSS, etc).
    /// Ignored by non-reasoning models. None = use the model's default.
    #[serde(skip_serializing_if = "Option::is_none")]
    think: Option<bool>,
 }
 #[derive(Serialize)]
@@ -845,90 +1186,6 @@ struct OllamaOptions {
    min_p: Option<f32>,
 }
 /// Tool definition sent in /api/chat requests (OpenAI-compatible format)
 #[derive(Serialize, Clone, Debug)]
 pub struct Tool {
    #[serde(rename = "type")]
    pub tool_type: String, // always "function"
    pub function: ToolFunction,
 }
 #[derive(Serialize, Clone, Debug)]
 pub struct ToolFunction {
    pub name: String,
    pub description: String,
    pub parameters: serde_json::Value,
 }
 impl Tool {
    pub fn function(name: &str, description: &str, parameters: serde_json::Value) -> Self {
        Self {
            tool_type: "function".to_string(),
            function: ToolFunction {
                name: name.to_string(),
                description: description.to_string(),
                parameters,
            },
        }
    }
 }
 /// A message in the chat conversation history
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ChatMessage {
    pub role: String, // "system" | "user" | "assistant" | "tool"
    /// Empty string (not null) when tool_calls is present — Ollama quirk
    #[serde(default)]
    pub content: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_calls: Option<Vec<ToolCall>>,
    /// Base64 images — only on user messages to vision-capable models
    #[serde(skip_serializing_if = "Option::is_none")]
    pub images: Option<Vec<String>>,
 }
 impl ChatMessage {
    pub fn system(content: impl Into<String>) -> Self {
        Self {
            role: "system".to_string(),
            content: content.into(),
            tool_calls: None,
            images: None,
        }
    }
    pub fn user(content: impl Into<String>) -> Self {
        Self {
            role: "user".to_string(),
            content: content.into(),
            tool_calls: None,
            images: None,
        }
    }
    pub fn tool_result(content: impl Into<String>) -> Self {
        Self {
            role: "tool".to_string(),
            content: content.into(),
            tool_calls: None,
            images: None,
        }
    }
 }
 /// Tool call returned by the model in an assistant message
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ToolCall {
    pub function: ToolCallFunction,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub id: Option<String>,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ToolCallFunction {
    pub name: String,
    /// Native JSON object (NOT a JSON-encoded string like OpenAI)
    pub arguments: serde_json::Value,
 }
 #[derive(Serialize)]
 struct OllamaChatRequest<'a> {
    model: &'a str,
@@ -950,13 +1207,102 @@ struct OllamaChatResponse {
    done_reason: String,
    #[serde(default)]
    prompt_eval_count: Option<i32>,
    /// Nanoseconds spent evaluating the prompt (context ingestion).
    #[serde(default)]
    prompt_eval_duration: Option<u64>,
    #[serde(default)]
    eval_count: Option<i32>,
    /// Nanoseconds spent generating the response tokens.
    #[serde(default)]
    eval_duration: Option<u64>,
 }
 /// One chunk in the NDJSON stream from `/api/chat` with `stream: true`.
 /// Early chunks carry content deltas in `message.content`; the final chunk
 /// has `done: true`, optional `tool_calls`, and usage counters.
 #[derive(Deserialize, Debug)]
 struct OllamaStreamChunk {
    #[serde(default)]
    message: OllamaStreamMessage,
    #[serde(default)]
    done: bool,
    #[serde(default)]
    prompt_eval_count: Option<i32>,
    #[serde(default)]
    prompt_eval_duration: Option<u64>,
    #[serde(default)]
    eval_count: Option<i32>,
    #[serde(default)]
    eval_duration: Option<u64>,
 }
 #[derive(Deserialize, Debug, Default)]
 struct OllamaStreamMessage {
    #[serde(default)]
    role: String,
    #[serde(default)]
    content: String,
    #[serde(default)]
    tool_calls: Option<Vec<crate::ai::llm_client::ToolCall>>,
 }
 #[derive(Deserialize)]
 struct OllamaResponse {
    response: String,
    #[serde(default)]
    prompt_eval_count: Option<i32>,
    #[serde(default)]
    prompt_eval_duration: Option<u64>,
    #[serde(default)]
    eval_count: Option<i32>,
    #[serde(default)]
    eval_duration: Option<u64>,
 }
 fn log_chat_metrics(
    prompt_eval_count: Option<i32>,
    prompt_eval_duration_ns: Option<u64>,
    eval_count: Option<i32>,
    eval_duration_ns: Option<u64>,
 ) {
    // Compute tokens/sec when both count and duration are present.
    fn tokens_per_sec(count: Option<i32>, duration_ns: Option<u64>) -> Option<f64> {
        match (count, duration_ns) {
            (Some(c), Some(d)) if c > 0 && d > 0 => Some((c as f64) * 1_000_000_000.0 / (d as f64)),
            _ => None,
        }
    }
    let prompt_ms = prompt_eval_duration_ns.map(|ns| ns as f64 / 1_000_000.0);
    let eval_ms = eval_duration_ns.map(|ns| ns as f64 / 1_000_000.0);
    let prompt_tps = tokens_per_sec(prompt_eval_count, prompt_eval_duration_ns);
    let eval_tps = tokens_per_sec(eval_count, eval_duration_ns);
    let mut parts: Vec<String> = Vec::new();
    if let Some(c) = prompt_eval_count {
        let mut s = format!("prompt={} tok", c);
        if let Some(ms) = prompt_ms {
            s.push_str(&format!(" ({:.0} ms", ms));
            if let Some(tps) = prompt_tps {
                s.push_str(&format!(", {:.1} tok/s", tps));
            }
            s.push(')');
        }
        parts.push(s);
    }
    if let Some(c) = eval_count {
        let mut s = format!("gen={} tok", c);
        if let Some(ms) = eval_ms {
            s.push_str(&format!(" ({:.0} ms", ms));
            if let Some(tps) = eval_tps {
                s.push_str(&format!(", {:.1} tok/s", tps));
            }
            s.push(')');
        }
        parts.push(s);
    }
    if !parts.is_empty() {
        log::info!("Ollama chat metrics — {}", parts.join(", "));
    }
 }
 #[derive(Deserialize)]
@@ -975,13 +1321,6 @@ struct OllamaShowResponse {
    capabilities: Vec<String>,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ModelCapabilities {
    pub name: String,
    pub has_vision: bool,
    pub has_tool_calling: bool,
 }
 #[derive(Serialize)]
 struct OllamaBatchEmbedRequest {
    model: String,
--- a/src/ai/openrouter.rs
+++ b/src/ai/openrouter.rs
@@ -0,0 +1,998 @@
 // First consumer lands in a later PR (hybrid backend routing). Tests exercise
 // the translation helpers directly.
 #![allow(dead_code)]
 use anyhow::{Context, Result, anyhow, bail};
 use async_trait::async_trait;
 use reqwest::Client;
 use serde::Deserialize;
 use serde_json::{Value, json};
 use std::collections::HashMap;
 use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};
 use crate::ai::llm_client::{
    ChatMessage, LlmClient, LlmStreamEvent, ModelCapabilities, Tool, ToolCall, ToolCallFunction,
 };
 use futures::stream::{BoxStream, StreamExt};
 const DEFAULT_BASE_URL: &str = "https://openrouter.ai/api/v1";
 const DEFAULT_EMBEDDING_MODEL: &str = "openai/text-embedding-3-small";
 const CACHE_DURATION_SECS: u64 = 15 * 60;
 #[derive(Clone)]
 struct CachedEntry<T> {
    data: T,
    cached_at: Instant,
 }
 impl<T> CachedEntry<T> {
    fn new(data: T) -> Self {
        Self {
            data,
            cached_at: Instant::now(),
        }
    }
    fn is_expired(&self) -> bool {
        self.cached_at.elapsed().as_secs() > CACHE_DURATION_SECS
    }
 }
 lazy_static::lazy_static! {
    static ref MODEL_CAPABILITIES_CACHE: Arc<Mutex<HashMap<String, CachedEntry<Vec<ModelCapabilities>>>>> =
        Arc::new(Mutex::new(HashMap::new()));
 }
 /// OpenAI-compatible client for OpenRouter (https://openrouter.ai).
 ///
 /// Translates canonical `ChatMessage` / `Tool` shapes to OpenAI wire format:
 /// - Tool-call `arguments` serialized as JSON-encoded strings (vs Ollama's
 ///   native JSON).
 /// - Image content rewritten into content-parts array with `image_url` entries.
 /// - `role=tool` messages attach a `tool_call_id` inferred from the preceding
 ///   assistant turn's tool call.
 #[derive(Clone)]
 pub struct OpenRouterClient {
    client: Client,
    pub api_key: String,
    pub base_url: String,
    pub primary_model: String,
    pub embedding_model: String,
    num_ctx: Option<i32>,
    temperature: Option<f32>,
    top_p: Option<f32>,
    top_k: Option<i32>,
    min_p: Option<f32>,
    /// Optional `HTTP-Referer` header OpenRouter uses for attribution.
    pub referer: Option<String>,
    /// Optional `X-Title` header OpenRouter uses for attribution.
    pub app_title: Option<String>,
 }
 impl OpenRouterClient {
    pub fn new(api_key: String, base_url: Option<String>, primary_model: String) -> Self {
        Self {
            client: Client::builder()
                .connect_timeout(Duration::from_secs(10))
                .timeout(Duration::from_secs(180))
                .build()
                .unwrap_or_else(|_| Client::new()),
            api_key,
            base_url: base_url.unwrap_or_else(|| DEFAULT_BASE_URL.to_string()),
            primary_model,
            embedding_model: DEFAULT_EMBEDDING_MODEL.to_string(),
            num_ctx: None,
            temperature: None,
            top_p: None,
            top_k: None,
            min_p: None,
            referer: None,
            app_title: None,
        }
    }
    pub fn set_embedding_model(&mut self, model: String) {
        self.embedding_model = model;
    }
    #[allow(dead_code)]
    pub fn set_num_ctx(&mut self, num_ctx: Option<i32>) {
        self.num_ctx = num_ctx;
    }
    #[allow(dead_code)]
    pub fn set_sampling_params(
        &mut self,
        temperature: Option<f32>,
        top_p: Option<f32>,
        top_k: Option<i32>,
        min_p: Option<f32>,
    ) {
        self.temperature = temperature;
        self.top_p = top_p;
        self.top_k = top_k;
        self.min_p = min_p;
    }
    pub fn set_attribution(&mut self, referer: Option<String>, app_title: Option<String>) {
        self.referer = referer;
        self.app_title = app_title;
    }
    fn authed(&self, builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
        let mut b = builder.bearer_auth(&self.api_key);
        if let Some(r) = &self.referer {
            b = b.header("HTTP-Referer", r);
        }
        if let Some(t) = &self.app_title {
            b = b.header("X-Title", t);
        }
        b
    }
    /// Translate canonical messages to the OpenAI-compatible wire shape.
    ///
    /// Walks in order so it can attach `tool_call_id` to `role=tool` messages
    /// based on the most recent assistant turn's tool call.
    fn messages_to_openai(messages: &[ChatMessage]) -> Vec<Value> {
        let mut out = Vec::with_capacity(messages.len());
        let mut last_tool_call_ids: Vec<String> = Vec::new();
        let mut next_tool_result_idx: usize = 0;
        for msg in messages {
            let mut obj = serde_json::Map::new();
            obj.insert("role".into(), Value::String(msg.role.clone()));
            // Content: string OR content-parts array (when images present).
            match &msg.images {
                Some(images) if !images.is_empty() => {
                    let mut parts: Vec<Value> = Vec::new();
                    if !msg.content.is_empty() {
                        parts.push(json!({"type": "text", "text": msg.content}));
                    }
                    for img in images {
                        let url = image_to_data_url(img);
                        parts.push(json!({
                            "type": "image_url",
                            "image_url": { "url": url }
                        }));
                    }
                    obj.insert("content".into(), Value::Array(parts));
                }
                _ => {
                    obj.insert("content".into(), Value::String(msg.content.clone()));
                }
            }
            // Assistant message with tool_calls: stringify arguments, remember
            // the ids so the subsequent tool messages can reference them.
            if let Some(tcs) = &msg.tool_calls
                && msg.role == "assistant"
            {
                let converted: Vec<Value> = tcs
                    .iter()
                    .enumerate()
                    .map(|(i, call)| {
                        let id = call.id.clone().unwrap_or_else(|| format!("call_{}", i));
                        let args_str = serde_json::to_string(&call.function.arguments)
                            .unwrap_or_else(|_| "{}".to_string());
                        json!({
                            "id": id,
                            "type": "function",
                            "function": {
                                "name": call.function.name,
                                "arguments": args_str,
                            }
                        })
                    })
                    .collect();
                last_tool_call_ids = converted
                    .iter()
                    .filter_map(|v| v.get("id").and_then(|x| x.as_str()).map(String::from))
                    .collect();
                next_tool_result_idx = 0;
                obj.insert("tool_calls".into(), Value::Array(converted));
            }
            // Tool result messages: attach tool_call_id from the last assistant turn.
            if msg.role == "tool" {
                let id = last_tool_call_ids
                    .get(next_tool_result_idx)
                    .cloned()
                    .unwrap_or_else(|| "call_0".to_string());
                obj.insert("tool_call_id".into(), Value::String(id));
                next_tool_result_idx += 1;
            }
            out.push(Value::Object(obj));
        }
        out
    }
    /// Parse an OpenAI-compatible assistant message back into canonical shape.
    fn openai_message_to_chat(msg: &Value) -> Result<ChatMessage> {
        let obj = msg
            .as_object()
            .ok_or_else(|| anyhow!("response message is not an object"))?;
        let role = obj
            .get("role")
            .and_then(|v| v.as_str())
            .unwrap_or("assistant")
            .to_string();
        let content = obj
            .get("content")
            .and_then(|v| v.as_str())
            .unwrap_or("")
            .to_string();
        let tool_calls = if let Some(tcs) = obj.get("tool_calls").and_then(|v| v.as_array()) {
            let mut parsed = Vec::with_capacity(tcs.len());
            for tc in tcs {
                let id = tc.get("id").and_then(|v| v.as_str()).map(String::from);
                let function = tc
                    .get("function")
                    .ok_or_else(|| anyhow!("tool_call missing function field"))?;
                let name = function
                    .get("name")
                    .and_then(|v| v.as_str())
                    .unwrap_or_default()
                    .to_string();
                let args_value = match function.get("arguments") {
                    // OpenAI-compat: stringified JSON.
                    Some(Value::String(s)) => {
                        serde_json::from_str::<Value>(s).unwrap_or_else(|_| json!({}))
                    }
                    // Some providers emit arguments as an object directly — accept both.
                    Some(v @ Value::Object(_)) => v.clone(),
                    _ => json!({}),
                };
                parsed.push(ToolCall {
                    id,
                    function: ToolCallFunction {
                        name,
                        arguments: args_value,
                    },
                });
            }
            Some(parsed)
        } else {
            None
        };
        Ok(ChatMessage {
            role,
            content,
            tool_calls,
            images: None,
        })
    }
    fn build_options(&self) -> Vec<(&'static str, Value)> {
        let mut v = Vec::new();
        if let Some(t) = self.temperature {
            v.push(("temperature", json!(t)));
        }
        if let Some(p) = self.top_p {
            v.push(("top_p", json!(p)));
        }
        if let Some(k) = self.top_k {
            v.push(("top_k", json!(k)));
        }
        if let Some(m) = self.min_p {
            v.push(("min_p", json!(m)));
        }
        if let Some(c) = self.num_ctx {
            // OpenAI uses max_tokens for generation bound; num_ctx isn't
            // directly transferable. Skip rather than silently mis-map.
            let _ = c;
        }
        v
    }
 }
 #[async_trait]
 impl LlmClient for OpenRouterClient {
    async fn generate(
        &self,
        prompt: &str,
        system: Option<&str>,
        images: Option<Vec<String>>,
    ) -> Result<String> {
        let mut messages: Vec<ChatMessage> = Vec::new();
        if let Some(sys) = system {
            messages.push(ChatMessage::system(sys));
        }
        let mut user = ChatMessage::user(prompt);
        user.images = images;
        messages.push(user);
        let (reply, _, _) = self.chat_with_tools(messages, Vec::new()).await?;
        Ok(reply.content)
    }
    async fn chat_with_tools(
        &self,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
    ) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
        let url = format!("{}/chat/completions", self.base_url);
        let mut body = serde_json::Map::new();
        body.insert("model".into(), Value::String(self.primary_model.clone()));
        body.insert(
            "messages".into(),
            Value::Array(Self::messages_to_openai(&messages)),
        );
        body.insert("stream".into(), Value::Bool(false));
        if !tools.is_empty() {
            body.insert(
                "tools".into(),
                serde_json::to_value(&tools).context("serializing tools")?,
            );
        }
        for (k, v) in self.build_options() {
            body.insert(k.into(), v);
        }
        log::info!(
            "OpenRouter chat_with_tools: model={} messages={} tools={}",
            self.primary_model,
            messages.len(),
            tools.len()
        );
        let resp = self
            .authed(self.client.post(&url))
            .json(&Value::Object(body))
            .send()
            .await
            .with_context(|| format!("POST {} failed", url))?;
        if !resp.status().is_success() {
            let status = resp.status();
            let body = resp.text().await.unwrap_or_default();
            bail!("OpenRouter chat request failed: {} — {}", status, body);
        }
        let parsed: Value = resp.json().await.context("parsing chat response")?;
        let choice = parsed
            .get("choices")
            .and_then(|v| v.as_array())
            .and_then(|a| a.first())
            .ok_or_else(|| {
                anyhow!(
                    "response missing choices[0]: {}",
                    extract_openrouter_error_detail(&parsed)
                )
            })?;
        let msg = choice.get("message").ok_or_else(|| {
            anyhow!(
                "choices[0] missing message: {}",
                extract_openrouter_error_detail(&parsed)
            )
        })?;
        let chat_msg = Self::openai_message_to_chat(msg)?;
        let usage = parsed.get("usage");
        let prompt_tokens = usage
            .and_then(|u| u.get("prompt_tokens"))
            .and_then(|v| v.as_i64())
            .map(|n| n as i32);
        let completion_tokens = usage
            .and_then(|u| u.get("completion_tokens"))
            .and_then(|v| v.as_i64())
            .map(|n| n as i32);
        Ok((chat_msg, prompt_tokens, completion_tokens))
    }
    async fn chat_with_tools_stream(
        &self,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
    ) -> Result<BoxStream<'static, Result<LlmStreamEvent>>> {
        let url = format!("{}/chat/completions", self.base_url);
        let mut body = serde_json::Map::new();
        body.insert("model".into(), Value::String(self.primary_model.clone()));
        body.insert(
            "messages".into(),
            Value::Array(Self::messages_to_openai(&messages)),
        );
        body.insert("stream".into(), Value::Bool(true));
        // Ask for usage data in the final chunk (OpenAI + OpenRouter
        // both honor this options bag).
        body.insert(
            "stream_options".into(),
            serde_json::json!({ "include_usage": true }),
        );
        if !tools.is_empty() {
            body.insert(
                "tools".into(),
                serde_json::to_value(&tools).context("serializing tools")?,
            );
        }
        for (k, v) in self.build_options() {
            body.insert(k.into(), v);
        }
        let resp = self
            .authed(self.client.post(&url))
            .json(&Value::Object(body))
            .send()
            .await
            .with_context(|| format!("POST {} failed", url))?;
        if !resp.status().is_success() {
            let status = resp.status();
            let body = resp.text().await.unwrap_or_default();
            bail!("OpenRouter stream request failed: {} — {}", status, body);
        }
        // OpenAI-compat SSE stream. Each event is `data: <json>\n\n`, with
        // `data: [DONE]` signalling completion. Tool calls arrive as
        // `delta.tool_calls[i]` chunks that must be concatenated by index.
        let byte_stream = resp.bytes_stream();
        let stream = async_stream::stream! {
            let mut byte_stream = byte_stream;
            let mut buf: Vec<u8> = Vec::new();
            let mut accumulated_content = String::new();
            // tool call state: index -> (id, name, args_string)
            let mut tool_state: std::collections::BTreeMap<
                usize,
                (Option<String>, Option<String>, String),
            > = std::collections::BTreeMap::new();
            let mut role = "assistant".to_string();
            let mut prompt_tokens: Option<i32> = None;
            let mut completion_tokens: Option<i32> = None;
            let mut done_seen = false;
            while let Some(chunk) = byte_stream.next().await {
                let chunk = match chunk {
                    Ok(b) => b,
                    Err(e) => {
                        yield Err(anyhow!("stream read failed: {}", e));
                        return;
                    }
                };
                buf.extend_from_slice(&chunk);
                // SSE frames are delimited by a blank line. Walk the buffer
                // for "\n\n" markers; anything before them is a complete
                // frame (possibly multi-line).
                while let Some(sep) = find_double_newline(&buf) {
                    let frame = buf.drain(..sep + 2).collect::<Vec<_>>();
                    let frame_str = match std::str::from_utf8(&frame) {
                        Ok(s) => s,
                        Err(_) => continue,
                    };
                    // A frame is one or more lines; the payload is on data:
                    // lines. Ignore comments and other fields.
                    for line in frame_str.lines() {
                        let line = line.trim_end_matches('\r');
                        let payload = match line.strip_prefix("data: ") {
                            Some(p) => p,
                            None => continue,
                        };
                        if payload == "[DONE]" {
                            done_seen = true;
                            break;
                        }
                        let v: Value = match serde_json::from_str(payload) {
                            Ok(v) => v,
                            Err(e) => {
                                log::warn!(
                                    "malformed OpenRouter SSE frame: {} ({})",
                                    payload,
                                    e
                                );
                                continue;
                            }
                        };
                        // Usage can arrive in a dedicated final frame with
                        // empty choices.
                        if let Some(usage) = v.get("usage") {
                            prompt_tokens = usage
                                .get("prompt_tokens")
                                .and_then(|n| n.as_i64())
                                .map(|n| n as i32);
                            completion_tokens = usage
                                .get("completion_tokens")
                                .and_then(|n| n.as_i64())
                                .map(|n| n as i32);
                        }
                        let Some(choices) = v.get("choices").and_then(|c| c.as_array())
                        else {
                            continue;
                        };
                        let Some(choice) = choices.first() else { continue };
                        let delta = match choice.get("delta") {
                            Some(d) => d,
                            None => continue,
                        };
                        if let Some(r) = delta.get("role").and_then(|v| v.as_str()) {
                            role = r.to_string();
                        }
                        if let Some(content) =
                            delta.get("content").and_then(|v| v.as_str())
                            && !content.is_empty()
                        {
                            accumulated_content.push_str(content);
                            yield Ok(LlmStreamEvent::TextDelta(content.to_string()));
                        }
                        if let Some(tcs) = delta.get("tool_calls").and_then(|v| v.as_array()) {
                            for tc_delta in tcs {
                                let idx = tc_delta
                                    .get("index")
                                    .and_then(|n| n.as_u64())
                                    .unwrap_or(0) as usize;
                                let entry = tool_state
                                    .entry(idx)
                                    .or_insert((None, None, String::new()));
                                if let Some(id) =
                                    tc_delta.get("id").and_then(|v| v.as_str())
                                {
                                    entry.0 = Some(id.to_string());
                                }
                                if let Some(func) = tc_delta.get("function") {
                                    if let Some(name) =
                                        func.get("name").and_then(|v| v.as_str())
                                    {
                                        entry.1 = Some(name.to_string());
                                    }
                                    if let Some(args) =
                                        func.get("arguments").and_then(|v| v.as_str())
                                    {
                                        entry.2.push_str(args);
                                    }
                                }
                            }
                        }
                    }
                    if done_seen {
                        break;
                    }
                }
                if done_seen {
                    break;
                }
            }
            // Finalize tool calls: parse accumulated argument strings.
            let tool_calls: Option<Vec<ToolCall>> = if tool_state.is_empty() {
                None
            } else {
                let mut v = Vec::with_capacity(tool_state.len());
                for (_idx, (id, name, args)) in tool_state {
                    let arguments: Value = if args.trim().is_empty() {
                        Value::Object(Default::default())
                    } else {
                        serde_json::from_str(&args).unwrap_or_else(|_| {
                            Value::Object(Default::default())
                        })
                    };
                    v.push(ToolCall {
                        id,
                        function: ToolCallFunction {
                            name: name.unwrap_or_default(),
                            arguments,
                        },
                    });
                }
                Some(v)
            };
            let message = ChatMessage {
                role,
                content: accumulated_content,
                tool_calls,
                images: None,
            };
            yield Ok(LlmStreamEvent::Done {
                message,
                prompt_eval_count: prompt_tokens,
                eval_count: completion_tokens,
            });
        };
        Ok(Box::pin(stream))
    }
    async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
        let url = format!("{}/embeddings", self.base_url);
        let body = json!({
            "model": self.embedding_model,
            "input": texts,
        });
        let resp = self
            .authed(self.client.post(&url))
            .json(&body)
            .send()
            .await
            .with_context(|| format!("POST {} failed", url))?;
        if !resp.status().is_success() {
            let status = resp.status();
            let body = resp.text().await.unwrap_or_default();
            bail!("OpenRouter embedding request failed: {} — {}", status, body);
        }
        #[derive(Deserialize)]
        struct EmbedResponse {
            data: Vec<EmbedItem>,
        }
        #[derive(Deserialize)]
        struct EmbedItem {
            embedding: Vec<f32>,
        }
        let parsed: EmbedResponse = resp.json().await.context("parsing embed response")?;
        Ok(parsed.data.into_iter().map(|i| i.embedding).collect())
    }
    async fn describe_image(&self, image_base64: &str) -> Result<String> {
        let prompt = "Briefly describe what you see in this image in 1-2 sentences. \
                      Focus on the people, location, and activity.";
        self.generate(
            prompt,
            Some("You are a scene description assistant. Be concise and factual."),
            Some(vec![image_base64.to_string()]),
        )
        .await
    }
    async fn list_models(&self) -> Result<Vec<ModelCapabilities>> {
        {
            let cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
            if let Some(entry) = cache.get(&self.base_url)
                && !entry.is_expired()
            {
                return Ok(entry.data.clone());
            }
        }
        let url = format!("{}/models", self.base_url);
        let resp = self
            .authed(self.client.get(&url))
            .send()
            .await
            .with_context(|| format!("GET {} failed", url))?;
        if !resp.status().is_success() {
            let status = resp.status();
            let body = resp.text().await.unwrap_or_default();
            bail!("OpenRouter list_models failed: {} — {}", status, body);
        }
        let parsed: Value = resp.json().await.context("parsing models response")?;
        let data = parsed
            .get("data")
            .and_then(|v| v.as_array())
            .ok_or_else(|| anyhow!("models response missing data[]"))?;
        let caps: Vec<ModelCapabilities> = data.iter().map(parse_model_capabilities).collect();
        {
            let mut cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
            cache.insert(self.base_url.clone(), CachedEntry::new(caps.clone()));
        }
        Ok(caps)
    }
    async fn model_capabilities(&self, model: &str) -> Result<ModelCapabilities> {
        let all = self.list_models().await?;
        all.into_iter()
            .find(|m| m.name == model)
            .ok_or_else(|| anyhow!("model '{}' not found on OpenRouter", model))
    }
    fn primary_model(&self) -> &str {
        &self.primary_model
    }
 }
 /// Extract a diagnostic fragment from an OpenRouter response body that
 /// doesn't match the expected `{choices: [...]}` shape. OpenRouter will
 /// sometimes return 200 OK with `{"error": {"message": "...", "code": ...}}`
 /// when the upstream provider (Anthropic/OpenAI/Google/etc) errored out
 /// — rate limits, content moderation, model overload, provider timeout.
 /// Surface the structured error if present; otherwise fall back to a
 /// truncated raw-JSON view so the log line is actionable.
 fn extract_openrouter_error_detail(parsed: &Value) -> String {
    if let Some(err) = parsed.get("error") {
        let message = err
            .get("message")
            .and_then(|v| v.as_str())
            .unwrap_or("(no message)");
        let code = err
            .get("code")
            .map(|v| match v {
                Value::String(s) => s.clone(),
                other => other.to_string(),
            })
            .unwrap_or_else(|| "?".to_string());
        let short_message: String = message.chars().take(240).collect();
        return format!("error code={} message=\"{}\"", code, short_message);
    }
    let raw = parsed.to_string();
    raw.chars().take(300).collect()
 }
 /// Find the byte offset of the first `\n\n` (end of an SSE frame) in `buf`.
 /// Returns the index of the first `\n` of the pair, so the full separator is
 /// `buf[idx..=idx+1]`. Also handles `\r\n\r\n` since some servers emit it.
 fn find_double_newline(buf: &[u8]) -> Option<usize> {
    for i in 0..buf.len().saturating_sub(1) {
        if buf[i] == b'\n' && buf[i + 1] == b'\n' {
            return Some(i);
        }
        // \r\n\r\n: the second \n of this pattern is at i+2; flag at i so the
        // drain call (which consumes ..sep+2) takes exactly the frame.
        if i + 3 < buf.len()
            && buf[i] == b'\r'
            && buf[i + 1] == b'\n'
            && buf[i + 2] == b'\r'
            && buf[i + 3] == b'\n'
        {
            return Some(i + 1);
        }
    }
    None
 }
 /// Build a `data:` URL if the provided string is raw base64, otherwise pass it through.
 fn image_to_data_url(img: &str) -> String {
    if img.starts_with("data:") {
        img.to_string()
    } else {
        format!("data:image/jpeg;base64,{}", img)
    }
 }
 fn parse_model_capabilities(m: &Value) -> ModelCapabilities {
    let name = m
        .get("id")
        .and_then(|v| v.as_str())
        .unwrap_or_default()
        .to_string();
    let has_tool_calling = m
        .get("supported_parameters")
        .and_then(|v| v.as_array())
        .map(|arr| arr.iter().any(|x| x.as_str() == Some("tools")))
        .unwrap_or(false);
    let has_vision = m
        .get("architecture")
        .and_then(|v| v.get("input_modalities"))
        .and_then(|v| v.as_array())
        .map(|arr| arr.iter().any(|x| x.as_str() == Some("image")))
        .unwrap_or(false);
    ModelCapabilities {
        name,
        has_vision,
        has_tool_calling,
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn tool_call_arguments_stringified_on_send() {
        let mut msg = ChatMessage {
            role: "assistant".into(),
            content: String::new(),
            tool_calls: Some(vec![ToolCall {
                id: Some("call_abc".into()),
                function: ToolCallFunction {
                    name: "search_sms".into(),
                    arguments: json!({"query": "hello", "limit": 5}),
                },
            }]),
            images: None,
        };
        msg.tool_calls.as_mut().unwrap()[0].function.arguments =
            json!({"query": "hello", "limit": 5});
        let wire = OpenRouterClient::messages_to_openai(&[msg]);
        let tcs = wire[0]
            .get("tool_calls")
            .and_then(|v| v.as_array())
            .expect("tool_calls present");
        let args = tcs[0]
            .get("function")
            .and_then(|f| f.get("arguments"))
            .and_then(|a| a.as_str())
            .expect("arguments stringified");
        let parsed: Value = serde_json::from_str(args).unwrap();
        assert_eq!(parsed["query"], "hello");
        assert_eq!(parsed["limit"], 5);
    }
    #[test]
    fn tool_call_arguments_parsed_on_receive() {
        let response_msg = json!({
            "role": "assistant",
            "content": "",
            "tool_calls": [{
                "id": "call_xyz",
                "type": "function",
                "function": {
                    "name": "get_weather",
                    "arguments": "{\"city\":\"Boston\",\"units\":\"celsius\"}"
                }
            }]
        });
        let parsed = OpenRouterClient::openai_message_to_chat(&response_msg).unwrap();
        let tcs = parsed.tool_calls.unwrap();
        assert_eq!(tcs.len(), 1);
        assert_eq!(tcs[0].function.name, "get_weather");
        assert_eq!(tcs[0].function.arguments["city"], "Boston");
        assert_eq!(tcs[0].function.arguments["units"], "celsius");
        assert_eq!(tcs[0].id.as_deref(), Some("call_xyz"));
    }
    #[test]
    fn tool_call_arguments_accept_native_json_on_receive() {
        // Some providers return arguments as an object directly; accept both.
        let response_msg = json!({
            "role": "assistant",
            "content": "",
            "tool_calls": [{
                "id": "call_1",
                "type": "function",
                "function": {
                    "name": "foo",
                    "arguments": {"nested": {"k": 1}}
                }
            }]
        });
        let parsed = OpenRouterClient::openai_message_to_chat(&response_msg).unwrap();
        let tc = &parsed.tool_calls.unwrap()[0];
        assert_eq!(tc.function.arguments["nested"]["k"], 1);
    }
    #[test]
    fn images_become_content_parts() {
        let mut msg = ChatMessage::user("What is in this photo?");
        msg.images = Some(vec!["BASE64DATA".into()]);
        let wire = OpenRouterClient::messages_to_openai(&[msg]);
        let content = wire[0].get("content").and_then(|v| v.as_array()).unwrap();
        assert_eq!(content.len(), 2);
        assert_eq!(content[0]["type"], "text");
        assert_eq!(content[0]["text"], "What is in this photo?");
        assert_eq!(content[1]["type"], "image_url");
        assert_eq!(
            content[1]["image_url"]["url"],
            "data:image/jpeg;base64,BASE64DATA"
        );
    }
    #[test]
    fn data_url_images_pass_through_unchanged() {
        let mut msg = ChatMessage::user("");
        msg.images = Some(vec!["data:image/png;base64,ABCDEF".into()]);
        let wire = OpenRouterClient::messages_to_openai(&[msg]);
        let content = wire[0].get("content").and_then(|v| v.as_array()).unwrap();
        // No text part when content is empty.
        assert_eq!(content.len(), 1);
        assert_eq!(
            content[0]["image_url"]["url"],
            "data:image/png;base64,ABCDEF"
        );
    }
    #[test]
    fn text_only_message_stays_string() {
        let msg = ChatMessage::user("hello");
        let wire = OpenRouterClient::messages_to_openai(&[msg]);
        assert_eq!(wire[0]["content"], "hello");
        assert!(wire[0]["content"].as_str().is_some());
    }
    #[test]
    fn tool_result_inherits_tool_call_id_from_prior_assistant() {
        let assistant = ChatMessage {
            role: "assistant".into(),
            content: String::new(),
            tool_calls: Some(vec![ToolCall {
                id: Some("call_42".into()),
                function: ToolCallFunction {
                    name: "lookup".into(),
                    arguments: json!({}),
                },
            }]),
            images: None,
        };
        let tool_result = ChatMessage::tool_result("found it");
        let wire = OpenRouterClient::messages_to_openai(&[assistant, tool_result]);
        assert_eq!(wire[1]["role"], "tool");
        assert_eq!(wire[1]["tool_call_id"], "call_42");
    }
    #[test]
    fn multiple_tool_results_map_to_sequential_call_ids() {
        let assistant = ChatMessage {
            role: "assistant".into(),
            content: String::new(),
            tool_calls: Some(vec![
                ToolCall {
                    id: Some("call_A".into()),
                    function: ToolCallFunction {
                        name: "a".into(),
                        arguments: json!({}),
                    },
                },
                ToolCall {
                    id: Some("call_B".into()),
                    function: ToolCallFunction {
                        name: "b".into(),
                        arguments: json!({}),
                    },
                },
            ]),
            images: None,
        };
        let r1 = ChatMessage::tool_result("a result");
        let r2 = ChatMessage::tool_result("b result");
        let wire = OpenRouterClient::messages_to_openai(&[assistant, r1, r2]);
        assert_eq!(wire[1]["tool_call_id"], "call_A");
        assert_eq!(wire[2]["tool_call_id"], "call_B");
    }
    #[test]
    fn missing_tool_call_id_gets_synthetic_fallback() {
        let assistant = ChatMessage {
            role: "assistant".into(),
            content: String::new(),
            tool_calls: Some(vec![ToolCall {
                id: None,
                function: ToolCallFunction {
                    name: "noid".into(),
                    arguments: json!({}),
                },
            }]),
            images: None,
        };
        let wire = OpenRouterClient::messages_to_openai(&[assistant]);
        let tcs = wire[0]
            .get("tool_calls")
            .and_then(|v| v.as_array())
            .unwrap();
        assert_eq!(tcs[0]["id"], "call_0");
    }
    #[test]
    fn parse_model_capabilities_extracts_tools_and_vision() {
        let m = json!({
            "id": "anthropic/claude-sonnet-4",
            "supported_parameters": ["temperature", "top_p", "tools", "max_tokens"],
            "architecture": {
                "input_modalities": ["text", "image"]
            }
        });
        let caps = parse_model_capabilities(&m);
        assert_eq!(caps.name, "anthropic/claude-sonnet-4");
        assert!(caps.has_tool_calling);
        assert!(caps.has_vision);
    }
    #[test]
    fn parse_model_capabilities_handles_missing_fields() {
        let m = json!({
            "id": "some/text-only-model"
        });
        let caps = parse_model_capabilities(&m);
        assert_eq!(caps.name, "some/text-only-model");
        assert!(!caps.has_tool_calling);
        assert!(!caps.has_vision);
    }
 }
--- a/src/ai/sms_client.rs
+++ b/src/ai/sms_client.rs
@@ -250,6 +250,45 @@ impl SmsApiClient {
            .collect())
    }
    /// Search message bodies via the Django side's FTS5 / semantic / hybrid
    /// endpoint. `mode` selects the ranking strategy:
    ///   - "fts5"     keyword-only, supports phrase / prefix / boolean / NEAR
    ///   - "semantic" embedding similarity
    ///   - "hybrid"   both merged via reciprocal rank fusion (recommended)
    pub async fn search_messages(
        &self,
        query: &str,
        mode: &str,
        limit: usize,
    ) -> Result<Vec<SmsSearchHit>> {
        let url = format!(
            "{}/api/messages/search/?q={}&mode={}&limit={}",
            self.base_url,
            urlencoding::encode(query),
            urlencoding::encode(mode),
            limit
        );
        let mut request = self.client.get(&url);
        if let Some(token) = &self.token {
            request = request.header("Authorization", format!("Bearer {}", token));
        }
        let response = request.send().await?;
        if !response.status().is_success() {
            let status = response.status();
            let body = response.text().await.unwrap_or_default();
            return Err(anyhow::anyhow!(
                "SMS search request failed: {} - {}",
                status,
                body
            ));
        }
        let data: SmsSearchResponse = response.json().await?;
        Ok(data.results)
    }
    pub async fn summarize_context(
        &self,
        messages: &[SmsMessage],
@@ -260,12 +299,13 @@ impl SmsApiClient {
        }
        // Create prompt for Ollama with sender/receiver distinction
        let user_name = crate::ai::user_display_name();
        let messages_text: String = messages
            .iter()
            .take(60) // Limit to avoid token overflow
            .map(|m| {
                if m.is_sent {
-                    format!("Me: {}", m.body)
+                    format!("{}: {}", user_name, m.body)
                } else {
                    format!("{}: {}", m.contact, m.body)
                }
@@ -314,3 +354,28 @@ struct SmsApiMessage {
    #[serde(rename = "type")]
    type_: i32,
 }
 #[derive(Debug, Clone, Deserialize)]
 pub struct SmsSearchHit {
    #[allow(dead_code)]
    pub message_id: i64,
    pub contact_name: String,
    #[allow(dead_code)]
    pub contact_address: String,
    pub body: String,
    pub date: i64,
    /// Message direction code: 1 = received, 2 = sent.
    #[serde(rename = "type")]
    pub type_: i32,
    /// Present for semantic / hybrid modes; absent for fts5.
    #[serde(default)]
    pub similarity_score: Option<f32>,
 }
 #[derive(Deserialize)]
 struct SmsSearchResponse {
    results: Vec<SmsSearchHit>,
    #[allow(dead_code)]
    #[serde(default)]
    search_method: String,
 }
--- a/src/bin/populate_knowledge.rs
+++ b/src/bin/populate_knowledge.rs
@@ -134,6 +134,7 @@ async fn main() -> anyhow::Result<()> {
    let generator = InsightGenerator::new(
        ollama,
        None,
        sms_client,
        insight_dao.clone(),
        exif_dao,
@@ -249,6 +250,9 @@ async fn main() -> anyhow::Result<()> {
                args.top_k,
                args.min_p,
                args.max_iterations,
                None,
                Vec::new(),
                Vec::new(),
            )
            .await
        {
--- a/src/bin/test_daily_summary.rs
+++ b/src/bin/test_daily_summary.rs
@@ -1,7 +1,10 @@
 use anyhow::Result;
 use chrono::NaiveDate;
 use clap::Parser;
-use image_api::ai::{OllamaClient, SmsApiClient, strip_summary_boilerplate};
+use image_api::ai::{
    EMBEDDING_MODEL, OllamaClient, SmsApiClient, build_daily_summary_prompt,
    strip_summary_boilerplate, user_display_name,
 };
 use image_api::database::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
 use std::env;
 use std::sync::{Arc, Mutex};
@@ -25,6 +28,26 @@ struct Args {
    #[arg(short, long)]
    model: Option<String>,
    /// Context window size passed as Ollama `num_ctx`. Omit for server default.
    #[arg(long)]
    num_ctx: Option<i32>,
    /// Sampling temperature. Omit for server default.
    #[arg(long)]
    temperature: Option<f32>,
    /// Top-p (nucleus) sampling. Omit for server default.
    #[arg(long)]
    top_p: Option<f32>,
    /// Top-k sampling. Omit for server default.
    #[arg(long)]
    top_k: Option<i32>,
    /// Min-p sampling. Omit for server default.
    #[arg(long)]
    min_p: Option<f32>,
    /// Test mode: Generate but don't save to database (shows output only)
    #[arg(short = 't', long, default_value_t = false)]
    test_mode: bool,
@@ -86,12 +109,28 @@ async fn main() -> Result<()> {
            .unwrap_or_else(|_| "nemotron-3-nano:30b".to_string())
    });
-    let ollama = OllamaClient::new(
+    let mut ollama = OllamaClient::new(
        ollama_primary_url,
        ollama_fallback_url.clone(),
        model_to_use.clone(),
        Some(model_to_use), // Use same model for fallback
    );
    if let Some(ctx) = args.num_ctx {
        ollama.set_num_ctx(Some(ctx));
    }
    if args.temperature.is_some()
        || args.top_p.is_some()
        || args.top_k.is_some()
        || args.min_p.is_some()
    {
        ollama.set_sampling_params(args.temperature, args.top_p, args.top_k, args.min_p);
    }
    // Surface what's actually configured so comparison runs are auditable.
    println!(
        "num_ctx={:?} temperature={:?} top_p={:?} top_k={:?} min_p={:?}",
        args.num_ctx, args.temperature, args.top_p, args.top_k, args.min_p
    );
    let sms_api_url =
        env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
@@ -160,9 +199,14 @@ async fn main() -> Result<()> {
        println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
        if args.verbose {
            let user_name = user_display_name();
            println!("\nMessage preview:");
            for (i, msg) in messages.iter().take(3).enumerate() {
-                let sender = if msg.is_sent { "Me" } else { &msg.contact };
+                let sender: &str = if msg.is_sent {
                    &user_name
                } else {
                    &msg.contact
                };
                let preview = msg.body.chars().take(60).collect::<String>();
                println!("  {}. {}: {}...", i + 1, sender, preview);
            }
@@ -172,64 +216,11 @@ async fn main() -> Result<()> {
            println!();
        }
-        // Format messages for LLM
+        let (prompt, system_prompt) = build_daily_summary_prompt(&args.contact, date, messages);
        let messages_text: String = messages
            .iter()
            .take(200)
            .map(|m| {
                if m.is_sent {
                    format!("Me: {}", m.body)
                } else {
                    format!("{}: {}", m.contact, m.body)
                }
            })
            .collect::<Vec<_>>()
            .join("\n");
        let prompt = format!(
            r#"Summarize this day's conversation between me and {}.
 CRITICAL FORMAT RULES:
 - Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
 - Do NOT repeat the date at the beginning
 - Start DIRECTLY with the content - begin with a person's name or action
 - Write in past tense, as if recording what happened
 NARRATIVE (3-5 sentences):
 - What specific topics, activities, or events were discussed?
 - What places, people, or organizations were mentioned?
 - What plans were made or decisions discussed?
 - Clearly distinguish between what "I" did versus what {} did
 KEYWORDS (comma-separated):
 5-10 specific keywords that capture this conversation's unique content:
 - Proper nouns (people, places, brands)
 - Specific activities ("drum corps audition" not just "music")
 - Distinctive terms that make this day unique
 Date: {} ({})
 Messages:
 {}
 YOUR RESPONSE (follow this format EXACTLY):
 Summary: [Start directly with content, NO preamble]
 Keywords: [specific, unique terms]"#,
            args.contact,
            args.contact,
            date.format("%B %d, %Y"),
            weekday,
            messages_text
        );
        println!("Generating summary...");
-        let summary = ollama
+        let summary = ollama.generate(&prompt, Some(system_prompt)).await?;
            .generate(
                &prompt,
                Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."),
            )
            .await?;
        println!("\n📝 GENERATED SUMMARY:");
        println!("─────────────────────────────────────────");
@@ -256,8 +247,7 @@ Keywords: [specific, unique terms]"#,
                message_count: messages.len() as i32,
                embedding,
                created_at: chrono::Utc::now().timestamp(),
-                // model_version: "nomic-embed-text:v1.5".to_string(),
+                model_version: EMBEDDING_MODEL.to_string(),
                model_version: "mxbai-embed-large:335m".to_string(),
            };
            let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao");
--- a/src/database/daily_summary_dao.rs
+++ b/src/database/daily_summary_dao.rs
@@ -268,7 +268,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {
                .into_iter()
                .take(limit)
                .map(|(similarity, summary)| {
-                    log::info!(
+                    log::debug!(
                        "Summary match: similarity={:.3}, date={}, contact={}, summary=\"{}\"",
                        similarity,
                        summary.date,
@@ -388,7 +388,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {
                .into_iter()
                .take(limit)
                .map(|(combined, similarity, days, summary)| {
-                    log::info!(
+                    log::debug!(
                        "Summary match: combined={:.3} (sim={:.3}, days={}), date={}, contact={}, summary=\"{}\"",
                        combined,
                        similarity,
--- a/src/database/insights_dao.rs
+++ b/src/database/insights_dao.rs
@@ -38,6 +38,16 @@ pub trait InsightDao: Sync + Send {
        file_path: &str,
    ) -> Result<Vec<PhotoInsight>, DbError>;
    /// Fetch a single insight by primary key, regardless of `is_current`.
    /// Used by the few-shot injection flow where the caller picks specific
    /// historical insights (which may have been superseded) as training
    /// exemplars for a fresh generation.
    fn get_insight_by_id(
        &mut self,
        context: &opentelemetry::Context,
        insight_id: i32,
    ) -> Result<Option<PhotoInsight>, DbError>;
    fn delete_insight(
        &mut self,
        context: &opentelemetry::Context,
@@ -60,6 +70,17 @@ pub trait InsightDao: Sync + Send {
        &mut self,
        context: &opentelemetry::Context,
    ) -> Result<Vec<PhotoInsight>, DbError>;
    /// Replace the `training_messages` JSON blob on the current row for
    /// `(library_id, rel_path)`. Used by chat-turn append mode to persist
    /// the extended conversation without inserting a new insight version.
    fn update_training_messages(
        &mut self,
        context: &opentelemetry::Context,
        library_id: i32,
        file_path: &str,
        training_messages_json: &str,
    ) -> Result<(), DbError>;
 }
 pub struct SqliteInsightDao {
@@ -187,6 +208,25 @@ impl InsightDao for SqliteInsightDao {
        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }
    fn get_insight_by_id(
        &mut self,
        context: &opentelemetry::Context,
        insight_id: i32,
    ) -> Result<Option<PhotoInsight>, DbError> {
        trace_db_call(context, "query", "get_insight_by_id", |_span| {
            use schema::photo_insights::dsl::*;
            let mut connection = self.connection.lock().expect("Unable to get InsightDao");
            photo_insights
                .find(insight_id)
                .first::<PhotoInsight>(connection.deref_mut())
                .optional()
                .map_err(|_| anyhow::anyhow!("Query error"))
        })
        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }
    fn delete_insight(
        &mut self,
        context: &opentelemetry::Context,
@@ -265,4 +305,30 @@ impl InsightDao for SqliteInsightDao {
        })
        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }
    fn update_training_messages(
        &mut self,
        context: &opentelemetry::Context,
        lib_id: i32,
        path: &str,
        training_messages_json: &str,
    ) -> Result<(), DbError> {
        trace_db_call(context, "update", "update_training_messages", |_span| {
            use schema::photo_insights::dsl::*;
            let mut connection = self.connection.lock().expect("Unable to get InsightDao");
            diesel::update(
                photo_insights
                    .filter(library_id.eq(lib_id))
                    .filter(rel_path.eq(path))
                    .filter(is_current.eq(true)),
            )
            .set(training_messages.eq(Some(training_messages_json.to_string())))
            .execute(connection.deref_mut())
            .map(|_| ())
            .map_err(|_| anyhow::anyhow!("Update error"))
        })
        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }
 }
--- a/src/database/models.rs
+++ b/src/database/models.rs
@@ -100,6 +100,14 @@ pub struct InsertPhotoInsight {
    pub model_version: String,
    pub is_current: bool,
    pub training_messages: Option<String>,
    /// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat).
    pub backend: String,
    /// JSON array of insight ids whose `training_messages` were compressed
    /// and injected into the system prompt as few-shot exemplars when this
    /// row was generated. `None` means no few-shot was used (pristine
    /// generation). Used downstream to filter out contaminated rows when
    /// assembling an unbiased training / evaluation set.
    pub fewshot_source_ids: Option<String>,
 }
 #[derive(Serialize, Queryable, Clone, Debug)]
@@ -115,6 +123,9 @@ pub struct PhotoInsight {
    pub is_current: bool,
    pub training_messages: Option<String>,
    pub approved: Option<bool>,
    /// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat).
    pub backend: String,
    pub fewshot_source_ids: Option<String>,
 }
 // --- Libraries ---
--- a/src/database/schema.rs
+++ b/src/database/schema.rs
@@ -142,6 +142,8 @@ diesel::table! {
        is_current -> Bool,
        training_messages -> Nullable<Text>,
        approved -> Nullable<Bool>,
        backend -> Text,
        fewshot_source_ids -> Nullable<Text>,
    }
 }
--- a/src/exif.rs
+++ b/src/exif.rs
@@ -1,5 +1,5 @@
 use std::fs::File;
-use std::io::BufReader;
+use std::io::{BufReader, Read, Seek, SeekFrom};
 use std::path::Path;
 use anyhow::{Result, anyhow};
@@ -25,6 +25,60 @@ pub struct ExifData {
    pub date_taken: Option<i64>,
 }
 /// TIFF-based RAW formats where `JPEGInterchangeFormat` offsets are
 /// absolute file offsets (the file itself is a TIFF container).
 fn is_tiff_raw(path: &Path) -> bool {
    matches!(
        path.extension()
            .and_then(|e| e.to_str())
            .map(|s| s.to_lowercase())
            .as_deref(),
        Some(
            "tiff" | "tif" | "nef" | "cr2" | "arw" | "dng" | "raf" | "orf" | "rw2" | "pef" | "srw"
        )
    )
 }
 /// Returns the bytes of the embedded JPEG thumbnail in a TIFF-based RAW or
 /// TIFF file. Used to thumbnail formats whose RAW pixel data can't be decoded
 /// by our normal tools (e.g. Sony ARW). Returns `None` if no preview is
 /// present, the file isn't a TIFF container, or the data doesn't look like
 /// a valid JPEG.
 pub fn extract_embedded_jpeg_preview(path: &Path) -> Option<Vec<u8>> {
    if !is_tiff_raw(path) {
        return None;
    }
    let file = File::open(path).ok()?;
    let mut bufreader = BufReader::new(file);
    let exif = Reader::new().read_from_container(&mut bufreader).ok()?;
    let offset = exif
        .get_field(Tag::JPEGInterchangeFormat, In::THUMBNAIL)?
        .value
        .get_uint(0)?;
    let length = exif
        .get_field(Tag::JPEGInterchangeFormatLength, In::THUMBNAIL)?
        .value
        .get_uint(0)?;
    if length == 0 {
        return None;
    }
    let mut file = File::open(path).ok()?;
    file.seek(SeekFrom::Start(offset as u64)).ok()?;
    let mut buf = vec![0u8; length as usize];
    file.read_exact(&mut buf).ok()?;
    // JPEG SOI marker sanity check — MakerNote offsets sometimes point at
    // TIFF-wrapped previews or other non-JPEG data.
    if buf.len() < 2 || buf[0] != 0xFF || buf[1] != 0xD8 {
        return None;
    }
    Some(buf)
 }
 pub fn supports_exif(path: &Path) -> bool {
    if let Some(ext) = path.extension() {
        let ext_lower = ext.to_string_lossy().to_lowercase();
--- a/src/file_types.rs
+++ b/src/file_types.rs
@@ -3,9 +3,22 @@ use walkdir::DirEntry;
 /// Supported image file extensions
 pub const IMAGE_EXTENSIONS: &[&str] = &[
-    "jpg", "jpeg", "png", "webp", "tiff", "tif", "heif", "heic", "avif", "nef",
+    "jpg", "jpeg", "png", "webp", "tiff", "tif", "heif", "heic", "avif", "nef", "arw",
 ];
 /// Extensions the `image` crate cannot decode — we fall back to ffmpeg to
 /// extract an embedded preview or decode the frame.
 pub const FFMPEG_THUMBNAIL_EXTENSIONS: &[&str] = &["heif", "heic", "nef", "arw"];
 /// Returns true if thumbnail generation should go through ffmpeg instead of
 /// the `image` crate (RAW formats, HEIF/HEIC).
 pub fn needs_ffmpeg_thumbnail(path: &Path) -> bool {
    match path.extension().and_then(|e| e.to_str()) {
        Some(ext) => FFMPEG_THUMBNAIL_EXTENSIONS.contains(&ext.to_lowercase().as_str()),
        None => false,
    }
 }
 /// Supported video file extensions
 pub const VIDEO_EXTENSIONS: &[&str] = &["mp4", "mov", "avi", "mkv"];
--- a/src/files.rs
+++ b/src/files.rs
@@ -15,6 +15,7 @@ use crate::database::ExifDao;
 use crate::file_types;
 use crate::geo::{gps_bounding_box, haversine_distance};
 use crate::memories::extract_date_from_filename;
 use crate::utils::earliest_fs_time;
 use crate::{AppState, create_thumbnails};
 use actix_web::web::Data;
 use actix_web::{
@@ -138,8 +139,8 @@ fn in_memory_date_sort(
                    lib_roots.get(&lib_id).and_then(|root| {
                        let full_path = Path::new(root).join(&f.file_name);
                        std::fs::metadata(full_path)
                            .and_then(|md| md.created().or(md.modified()))
                            .ok()
                            .and_then(|md| earliest_fs_time(&md))
                            .map(|system_time| {
                                <SystemTime as Into<DateTime<Utc>>>::into(system_time).timestamp()
                            })
--- a/src/main.rs
+++ b/src/main.rs
@@ -52,7 +52,8 @@ use crate::state::AppState;
 use crate::tags::*;
 use crate::video::actors::{
    GeneratePreviewClipMessage, ProcessMessage, QueueVideosMessage, ScanDirectoryMessage,
-    VideoPlaylistManager, create_playlist, generate_video_thumbnail,
+    VideoPlaylistManager, create_playlist, generate_image_thumbnail_ffmpeg,
    generate_video_thumbnail,
 };
 use log::{debug, error, info, trace, warn};
 use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
@@ -1060,6 +1061,47 @@ async fn delete_favorite(
    }
 }
 /// Sentinel path written next to a would-be thumbnail when a file cannot be
 /// decoded by either the `image` crate or ffmpeg. Its presence causes future
 /// scans to skip the file instead of re-logging the failure.
 pub fn unsupported_thumbnail_sentinel(thumb_path: &Path) -> PathBuf {
    let mut s = thumb_path.as_os_str().to_owned();
    s.push(".unsupported");
    PathBuf::from(s)
 }
 fn generate_image_thumbnail(src: &Path, thumb_path: &Path) -> std::io::Result<()> {
    // RAW formats (ARW/NEF/CR2/etc): try the file's embedded JPEG preview
    // first. Avoids ffmpeg choking on proprietary RAW compression (Sony ARW
    // in particular), and is faster than decoding RAW pixels anyway.
    if let Some(preview) = exif::extract_embedded_jpeg_preview(src) {
        let img = image::load_from_memory(&preview).map_err(|e| {
            std::io::Error::new(
                std::io::ErrorKind::InvalidData,
                format!("decode embedded preview {:?}: {}", src, e),
            )
        })?;
        let scaled = img.thumbnail(200, u32::MAX);
        scaled
            .save_with_format(thumb_path, image::ImageFormat::Jpeg)
            .map_err(|e| std::io::Error::other(format!("save {:?}: {}", thumb_path, e)))?;
        return Ok(());
    }
    if file_types::needs_ffmpeg_thumbnail(src) {
        return generate_image_thumbnail_ffmpeg(src, thumb_path);
    }
    let img = image::open(src).map_err(|e| {
        std::io::Error::new(std::io::ErrorKind::InvalidData, format!("{:?}: {}", src, e))
    })?;
    let scaled = img.thumbnail(200, u32::MAX);
    scaled
        .save(thumb_path)
        .map_err(|e| std::io::Error::other(format!("save {:?}: {}", thumb_path, e)))?;
    Ok(())
 }
 fn create_thumbnails(libs: &[libraries::Library]) {
    let tracer = global_tracer();
    let span = tracer.start("creating thumbnails");
@@ -1080,17 +1122,26 @@ fn create_thumbnails(libs: &[libraries::Library]) {
            .into_par_iter()
            .filter_map(|entry| entry.ok())
            .filter(|entry| entry.file_type().is_file())
-            .filter(|entry| {
+            .for_each(|entry| {
-                if is_video(entry) {
+                let src = entry.path();
-                    let relative_path = &entry.path().strip_prefix(&images).unwrap();
+                let Ok(relative_path) = src.strip_prefix(&images) else {
                    return;
                };
                let thumb_path = Path::new(thumbnail_directory).join(relative_path);
                    std::fs::create_dir_all(
                        thumb_path
                            .parent()
                            .unwrap_or_else(|| panic!("Thumbnail {:?} has no parent?", thumb_path)),
                    )
                    .expect("Error creating directory");
                if thumb_path.exists() || unsupported_thumbnail_sentinel(&thumb_path).exists() {
                    return;
                }
                let Some(parent) = thumb_path.parent() else {
                    return;
                };
                if let Err(e) = std::fs::create_dir_all(parent) {
                    error!("Failed to create thumbnail dir {:?}: {}", parent, e);
                    return;
                }
                if is_video(&entry) {
                    let mut video_span = tracer.start_with_context(
                        "generate_video_thumbnail",
                        &opentelemetry::Context::new()
@@ -1103,37 +1154,24 @@ fn create_thumbnails(libs: &[libraries::Library]) {
                    ]);
                    debug!("Generating video thumbnail: {:?}", thumb_path);
-                    generate_video_thumbnail(entry.path(), &thumb_path);
+                    generate_video_thumbnail(src, &thumb_path);
                    video_span.end();
-                    false
+                } else if is_image(&entry) {
-                } else {
+                    match generate_image_thumbnail(src, &thumb_path) {
-                    is_image(entry)
+                        Ok(_) => info!("Saved thumbnail: {:?}", thumb_path),
                        Err(e) => {
                            let sentinel = unsupported_thumbnail_sentinel(&thumb_path);
                            error!(
                                "Unable to thumbnail {:?}: {}. Writing sentinel {:?}",
                                src, e, sentinel
                            );
                            if let Err(se) = std::fs::write(&sentinel, b"") {
                                warn!("Failed to write sentinel {:?}: {}", sentinel, se);
                            }
            })
            .filter(|entry| {
                let path = entry.path();
                let relative_path = &path.strip_prefix(&images).unwrap();
                let thumb_path = Path::new(thumbnail_directory).join(relative_path);
                !thumb_path.exists()
            })
            .map(|entry| (image::open(entry.path()), entry.path().to_path_buf()))
            .filter(|(img, path)| {
                if let Err(e) = img {
                    error!("Unable to open image: {:?}. {}", path, e);
                        }
-                img.is_ok()
+                    }
-            })
+                }
-            .map(|(img, path)| (img.unwrap(), path))
+            });
            .map(|(image, path)| (image.thumbnail(200, u32::MAX), path))
            .map(|(image, path)| {
                let relative_path = &path.strip_prefix(&images).unwrap();
                let thumb_path = Path::new(thumbnail_directory).join(relative_path);
                std::fs::create_dir_all(thumb_path.parent().unwrap())
                    .expect("There was an issue creating directory");
                info!("Saving thumbnail: {:?}", thumb_path);
                image.save(thumb_path).expect("Failure saving thumbnail");
            })
            .for_each(drop);
    }
    debug!("Finished making thumbnails");
@@ -1355,6 +1393,11 @@ fn main() -> std::io::Result<()> {
                .service(ai::delete_insight_handler)
                .service(ai::get_all_insights_handler)
                .service(ai::get_available_models_handler)
                .service(ai::get_openrouter_models_handler)
                .service(ai::chat_turn_handler)
                .service(ai::chat_stream_handler)
                .service(ai::chat_history_handler)
                .service(ai::chat_rewind_handler)
                .service(ai::rate_insight_handler)
                .service(ai::export_training_data_handler)
                .service(libraries::list_libraries)
@@ -1739,7 +1782,8 @@ fn process_new_files(
    // not just photos with parseable EXIF.
    for (file_path, relative_path) in &files {
        let thumb_path = thumbnail_directory.join(relative_path);
-        let needs_thumbnail = !thumb_path.exists();
+        let needs_thumbnail =
            !thumb_path.exists() && !unsupported_thumbnail_sentinel(&thumb_path).exists();
        let needs_row = !existing_exif_paths.contains_key(relative_path);
        if needs_thumbnail || needs_row {
--- a/src/memories.rs
+++ b/src/memories.rs
@@ -19,6 +19,7 @@ use crate::files::is_image_or_video;
 use crate::libraries::Library;
 use crate::otel::{extract_context_from_request, global_tracer};
 use crate::state::AppState;
 use crate::utils::earliest_fs_time;
 // Helper that encapsulates path-exclusion semantics
 #[derive(Debug)]
@@ -336,8 +337,8 @@ fn get_memory_date_with_priority(
        return Some((date, Some(exif_timestamp), modified));
    }
-    // Priority 3: Fall back to metadata
+    // Priority 3: Fall back to metadata (earlier of created/modified — see utils::earliest_fs_time)
-    let system_time = meta.created().ok().or_else(|| meta.modified().ok())?;
+    let system_time = earliest_fs_time(&meta)?;
    let dt_utc: DateTime<Utc> = system_time.into();
    let date_in_timezone = if let Some(tz) = client_timezone {
--- a/src/state.rs
+++ b/src/state.rs
@@ -1,3 +1,5 @@
 use crate::ai::insight_chat::{ChatLockMap, InsightChatService};
 use crate::ai::openrouter::OpenRouterClient;
 use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
 use crate::database::{
    CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao,
@@ -31,8 +33,20 @@ pub struct AppState {
    pub preview_clips_path: String,
    pub excluded_dirs: Vec<String>,
    pub ollama: OllamaClient,
    /// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only
    /// when a request explicitly opts into `backend=hybrid`. Currently
    /// reached via `insight_generator`; kept here so future handlers
    /// (insight_chat) can route to it without threading it through the
    /// generator.
    #[allow(dead_code)]
    pub openrouter: Option<Arc<OpenRouterClient>>,
    /// Curated list of OpenRouter model ids exposed to clients. Sourced from
    /// `OPENROUTER_ALLOWED_MODELS` (comma-separated). Empty when unset.
    pub openrouter_allowed_models: Vec<String>,
    pub sms_client: SmsApiClient,
    pub insight_generator: InsightGenerator,
    /// Chat continuation service. Hold an Arc so handlers can clone cheaply.
    pub insight_chat: Arc<InsightChatService>,
 }
 impl AppState {
@@ -61,8 +75,11 @@ impl AppState {
        preview_clips_path: String,
        excluded_dirs: Vec<String>,
        ollama: OllamaClient,
        openrouter: Option<Arc<OpenRouterClient>>,
        openrouter_allowed_models: Vec<String>,
        sms_client: SmsApiClient,
        insight_generator: InsightGenerator,
        insight_chat: Arc<InsightChatService>,
        preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>,
    ) -> Self {
        assert!(
@@ -92,8 +109,11 @@ impl AppState {
            preview_clips_path,
            excluded_dirs,
            ollama,
            openrouter,
            openrouter_allowed_models,
            sms_client,
            insight_generator,
            insight_chat,
        }
    }
@@ -127,6 +147,9 @@ impl Default for AppState {
            ollama_fallback_model,
        );
        let openrouter = build_openrouter_from_env();
        let openrouter_allowed_models = parse_openrouter_allowed_models();
        let sms_api_url =
            env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
        let sms_api_token = env::var("SMS_API_TOKEN").ok();
@@ -168,6 +191,7 @@ impl Default for AppState {
        // Initialize InsightGenerator with all data sources
        let insight_generator = InsightGenerator::new(
            ollama.clone(),
            openrouter.clone(),
            sms_client.clone(),
            insight_dao.clone(),
            exif_dao.clone(),
@@ -180,6 +204,18 @@ impl Default for AppState {
            libraries_vec.clone(),
        );
        // Chat continuation reuses the generator for tool dispatch + image
        // loading. The lock map starts empty and grows lazily per file.
        let chat_locks: ChatLockMap =
            Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
        let insight_chat = Arc::new(InsightChatService::new(
            Arc::new(insight_generator.clone()),
            ollama.clone(),
            openrouter.clone(),
            insight_dao.clone(),
            chat_locks,
        ));
        // Ensure preview clips directory exists
        let preview_clips_path =
            env::var("PREVIEW_CLIPS_DIRECTORY").unwrap_or_else(|_| "preview_clips".to_string());
@@ -195,13 +231,47 @@ impl Default for AppState {
            preview_clips_path,
            Self::parse_excluded_dirs(),
            ollama,
            openrouter,
            openrouter_allowed_models,
            sms_client,
            insight_generator,
            insight_chat,
            preview_dao,
        )
    }
 }
 /// Build an `OpenRouterClient` from environment variables. Returns `None`
 /// when `OPENROUTER_API_KEY` is unset (the hybrid backend is then
 /// unavailable and requests for it return a clear error).
 fn build_openrouter_from_env() -> Option<Arc<OpenRouterClient>> {
    let api_key = env::var("OPENROUTER_API_KEY").ok()?;
    let base_url = env::var("OPENROUTER_BASE_URL").ok();
    let default_model = env::var("OPENROUTER_DEFAULT_MODEL")
        .unwrap_or_else(|_| "anthropic/claude-sonnet-4".to_string());
    let mut client = OpenRouterClient::new(api_key, base_url, default_model);
    client.set_attribution(
        env::var("OPENROUTER_HTTP_REFERER").ok(),
        env::var("OPENROUTER_APP_TITLE").ok(),
    );
    if let Ok(model) = env::var("OPENROUTER_EMBEDDING_MODEL") {
        client.set_embedding_model(model);
    }
    Some(Arc::new(client))
 }
 /// Parse `OPENROUTER_ALLOWED_MODELS` (comma-separated) into a vec. Returns
 /// empty when unset, in which case `/insights/openrouter/models` reports no
 /// curated picks and the server falls back to `OPENROUTER_DEFAULT_MODEL`.
 fn parse_openrouter_allowed_models() -> Vec<String> {
    env::var("OPENROUTER_ALLOWED_MODELS")
        .unwrap_or_default()
        .split(',')
        .map(|s| s.trim().to_string())
        .filter(|s| !s.is_empty())
        .collect()
 }
 #[cfg(test)]
 impl AppState {
    /// Creates an AppState instance for testing with temporary directories
@@ -255,6 +325,7 @@ impl AppState {
        };
        let insight_generator = InsightGenerator::new(
            ollama.clone(),
            None,
            sms_client.clone(),
            insight_dao.clone(),
            exif_dao.clone(),
@@ -267,6 +338,16 @@ impl AppState {
            vec![test_lib],
        );
        let chat_locks: ChatLockMap =
            Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
        let insight_chat = Arc::new(InsightChatService::new(
            Arc::new(insight_generator.clone()),
            ollama.clone(),
            None,
            insight_dao.clone(),
            chat_locks,
        ));
        // Initialize test preview DAO
        let preview_dao: Arc<Mutex<Box<dyn PreviewDao>>> =
            Arc::new(Mutex::new(Box::new(SqlitePreviewDao::new())));
@@ -286,8 +367,11 @@ impl AppState {
            preview_clips_path.to_string_lossy().to_string(),
            Vec::new(), // No excluded directories for test state
            ollama,
            None,
            Vec::new(),
            sms_client,
            insight_generator,
            insight_chat,
            preview_dao,
        )
    }
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -1,3 +1,5 @@
 use std::time::SystemTime;
 /// Normalize a file path to use forward slashes for cross-platform consistency
 /// This ensures paths stored in the database always use `/` regardless of OS
 ///
@@ -12,6 +14,20 @@ pub fn normalize_path(path: &str) -> String {
    path.replace('\\', "/")
 }
 /// Pick the earlier of a file's created and modified timestamps.
 ///
 /// On copied/restored files (e.g., a backup library), `created` is stamped at
 /// copy time while `modified` is preserved from the source — so the earlier
 /// of the two is a better proxy for when the content originated. Falls back
 /// to whichever timestamp is available if one platform lacks the other.
 pub fn earliest_fs_time(md: &std::fs::Metadata) -> Option<SystemTime> {
    match (md.created().ok(), md.modified().ok()) {
        (Some(c), Some(m)) => Some(c.min(m)),
        (Some(t), None) | (None, Some(t)) => Some(t),
        (None, None) => None,
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/video/actors.rs
+++ b/src/video/actors.rs
@@ -4,7 +4,6 @@ use crate::libraries::Library;
 use crate::otel::global_tracer;
 use crate::video::ffmpeg::generate_preview_clip;
 use actix::prelude::*;
 use futures::TryFutureExt;
 use log::{debug, error, info, trace, warn};
 use opentelemetry::KeyValue;
 use opentelemetry::trace::{Span, Status, Tracer};
@@ -48,6 +47,24 @@ impl Handler<ProcessMessage> for StreamActor {
    }
 }
 pub fn playlist_file_for(playlist_dir: &str, video_path: &Path) -> PathBuf {
    let filename = video_path
        .file_name()
        .and_then(|n| n.to_str())
        .unwrap_or("unknown");
    PathBuf::from(format!("{}/{}.m3u8", playlist_dir, filename))
 }
 /// Sentinel path written next to a would-be playlist when ffmpeg cannot
 /// transcode the source (e.g. truncated mp4 with no moov atom). Its presence
 /// causes future scans to skip the file instead of re-running ffmpeg every
 /// pass. Delete the `.unsupported` file to force a retry.
 pub fn playlist_unsupported_sentinel(playlist_file: &Path) -> PathBuf {
    let mut s = playlist_file.as_os_str().to_owned();
    s.push(".unsupported");
    PathBuf::from(s)
 }
 pub async fn create_playlist(video_path: &str, playlist_file: &str) -> Result<Child> {
    if Path::new(playlist_file).exists() {
        debug!("Playlist already exists: {}", playlist_file);
@@ -66,9 +83,11 @@ pub async fn create_playlist(video_path: &str, playlist_file: &str) -> Result<Ch
        .arg("-hls_time")
        .arg("3")
        .arg("-hls_list_size")
-        .arg("100")
+        .arg("0")
        .arg("-hls_playlist_type")
        .arg("vod")
        .arg("-vf")
-        .arg("scale=1080:-2,setsar=1:1")
+        .arg("scale='min(1080,iw)':-2,setsar=1:1")
        .arg(playlist_file)
        .stdout(Stdio::null())
        .stderr(Stdio::null())
@@ -103,113 +122,169 @@ pub fn generate_video_thumbnail(path: &Path, destination: &Path) {
        .expect("Failure to create video frame");
 }
-/// Check if a video is already encoded with h264 codec
+/// Use ffmpeg to extract a 200px-wide thumbnail from formats the `image` crate
-/// Returns true if the video uses h264, false otherwise or if detection fails
+/// can't decode (RAW: NEF/ARW, HEIC/HEIF). Writes JPEG bytes to `destination`
-async fn is_h264_encoded(video_path: &str) -> bool {
+/// regardless of its extension.
 pub fn generate_image_thumbnail_ffmpeg(path: &Path, destination: &Path) -> std::io::Result<()> {
    let output = Command::new("ffmpeg")
        .arg("-y")
        .arg("-i")
        .arg(path)
        .arg("-vframes")
        .arg("1")
        .arg("-vf")
        .arg("scale=200:-1")
        .arg("-f")
        .arg("image2")
        .arg("-c:v")
        .arg("mjpeg")
        .arg(destination)
        .output()?;
    if !output.status.success() {
        return Err(std::io::Error::other(format!(
            "ffmpeg failed ({}): {}",
            output.status,
            String::from_utf8_lossy(&output.stderr).trim()
        )));
    }
    Ok(())
 }
 /// Video stream metadata needed to pick HLS encode settings. Populated by
 /// a single ffprobe call to avoid spawning multiple subprocesses per video.
 #[derive(Debug, Default)]
 struct VideoStreamMeta {
    is_h264: bool,
    /// Rotation in degrees (0/90/180/270). Checks both the legacy `rotate`
    /// stream tag and the modern display-matrix side data.
    rotation: i32,
 }
 /// Probe video stream metadata in one ffprobe call. Returns default (codec
 /// unknown, rotation 0) on any failure — callers fall back to transcoding.
 async fn probe_video_stream_meta(video_path: &str) -> VideoStreamMeta {
    let output = tokio::process::Command::new("ffprobe")
        .arg("-v")
        .arg("error")
        .arg("-select_streams")
        .arg("v:0")
        .arg("-print_format")
        .arg("json")
        .arg("-show_entries")
-        .arg("stream=codec_name")
+        .arg("stream=codec_name:stream_tags=rotate:side_data_list")
        .arg("-of")
        .arg("default=noprint_wrappers=1:nokey=1")
        .arg(video_path)
        .output()
        .await;
-    match output {
+    let Ok(output) = output else {
-        Ok(output) if output.status.success() => {
+        warn!("Failed to run ffprobe for {}", video_path);
-            let codec = String::from_utf8_lossy(&output.stdout);
+        return VideoStreamMeta::default();
-            let codec = codec.trim();
+    };
-            debug!("Detected codec for {}: {}", video_path, codec);
+    if !output.status.success() {
            codec == "h264"
        }
        Ok(output) => {
        warn!(
            "ffprobe failed for {}: {}",
            video_path,
-                String::from_utf8_lossy(&output.stderr)
+            String::from_utf8_lossy(&output.stderr).trim()
        );
-            false
+        return VideoStreamMeta::default();
        }
        Err(e) => {
            warn!("Failed to run ffprobe for {}: {}", video_path, e);
            false
        }
    }
    let Ok(json) = serde_json::from_slice::<serde_json::Value>(&output.stdout) else {
        warn!("ffprobe returned non-JSON for {}", video_path);
        return VideoStreamMeta::default();
    };
    let stream = &json["streams"][0];
    let is_h264 = stream
        .get("codec_name")
        .and_then(|v| v.as_str())
        .map(|s| s == "h264")
        .unwrap_or(false);
    // Prefer legacy `tags.rotate` (older containers); fall back to the
    // display-matrix side data (iPhone and other modern recorders).
    let rotation = stream
        .get("tags")
        .and_then(|t| t.get("rotate"))
        .and_then(|r| r.as_str())
        .and_then(|s| s.parse::<i32>().ok())
        .filter(|r| *r != 0)
        .or_else(|| {
            stream
                .get("side_data_list")
                .and_then(|l| l.as_array())
                .and_then(|arr| {
                    arr.iter()
                        .find_map(|sd| sd.get("rotation").and_then(|r| r.as_f64()))
                })
                .map(|f| f.abs() as i32)
                .filter(|r| *r != 0)
        })
        .unwrap_or(0);
    debug!(
        "Probed {}: codec_h264={}, rotation={}°",
        video_path, is_h264, rotation
    );
    VideoStreamMeta { is_h264, rotation }
 }
-/// Check if a video has rotation metadata
+/// Probe the max keyframe interval (GOP) in the first ~30s of a video.
-/// Returns the rotation angle in degrees (0, 90, 180, 270) or 0 if none detected
+/// Returns `None` on probe failure or if we couldn't see at least two keyframes.
-/// Checks both legacy stream tags and modern display matrix side data
+///
-async fn get_video_rotation(video_path: &str) -> i32 {
+/// Used to decide between stream-copy and transcode: HLS needs segments to
-    // Check legacy rotate stream tag (older videos)
+/// start on keyframes, so if the source GOP exceeds `hls_time`, copying
 /// produces oversized/glitchy segments and we need to re-encode.
 async fn get_max_gop_seconds(video_path: &str) -> Option<f64> {
    let output = tokio::process::Command::new("ffprobe")
        .arg("-v")
        .arg("error")
        .arg("-select_streams")
        .arg("v:0")
        .arg("-skip_frame")
        .arg("nokey")
        .arg("-show_entries")
-        .arg("stream_tags=rotate")
+        .arg("frame=pts_time")
        .arg("-of")
-        .arg("default=noprint_wrappers=1:nokey=1")
+        .arg("csv=p=0")
        .arg("-read_intervals")
        .arg("%+30")
        .arg(video_path)
        .output()
-        .await;
+        .await
        .ok()?;
-    if let Ok(output) = output
+    if !output.status.success() {
-        && output.status.success()
+        warn!(
-    {
+            "ffprobe GOP check failed for {}: {}",
-        let rotation_str = String::from_utf8_lossy(&output.stdout);
+            video_path,
-        let rotation_str = rotation_str.trim();
+            String::from_utf8_lossy(&output.stderr).trim()
        if !rotation_str.is_empty()
            && let Ok(rotation) = rotation_str.parse::<i32>()
            && rotation != 0
        {
            debug!(
                "Detected rotation {}° from stream tag for {}",
                rotation, video_path
        );
-            return rotation;
+        return None;
        }
    }
-    // Check display matrix side data (modern videos, e.g. iPhone)
+    let times: Vec<f64> = String::from_utf8_lossy(&output.stdout)
-    let output = tokio::process::Command::new("ffprobe")
+        .lines()
-        .arg("-v")
+        .filter_map(|l| l.trim().parse::<f64>().ok())
-        .arg("error")
+        .collect();
        .arg("-select_streams")
        .arg("v:0")
        .arg("-show_entries")
        .arg("side_data=rotation")
        .arg("-of")
        .arg("default=noprint_wrappers=1:nokey=1")
        .arg(video_path)
        .output()
        .await;
-    if let Ok(output) = output
+    if times.len() < 2 {
-        && output.status.success()
+        return None;
-    {
+    }
-        let rotation_str = String::from_utf8_lossy(&output.stdout);
+
-        let rotation_str = rotation_str.trim();
+    let max_gop = times
-        if !rotation_str.is_empty()
+        .windows(2)
-            && let Ok(rotation) = rotation_str.parse::<f64>()
+        .map(|w| w[1] - w[0])
-        {
+        .fold(0.0_f64, f64::max);
            let rotation = rotation.abs() as i32;
            if rotation != 0 {
    debug!(
-                    "Detected rotation {}° from display matrix for {}",
+        "Max GOP in first {} keyframes of {}: {:.2}s",
-                    rotation, video_path
+        times.len(),
        video_path,
        max_gop
    );
-                return rotation;
+    Some(max_gop)
            }
        }
    }
    0
 }
 pub struct VideoPlaylistManager {
@@ -246,15 +321,21 @@ impl Handler<ScanDirectoryMessage> for VideoPlaylistManager {
            msg.directory
        );
        let playlist_output_dir = self.playlist_dir.clone();
        let playlist_dir_str = playlist_output_dir.to_str().unwrap().to_string();
        let video_files = WalkDir::new(&msg.directory)
            .into_iter()
            .filter_map(|e| e.ok())
            .filter(|e| e.file_type().is_file())
            .filter(is_video)
            .filter(|e| {
                let playlist = playlist_file_for(&playlist_dir_str, e.path());
                !playlist.exists() && !playlist_unsupported_sentinel(&playlist).exists()
            })
            .collect::<Vec<DirEntry>>();
        let scan_dir_name = msg.directory.clone();
        let playlist_output_dir = self.playlist_dir.clone();
        let playlist_generator = self.playlist_generator.clone();
        Box::pin(async move {
@@ -285,6 +366,9 @@ impl Handler<ScanDirectoryMessage> for VideoPlaylistManager {
                            path_as_str
                        );
                    }
                    Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
                        debug!("Playlist already exists for '{:?}', skipping", path);
                    }
                    Err(e) => {
                        warn!("Failed to generate playlist for path '{:?}'. {:?}", path, e);
                    }
@@ -318,14 +402,19 @@ impl Handler<QueueVideosMessage> for VideoPlaylistManager {
        );
        let playlist_output_dir = self.playlist_dir.clone();
        let playlist_dir_str = playlist_output_dir.to_str().unwrap().to_string();
        let playlist_generator = self.playlist_generator.clone();
        for video_path in msg.video_paths {
            let playlist = playlist_file_for(&playlist_dir_str, &video_path);
            if playlist.exists() || playlist_unsupported_sentinel(&playlist).exists() {
                continue;
            }
            let path_str = video_path.to_string_lossy().to_string();
            debug!("Queueing playlist generation for: {}", path_str);
            playlist_generator.do_send(GeneratePlaylistMessage {
-                playlist_path: playlist_output_dir.to_str().unwrap().to_string(),
+                playlist_path: playlist_dir_str.clone(),
                video_path,
            });
        }
@@ -357,8 +446,17 @@ pub struct PlaylistGenerator {
 impl PlaylistGenerator {
    pub(crate) fn new() -> Self {
        // Concurrency is tunable via HLS_CONCURRENCY so operators can dial
        // it to their hardware: 1 on weak Synology boxes to avoid thermal
        // throttling, higher on desktops with spare cores.
        let concurrency = std::env::var("HLS_CONCURRENCY")
            .ok()
            .and_then(|v| v.parse::<usize>().ok())
            .filter(|&n| n > 0)
            .unwrap_or(2);
        info!("PlaylistGenerator: concurrency={}", concurrency);
        PlaylistGenerator {
-            semaphore: Arc::new(Semaphore::new(2)),
+            semaphore: Arc::new(Semaphore::new(concurrency)),
        }
    }
 }
@@ -418,14 +516,42 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
                return Err(std::io::Error::from(std::io::ErrorKind::AlreadyExists));
            }
-            // Check if video is already h264 encoded
+            // One ffprobe call for codec + rotation metadata.
-            let is_h264 = is_h264_encoded(&video_file).await;
+            let stream_meta = probe_video_stream_meta(&video_file).await;
-
+            let is_h264 = stream_meta.is_h264;
-            // Check for rotation metadata
+            let rotation = stream_meta.rotation;
            let rotation = get_video_rotation(&video_file).await;
            let has_rotation = rotation != 0;
-            let use_copy = is_h264 && !has_rotation;
+            // Stream-copy is only safe when the source GOP fits inside a
            // single HLS segment. Otherwise ffmpeg has to extend segments
            // past hls_time to land on a keyframe, producing uneven
            // segments and seeking glitches.
            const HLS_SEGMENT_SECONDS: f64 = 3.0;
            let gop_ok = if is_h264 && !has_rotation {
                match get_max_gop_seconds(&video_file).await {
                    Some(g) if g > HLS_SEGMENT_SECONDS => {
                        info!(
                            "Video {} has long GOP ({:.1}s > {}s), transcoding for segment alignment",
                            video_file, g, HLS_SEGMENT_SECONDS
                        );
                        false
                    }
                    Some(_) => true,
                    None => {
                        // Probe failed — be conservative and transcode rather
                        // than risk broken segments from a mystery source.
                        debug!(
                            "GOP probe failed for {}, transcoding to be safe",
                            video_file
                        );
                        false
                    }
                }
            } else {
                false
            };
            let use_copy = is_h264 && !has_rotation && gop_ok;
            if has_rotation {
                info!(
@@ -439,59 +565,182 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
            } else if use_copy {
                info!("Video {} is already h264, using stream copy", video_file);
                span.add_event("Using stream copy (h264 detected)", vec![]);
            } else if is_h264 {
                info!(
                    "Video {} is h264 but needs transcoding for GOP alignment",
                    video_file
                );
                span.add_event("Transcoding for GOP alignment", vec![]);
            } else {
                info!("Video {} needs transcoding to h264", video_file);
                span.add_event("Transcoding to h264", vec![]);
            }
-            tokio::spawn(async move {
+            // Encode to a .tmp playlist and explicit segment names so a failed
            // encode leaves predictable artifacts we can clean up — and so a
            // concurrent scan doesn't see a half-written .m3u8 as "done".
            let playlist_tmp = format!("{}.tmp", playlist_file);
            let video_stem = msg
                .video_path
                .file_name()
                .and_then(|n| n.to_str())
                .unwrap_or("video");
            let segment_pattern = format!("{}/{}_%03d.ts", playlist_path, video_stem);
            let mut cmd = tokio::process::Command::new("ffmpeg");
-                cmd.arg("-i").arg(&video_file);
+            cmd.arg("-y").arg("-i").arg(&video_file);
            if use_copy {
                    // Video is already h264, just copy the stream
                    // Note: rotation metadata will be preserved in the stream
                cmd.arg("-c:v").arg("copy");
-                    cmd.arg("-c:a").arg("aac"); // Still need to ensure audio is compatible
+                cmd.arg("-c:a").arg("aac");
            } else {
                let nvenc = crate::video::ffmpeg::is_nvenc_available().await;
                if nvenc {
                    // NVENC: no CRF, use VBR + target CQ. p1 = fastest
                    // preset — prioritizes encoder throughput over bitrate
                    // efficiency. CQ 23 roughly matches libx264 crf 21
                    // visually; NVENC has slightly lower compression
                    // efficiency per quality.
                    cmd.arg("-c:v").arg("h264_nvenc");
                    cmd.arg("-preset").arg("p1");
                    cmd.arg("-rc").arg("vbr");
                    cmd.arg("-cq").arg("23");
                    cmd.arg("-pix_fmt").arg("yuv420p");
                } else {
                    // Need to transcode - autorotate is enabled by default and will apply rotation
                    cmd.arg("-c:v").arg("h264");
                    cmd.arg("-crf").arg("21");
                    cmd.arg("-preset").arg("veryfast");
-                    cmd.arg("-vf").arg("scale=1080:-2,setsar=1:1");
+                }
                cmd.arg("-vf").arg("scale='min(1080,iw)':-2,setsar=1:1");
                cmd.arg("-c:a").arg("aac");
                // Force an IDR frame every hls_time seconds so each HLS
                // segment starts on a keyframe — accurate seeking without
                // players having to decode from a prior segment.
                cmd.arg("-force_key_frames").arg("expr:gte(t,n_forced*3)");
            }
-                // Common HLS settings
+            // -f hls is required because the playlist is written to a .tmp
            // path during encoding — ffmpeg normally infers the muxer from
            // the output extension and doesn't recognize ".m3u8.tmp".
            cmd.arg("-f").arg("hls");
            cmd.arg("-hls_time").arg("3");
-                cmd.arg("-hls_list_size").arg("100");
+            cmd.arg("-hls_list_size").arg("0");
-                cmd.arg(&playlist_file);
+            cmd.arg("-hls_playlist_type").arg("vod");
            // independent_segments advertises that each segment can be
            // decoded without reference to any other — the matching guarantee
            // for the forced keyframes above.
            cmd.arg("-hls_flags").arg("independent_segments");
            cmd.arg("-hls_segment_filename").arg(&segment_pattern);
            cmd.arg(&playlist_tmp);
            cmd.stdout(Stdio::null());
            cmd.stderr(Stdio::piped());
            cmd.kill_on_drop(true);
-                let ffmpeg_result = cmd
+            // Spawn + wait under a timeout so a hung ffmpeg (corrupt source,
-                    .output()
+            // NFS stall, etc.) doesn't permanently hold a semaphore slot.
-                    .inspect_err(|e| error!("Failed to run ffmpeg on child process: {}", e))
+            // Default is generous — a long 4K transcode on CPU can take hours.
-                    .map_err(|e| std::io::Error::other(e.to_string()))
+            let timeout_secs = std::env::var("HLS_TIMEOUT_SECONDS")
-                    .await;
+                .ok()
                .and_then(|v| v.parse::<u64>().ok())
                .unwrap_or(7200);
            let ffmpeg_result = match cmd.spawn() {
                Ok(child) => {
                    match tokio::time::timeout(
                        std::time::Duration::from_secs(timeout_secs),
                        child.wait_with_output(),
                    )
                    .await
                    {
                        Ok(res) => res
                            .inspect_err(|e| {
                                error!("Failed to wait on ffmpeg child process: {}", e)
                            })
                            .map_err(|e| std::io::Error::other(e.to_string())),
                        Err(_) => Err(std::io::Error::other(format!(
                            "ffmpeg exceeded {}s timeout",
                            timeout_secs
                        ))),
                    }
                }
                Err(e) => {
                    error!("Failed to spawn ffmpeg: {}", e);
                    Err(std::io::Error::other(e.to_string()))
                }
            };
                // Hang on to the permit until we're done decoding and then explicitly drop
            drop(permit);
-                if let Ok(ref res) = ffmpeg_result {
+            let success = matches!(&ffmpeg_result, Ok(out) if out.status.success());
-                    debug!("ffmpeg output: {:?}", res);
+
            if success {
                if let Err(e) = tokio::fs::rename(&playlist_tmp, &playlist_file).await {
                    error!(
                        "ffmpeg succeeded but rename {} -> {} failed: {}",
                        playlist_tmp, playlist_file, e
                    );
                    cleanup_partial_hls(&playlist_tmp, playlist_path.as_str(), video_stem).await;
                    span.set_status(Status::error(format!("rename failed: {}", e)));
                    return Err(e);
                }
-
+                debug!("Playlist complete: {}", playlist_file);
                span.set_status(Status::Ok);
                ffmpeg_result
            });
                Ok(())
            } else {
                let detail = match &ffmpeg_result {
                    Ok(out) => format!(
                        "exit {}: {}",
                        out.status,
                        String::from_utf8_lossy(&out.stderr).trim()
                    ),
                    Err(e) => format!("ffmpeg failed: {}", e),
                };
                error!("ffmpeg failed for {}: {}", video_file, detail);
                cleanup_partial_hls(&playlist_tmp, playlist_path.as_str(), video_stem).await;
                let sentinel = playlist_unsupported_sentinel(Path::new(&playlist_file));
                if let Err(se) = tokio::fs::write(&sentinel, b"").await {
                    warn!(
                        "Failed to write playlist sentinel {}: {}",
                        sentinel.display(),
                        se
                    );
                } else {
                    info!(
                        "Wrote playlist sentinel {} so future scans skip {}",
                        sentinel.display(),
                        video_file
                    );
                }
                span.set_status(Status::error(detail.clone()));
                Err(std::io::Error::other(detail))
            }
        })
    }
 }
 /// Delete the temp playlist and any segment files that ffmpeg may have written
 /// before failing. Called both on ffmpeg error and on rename failure so a
 /// retry on the next scan starts from a clean slate.
 async fn cleanup_partial_hls(playlist_tmp: &str, playlist_dir: &str, video_stem: &str) {
    let _ = tokio::fs::remove_file(playlist_tmp).await;
    let segment_prefix = format!("{}_", video_stem);
    let Ok(mut entries) = tokio::fs::read_dir(playlist_dir).await else {
        return;
    };
    while let Ok(Some(entry)) = entries.next_entry().await {
        let Some(name) = entry.file_name().to_str().map(str::to_owned) else {
            continue;
        };
        if name.starts_with(&segment_prefix)
            && name.ends_with(".ts")
            && let Err(e) = tokio::fs::remove_file(entry.path()).await
        {
            warn!("Failed to remove partial segment {}: {}", name, e);
        }
    }
 }
 #[derive(Message)]
 #[rtype(result = "()")]
 pub struct GeneratePreviewClipMessage {
--- a/src/video/ffmpeg.rs
+++ b/src/video/ffmpeg.rs
@@ -22,16 +22,16 @@ async fn check_nvenc_available() -> bool {
 }
 /// Returns whether NVENC is available, caching the result after first check.
-async fn is_nvenc_available() -> bool {
+pub async fn is_nvenc_available() -> bool {
    if let Some(&available) = NVENC_AVAILABLE.get() {
        return available;
    }
    let available = check_nvenc_available().await;
    let _ = NVENC_AVAILABLE.set(available);
    if available {
-        info!("CUDA NVENC hardware acceleration detected and enabled for preview clips");
+        info!("CUDA NVENC hardware acceleration detected and enabled");
    } else {
-        info!("NVENC not available, using CPU encoding for preview clips");
+        info!("NVENC not available, using CPU encoding");
    }
    available
 }
		`@@ -0,0 +1 @@`
							`ALTER TABLE photo_insights ADD COLUMN backend TEXT NOT NULL DEFAULT 'local';`
		`@@ -0,0 +1 @@`
							`ALTER TABLE photo_insights ADD COLUMN fewshot_source_ids TEXT;`