From fbd769e475d8a92ddd2a9a79401a7dd52c39a388 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 10 May 2026 13:30:35 -0400 Subject: [PATCH 1/3] personas: composite FK + built-in update guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two persona-infrastructure correctness fixes that go together because the second one (FK with CASCADE) requires the first (preventing the persona row from being mutated out from under its facts). 1. update_persona handler refuses name/systemPrompt edits to built-ins (409). includeAllMemories stays editable — that's a per-user preference, not the persona's identity. Mirrors the existing delete_persona guard. The DAO is intentionally permissive so the guard sits at the HTTP layer; persona_dao test pins that contract. 2. Migration 2026-05-10 adds user_id to entity_facts and a composite FK (user_id, persona_id) -> personas(user_id, persona_id) ON DELETE CASCADE. This closes two issues at once: - Persona orphans: deleting a custom persona used to leave its facts dangling forever, readable only via PersonaFilter::All. CASCADE now wipes them with the persona row. - Multi-user fact leakage: PersonaFilter::Single("default") used to surface every user's default-scoped facts. PersonaFilter is now { user_id, persona_id } and all read paths (get_facts_for_entity, list_facts, get_recent_activity) filter on user_id first. upsert_fact's dedup key extends to user_id so identical claims under shared persona names from different users no longer corroborate-bump each other's confidence. - user_id threads from Claims.sub.parse::().unwrap_or(1) at the chat / insight handlers through ChatTurnRequest, the streaming agentic loop, execute_tool, and into the leaf tools (tool_store_fact, tool_recall_facts_for_photo). The ".unwrap_or(1)" accommodates Apollo's service token whose sub is non-numeric on legacy mints. - Backfill picks the smallest user_id matching each legacy fact's persona_id so the FK holds for already-stored rows. Five new knowledge_dao tests with FK-on connection: persona scoping isolation, All-variant union per-user, dedup not crossing users, CASCADE delete, FK rejection of unknown personas. Plus dao_update_does_not_block_built_ins documenting where the HTTP-layer guard lives. Apollo coordinates separately — the matching changes there add the /api/personas proxy and start sending persona_id on photo-chat turns. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../down.sql | 47 +++ .../up.sql | 82 ++++ src/ai/handlers.rs | 20 +- src/ai/insight_chat.rs | 11 + src/ai/insight_generator.rs | 31 +- src/bin/populate_knowledge.rs | 1 + src/database/knowledge_dao.rs | 374 +++++++++++++++++- src/database/models.rs | 7 + src/database/persona_dao.rs | 37 ++ src/database/schema.rs | 1 + src/knowledge.rs | 17 +- src/personas.rs | 29 ++ 12 files changed, 629 insertions(+), 28 deletions(-) create mode 100644 migrations/2026-05-10-000000_entity_facts_persona_fk/down.sql create mode 100644 migrations/2026-05-10-000000_entity_facts_persona_fk/up.sql diff --git a/migrations/2026-05-10-000000_entity_facts_persona_fk/down.sql b/migrations/2026-05-10-000000_entity_facts_persona_fk/down.sql new file mode 100644 index 0000000..144e641 --- /dev/null +++ b/migrations/2026-05-10-000000_entity_facts_persona_fk/down.sql @@ -0,0 +1,47 @@ +-- Reverse 2026-05-10-000000_entity_facts_persona_fk: drop the +-- composite FK and the user_id column via the same rebuild pattern. + +DROP INDEX IF EXISTS idx_entity_facts_user_persona; +DROP INDEX IF EXISTS idx_entity_facts_persona; +DROP INDEX IF EXISTS idx_entity_facts_source_photo; +DROP INDEX IF EXISTS idx_entity_facts_status; +DROP INDEX IF EXISTS idx_entity_facts_predicate; +DROP INDEX IF EXISTS idx_entity_facts_subject; + +ALTER TABLE entity_facts RENAME TO entity_facts_old; + +CREATE TABLE entity_facts ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + subject_entity_id INTEGER NOT NULL, + predicate TEXT NOT NULL, + object_entity_id INTEGER, + object_value TEXT, + source_photo TEXT, + source_insight_id INTEGER, + confidence REAL NOT NULL DEFAULT 0.6, + status TEXT NOT NULL DEFAULT 'active', + created_at BIGINT NOT NULL, + persona_id TEXT NOT NULL DEFAULT 'default', + CONSTRAINT fk_ef_subject FOREIGN KEY (subject_entity_id) REFERENCES entities(id) ON DELETE CASCADE, + CONSTRAINT fk_ef_object FOREIGN KEY (object_entity_id) REFERENCES entities(id) ON DELETE SET NULL, + CONSTRAINT fk_ef_insight FOREIGN KEY (source_insight_id) REFERENCES photo_insights(id) ON DELETE SET NULL, + CHECK (object_entity_id IS NOT NULL OR object_value IS NOT NULL) +); + +INSERT INTO entity_facts + (id, subject_entity_id, predicate, object_entity_id, object_value, + source_photo, source_insight_id, confidence, status, created_at, + persona_id) +SELECT + id, subject_entity_id, predicate, object_entity_id, object_value, + source_photo, source_insight_id, confidence, status, created_at, + persona_id +FROM entity_facts_old; + +DROP TABLE entity_facts_old; + +CREATE INDEX idx_entity_facts_subject ON entity_facts(subject_entity_id); +CREATE INDEX idx_entity_facts_predicate ON entity_facts(predicate); +CREATE INDEX idx_entity_facts_status ON entity_facts(status); +CREATE INDEX idx_entity_facts_source_photo ON entity_facts(source_photo); +CREATE INDEX idx_entity_facts_persona ON entity_facts(persona_id); diff --git a/migrations/2026-05-10-000000_entity_facts_persona_fk/up.sql b/migrations/2026-05-10-000000_entity_facts_persona_fk/up.sql new file mode 100644 index 0000000..8823ef8 --- /dev/null +++ b/migrations/2026-05-10-000000_entity_facts_persona_fk/up.sql @@ -0,0 +1,82 @@ +-- Add a real foreign key from entity_facts to personas. Until now, +-- entity_facts.persona_id was a free-form string with no integrity +-- guarantee — deleting a persona orphaned its facts, which then sat +-- forever in the readable-only-via-PersonaFilter::All hive-mind view. +-- +-- personas is keyed (user_id, persona_id) so the FK has to be +-- composite. That requires entity_facts to carry user_id too, which +-- has the side benefit of fixing multi-user fact leakage on the read +-- path (without it, two users with the same 'default' persona would +-- see each other's default-scoped facts). +-- +-- SQLite can't ALTER TABLE to add an FK; the table-rebuild dance is +-- the only way. Pattern matches 2026-05-09's down.sql and the older +-- 2026-04-20-000000 migration. + +DROP INDEX IF EXISTS idx_entity_facts_subject; +DROP INDEX IF EXISTS idx_entity_facts_predicate; +DROP INDEX IF EXISTS idx_entity_facts_status; +DROP INDEX IF EXISTS idx_entity_facts_source_photo; +DROP INDEX IF EXISTS idx_entity_facts_persona; + +ALTER TABLE entity_facts RENAME TO entity_facts_old; + +CREATE TABLE entity_facts ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + subject_entity_id INTEGER NOT NULL, + predicate TEXT NOT NULL, + object_entity_id INTEGER, + object_value TEXT, + source_photo TEXT, + source_insight_id INTEGER, + confidence REAL NOT NULL DEFAULT 0.6, + status TEXT NOT NULL DEFAULT 'active', + created_at BIGINT NOT NULL, + persona_id TEXT NOT NULL DEFAULT 'default', + user_id INTEGER NOT NULL DEFAULT 1, + CONSTRAINT fk_ef_subject FOREIGN KEY (subject_entity_id) REFERENCES entities(id) ON DELETE CASCADE, + CONSTRAINT fk_ef_object FOREIGN KEY (object_entity_id) REFERENCES entities(id) ON DELETE SET NULL, + CONSTRAINT fk_ef_insight FOREIGN KEY (source_insight_id) REFERENCES photo_insights(id) ON DELETE SET NULL, + CONSTRAINT fk_ef_persona FOREIGN KEY (user_id, persona_id) REFERENCES personas(user_id, persona_id) ON DELETE CASCADE, + CHECK (object_entity_id IS NOT NULL OR object_value IS NOT NULL) +); + +-- Backfill: assign each legacy fact to the user that owns the matching +-- persona. Built-ins are seeded per-user with the same persona_id +-- string for everyone, so MIN(user_id) deterministically picks the +-- earliest registered user (typically user 1, the operator). Custom +-- persona_ids exist for at most one user, so MIN is also unique. +-- Falls back to user_id=1 when no matching persona row exists; in that +-- case the FK below would still fail, but legacy rows shouldn't be in +-- that state because 2026-05-09 ADD COLUMN defaulted persona_id to +-- 'default', which is seeded for every user. +INSERT INTO entity_facts + (id, subject_entity_id, predicate, object_entity_id, object_value, + source_photo, source_insight_id, confidence, status, created_at, + persona_id, user_id) +SELECT + old.id, + old.subject_entity_id, + old.predicate, + old.object_entity_id, + old.object_value, + old.source_photo, + old.source_insight_id, + old.confidence, + old.status, + old.created_at, + old.persona_id, + COALESCE( + (SELECT MIN(p.user_id) FROM personas p WHERE p.persona_id = old.persona_id), + 1 + ) +FROM entity_facts_old old; + +DROP TABLE entity_facts_old; + +CREATE INDEX idx_entity_facts_subject ON entity_facts(subject_entity_id); +CREATE INDEX idx_entity_facts_predicate ON entity_facts(predicate); +CREATE INDEX idx_entity_facts_status ON entity_facts(status); +CREATE INDEX idx_entity_facts_source_photo ON entity_facts(source_photo); +CREATE INDEX idx_entity_facts_persona ON entity_facts(persona_id); +CREATE INDEX idx_entity_facts_user_persona ON entity_facts(user_id, persona_id); diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index 4b76a6c..f296989 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -305,11 +305,14 @@ pub async fn get_all_insights_handler( #[post("/insights/generate/agentic")] pub async fn generate_agentic_insight_handler( http_request: HttpRequest, - _claims: Claims, + claims: Claims, request: web::Json, insight_generator: web::Data, insight_dao: web::Data>>, ) -> impl Responder { + // Service tokens (sub: "service:apollo") fall through to user_id=1 + // — the operator convention. Mobile/web clients have a numeric sub. + let user_id = claims.sub.parse::().unwrap_or(1); let parent_context = extract_context_from_request(&http_request); let tracer = global_tracer(); let mut span = tracer.start_with_context("http.insights.generate_agentic", &parent_context); @@ -402,6 +405,7 @@ pub async fn generate_agentic_insight_handler( request.backend.clone(), fewshot_examples, fewshot_ids, + user_id, persona_id, ) .await; @@ -692,7 +696,7 @@ pub struct ChatTurnHttpResponse { #[post("/insights/chat")] pub async fn chat_turn_handler( http_request: HttpRequest, - _claims: Claims, + claims: Claims, request: web::Json, app_state: web::Data, ) -> impl Responder { @@ -711,8 +715,14 @@ pub async fn chat_turn_handler( } }; + // Service-token claims (sub: "service:apollo") fall through to + // user_id=1 — the operator convention. Mobile/web clients have a + // numeric sub. Required for the entity_facts composite FK. + let user_id = claims.sub.parse::().unwrap_or(1); + let chat_req = ChatTurnRequest { library_id: library.id, + user_id, file_path: request.file_path.clone(), user_message: request.user_message.clone(), model: request.model.clone(), @@ -914,7 +924,7 @@ pub async fn chat_history_handler( /// Returns `text/event-stream` with one event per chat stream event. #[post("/insights/chat/stream")] pub async fn chat_stream_handler( - _claims: Claims, + claims: Claims, request: web::Json, app_state: web::Data, ) -> HttpResponse { @@ -928,8 +938,12 @@ pub async fn chat_stream_handler( } }; + // Service-token sub falls through to user_id=1 (see chat_turn_handler). + let user_id = claims.sub.parse::().unwrap_or(1); + let chat_req = ChatTurnRequest { library_id: library.id, + user_id, file_path: request.file_path.clone(), user_message: request.user_message.clone(), model: request.model.clone(), diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs index 1894920..64918f5 100644 --- a/src/ai/insight_chat.rs +++ b/src/ai/insight_chat.rs @@ -31,6 +31,12 @@ pub type ChatLockMap = Arc> #[derive(Debug)] pub struct ChatTurnRequest { pub library_id: i32, + /// Author's user_id, extracted from Claims at the handler. Tagged + /// onto every entity_fact row written this turn so the composite FK + /// (user_id, persona_id) → personas holds and so cross-user reads + /// stay isolated. Service token claims that don't parse as i32 + /// fall through to user_id=1 (operator convention). + pub user_id: i32, pub file_path: String, pub user_message: String, /// Override the model id. Local mode: an Ollama model name. Hybrid: @@ -475,6 +481,7 @@ impl InsightChatService { &ollama_client, &image_base64, &normalized, + req.user_id, &active_persona, &loop_cx, ) @@ -843,6 +850,7 @@ impl InsightChatService { tools, &image_base64, &normalized, + req.user_id, &active_persona, max_iterations, &tx, @@ -1031,6 +1039,7 @@ impl InsightChatService { tools, &image_base64, &normalized, + req.user_id, &active_persona, max_iterations, &tx, @@ -1181,6 +1190,7 @@ impl InsightChatService { tools: Vec, image_base64: &Option, normalized: &str, + user_id: i32, active_persona: &str, max_iterations: usize, tx: &tokio::sync::mpsc::Sender, @@ -1260,6 +1270,7 @@ impl InsightChatService { ollama_client, image_base64, normalized, + user_id, active_persona, &cx, ) diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index dfc932a..d380b40 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -1536,13 +1536,15 @@ Return ONLY the summary, nothing else."#, /// Dispatch a tool call to the appropriate executor. /// - /// `persona_id` identifies the persona this loop is generating for — - /// `store_fact` tags new facts with it, `recall_facts_for_photo` - /// filters reads to it (always Single in the agentic loop, even when - /// the persona has `include_all_memories=true`; the hive-mind toggle - /// is for human browsing of `/knowledge/*`, where mixing voices is - /// the explicit goal — during generation the persona's own voice - /// must stay clean). + /// `(user_id, persona_id)` identifies the author this loop is + /// generating for — `store_fact` tags new facts with both, + /// `recall_facts_for_photo` filters reads to both (always Single + /// in the agentic loop, even when the persona has + /// `include_all_memories=true`; the hive-mind toggle is for human + /// browsing of `/knowledge/*`, where mixing voices is the explicit + /// goal — during generation the persona's own voice must stay + /// clean). The composite (user_id, persona_id) is required for the + /// FK to personas to hold (migration 2026-05-10-000000). pub(crate) async fn execute_tool( &self, tool_name: &str, @@ -1550,6 +1552,7 @@ Return ONLY the summary, nothing else."#, ollama: &OllamaClient, image_base64: &Option, file_path: &str, + user_id: i32, persona_id: &str, cx: &opentelemetry::Context, ) -> String { @@ -1566,12 +1569,12 @@ Return ONLY the summary, nothing else."#, "get_personal_place_at" => self.tool_get_personal_place_at(arguments).await, "recall_entities" => self.tool_recall_entities(arguments, cx).await, "recall_facts_for_photo" => { - self.tool_recall_facts_for_photo(arguments, persona_id, cx) + self.tool_recall_facts_for_photo(arguments, user_id, persona_id, cx) .await } "store_entity" => self.tool_store_entity(arguments, ollama, cx).await, "store_fact" => { - self.tool_store_fact(arguments, file_path, persona_id, cx) + self.tool_store_fact(arguments, file_path, user_id, persona_id, cx) .await } "get_current_datetime" => Self::tool_get_current_datetime(), @@ -2406,11 +2409,15 @@ Return ONLY the summary, nothing else."#, async fn tool_recall_facts_for_photo( &self, args: &serde_json::Value, + user_id: i32, persona_id: &str, cx: &opentelemetry::Context, ) -> String { use crate::database::PersonaFilter; - let persona_filter = PersonaFilter::Single(persona_id.to_string()); + let persona_filter = PersonaFilter::Single { + user_id, + persona_id: persona_id.to_string(), + }; let file_path = match args.get("file_path").and_then(|v| v.as_str()) { Some(p) => p.to_string(), None => return "Error: missing required parameter 'file_path'".to_string(), @@ -2595,6 +2602,7 @@ Return ONLY the summary, nothing else."#, &self, args: &serde_json::Value, file_path: &str, + user_id: i32, persona_id: &str, cx: &opentelemetry::Context, ) -> String { @@ -2647,6 +2655,7 @@ Return ONLY the summary, nothing else."#, status: "active".to_string(), created_at: chrono::Utc::now().timestamp(), persona_id: persona_id.to_string(), + user_id, }; let mut kdao = self @@ -3196,6 +3205,7 @@ Return ONLY the summary, nothing else."#, backend: Option, fewshot_examples: Vec>, fewshot_source_ids: Vec, + user_id: i32, persona_id: String, ) -> Result<(Option, Option)> { let tracer = global_tracer(); @@ -3673,6 +3683,7 @@ Return ONLY the summary, nothing else."#, &ollama_client, &image_base64, &file_path, + user_id, &persona_id, &loop_cx, ) diff --git a/src/bin/populate_knowledge.rs b/src/bin/populate_knowledge.rs index 39b8d0f..a7cdd27 100644 --- a/src/bin/populate_knowledge.rs +++ b/src/bin/populate_knowledge.rs @@ -335,6 +335,7 @@ async fn main() -> anyhow::Result<()> { None, Vec::new(), Vec::new(), + 1, // operator user_id — populate_knowledge is single-user offline tool "default".to_string(), ) .await diff --git a/src/database/knowledge_dao.rs b/src/database/knowledge_dao.rs index 0339a73..f807f23 100644 --- a/src/database/knowledge_dao.rs +++ b/src/database/knowledge_dao.rs @@ -57,12 +57,25 @@ pub struct FactFilter { /// Persona scoping for fact reads. `Single` filters to one persona's /// view; `All` is the hive-mind read used when a persona has -/// `include_all_memories=true` in the personas table. Entities and -/// photo-links are always shared and don't take a persona filter. +/// `include_all_memories=true` in the personas table. Both variants +/// carry `user_id` because facts are user-isolated — two users with +/// the same 'default' persona must not see each other's facts (this +/// is enforced at the schema level by the composite FK in migration +/// 2026-05-10). Entities and photo-links are always shared and don't +/// take a persona filter. #[derive(Debug, Clone)] pub enum PersonaFilter { - Single(String), - All, + Single { user_id: i32, persona_id: String }, + All { user_id: i32 }, +} + +impl PersonaFilter { + pub fn user_id(&self) -> i32 { + match self { + Self::Single { user_id, .. } => *user_id, + Self::All { user_id } => *user_id, + } + } } pub struct EntityPatch { @@ -598,12 +611,14 @@ impl KnowledgeDao for SqliteKnowledgeDao { let mut conn = self.connection.lock().expect("KnowledgeDao lock"); // Look for an identical active fact AUTHORED BY THE SAME - // PERSONA. The same claim from a different persona is a + // (USER, PERSONA). The same claim from a different persona — + // or from a different user with the same persona name — is a // separate fact (each persona's voice/confidence is its own), // not a confidence bump on someone else's row. let mut dup_query = entity_facts .filter(subject_entity_id.eq(fact.subject_entity_id)) .filter(predicate.eq(&fact.predicate)) + .filter(user_id.eq(fact.user_id)) .filter(persona_id.eq(&fact.persona_id)) .filter(status.ne("rejected")) .into_boxed(); @@ -665,8 +680,9 @@ impl KnowledgeDao for SqliteKnowledgeDao { let mut q = entity_facts .filter(subject_entity_id.eq(entity_id)) .filter(status.ne("rejected")) + .filter(user_id.eq(persona.user_id())) .into_boxed(); - if let PersonaFilter::Single(pid) = persona { + if let PersonaFilter::Single { persona_id: pid, .. } = persona { q = q.filter(persona_id.eq(pid.clone())); } q.load::(conn.deref_mut()) @@ -688,6 +704,11 @@ impl KnowledgeDao for SqliteKnowledgeDao { let mut query = entity_facts.into_boxed(); let mut count_query = entity_facts.into_boxed(); + // user_id always applies — facts are user-isolated. + let uid = filter.persona.user_id(); + query = query.filter(user_id.eq(uid)); + count_query = count_query.filter(user_id.eq(uid)); + if let Some(eid) = filter.entity_id { query = query.filter(subject_entity_id.eq(eid)); count_query = count_query.filter(subject_entity_id.eq(eid)); @@ -701,7 +722,7 @@ impl KnowledgeDao for SqliteKnowledgeDao { query = query.filter(predicate.eq(pred)); count_query = count_query.filter(predicate.eq(pred)); } - if let PersonaFilter::Single(ref pid) = filter.persona { + if let PersonaFilter::Single { persona_id: ref pid, .. } = filter.persona { query = query.filter(persona_id.eq(pid.clone())); count_query = count_query.filter(persona_id.eq(pid.clone())); } @@ -901,8 +922,9 @@ impl KnowledgeDao for SqliteKnowledgeDao { let mut facts_q = ef::entity_facts .filter(ef::created_at.gt(since)) + .filter(ef::user_id.eq(persona.user_id())) .into_boxed(); - if let PersonaFilter::Single(pid) = persona { + if let PersonaFilter::Single { persona_id: pid, .. } = persona { facts_q = facts_q.filter(ef::persona_id.eq(pid.clone())); } let recent_facts = facts_q @@ -919,3 +941,339 @@ impl KnowledgeDao for SqliteKnowledgeDao { .map_err(|_| DbError::new(DbErrorKind::QueryError)) } } + +#[cfg(test)] +mod tests { + //! Persona scoping + composite-FK invariants for entity_facts. + //! + //! These tests pin three contracts that are silently regressable: + //! + //! 1. PersonaFilter::Single isolates per (user_id, persona_id). Two + //! users with the same 'default' persona must not see each + //! other's facts (multi-user leakage was a latent bug before + //! migration 2026-05-10 added user_id + composite FK). + //! + //! 2. PersonaFilter::All scopes to a single user but unions across + //! that user's personas. Hive-mind for human browsing of + //! /knowledge/*; never crosses users. + //! + //! 3. Deleting a persona CASCADEs to the user's facts under that + //! persona — and ONLY that user's, ONLY that persona's. Other + //! users sharing the persona_id name keep their facts. + //! + //! FKs aren't enabled by default on Diesel's SQLite connection; + //! `connection_with_fks_on()` flips the pragma so the cascade + //! actually fires in tests (mirroring runtime in production). + + use super::*; + use crate::database::models::{InsertEntity, InsertEntityFact, InsertPersona}; + use crate::database::test::in_memory_db_connection; + use diesel::connection::SimpleConnection; + + fn connection_with_fks_on() -> Arc> { + let mut conn = in_memory_db_connection(); + conn.batch_execute("PRAGMA foreign_keys = ON;") + .expect("enable foreign_keys pragma"); + Arc::new(Mutex::new(conn)) + } + + fn create_user(conn: &Arc>, username: &str) -> i32 { + use crate::database::schema::users::dsl as u; + let mut c = conn.lock().unwrap(); + diesel::insert_into(u::users) + .values((u::username.eq(username), u::password.eq("x"))) + .execute(c.deref_mut()) + .unwrap(); + u::users + .filter(u::username.eq(username)) + .select(u::id) + .first(c.deref_mut()) + .unwrap() + } + + fn create_persona_row(conn: &Arc>, uid: i32, pid: &str) { + use crate::database::schema::personas::dsl as p; + let mut c = conn.lock().unwrap(); + diesel::insert_into(p::personas) + .values(InsertPersona { + user_id: uid, + persona_id: pid, + name: pid, + system_prompt: "test prompt", + is_built_in: false, + include_all_memories: false, + created_at: 0, + updated_at: 0, + }) + .execute(c.deref_mut()) + .unwrap(); + } + + fn make_entity(dao: &mut SqliteKnowledgeDao, name: &str) -> Entity { + let cx = opentelemetry::Context::new(); + dao.upsert_entity( + &cx, + InsertEntity { + name: name.to_string(), + entity_type: "person".to_string(), + description: String::new(), + embedding: None, + confidence: 0.6, + status: "active".to_string(), + created_at: 0, + updated_at: 0, + }, + ) + .unwrap() + } + + fn add_fact( + dao: &mut SqliteKnowledgeDao, + subject: i32, + predicate: &str, + value: &str, + user_id: i32, + persona_id: &str, + ) -> EntityFact { + let cx = opentelemetry::Context::new(); + let (fact, _) = dao + .upsert_fact( + &cx, + InsertEntityFact { + subject_entity_id: subject, + predicate: predicate.to_string(), + object_entity_id: None, + object_value: Some(value.to_string()), + source_photo: None, + source_insight_id: None, + confidence: 0.6, + status: "active".to_string(), + created_at: 0, + persona_id: persona_id.to_string(), + user_id, + }, + ) + .unwrap(); + fact + } + + #[test] + fn persona_filter_single_isolates_per_user() { + // Two users, same persona name. Each user's facts under that + // persona must NOT surface to the other user's reads — this is + // the multi-user leakage that motivated adding user_id. + let cx = opentelemetry::Context::new(); + let conn = connection_with_fks_on(); + let alice = create_user(&conn, "alice"); + let bob = create_user(&conn, "bob"); + create_persona_row(&conn, alice, "default"); + create_persona_row(&conn, bob, "default"); + + let mut dao = SqliteKnowledgeDao::from_connection(conn.clone()); + let entity = make_entity(&mut dao, "Cabin"); + + add_fact(&mut dao, entity.id, "located_in", "Vermont", alice, "default"); + add_fact(&mut dao, entity.id, "color", "red", bob, "default"); + + let alice_view = dao + .get_facts_for_entity( + &cx, + entity.id, + &PersonaFilter::Single { + user_id: alice, + persona_id: "default".to_string(), + }, + ) + .unwrap(); + assert_eq!(alice_view.len(), 1); + assert_eq!(alice_view[0].predicate, "located_in"); + + let bob_view = dao + .get_facts_for_entity( + &cx, + entity.id, + &PersonaFilter::Single { + user_id: bob, + persona_id: "default".to_string(), + }, + ) + .unwrap(); + assert_eq!(bob_view.len(), 1); + assert_eq!(bob_view[0].predicate, "color"); + } + + #[test] + fn persona_filter_all_unions_across_personas_one_user() { + // include_all_memories=true → All variant: see this user's + // facts across all their personas. Must NOT include other + // users' facts even when they share a persona name. + let cx = opentelemetry::Context::new(); + let conn = connection_with_fks_on(); + let alice = create_user(&conn, "alice"); + let bob = create_user(&conn, "bob"); + create_persona_row(&conn, alice, "default"); + create_persona_row(&conn, alice, "journal"); + create_persona_row(&conn, bob, "default"); + + let mut dao = SqliteKnowledgeDao::from_connection(conn.clone()); + let entity = make_entity(&mut dao, "Cabin"); + + add_fact(&mut dao, entity.id, "p1", "v1", alice, "default"); + add_fact(&mut dao, entity.id, "p2", "v2", alice, "journal"); + add_fact(&mut dao, entity.id, "p3", "v3", bob, "default"); + + let alice_all = dao + .get_facts_for_entity(&cx, entity.id, &PersonaFilter::All { user_id: alice }) + .unwrap(); + let predicates: Vec<&str> = alice_all.iter().map(|f| f.predicate.as_str()).collect(); + assert_eq!(predicates.len(), 2); + assert!(predicates.contains(&"p1")); + assert!(predicates.contains(&"p2")); + assert!( + !predicates.contains(&"p3"), + "All variant must not leak across users" + ); + } + + #[test] + fn upsert_fact_dedup_does_not_cross_users() { + // Two users insert the SAME claim (same subject + predicate + + // object_value) under the same persona name. Pre-fix, the + // dedup key was (subject, predicate, persona_id) and bob's + // insert would corroborate alice's row instead of creating a + // new one. Post-fix the key includes user_id, so each user + // gets their own row at confidence=0.6. + let conn = connection_with_fks_on(); + let alice = create_user(&conn, "alice"); + let bob = create_user(&conn, "bob"); + create_persona_row(&conn, alice, "default"); + create_persona_row(&conn, bob, "default"); + + let mut dao = SqliteKnowledgeDao::from_connection(conn.clone()); + let entity = make_entity(&mut dao, "Cabin"); + + let alice_fact = add_fact(&mut dao, entity.id, "color", "red", alice, "default"); + let bob_fact = add_fact(&mut dao, entity.id, "color", "red", bob, "default"); + + assert_ne!(alice_fact.id, bob_fact.id, "must be separate rows"); + assert_eq!(alice_fact.confidence, 0.6); + assert_eq!( + bob_fact.confidence, 0.6, + "bob's row should not have been corroboration-bumped against alice's" + ); + } + + #[test] + fn deleting_persona_cascades_only_that_users_facts() { + // Composite FK + CASCADE: deleting alice's 'journal' persona + // wipes alice's journal facts but leaves alice's default + // facts AND bob's journal-named facts untouched. + let cx = opentelemetry::Context::new(); + let conn = connection_with_fks_on(); + let alice = create_user(&conn, "alice"); + let bob = create_user(&conn, "bob"); + create_persona_row(&conn, alice, "default"); + create_persona_row(&conn, alice, "journal"); + create_persona_row(&conn, bob, "journal"); + + let mut dao = SqliteKnowledgeDao::from_connection(conn.clone()); + let entity = make_entity(&mut dao, "Cabin"); + + add_fact(&mut dao, entity.id, "p_alice_default", "x", alice, "default"); + add_fact(&mut dao, entity.id, "p_alice_journal", "y", alice, "journal"); + add_fact(&mut dao, entity.id, "p_bob_journal", "z", bob, "journal"); + + // Delete alice's journal persona — CASCADE should remove only + // alice's journal facts. + { + use crate::database::schema::personas::dsl as p; + let mut c = conn.lock().unwrap(); + diesel::delete( + p::personas + .filter(p::user_id.eq(alice)) + .filter(p::persona_id.eq("journal")), + ) + .execute(c.deref_mut()) + .unwrap(); + } + + // alice/default survives. + let alice_default = dao + .get_facts_for_entity( + &cx, + entity.id, + &PersonaFilter::Single { + user_id: alice, + persona_id: "default".to_string(), + }, + ) + .unwrap(); + assert_eq!(alice_default.len(), 1); + assert_eq!(alice_default[0].predicate, "p_alice_default"); + + // alice/journal is gone. + let alice_journal = dao + .get_facts_for_entity( + &cx, + entity.id, + &PersonaFilter::Single { + user_id: alice, + persona_id: "journal".to_string(), + }, + ) + .unwrap(); + assert!( + alice_journal.is_empty(), + "CASCADE should have removed alice's journal facts" + ); + + // bob/journal — same persona name, different user — untouched. + let bob_journal = dao + .get_facts_for_entity( + &cx, + entity.id, + &PersonaFilter::Single { + user_id: bob, + persona_id: "journal".to_string(), + }, + ) + .unwrap(); + assert_eq!(bob_journal.len(), 1); + assert_eq!(bob_journal[0].predicate, "p_bob_journal"); + } + + #[test] + fn fact_insert_with_unknown_persona_is_rejected() { + // FK enforcement: inserting a fact whose (user_id, persona_id) + // pair has no matching personas row should fail. Protects + // against typo'd persona ids silently leaking into the table. + let cx = opentelemetry::Context::new(); + let conn = connection_with_fks_on(); + let alice = create_user(&conn, "alice"); + // Note: NO persona row inserted for alice + 'ghost'. + + let mut dao = SqliteKnowledgeDao::from_connection(conn.clone()); + let entity = make_entity(&mut dao, "Cabin"); + + let result = dao.upsert_fact( + &cx, + InsertEntityFact { + subject_entity_id: entity.id, + predicate: "color".to_string(), + object_entity_id: None, + object_value: Some("red".to_string()), + source_photo: None, + source_insight_id: None, + confidence: 0.6, + status: "active".to_string(), + created_at: 0, + persona_id: "ghost".to_string(), + user_id: alice, + }, + ); + assert!( + result.is_err(), + "FK should reject fact whose persona doesn't exist" + ); + } +} diff --git a/src/database/models.rs b/src/database/models.rs index c3583d2..479b631 100644 --- a/src/database/models.rs +++ b/src/database/models.rs @@ -243,6 +243,12 @@ pub struct InsertEntityFact { /// real-world referents. Defaults to `'default'` for legacy rows /// (see migration 2026-05-09-000000). pub persona_id: String, + /// Author's user_id. Required for the composite FK to + /// `personas(user_id, persona_id)` (migration 2026-05-10-000000) and + /// for cross-user fact isolation: two users with the same 'default' + /// persona must not see each other's facts. Always paired with + /// `persona_id` — they're a unit. + pub user_id: i32, } #[derive(Serialize, Queryable, Clone, Debug)] @@ -258,6 +264,7 @@ pub struct EntityFact { pub status: String, pub created_at: i64, pub persona_id: String, + pub user_id: i32, } #[derive(Insertable)] diff --git a/src/database/persona_dao.rs b/src/database/persona_dao.rs index 658a7f7..8ea404d 100644 --- a/src/database/persona_dao.rs +++ b/src/database/persona_dao.rs @@ -359,6 +359,43 @@ mod tests { assert_eq!(dao.list_personas(&cx, uid).unwrap().len(), 2); } + #[test] + fn dao_update_does_not_block_built_ins() { + // Documenting contract: the DAO is intentionally permissive — + // `update_persona` will apply name/system_prompt edits to ANY + // row, including built-ins. The guard against editing built-in + // identity (name + systemPrompt) lives in the HTTP handler + // (src/personas.rs::update_persona). If you find yourself + // wanting to add the guard here too, prefer that — defence in + // depth — but keep this test passing so anyone who removes + // the handler guard gets a failing call site, not silent data + // corruption. + let cx = opentelemetry::Context::new(); + let (mut dao, uid) = dao_with_user("eve"); + + dao.create_persona(&cx, uid, "default", "Default", "old", true, false) + .unwrap(); + let updated = dao + .update_persona( + &cx, + uid, + "default", + PersonaPatch { + name: Some("Renamed".into()), + system_prompt: Some("new prompt".into()), + include_all_memories: None, + }, + ) + .unwrap() + .unwrap(); + assert_eq!(updated.name, "Renamed"); + assert_eq!(updated.system_prompt, "new prompt"); + assert!( + updated.is_built_in, + "is_built_in flag should be unchanged by patch" + ); + } + #[test] fn update_toggles_include_all_memories() { let cx = opentelemetry::Context::new(); diff --git a/src/database/schema.rs b/src/database/schema.rs index 77378bb..d93d583 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -58,6 +58,7 @@ diesel::table! { status -> Text, created_at -> BigInt, persona_id -> Text, + user_id -> Integer, } } diff --git a/src/knowledge.rs b/src/knowledge.rs index 193c4a4..30ceabd 100644 --- a/src/knowledge.rs +++ b/src/knowledge.rs @@ -14,8 +14,10 @@ use crate::personas::PersonaDaoData; /// Resolve the `X-Persona-Id` header into a `PersonaFilter`. Missing /// header → `'default'`. If the persona has `include_all_memories=true`, /// returns `PersonaFilter::All` so reads see the full hive-mind pool. -/// On lookup failure (e.g. malformed JWT) returns `Single("default")` — -/// safer than `All` because it preserves the historical baseline view. +/// On JWT-parse failure (sub is not a numeric user_id) the resolver +/// falls through to user_id=1 — the operator convention for service +/// tokens — preserving the historical baseline view. Same fallback +/// applies on any persona-lookup error. fn resolve_persona_filter( req: &HttpRequest, claims: &Claims, @@ -28,15 +30,16 @@ fn resolve_persona_filter( .map(|s| s.to_string()) .unwrap_or_else(|| "default".to_string()); - let Ok(uid) = claims.sub.parse::() else { - return PersonaFilter::Single(pid); - }; + let uid = claims.sub.parse::().unwrap_or(1); let cx = opentelemetry::Context::current(); let mut dao = persona_dao.lock().expect("Unable to lock PersonaDao"); match dao.get_persona(&cx, uid, &pid) { - Ok(Some(p)) if p.include_all_memories => PersonaFilter::All, - _ => PersonaFilter::Single(pid), + Ok(Some(p)) if p.include_all_memories => PersonaFilter::All { user_id: uid }, + _ => PersonaFilter::Single { + user_id: uid, + persona_id: pid, + }, } } diff --git a/src/personas.rs b/src/personas.rs index c187a95..4b1fe53 100644 --- a/src/personas.rs +++ b/src/personas.rs @@ -216,6 +216,35 @@ async fn update_persona( let cx = opentelemetry::Context::current(); let mut dao = dao.lock().expect("Unable to lock PersonaDao"); + // Built-in personas are owned by the migration; the canonical voice + // text lives in source. A client renaming or rewriting the prompt + // here would diverge from what new users get seeded with and hide + // the operator's actual customization (their own custom persona) + // from the picker. `include_all_memories` stays editable on + // built-ins — that's a per-user preference, not the persona's + // identity. Mirrors the same guard delete_persona enforces below. + match dao.get_persona(&cx, uid, &pid) { + Ok(Some(p)) if p.is_built_in => { + let editing_identity = + body.name.is_some() || body.system_prompt.is_some(); + if editing_identity { + return HttpResponse::Conflict().json(serde_json::json!({ + "error": "Cannot edit name or systemPrompt of a built-in persona" + })); + } + } + Ok(None) => { + return HttpResponse::NotFound() + .json(serde_json::json!({"error": "Persona not found"})); + } + Err(e) => { + log::error!("update_persona lookup error: {:?}", e); + return HttpResponse::InternalServerError() + .json(serde_json::json!({"error": "Database error"})); + } + Ok(Some(_)) => {} + } + let patch = PersonaPatch { name: body.name.clone(), system_prompt: body.system_prompt.clone(), -- 2.49.1 From b9d9ba0320a914b4b6654f411ebb37d1261c9ff7 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 10 May 2026 13:48:13 -0400 Subject: [PATCH 2/3] chat: route search_messages({date}) to get_sms_messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the LLM calls search_messages with { date, limit } and no query, it's making the predictable mistake of conflating the two "messages"-shaped tools. The previous behaviour returned an error that pointed it at get_sms_messages — correct, but burning a turn on the misroute. Long photo-chat threads where the user asks "what was happening that weekend?" hit this on small models roughly half the time. Now the date-string-without-query case transparently dispatches to get_sms_messages with the same args (date / limit / days_radius / contact name all pass through unchanged) and prepends a short "(Note: routed to get_sms_messages — prefer it directly next time)" to the result. The model sees real data on its first try while still learning the right tool for next time. Cases that don't have a get_sms_messages equivalent (numeric contact_id, or start_ts / end_ts windows) keep the original error so the model knows to either supply a query or restructure its call. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ai/insight_generator.rs | 42 ++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index d380b40..1e39f9a 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -1558,7 +1558,7 @@ Return ONLY the summary, nothing else."#, ) -> String { let result = match tool_name { "search_rag" => self.tool_search_rag(arguments, ollama, cx).await, - "search_messages" => self.tool_search_messages(arguments).await, + "search_messages" => self.tool_search_messages(arguments, cx).await, "get_sms_messages" => self.tool_get_sms_messages(arguments, cx).await, "get_calendar_events" => self.tool_get_calendar_events(arguments, cx).await, "get_location_history" => self.tool_get_location_history(arguments, cx).await, @@ -1807,15 +1807,43 @@ Return ONLY the summary, nothing else."#, /// Tool: search_messages — keyword / semantic / hybrid search over all /// SMS message bodies via the Django FTS5 + embeddings pipeline. Now /// supports optional `contact_id`, `start_ts`, `end_ts` filters. - async fn tool_search_messages(&self, args: &serde_json::Value) -> String { + async fn tool_search_messages(&self, args: &serde_json::Value, cx: &opentelemetry::Context) -> String { let query = match args.get("query").and_then(|v| v.as_str()) { Some(q) if !q.trim().is_empty() => q.trim(), _ => { - let has_date = args.get("date").is_some() - || args.get("start_ts").is_some() - || args.get("end_ts").is_some(); - let has_contact = args.get("contact").is_some() || args.get("contact_id").is_some(); - if has_date || has_contact { + // Common LLM mistake: calling search_messages with + // { date, ... } as if it were date-browsing. The two + // tools share the "messages" word, and search_messages + // sounds like the natural verb. Instead of returning + // an error and burning a turn, transparently route to + // get_sms_messages when there's a `date` (and a + // contact-name string, optional). The LLM gets real + // data on its first try; the result is logged with a + // routing note so a human reading the trace can see + // what happened. + let has_date_str = args + .get("date") + .and_then(|v| v.as_str()) + .map(|s| !s.trim().is_empty()) + .unwrap_or(false); + let has_numeric_contact = args.get("contact_id").is_some(); + let has_ts_window = + args.get("start_ts").is_some() || args.get("end_ts").is_some(); + if has_date_str && !has_numeric_contact && !has_ts_window { + log::info!( + "search_messages with `date` and no `query` — routing to get_sms_messages" + ); + let routed = self.tool_get_sms_messages(args, cx).await; + return format!( + "(Note: routed to get_sms_messages — search_messages requires a \ + `query`; date-only browsing belongs on get_sms_messages. Prefer \ + get_sms_messages directly next time.)\n\n{}", + routed + ); + } + let has_contact_name = + args.get("contact").and_then(|v| v.as_str()).is_some(); + if has_ts_window || has_numeric_contact || has_contact_name { return "Error: search_messages needs a 'query' (keywords/phrase). \ To fetch messages around a date or from a contact without keywords, \ call get_sms_messages with { date, contact? } instead." -- 2.49.1 From 08a5f46be184340247d70a7aaa4a31c684c5ed38 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 10 May 2026 14:03:41 -0400 Subject: [PATCH 3/3] chat: scope insight lookup by library_id to fix regen-shadow bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a photo exists in more than one library and the user regenerates its insight from library A's chat, the regenerate streams cleanly, store_insight flips library A's old row to is_current=false, and inserts a new is_current=true row tagged (library A, rel_path). On the next history fetch the user sees their old transcript — the regenerate appears to vanish. The cause: get_insight(file_path) filters on rel_path + is_current only, so library B's untouched is_current=true row for the same rel_path satisfies the query and gets returned by SQLite's .first() ahead of A's new row. Because get_insight is also what chat_turn_stream uses to decide bootstrap vs. continuation, the next chat turn after the shadow hit also routes against the wrong insight, so update_training_messages corrupts library B's transcript with library A's chat. Fix: add get_current_insight_for_library(library_id, file_path) filtered on (library_id, rel_path, is_current=true) and route the chat surface (load_history, chat_turn{,_stream}, rewind_history) through it. load_history falls back to the cross-library get_insight when the scoped lookup misses — preserves the "scalar data merges across libraries" intent for the case where the active library has no insight but another does. The path-only get_insight stays for callers that don't have library context (populate_knowledge, the photo-grid metadata fetch). chat_history_handler stops dropping the parsed library on the floor and threads it through. Single-library deploys see no behaviour change. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ai/handlers.rs | 16 ++++++++++----- src/ai/insight_chat.rs | 40 ++++++++++++++++++++++++++---------- src/database/insights_dao.rs | 38 ++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 16 deletions(-) diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index f296989..f6189ba 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -874,15 +874,21 @@ pub async fn chat_history_handler( query: web::Query, app_state: web::Data, ) -> impl Responder { - // library param parsed for parity with other insight endpoints, even - // though load_history currently keys on file_path alone (matches the - // existing get_insight DAO contract). - let _library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + // library_id scopes the lookup so a regenerate on this library + // isn't shadowed by an untouched is_current=true row in another + // library for the same rel_path. load_history falls back to the + // cross-library lookup when the scoped one misses, so a photo + // with no insight in this library but one in another still + // surfaces (the "show this photo's primary insight" merge case). + let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); - match app_state.insight_chat.load_history(&query.path) { + match app_state + .insight_chat + .load_history(library.id, &query.path) + { Ok(view) => HttpResponse::Ok().json(ChatHistoryHttpResponse { messages: view .messages diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs index 64918f5..5a0e3f9 100644 --- a/src/ai/insight_chat.rs +++ b/src/ai/insight_chat.rs @@ -118,14 +118,28 @@ impl InsightChatService { /// scaffolding (system message, tool turns, tool-dispatch-only assistant /// messages) and drops base64 images from user turns to keep payloads /// small. The first remaining user message is flagged `is_initial`. - pub fn load_history(&self, file_path: &str) -> Result { + /// + /// `library_id` scopes the lookup to one library — without it, a + /// regenerate on lib1 can be shadowed on the next refresh by an + /// untouched `is_current=true` row in lib2 for the same rel_path. + /// Falls back to the cross-library `get_insight` only when the + /// scoped lookup misses, preserving the cross-library "show this + /// photo's primary insight" merge for the case where the active + /// library has no insight but another library does. + pub fn load_history(&self, library_id: i32, file_path: &str) -> Result { let normalized = normalize_path(file_path); let cx = opentelemetry::Context::new(); let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao"); - let insight = dao - .get_insight(&cx, &normalized) + let insight = match dao + .get_current_insight_for_library(&cx, library_id, &normalized) .map_err(|e| anyhow!("failed to load insight: {:?}", e))? - .ok_or_else(|| anyhow!("no insight found for path"))?; + { + Some(i) => i, + None => dao + .get_insight(&cx, &normalized) + .map_err(|e| anyhow!("failed to load insight: {:?}", e))? + .ok_or_else(|| anyhow!("no insight found for path"))?, + }; let raw = insight .training_messages @@ -267,7 +281,7 @@ impl InsightChatService { let insight = { let cx = opentelemetry::Context::new(); let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao"); - dao.get_insight(&cx, &normalized) + dao.get_current_insight_for_library(&cx, req.library_id, &normalized) .map_err(|e| anyhow!("failed to load insight: {:?}", e))? .ok_or_else(|| anyhow!("no insight found for path"))? }; @@ -627,7 +641,7 @@ impl InsightChatService { let insight = { let cx = opentelemetry::Context::new(); let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao"); - dao.get_insight(&cx, &normalized) + dao.get_current_insight_for_library(&cx, library_id, &normalized) .map_err(|e| anyhow!("failed to load insight: {:?}", e))? .ok_or_else(|| anyhow!("no insight found for path"))? }; @@ -726,14 +740,18 @@ impl InsightChatService { }; let _guard = entry_lock.lock().await; - // Look up existing insight (None when missing). The branch below - // decides bootstrap vs. continuation: `regenerate=true` forces - // bootstrap regardless of the row's presence; missing rows always - // bootstrap. + // Look up existing insight scoped to this turn's library_id. + // Path-only lookup would let an `is_current=true` row in + // another library route us into the continuation path against + // a transcript we'd then update_training_messages on — corrupting + // the other library's curated insight. Library-scoped lookup + // means a fresh chat on a photo that has no insight in this + // library bootstraps cleanly, even when another library has + // an insight for the same rel_path. let existing_insight = { let cx = opentelemetry::Context::new(); let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao"); - dao.get_insight(&cx, &normalized) + dao.get_current_insight_for_library(&cx, req.library_id, &normalized) .map_err(|e| anyhow!("failed to load insight: {:?}", e))? }; diff --git a/src/database/insights_dao.rs b/src/database/insights_dao.rs index 28de5f4..8c7551c 100644 --- a/src/database/insights_dao.rs +++ b/src/database/insights_dao.rs @@ -21,6 +21,22 @@ pub trait InsightDao: Sync + Send { file_path: &str, ) -> Result, DbError>; + /// Library-scoped variant of `get_insight`. The default `get_insight` + /// finds any `is_current=true` row matching `file_path` across + /// libraries — fine for the photo-grid metadata fetch (cross-library + /// merge), wrong for the chat path: a regenerate on lib1 flips lib1's + /// row to `is_current=false` and inserts a new lib1 row, but + /// lib2's untouched `is_current=true` row for the same rel_path + /// would still satisfy the path-only query and shadow the regen on + /// the next history fetch. Always pass a library_id when you have + /// one (chat / insight write paths always do). + fn get_current_insight_for_library( + &mut self, + context: &opentelemetry::Context, + library_id: i32, + file_path: &str, + ) -> Result, DbError>; + /// Return the most recent current insight whose rel_path is one of /// `paths`. Used for content-hash sharing: the caller expands a /// single file into all rel_paths with the same content_hash, then @@ -182,6 +198,28 @@ impl InsightDao for SqliteInsightDao { .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + fn get_current_insight_for_library( + &mut self, + context: &opentelemetry::Context, + lib_id: i32, + path: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_current_insight_for_library", |_span| { + use schema::photo_insights::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get InsightDao"); + + photo_insights + .filter(library_id.eq(lib_id)) + .filter(rel_path.eq(path)) + .filter(is_current.eq(true)) + .first::(connection.deref_mut()) + .optional() + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + fn get_insight_for_paths( &mut self, context: &opentelemetry::Context, -- 2.49.1