Files
ImageApi/src/database/knowledge_dao.rs
Cameron Cordes 85f3716379 knowledge: fact supersession + photo-date valid_from
Two Phase-2 followups in one commit since they're coupled at the
write path:

* Agent populates valid_from from the source photo's date_taken
  when calling store_fact. Loose semantics — date_taken is *evidence
  at that date*, not strictly when the fact started being true — but
  gives the curator a calendar anchor and pairs with supersession to
  close intervals cleanly. valid_until stays NULL (a single photo
  can't tell us when something stopped). Honours the existing
  upsert_fact dedup (corroborated facts keep their first-recorded
  valid_from).

* Supersession: new column entity_facts.superseded_by INTEGER
  (migration 2026-05-10-000200), new status value 'superseded',
  new DAO method supersede_fact, new HTTP endpoint
  POST /knowledge/facts/{id}/supersede.

  Marking an old fact as replaced by a new one atomically: flips
  status to 'superseded', sets superseded_by, and stamps
  valid_until from the new fact's valid_from (when not already
  set). delete_fact clears dangling supersession pointers in the
  same transaction so the column never points at a missing row —
  no FK because SQLite can't ALTER ADD with REFERENCES, but the
  DAO maintains the invariant.

Pairs with conflict detection from the previous slice: once the
old fact's valid_until is closed, its interval no longer overlaps
the new fact's, so they stop flagging — the supersede action
resolves the conflict.

Two tests pin the contract: supersede stamps valid_until from
new.valid_from while respecting an existing valid_until, and
deleting the supersedeR clears the dangling pointer while leaving
the old fact's 'superseded' status in place for history.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-10 19:47:06 -04:00

2046 lines
78 KiB
Rust

#![allow(dead_code)]
use diesel::prelude::*;
use diesel::sqlite::SqliteConnection;
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};
use crate::database::models::{
Entity, EntityFact, EntityPhotoLink, InsertEntity, InsertEntityFact, InsertEntityPhotoLink,
};
use crate::database::schema;
use crate::database::{DbError, DbErrorKind, connect};
use crate::otel::trace_db_call;
// ---------------------------------------------------------------------------
// Entity type normalisation
// ---------------------------------------------------------------------------
/// Canonicalise a model-supplied entity_type to a consistent lowercase form.
/// Weak models frequently vary capitalisation ("Person" vs "person") or use
/// synonym types ("location" vs "place"). Normalising here prevents duplicate
/// entities that differ only by type spelling.
pub(crate) fn normalize_entity_type(raw: &str) -> String {
match raw.to_lowercase().as_str() {
"person" | "people" | "human" | "individual" | "contact" => "person",
"place" | "location" | "venue" | "site" | "area" | "landmark" => "place",
"event" | "occasion" | "activity" | "celebration" => "event",
"thing" | "object" | "item" | "product" => "thing",
other => other,
}
.to_string()
}
// ---------------------------------------------------------------------------
// Filter / patch types
// ---------------------------------------------------------------------------
pub struct EntityFilter {
pub entity_type: Option<String>,
/// "active" | "reviewed" | "rejected" | "all"
pub status: Option<String>,
/// LIKE match on name and description
pub search: Option<String>,
pub limit: i64,
pub offset: i64,
}
/// Sort key for the curation list. Name = alphabetical clustering
/// (good for spotting near-duplicates like Sara / Sarah / Sarah J.).
/// FactCount = surface heavily-used entities first, demote 0-fact
/// noise to the bottom. UpdatedDesc = legacy "newest activity first".
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EntitySort {
UpdatedDesc,
NameAsc,
FactCountDesc,
}
pub struct FactFilter {
pub entity_id: Option<i32>,
/// "active" | "reviewed" | "rejected" | "all"
pub status: Option<String>,
pub predicate: Option<String>,
pub persona: PersonaFilter,
pub limit: i64,
pub offset: i64,
}
/// Persona scoping for fact reads. `Single` filters to one persona's
/// view; `All` is the hive-mind read used when a persona has
/// `include_all_memories=true` in the personas table. Both variants
/// carry `user_id` because facts are user-isolated — two users with
/// the same 'default' persona must not see each other's facts (this
/// is enforced at the schema level by the composite FK in migration
/// 2026-05-10). Entities and photo-links are always shared and don't
/// take a persona filter.
#[derive(Debug, Clone)]
pub enum PersonaFilter {
Single { user_id: i32, persona_id: String },
All { user_id: i32 },
}
impl PersonaFilter {
pub fn user_id(&self) -> i32 {
match self {
Self::Single { user_id, .. } => *user_id,
Self::All { user_id } => *user_id,
}
}
}
pub struct EntityPatch {
pub name: Option<String>,
pub description: Option<String>,
pub status: Option<String>,
pub confidence: Option<f32>,
}
pub struct FactPatch {
pub predicate: Option<String>,
pub object_value: Option<String>,
pub status: Option<String>,
pub confidence: Option<f32>,
/// Real-world valid-time bounds. Outer Some = "patch this column";
/// inner Some(val) = set to that unix-seconds value; inner None =
/// clear back to NULL ("unbounded"). The double-Option lets the
/// HTTP layer distinguish "field omitted" (leave alone) from
/// "field sent as null" (clear) — needed for these specifically
/// because there's no sentinel string-empty equivalent like the
/// other fields have.
pub valid_from: Option<Option<i64>>,
pub valid_until: Option<Option<i64>>,
}
pub struct RecentActivity {
pub entities: Vec<Entity>,
pub facts: Vec<EntityFact>,
}
// ---------------------------------------------------------------------------
// Trait
// ---------------------------------------------------------------------------
pub trait KnowledgeDao: Sync + Send {
// --- Entity ---
fn upsert_entity(
&mut self,
cx: &opentelemetry::Context,
entity: InsertEntity,
) -> Result<Entity, DbError>;
fn get_entity_by_id(
&mut self,
cx: &opentelemetry::Context,
id: i32,
) -> Result<Option<Entity>, DbError>;
fn get_entity_by_name(
&mut self,
cx: &opentelemetry::Context,
name: &str,
entity_type: Option<&str>,
) -> Result<Vec<Entity>, DbError>;
fn get_entities_with_embeddings(
&mut self,
cx: &opentelemetry::Context,
entity_type: Option<&str>,
) -> Result<Vec<Entity>, DbError>;
fn list_entities(
&mut self,
cx: &opentelemetry::Context,
filter: EntityFilter,
) -> Result<(Vec<Entity>, i64), DbError>;
/// List entities alongside a persona-scoped fact count for each.
/// Powers the curation surface — sorting by fact count surfaces
/// the heavily-used entities and demotes 0-fact noise. Counting
/// is restricted to non-rejected facts under the active persona
/// scope so a switch in the persona picker re-orders the list.
fn list_entities_with_fact_counts(
&mut self,
cx: &opentelemetry::Context,
filter: EntityFilter,
sort: EntitySort,
persona: &PersonaFilter,
) -> Result<(Vec<(Entity, i64)>, i64), DbError>;
fn update_entity_status(
&mut self,
cx: &opentelemetry::Context,
id: i32,
status: &str,
) -> Result<(), DbError>;
fn update_entity(
&mut self,
cx: &opentelemetry::Context,
id: i32,
patch: EntityPatch,
) -> Result<Option<Entity>, DbError>;
fn delete_entity(&mut self, cx: &opentelemetry::Context, id: i32) -> Result<(), DbError>;
fn merge_entities(
&mut self,
cx: &opentelemetry::Context,
source_id: i32,
target_id: i32,
) -> Result<(i64, i64), DbError>;
// --- Facts ---
fn upsert_fact(
&mut self,
cx: &opentelemetry::Context,
fact: InsertEntityFact,
) -> Result<(EntityFact, bool), DbError>;
fn get_facts_for_entity(
&mut self,
cx: &opentelemetry::Context,
entity_id: i32,
persona: &PersonaFilter,
) -> Result<Vec<EntityFact>, DbError>;
fn list_facts(
&mut self,
cx: &opentelemetry::Context,
filter: FactFilter,
) -> Result<(Vec<EntityFact>, i64), DbError>;
fn update_fact(
&mut self,
cx: &opentelemetry::Context,
id: i32,
patch: FactPatch,
) -> Result<Option<EntityFact>, DbError>;
fn update_facts_insight_id(
&mut self,
cx: &opentelemetry::Context,
source_photo: &str,
insight_id: i32,
) -> Result<(), DbError>;
fn delete_fact(&mut self, cx: &opentelemetry::Context, id: i32) -> Result<(), DbError>;
/// Mark an old fact as superseded by a new one. Atomically:
/// - reads the new fact's valid_from
/// - sets old.superseded_by = new_id
/// - sets old.status = 'superseded'
/// - stamps old.valid_until = new.valid_from (if not already
/// set; otherwise leaves it)
///
/// Returns the updated old fact. Errors if either id is missing.
fn supersede_fact(
&mut self,
cx: &opentelemetry::Context,
old_id: i32,
new_id: i32,
) -> Result<Option<EntityFact>, DbError>;
// --- Photo links ---
fn upsert_photo_link(
&mut self,
cx: &opentelemetry::Context,
link: InsertEntityPhotoLink,
) -> Result<(), DbError>;
fn delete_photo_links_for_file(
&mut self,
cx: &opentelemetry::Context,
file_path: &str,
) -> Result<(), DbError>;
fn get_links_for_photo(
&mut self,
cx: &opentelemetry::Context,
file_path: &str,
) -> Result<Vec<EntityPhotoLink>, DbError>;
fn get_links_for_entity(
&mut self,
cx: &opentelemetry::Context,
entity_id: i32,
) -> Result<Vec<EntityPhotoLink>, DbError>;
// --- Audit ---
fn get_recent_activity(
&mut self,
cx: &opentelemetry::Context,
since: i64,
limit: i64,
persona: &PersonaFilter,
) -> Result<RecentActivity, DbError>;
}
// ---------------------------------------------------------------------------
// SQLite implementation
// ---------------------------------------------------------------------------
pub struct SqliteKnowledgeDao {
connection: Arc<Mutex<SqliteConnection>>,
}
impl Default for SqliteKnowledgeDao {
fn default() -> Self {
Self::new()
}
}
impl SqliteKnowledgeDao {
pub fn new() -> Self {
SqliteKnowledgeDao {
connection: Arc::new(Mutex::new(connect())),
}
}
pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
SqliteKnowledgeDao { connection: conn }
}
fn serialize_embedding(vec: &[f32]) -> Vec<u8> {
vec.iter().flat_map(|f| f.to_le_bytes()).collect()
}
fn deserialize_embedding(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
if !bytes.len().is_multiple_of(4) {
return Err(DbError::new(DbErrorKind::QueryError));
}
Ok(bytes
.chunks_exact(4)
.map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]]))
.collect())
}
pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let mag_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let mag_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if mag_a == 0.0 || mag_b == 0.0 {
0.0
} else {
dot / (mag_a * mag_b)
}
}
}
/// Cosine-similarity threshold above which a new entity collapses into an
/// existing same-type entity at upsert time. The agent's pre-flight name
/// search uses FTS5 prefix tokens, which misses near-dupes like
/// "Sarah" / "Sara" / "Sarah J." that share a description-rich embedding.
/// Override via `ENTITY_DEDUP_COSINE_THRESHOLD` env var when tuning.
const ENTITY_DEDUP_COSINE_THRESHOLD_DEFAULT: f32 = 0.92;
fn entity_dedup_cosine_threshold() -> f32 {
std::env::var("ENTITY_DEDUP_COSINE_THRESHOLD")
.ok()
.and_then(|v| v.parse::<f32>().ok())
.unwrap_or(ENTITY_DEDUP_COSINE_THRESHOLD_DEFAULT)
}
impl KnowledgeDao for SqliteKnowledgeDao {
// -----------------------------------------------------------------------
// Entity operations
// -----------------------------------------------------------------------
fn upsert_entity(
&mut self,
cx: &opentelemetry::Context,
entity: InsertEntity,
) -> Result<Entity, DbError> {
trace_db_call(cx, "insert", "upsert_entity", |_span| {
use schema::entities::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
// Normalise type before lookup and insert so that model variations
// ("Person" / "person", "location" / "place") collapse to one row.
let entity = InsertEntity {
entity_type: normalize_entity_type(&entity.entity_type),
..entity
};
// Case-insensitive lookup by name + entity_type.
// Use lower() on both sides so existing dirty rows ("Person") still match.
let name_lower = entity.name.to_lowercase();
let type_lower = entity.entity_type.to_lowercase();
let mut existing: Option<Entity> = entities
.filter(diesel::dsl::sql::<diesel::sql_types::Bool>(&format!(
"lower(name) = '{}' AND lower(entity_type) = '{}'",
name_lower.replace('\'', "''"),
type_lower.replace('\'', "''")
)))
.first::<Entity>(conn.deref_mut())
.optional()
.map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
// Fuzzy-match fallback: if no exact name match and the incoming
// entity carries an embedding, compare against same-type entities'
// embeddings and collapse if any are above the cosine threshold.
if existing.is_none()
&& let Some(new_emb_bytes) = entity.embedding.as_ref()
&& let Ok(new_vec) = Self::deserialize_embedding(new_emb_bytes)
&& !new_vec.is_empty()
{
let threshold = entity_dedup_cosine_threshold();
let candidates: Vec<Entity> = entities
.filter(embedding.is_not_null())
.filter(entity_type.eq(&entity.entity_type))
.filter(status.ne("rejected"))
.load::<Entity>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
let mut best: Option<(Entity, f32)> = None;
for cand in candidates {
let Some(cand_bytes) = cand.embedding.as_ref() else {
continue;
};
let Ok(cand_vec) = Self::deserialize_embedding(cand_bytes) else {
continue;
};
let sim = Self::cosine_similarity(&new_vec, &cand_vec);
if sim >= threshold && best.as_ref().is_none_or(|(_, s)| sim > *s) {
best = Some((cand, sim));
}
}
if let Some((cand, sim)) = best {
log::info!(
"entity dedup: collapsing new '{}' ({}) into existing '{}' (id={}, cos={:.3})",
entity.name,
entity.entity_type,
cand.name,
cand.id,
sim
);
existing = Some(cand);
}
}
if let Some(existing_entity) = existing {
// Update description, embedding, updated_at
diesel::update(entities.filter(id.eq(existing_entity.id)))
.set((
description.eq(&entity.description),
embedding.eq(&entity.embedding),
updated_at.eq(entity.updated_at),
))
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Update error: {}", e))?;
entities
.filter(id.eq(existing_entity.id))
.first::<Entity>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))
} else {
diesel::insert_into(entities)
.values(&entity)
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Insert error: {}", e))?;
entities
.order(id.desc())
.first::<Entity>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))
}
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn get_entity_by_id(
&mut self,
cx: &opentelemetry::Context,
entity_id: i32,
) -> Result<Option<Entity>, DbError> {
trace_db_call(cx, "query", "get_entity_by_id", |_span| {
use schema::entities::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
entities
.filter(id.eq(entity_id))
.first::<Entity>(conn.deref_mut())
.optional()
.map_err(|e| anyhow::anyhow!("Query error: {}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_entity_by_name(
&mut self,
cx: &opentelemetry::Context,
entity_name: &str,
entity_type_filter: Option<&str>,
) -> Result<Vec<Entity>, DbError> {
trace_db_call(cx, "query", "get_entity_by_name", |_span| {
use schema::entities::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
let name_lower = entity_name.to_lowercase().replace('\'', "''");
let mut sql = format!("lower(name) = '{}'", name_lower);
if let Some(et) = entity_type_filter {
sql.push_str(&format!(" AND entity_type = '{}'", et.replace('\'', "''")));
}
sql.push_str(" AND status != 'rejected'");
entities
.filter(diesel::dsl::sql::<diesel::sql_types::Bool>(&sql))
.load::<Entity>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_entities_with_embeddings(
&mut self,
cx: &opentelemetry::Context,
entity_type_filter: Option<&str>,
) -> Result<Vec<Entity>, DbError> {
trace_db_call(cx, "query", "get_entities_with_embeddings", |_span| {
use schema::entities::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
let mut query = entities
.filter(embedding.is_not_null())
.filter(status.ne("rejected"))
.into_boxed();
if let Some(et) = entity_type_filter {
query = query.filter(entity_type.eq(et));
}
query
.load::<Entity>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn list_entities(
&mut self,
cx: &opentelemetry::Context,
filter: EntityFilter,
) -> Result<(Vec<Entity>, i64), DbError> {
trace_db_call(cx, "query", "list_entities", |_span| {
use diesel::dsl::count_star;
use schema::entities::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
let mut query = entities.into_boxed();
if let Some(ref et) = filter.entity_type {
query = query.filter(entity_type.eq(et));
}
let status_val = filter.status.as_deref().unwrap_or("active");
if status_val != "all" {
query = query.filter(status.eq(status_val));
}
if let Some(ref search_term) = filter.search {
let pattern = format!("%{}%", search_term);
query = query.filter(name.like(pattern.clone()).or(description.like(pattern)));
}
// Count with same filters applied (build separately since boxed query is consumed)
let mut count_query = entities.into_boxed();
if let Some(ref et) = filter.entity_type {
count_query = count_query.filter(entity_type.eq(et));
}
let status_val2 = filter.status.as_deref().unwrap_or("active");
if status_val2 != "all" {
count_query = count_query.filter(status.eq(status_val2));
}
if let Some(ref search_term) = filter.search {
let pattern = format!("%{}%", search_term);
count_query =
count_query.filter(name.like(pattern.clone()).or(description.like(pattern)));
}
let total: i64 = count_query
.select(count_star())
.first(conn.deref_mut())
.unwrap_or(0);
let results = query
.order(updated_at.desc())
.limit(filter.limit)
.offset(filter.offset)
.load::<Entity>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
Ok((results, total))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn list_entities_with_fact_counts(
&mut self,
cx: &opentelemetry::Context,
filter: EntityFilter,
sort: EntitySort,
persona: &PersonaFilter,
) -> Result<(Vec<(Entity, i64)>, i64), DbError> {
trace_db_call(cx, "query", "list_entities_with_fact_counts", |_span| {
use diesel::sql_query;
use diesel::sql_types::{BigInt, Integer, Text};
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
// Build WHERE fragments. Inline-safe values are bound; status
// / sort keywords are validated against fixed sets.
let mut where_parts: Vec<String> = Vec::new();
let mut bind_types: Vec<&'static str> = Vec::new();
let mut bind_strs: Vec<String> = Vec::new();
if filter.entity_type.is_some() {
where_parts.push("e.entity_type = ?".to_string());
bind_types.push("text");
bind_strs.push(filter.entity_type.clone().unwrap());
}
let status_val = filter.status.as_deref().unwrap_or("active");
if status_val != "all" {
where_parts.push("e.status = ?".to_string());
bind_types.push("text");
bind_strs.push(status_val.to_string());
}
if let Some(ref s) = filter.search {
where_parts.push("(e.name LIKE ? OR e.description LIKE ?)".to_string());
bind_types.push("text");
bind_types.push("text");
let pat = format!("%{}%", s);
bind_strs.push(pat.clone());
bind_strs.push(pat);
}
let where_clause = if where_parts.is_empty() {
String::new()
} else {
format!("WHERE {}", where_parts.join(" AND "))
};
// Persona-scoped fact-count subquery. Single = filter on
// (user_id, persona_id); All = union across the user's
// personas (mirror PersonaFilter::All read semantics).
let fact_count_join = match persona {
PersonaFilter::Single { user_id: _, persona_id: _ } => {
"LEFT JOIN (\
SELECT subject_entity_id, COUNT(*) AS fact_count \
FROM entity_facts \
WHERE user_id = ? AND persona_id = ? AND status != 'rejected' \
GROUP BY subject_entity_id\
) fc ON fc.subject_entity_id = e.id"
}
PersonaFilter::All { user_id: _ } => {
"LEFT JOIN (\
SELECT subject_entity_id, COUNT(*) AS fact_count \
FROM entity_facts \
WHERE user_id = ? AND status != 'rejected' \
GROUP BY subject_entity_id\
) fc ON fc.subject_entity_id = e.id"
}
};
let order_by = match sort {
EntitySort::UpdatedDesc => "e.updated_at DESC",
EntitySort::NameAsc => "lower(e.name) ASC",
EntitySort::FactCountDesc => {
"COALESCE(fc.fact_count, 0) DESC, lower(e.name) ASC"
}
};
let select_sql = format!(
"SELECT e.id, e.name, e.entity_type, e.description, e.embedding, \
e.confidence, e.status, e.created_at, e.updated_at, \
COALESCE(fc.fact_count, 0) AS fact_count \
FROM entities e \
{fact_count_join} \
{where_clause} \
ORDER BY {order_by} \
LIMIT ? OFFSET ?"
);
let count_sql = format!(
"SELECT COUNT(*) AS total FROM entities e {where_clause}"
);
// ── Total count ─────────────────────────────────────────
#[derive(diesel::QueryableByName)]
struct TotalRow {
#[diesel(sql_type = BigInt)]
total: i64,
}
let mut count_q = sql_query(count_sql).into_boxed();
for s in &bind_strs {
count_q = count_q.bind::<Text, _>(s.clone());
}
let total: i64 = count_q
.get_result::<TotalRow>(conn.deref_mut())
.map(|r| r.total)
.unwrap_or(0);
// ── Page query ──────────────────────────────────────────
#[derive(diesel::QueryableByName)]
struct EntityWithCountRow {
#[diesel(sql_type = Integer)]
id: i32,
#[diesel(sql_type = Text)]
name: String,
#[diesel(sql_type = Text)]
entity_type: String,
#[diesel(sql_type = Text)]
description: String,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Binary>)]
embedding: Option<Vec<u8>>,
#[diesel(sql_type = diesel::sql_types::Float)]
confidence: f32,
#[diesel(sql_type = Text)]
status: String,
#[diesel(sql_type = BigInt)]
created_at: i64,
#[diesel(sql_type = BigInt)]
updated_at: i64,
#[diesel(sql_type = BigInt)]
fact_count: i64,
}
let mut q = sql_query(select_sql).into_boxed();
// Persona binds first (they're earlier in the SQL — inside
// the subquery LEFT JOIN).
match persona {
PersonaFilter::Single { user_id, persona_id } => {
q = q
.bind::<Integer, _>(*user_id)
.bind::<Text, _>(persona_id.clone());
}
PersonaFilter::All { user_id } => {
q = q.bind::<Integer, _>(*user_id);
}
}
// Then WHERE binds in order.
for s in &bind_strs {
q = q.bind::<Text, _>(s.clone());
}
// Then LIMIT / OFFSET.
q = q
.bind::<BigInt, _>(filter.limit)
.bind::<BigInt, _>(filter.offset);
let rows: Vec<EntityWithCountRow> = q
.load(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
let pairs: Vec<(Entity, i64)> = rows
.into_iter()
.map(|r| {
(
Entity {
id: r.id,
name: r.name,
entity_type: r.entity_type,
description: r.description,
embedding: r.embedding,
confidence: r.confidence,
status: r.status,
created_at: r.created_at,
updated_at: r.updated_at,
},
r.fact_count,
)
})
.collect();
// Sink unused `_bind_types`; keeping it as documentation.
let _ = bind_types;
Ok((pairs, total))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn update_entity_status(
&mut self,
cx: &opentelemetry::Context,
entity_id: i32,
new_status: &str,
) -> Result<(), DbError> {
trace_db_call(cx, "update", "update_entity_status", |_span| {
use schema::entities::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
diesel::update(entities.filter(id.eq(entity_id)))
.set(status.eq(new_status))
.execute(conn.deref_mut())
.map(|_| ())
.map_err(|e| anyhow::anyhow!("Update error: {}", e))
})
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
}
fn update_entity(
&mut self,
cx: &opentelemetry::Context,
entity_id: i32,
patch: EntityPatch,
) -> Result<Option<Entity>, DbError> {
trace_db_call(cx, "update", "update_entity", |_span| {
use schema::entities::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
let now = chrono::Utc::now().timestamp();
if let Some(ref new_name) = patch.name {
diesel::update(entities.filter(id.eq(entity_id)))
.set((name.eq(new_name), updated_at.eq(now)))
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Update name error: {}", e))?;
}
if let Some(ref new_desc) = patch.description {
diesel::update(entities.filter(id.eq(entity_id)))
.set((description.eq(new_desc), updated_at.eq(now)))
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Update description error: {}", e))?;
}
if let Some(ref new_status) = patch.status {
diesel::update(entities.filter(id.eq(entity_id)))
.set((status.eq(new_status), updated_at.eq(now)))
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Update status error: {}", e))?;
}
if let Some(new_confidence) = patch.confidence {
diesel::update(entities.filter(id.eq(entity_id)))
.set((confidence.eq(new_confidence), updated_at.eq(now)))
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Update confidence error: {}", e))?;
}
entities
.filter(id.eq(entity_id))
.first::<Entity>(conn.deref_mut())
.optional()
.map_err(|e| anyhow::anyhow!("Query error: {}", e))
})
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
}
fn delete_entity(
&mut self,
cx: &opentelemetry::Context,
entity_id: i32,
) -> Result<(), DbError> {
trace_db_call(cx, "delete", "delete_entity", |_span| {
use schema::entities::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
// entity_facts has a CHECK constraint requiring
// `object_entity_id IS NOT NULL OR object_value IS NOT NULL`.
// The FK on object_entity_id is ON DELETE SET NULL — but
// facts that pointed at the deleted entity *only* via the
// entity reference (the common case for relational facts
// like "Alice is_friend_of Bob") have no object_value, so
// SET NULL would leave them with both NULLs and the CHECK
// aborts the whole DELETE. Pre-delete those facts in a
// transaction so the CASCADE / SET NULL chain on what
// remains can fire cleanly.
//
// Long-term fix is to change the FK to ON DELETE CASCADE
// via a table-rebuild migration, but the DAO-side workaround
// is sufficient and less invasive.
conn.transaction::<(), diesel::result::Error, _>(|conn| {
use schema::entity_facts::dsl as ef;
diesel::delete(
ef::entity_facts
.filter(ef::object_entity_id.eq(entity_id))
.filter(ef::object_value.is_null()),
)
.execute(conn)?;
diesel::delete(entities.filter(id.eq(entity_id))).execute(conn)?;
Ok(())
})
.map_err(|e| anyhow::anyhow!("Delete error: {}", e))
})
.map_err(|e| {
// Surface the actual diesel error string before collapsing
// to the opaque DbErrorKind::QueryError.
log::warn!("delete_entity({}) failed: {}", entity_id, e);
DbError::new(DbErrorKind::QueryError)
})
}
fn merge_entities(
&mut self,
cx: &opentelemetry::Context,
source_id: i32,
target_id: i32,
) -> Result<(i64, i64), DbError> {
trace_db_call(cx, "update", "merge_entities", |_span| {
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
conn.transaction::<(i64, i64), diesel::result::Error, _>(|conn| {
use schema::entity_facts::dsl as ef;
// 1. Re-point facts where source is subject
let facts_updated =
diesel::update(ef::entity_facts.filter(ef::subject_entity_id.eq(source_id)))
.set(ef::subject_entity_id.eq(target_id))
.execute(conn)? as i64;
// 2. Re-point facts where source is object
diesel::update(ef::entity_facts.filter(ef::object_entity_id.eq(source_id)))
.set(ef::object_entity_id.eq(Some(target_id)))
.execute(conn)?;
// 3. Copy photo links to target (INSERT OR IGNORE to skip duplicates)
let links_updated = diesel::sql_query(
"INSERT OR IGNORE INTO entity_photo_links (entity_id, library_id, rel_path, role) \
SELECT ?, library_id, rel_path, role FROM entity_photo_links WHERE entity_id = ?",
)
.bind::<diesel::sql_types::Integer, _>(target_id)
.bind::<diesel::sql_types::Integer, _>(source_id)
.execute(conn)? as i64;
// 4. Delete source entity (FK CASCADE removes remaining facts/links)
diesel::delete(
schema::entities::dsl::entities.filter(schema::entities::dsl::id.eq(source_id)),
)
.execute(conn)?;
Ok((facts_updated, links_updated))
})
.map_err(|e| anyhow::anyhow!("Merge transaction error: {}", e))
})
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
}
// -----------------------------------------------------------------------
// Fact operations
// -----------------------------------------------------------------------
fn upsert_fact(
&mut self,
cx: &opentelemetry::Context,
fact: InsertEntityFact,
) -> Result<(EntityFact, bool), DbError> {
trace_db_call(cx, "insert", "upsert_fact", |_span| {
use schema::entity_facts::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
// Look for an identical active fact AUTHORED BY THE SAME
// (USER, PERSONA). The same claim from a different persona —
// or from a different user with the same persona name — is a
// separate fact (each persona's voice/confidence is its own),
// not a confidence bump on someone else's row.
let mut dup_query = entity_facts
.filter(subject_entity_id.eq(fact.subject_entity_id))
.filter(predicate.eq(&fact.predicate))
.filter(user_id.eq(fact.user_id))
.filter(persona_id.eq(&fact.persona_id))
.filter(status.ne("rejected"))
.into_boxed();
match &fact.object_entity_id {
Some(oid) => dup_query = dup_query.filter(object_entity_id.eq(oid)),
None => dup_query = dup_query.filter(object_entity_id.is_null()),
}
match &fact.object_value {
Some(ov) => dup_query = dup_query.filter(object_value.eq(ov)),
None => dup_query = dup_query.filter(object_value.is_null()),
}
let existing: Option<EntityFact> = dup_query
.first::<EntityFact>(conn.deref_mut())
.optional()
.map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
if let Some(existing_fact) = existing {
// Corroborate: bump confidence by 0.1 capped at 0.95
let new_confidence = (existing_fact.confidence + 0.1).min(0.95);
diesel::update(entity_facts.filter(id.eq(existing_fact.id)))
.set(confidence.eq(new_confidence))
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Update confidence error: {}", e))?;
let updated = entity_facts
.filter(id.eq(existing_fact.id))
.first::<EntityFact>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
Ok((updated, false)) // false = corroborated, not newly created
} else {
diesel::insert_into(entity_facts)
.values(&fact)
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Insert error: {}", e))?;
let inserted = entity_facts
.order(id.desc())
.first::<EntityFact>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
Ok((inserted, true)) // true = newly created
}
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn get_facts_for_entity(
&mut self,
cx: &opentelemetry::Context,
entity_id: i32,
persona: &PersonaFilter,
) -> Result<Vec<EntityFact>, DbError> {
trace_db_call(cx, "query", "get_facts_for_entity", |_span| {
use schema::entity_facts::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
let mut q = entity_facts
.filter(subject_entity_id.eq(entity_id))
.filter(status.ne("rejected"))
.filter(user_id.eq(persona.user_id()))
.into_boxed();
if let PersonaFilter::Single { persona_id: pid, .. } = persona {
q = q.filter(persona_id.eq(pid.clone()));
}
q.load::<EntityFact>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn list_facts(
&mut self,
cx: &opentelemetry::Context,
filter: FactFilter,
) -> Result<(Vec<EntityFact>, i64), DbError> {
trace_db_call(cx, "query", "list_facts", |_span| {
use diesel::dsl::count_star;
use schema::entity_facts::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
let mut query = entity_facts.into_boxed();
let mut count_query = entity_facts.into_boxed();
// user_id always applies — facts are user-isolated.
let uid = filter.persona.user_id();
query = query.filter(user_id.eq(uid));
count_query = count_query.filter(user_id.eq(uid));
if let Some(eid) = filter.entity_id {
query = query.filter(subject_entity_id.eq(eid));
count_query = count_query.filter(subject_entity_id.eq(eid));
}
let status_val = filter.status.as_deref().unwrap_or("active");
if status_val != "all" {
query = query.filter(status.eq(status_val));
count_query = count_query.filter(status.eq(status_val));
}
if let Some(ref pred) = filter.predicate {
query = query.filter(predicate.eq(pred));
count_query = count_query.filter(predicate.eq(pred));
}
if let PersonaFilter::Single { persona_id: ref pid, .. } = filter.persona {
query = query.filter(persona_id.eq(pid.clone()));
count_query = count_query.filter(persona_id.eq(pid.clone()));
}
let total: i64 = count_query
.select(count_star())
.first(conn.deref_mut())
.unwrap_or(0);
let results = query
.order(created_at.desc())
.limit(filter.limit)
.offset(filter.offset)
.load::<EntityFact>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
Ok((results, total))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn update_fact(
&mut self,
cx: &opentelemetry::Context,
fact_id: i32,
patch: FactPatch,
) -> Result<Option<EntityFact>, DbError> {
trace_db_call(cx, "update", "update_fact", |_span| {
use schema::entity_facts::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
if let Some(ref new_predicate) = patch.predicate {
diesel::update(entity_facts.filter(id.eq(fact_id)))
.set(predicate.eq(new_predicate))
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Update error: {}", e))?;
}
if let Some(ref new_value) = patch.object_value {
diesel::update(entity_facts.filter(id.eq(fact_id)))
.set(object_value.eq(new_value))
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Update error: {}", e))?;
}
if let Some(ref new_status) = patch.status {
diesel::update(entity_facts.filter(id.eq(fact_id)))
.set(status.eq(new_status))
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Update error: {}", e))?;
}
if let Some(new_confidence) = patch.confidence {
diesel::update(entity_facts.filter(id.eq(fact_id)))
.set(confidence.eq(new_confidence))
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Update error: {}", e))?;
}
if let Some(new_from) = patch.valid_from {
diesel::update(entity_facts.filter(id.eq(fact_id)))
.set(valid_from.eq(new_from))
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Update error: {}", e))?;
}
if let Some(new_until) = patch.valid_until {
diesel::update(entity_facts.filter(id.eq(fact_id)))
.set(valid_until.eq(new_until))
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Update error: {}", e))?;
}
entity_facts
.filter(id.eq(fact_id))
.first::<EntityFact>(conn.deref_mut())
.optional()
.map_err(|e| anyhow::anyhow!("Query error: {}", e))
})
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
}
fn update_facts_insight_id(
&mut self,
cx: &opentelemetry::Context,
photo_path: &str,
insight_id: i32,
) -> Result<(), DbError> {
trace_db_call(cx, "update", "update_facts_insight_id", |_span| {
use schema::entity_facts::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
diesel::update(
entity_facts
.filter(source_photo.eq(photo_path))
.filter(source_insight_id.is_null()),
)
.set(source_insight_id.eq(insight_id))
.execute(conn.deref_mut())
.map(|_| ())
.map_err(|e| anyhow::anyhow!("Update error: {}", e))
})
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
}
fn delete_fact(&mut self, cx: &opentelemetry::Context, fact_id: i32) -> Result<(), DbError> {
trace_db_call(cx, "delete", "delete_fact", |_span| {
use schema::entity_facts::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
// Clear dangling supersession pointers from any fact this
// one had retired — there's no FK on superseded_by (SQLite
// can't ALTER ADD with REFERENCES) so we do it manually.
// Sibling rows lose the pointer but stay 'superseded' —
// the user's historical correction survives the cleanup.
conn.transaction::<(), diesel::result::Error, _>(|conn| {
diesel::update(entity_facts.filter(superseded_by.eq(fact_id)))
.set(superseded_by.eq::<Option<i32>>(None))
.execute(conn)?;
diesel::delete(entity_facts.filter(id.eq(fact_id))).execute(conn)?;
Ok(())
})
.map_err(|e| anyhow::anyhow!("Delete error: {}", e))
})
.map_err(|e| {
log::warn!("delete_fact({}) failed: {}", fact_id, e);
DbError::new(DbErrorKind::QueryError)
})
}
fn supersede_fact(
&mut self,
cx: &opentelemetry::Context,
old_id: i32,
new_id: i32,
) -> Result<Option<EntityFact>, DbError> {
trace_db_call(cx, "update", "supersede_fact", |_span| {
use schema::entity_facts::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
if old_id == new_id {
return Err(anyhow::anyhow!(
"supersede_fact: old_id and new_id must differ"
));
}
conn.transaction::<Option<EntityFact>, diesel::result::Error, _>(
|conn| {
// Pull the new fact's valid_from so we can close
// the old fact's interval at the same point.
let new_fact: Option<EntityFact> = entity_facts
.filter(id.eq(new_id))
.first::<EntityFact>(conn)
.optional()?;
let Some(new_fact) = new_fact else {
return Ok(None);
};
// Verify the old fact exists before touching it —
// returning None lets the handler 404 cleanly.
let old_fact: Option<EntityFact> = entity_facts
.filter(id.eq(old_id))
.first::<EntityFact>(conn)
.optional()?;
if old_fact.is_none() {
return Ok(None);
}
// Only stamp valid_until if the user hasn't
// already set it — respecting hand-curated bounds.
let target_valid_until = old_fact
.as_ref()
.and_then(|f| f.valid_until)
.or(new_fact.valid_from);
diesel::update(entity_facts.filter(id.eq(old_id)))
.set((
status.eq("superseded"),
superseded_by.eq(Some(new_id)),
valid_until.eq(target_valid_until),
))
.execute(conn)?;
entity_facts
.filter(id.eq(old_id))
.first::<EntityFact>(conn)
.optional()
},
)
.map_err(|e| anyhow::anyhow!("Supersede error: {}", e))
})
.map_err(|e| {
log::warn!(
"supersede_fact(old={}, new={}) failed: {}",
old_id,
new_id,
e
);
DbError::new(DbErrorKind::UpdateError)
})
}
// -----------------------------------------------------------------------
// Photo link operations
// -----------------------------------------------------------------------
fn upsert_photo_link(
&mut self,
cx: &opentelemetry::Context,
link: InsertEntityPhotoLink,
) -> Result<(), DbError> {
trace_db_call(cx, "insert", "upsert_photo_link", |_span| {
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
// INSERT OR IGNORE respects the UNIQUE(entity_id, library_id, rel_path, role) constraint
diesel::sql_query(
"INSERT OR IGNORE INTO entity_photo_links (entity_id, library_id, rel_path, role) VALUES (?, ?, ?, ?)"
)
.bind::<diesel::sql_types::Integer, _>(link.entity_id)
.bind::<diesel::sql_types::Integer, _>(link.library_id)
.bind::<diesel::sql_types::Text, _>(&link.file_path)
.bind::<diesel::sql_types::Text, _>(&link.role)
.execute(conn.deref_mut())
.map(|_| ())
.map_err(|e| anyhow::anyhow!("Insert error: {}", e))
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn delete_photo_links_for_file(
&mut self,
cx: &opentelemetry::Context,
file_path_val: &str,
) -> Result<(), DbError> {
trace_db_call(cx, "delete", "delete_photo_links_for_file", |_span| {
use schema::entity_photo_links::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
diesel::delete(entity_photo_links.filter(rel_path.eq(file_path_val)))
.execute(conn.deref_mut())
.map(|_| ())
.map_err(|e| anyhow::anyhow!("Delete error: {}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_links_for_photo(
&mut self,
cx: &opentelemetry::Context,
file_path_val: &str,
) -> Result<Vec<EntityPhotoLink>, DbError> {
trace_db_call(cx, "query", "get_links_for_photo", |_span| {
use schema::entity_photo_links::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
entity_photo_links
.filter(rel_path.eq(file_path_val))
.load::<EntityPhotoLink>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_links_for_entity(
&mut self,
cx: &opentelemetry::Context,
entity_id_val: i32,
) -> Result<Vec<EntityPhotoLink>, DbError> {
trace_db_call(cx, "query", "get_links_for_entity", |_span| {
use schema::entity_photo_links::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
entity_photo_links
.filter(entity_id.eq(entity_id_val))
.load::<EntityPhotoLink>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
// -----------------------------------------------------------------------
// Audit
// -----------------------------------------------------------------------
fn get_recent_activity(
&mut self,
cx: &opentelemetry::Context,
since: i64,
limit: i64,
persona: &PersonaFilter,
) -> Result<RecentActivity, DbError> {
trace_db_call(cx, "query", "get_recent_activity", |_span| {
use schema::entities::dsl as e;
use schema::entity_facts::dsl as ef;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
// Entities are shared — recency is global.
let recent_entities = e::entities
.filter(e::created_at.gt(since))
.order(e::created_at.desc())
.limit(limit)
.load::<Entity>(conn.deref_mut())
.map_err(|err| anyhow::anyhow!("Query error: {}", err))?;
let mut facts_q = ef::entity_facts
.filter(ef::created_at.gt(since))
.filter(ef::user_id.eq(persona.user_id()))
.into_boxed();
if let PersonaFilter::Single { persona_id: pid, .. } = persona {
facts_q = facts_q.filter(ef::persona_id.eq(pid.clone()));
}
let recent_facts = facts_q
.order(ef::created_at.desc())
.limit(limit)
.load::<EntityFact>(conn.deref_mut())
.map_err(|err| anyhow::anyhow!("Query error: {}", err))?;
Ok(RecentActivity {
entities: recent_entities,
facts: recent_facts,
})
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
}
#[cfg(test)]
mod tests {
//! Persona scoping + composite-FK invariants for entity_facts.
//!
//! These tests pin three contracts that are silently regressable:
//!
//! 1. PersonaFilter::Single isolates per (user_id, persona_id). Two
//! users with the same 'default' persona must not see each
//! other's facts (multi-user leakage was a latent bug before
//! migration 2026-05-10 added user_id + composite FK).
//!
//! 2. PersonaFilter::All scopes to a single user but unions across
//! that user's personas. Hive-mind for human browsing of
//! /knowledge/*; never crosses users.
//!
//! 3. Deleting a persona CASCADEs to the user's facts under that
//! persona — and ONLY that user's, ONLY that persona's. Other
//! users sharing the persona_id name keep their facts.
//!
//! FKs aren't enabled by default on Diesel's SQLite connection;
//! `connection_with_fks_on()` flips the pragma so the cascade
//! actually fires in tests (mirroring runtime in production).
use super::*;
use crate::database::models::{InsertEntity, InsertEntityFact, InsertPersona};
use crate::database::test::in_memory_db_connection;
use diesel::connection::SimpleConnection;
fn connection_with_fks_on() -> Arc<Mutex<SqliteConnection>> {
let mut conn = in_memory_db_connection();
conn.batch_execute("PRAGMA foreign_keys = ON;")
.expect("enable foreign_keys pragma");
Arc::new(Mutex::new(conn))
}
fn create_user(conn: &Arc<Mutex<SqliteConnection>>, username: &str) -> i32 {
use crate::database::schema::users::dsl as u;
let mut c = conn.lock().unwrap();
diesel::insert_into(u::users)
.values((u::username.eq(username), u::password.eq("x")))
.execute(c.deref_mut())
.unwrap();
u::users
.filter(u::username.eq(username))
.select(u::id)
.first(c.deref_mut())
.unwrap()
}
fn create_persona_row(conn: &Arc<Mutex<SqliteConnection>>, uid: i32, pid: &str) {
use crate::database::schema::personas::dsl as p;
let mut c = conn.lock().unwrap();
diesel::insert_into(p::personas)
.values(InsertPersona {
user_id: uid,
persona_id: pid,
name: pid,
system_prompt: "test prompt",
is_built_in: false,
include_all_memories: false,
created_at: 0,
updated_at: 0,
})
.execute(c.deref_mut())
.unwrap();
}
fn make_entity(dao: &mut SqliteKnowledgeDao, name: &str) -> Entity {
let cx = opentelemetry::Context::new();
dao.upsert_entity(
&cx,
InsertEntity {
name: name.to_string(),
entity_type: "person".to_string(),
description: String::new(),
embedding: None,
confidence: 0.6,
status: "active".to_string(),
created_at: 0,
updated_at: 0,
},
)
.unwrap()
}
fn add_fact(
dao: &mut SqliteKnowledgeDao,
subject: i32,
predicate: &str,
value: &str,
user_id: i32,
persona_id: &str,
) -> EntityFact {
let cx = opentelemetry::Context::new();
let (fact, _) = dao
.upsert_fact(
&cx,
InsertEntityFact {
subject_entity_id: subject,
predicate: predicate.to_string(),
object_entity_id: None,
object_value: Some(value.to_string()),
source_photo: None,
source_insight_id: None,
confidence: 0.6,
status: "active".to_string(),
created_at: 0,
persona_id: persona_id.to_string(),
user_id,
valid_from: None,
valid_until: None,
superseded_by: None,
},
)
.unwrap();
fact
}
#[test]
fn persona_filter_single_isolates_per_user() {
// Two users, same persona name. Each user's facts under that
// persona must NOT surface to the other user's reads — this is
// the multi-user leakage that motivated adding user_id.
let cx = opentelemetry::Context::new();
let conn = connection_with_fks_on();
let alice = create_user(&conn, "alice");
let bob = create_user(&conn, "bob");
create_persona_row(&conn, alice, "default");
create_persona_row(&conn, bob, "default");
let mut dao = SqliteKnowledgeDao::from_connection(conn.clone());
let entity = make_entity(&mut dao, "Cabin");
add_fact(&mut dao, entity.id, "located_in", "Vermont", alice, "default");
add_fact(&mut dao, entity.id, "color", "red", bob, "default");
let alice_view = dao
.get_facts_for_entity(
&cx,
entity.id,
&PersonaFilter::Single {
user_id: alice,
persona_id: "default".to_string(),
},
)
.unwrap();
assert_eq!(alice_view.len(), 1);
assert_eq!(alice_view[0].predicate, "located_in");
let bob_view = dao
.get_facts_for_entity(
&cx,
entity.id,
&PersonaFilter::Single {
user_id: bob,
persona_id: "default".to_string(),
},
)
.unwrap();
assert_eq!(bob_view.len(), 1);
assert_eq!(bob_view[0].predicate, "color");
}
#[test]
fn persona_filter_all_unions_across_personas_one_user() {
// include_all_memories=true → All variant: see this user's
// facts across all their personas. Must NOT include other
// users' facts even when they share a persona name.
let cx = opentelemetry::Context::new();
let conn = connection_with_fks_on();
let alice = create_user(&conn, "alice");
let bob = create_user(&conn, "bob");
create_persona_row(&conn, alice, "default");
create_persona_row(&conn, alice, "journal");
create_persona_row(&conn, bob, "default");
let mut dao = SqliteKnowledgeDao::from_connection(conn.clone());
let entity = make_entity(&mut dao, "Cabin");
add_fact(&mut dao, entity.id, "p1", "v1", alice, "default");
add_fact(&mut dao, entity.id, "p2", "v2", alice, "journal");
add_fact(&mut dao, entity.id, "p3", "v3", bob, "default");
let alice_all = dao
.get_facts_for_entity(&cx, entity.id, &PersonaFilter::All { user_id: alice })
.unwrap();
let predicates: Vec<&str> = alice_all.iter().map(|f| f.predicate.as_str()).collect();
assert_eq!(predicates.len(), 2);
assert!(predicates.contains(&"p1"));
assert!(predicates.contains(&"p2"));
assert!(
!predicates.contains(&"p3"),
"All variant must not leak across users"
);
}
#[test]
fn upsert_fact_dedup_does_not_cross_users() {
// Two users insert the SAME claim (same subject + predicate +
// object_value) under the same persona name. Pre-fix, the
// dedup key was (subject, predicate, persona_id) and bob's
// insert would corroborate alice's row instead of creating a
// new one. Post-fix the key includes user_id, so each user
// gets their own row at confidence=0.6.
let conn = connection_with_fks_on();
let alice = create_user(&conn, "alice");
let bob = create_user(&conn, "bob");
create_persona_row(&conn, alice, "default");
create_persona_row(&conn, bob, "default");
let mut dao = SqliteKnowledgeDao::from_connection(conn.clone());
let entity = make_entity(&mut dao, "Cabin");
let alice_fact = add_fact(&mut dao, entity.id, "color", "red", alice, "default");
let bob_fact = add_fact(&mut dao, entity.id, "color", "red", bob, "default");
assert_ne!(alice_fact.id, bob_fact.id, "must be separate rows");
assert_eq!(alice_fact.confidence, 0.6);
assert_eq!(
bob_fact.confidence, 0.6,
"bob's row should not have been corroboration-bumped against alice's"
);
}
#[test]
fn deleting_persona_cascades_only_that_users_facts() {
// Composite FK + CASCADE: deleting alice's 'journal' persona
// wipes alice's journal facts but leaves alice's default
// facts AND bob's journal-named facts untouched.
let cx = opentelemetry::Context::new();
let conn = connection_with_fks_on();
let alice = create_user(&conn, "alice");
let bob = create_user(&conn, "bob");
create_persona_row(&conn, alice, "default");
create_persona_row(&conn, alice, "journal");
create_persona_row(&conn, bob, "journal");
let mut dao = SqliteKnowledgeDao::from_connection(conn.clone());
let entity = make_entity(&mut dao, "Cabin");
add_fact(&mut dao, entity.id, "p_alice_default", "x", alice, "default");
add_fact(&mut dao, entity.id, "p_alice_journal", "y", alice, "journal");
add_fact(&mut dao, entity.id, "p_bob_journal", "z", bob, "journal");
// Delete alice's journal persona — CASCADE should remove only
// alice's journal facts.
{
use crate::database::schema::personas::dsl as p;
let mut c = conn.lock().unwrap();
diesel::delete(
p::personas
.filter(p::user_id.eq(alice))
.filter(p::persona_id.eq("journal")),
)
.execute(c.deref_mut())
.unwrap();
}
// alice/default survives.
let alice_default = dao
.get_facts_for_entity(
&cx,
entity.id,
&PersonaFilter::Single {
user_id: alice,
persona_id: "default".to_string(),
},
)
.unwrap();
assert_eq!(alice_default.len(), 1);
assert_eq!(alice_default[0].predicate, "p_alice_default");
// alice/journal is gone.
let alice_journal = dao
.get_facts_for_entity(
&cx,
entity.id,
&PersonaFilter::Single {
user_id: alice,
persona_id: "journal".to_string(),
},
)
.unwrap();
assert!(
alice_journal.is_empty(),
"CASCADE should have removed alice's journal facts"
);
// bob/journal — same persona name, different user — untouched.
let bob_journal = dao
.get_facts_for_entity(
&cx,
entity.id,
&PersonaFilter::Single {
user_id: bob,
persona_id: "journal".to_string(),
},
)
.unwrap();
assert_eq!(bob_journal.len(), 1);
assert_eq!(bob_journal[0].predicate, "p_bob_journal");
}
#[test]
fn fact_insert_with_unknown_persona_is_rejected() {
// FK enforcement: inserting a fact whose (user_id, persona_id)
// pair has no matching personas row should fail. Protects
// against typo'd persona ids silently leaking into the table.
let cx = opentelemetry::Context::new();
let conn = connection_with_fks_on();
let alice = create_user(&conn, "alice");
// Note: NO persona row inserted for alice + 'ghost'.
let mut dao = SqliteKnowledgeDao::from_connection(conn.clone());
let entity = make_entity(&mut dao, "Cabin");
let result = dao.upsert_fact(
&cx,
InsertEntityFact {
subject_entity_id: entity.id,
predicate: "color".to_string(),
object_entity_id: None,
object_value: Some("red".to_string()),
source_photo: None,
source_insight_id: None,
confidence: 0.6,
status: "active".to_string(),
created_at: 0,
persona_id: "ghost".to_string(),
user_id: alice,
valid_from: None,
valid_until: None,
superseded_by: None,
},
);
assert!(
result.is_err(),
"FK should reject fact whose persona doesn't exist"
);
}
#[test]
fn supersede_fact_links_and_stamps_valid_until() {
// Supersession: marking an old fact as replaced by a new one
// flips its status to 'superseded', points superseded_by at
// the new fact, and stamps valid_until from the new fact's
// valid_from (when not already set). Pre-existing valid_until
// on the old fact is respected.
let cx = opentelemetry::Context::new();
let conn = connection_with_fks_on();
let alice = create_user(&conn, "alice");
create_persona_row(&conn, alice, "default");
let mut dao = SqliteKnowledgeDao::from_connection(conn.clone());
let cameron = make_entity(&mut dao, "Cameron");
let old = add_fact(
&mut dao,
cameron.id,
"is_in_relationship_with",
"X",
alice,
"default",
);
// The new fact carries a valid_from we expect to be stamped
// onto the old fact's valid_until.
let new = add_fact(
&mut dao,
cameron.id,
"is_in_relationship_with",
"Y",
alice,
"default",
);
dao.update_fact(
&cx,
new.id,
FactPatch {
predicate: None,
object_value: None,
status: None,
confidence: None,
valid_from: Some(Some(1640995200)), // 2022-01-01
valid_until: None,
},
)
.unwrap();
let updated = dao
.supersede_fact(&cx, old.id, new.id)
.unwrap()
.expect("supersede returned None");
assert_eq!(updated.status, "superseded");
assert_eq!(updated.superseded_by, Some(new.id));
assert_eq!(updated.valid_until, Some(1640995200));
}
#[test]
fn delete_fact_clears_dangling_supersession_pointers() {
// Deleting the newer fact (the supersedeR) leaves the older
// fact's superseded_by dangling — the DAO clears it back to
// NULL in the same transaction so the column never points at
// a missing row. The old fact's status stays 'superseded'
// because the historical correction is still meaningful.
let cx = opentelemetry::Context::new();
let conn = connection_with_fks_on();
let alice = create_user(&conn, "alice");
create_persona_row(&conn, alice, "default");
let mut dao = SqliteKnowledgeDao::from_connection(conn.clone());
let cameron = make_entity(&mut dao, "Cameron");
let old = add_fact(&mut dao, cameron.id, "lives_in", "NYC", alice, "default");
let new = add_fact(&mut dao, cameron.id, "lives_in", "SF", alice, "default");
dao.supersede_fact(&cx, old.id, new.id).unwrap().unwrap();
dao.delete_fact(&cx, new.id).unwrap();
let rehydrated = dao
.list_facts(
&cx,
FactFilter {
entity_id: Some(cameron.id),
// "all" — the old fact is 'superseded' now, so the
// default 'active' scope would skip it.
status: Some("all".to_string()),
predicate: None,
persona: PersonaFilter::Single {
user_id: alice,
persona_id: "default".to_string(),
},
limit: 10,
offset: 0,
},
)
.unwrap()
.0;
let old_row = rehydrated.iter().find(|f| f.id == old.id).unwrap();
assert_eq!(
old_row.superseded_by, None,
"dangling supersession pointer should be cleared"
);
assert_eq!(
old_row.status, "superseded",
"historical status should survive the supersederr delete"
);
}
#[test]
fn update_fact_can_set_and_clear_valid_time() {
// FactPatch.valid_from / valid_until are Option<Option<i64>>
// so PATCH can distinguish "leave alone" (None) from "set to
// value" (Some(Some(n))) and "clear back to NULL" (Some(None)).
let cx = opentelemetry::Context::new();
let conn = connection_with_fks_on();
let alice = create_user(&conn, "alice");
create_persona_row(&conn, alice, "default");
let mut dao = SqliteKnowledgeDao::from_connection(conn.clone());
let cameron = make_entity(&mut dao, "Cameron");
let fact = add_fact(
&mut dao,
cameron.id,
"is_in_relationship_with",
"Alex",
alice,
"default",
);
assert_eq!(fact.valid_from, None);
assert_eq!(fact.valid_until, None);
// Set both bounds.
let updated = dao
.update_fact(
&cx,
fact.id,
FactPatch {
predicate: None,
object_value: None,
status: None,
confidence: None,
valid_from: Some(Some(1577836800)), // 2020-01-01
valid_until: Some(Some(1640995200)), // 2022-01-01
},
)
.unwrap()
.unwrap();
assert_eq!(updated.valid_from, Some(1577836800));
assert_eq!(updated.valid_until, Some(1640995200));
// Leave alone: omit both — values persist.
let still = dao
.update_fact(
&cx,
fact.id,
FactPatch {
predicate: None,
object_value: None,
status: None,
confidence: None,
valid_from: None,
valid_until: None,
},
)
.unwrap()
.unwrap();
assert_eq!(still.valid_from, Some(1577836800));
assert_eq!(still.valid_until, Some(1640995200));
// Clear valid_until back to NULL (relationship ongoing again).
let cleared = dao
.update_fact(
&cx,
fact.id,
FactPatch {
predicate: None,
object_value: None,
status: None,
confidence: None,
valid_from: None,
valid_until: Some(None),
},
)
.unwrap()
.unwrap();
assert_eq!(cleared.valid_from, Some(1577836800));
assert_eq!(cleared.valid_until, None);
}
#[test]
fn delete_entity_clears_relational_facts_that_would_violate_check() {
// entity_facts has a CHECK that at least one of object_entity_id /
// object_value is non-null. The FK on object_entity_id is
// ON DELETE SET NULL, which would leave purely-relational facts
// (subject + predicate + object_entity_id, no object_value)
// with both nulls and abort the delete. The DAO pre-deletes
// those rows in a transaction so the parent delete can succeed.
let cx = opentelemetry::Context::new();
let conn = connection_with_fks_on();
let alice = create_user(&conn, "alice");
create_persona_row(&conn, alice, "default");
let mut dao = SqliteKnowledgeDao::from_connection(conn.clone());
let bob = make_entity(&mut dao, "Bob");
let carol = make_entity(&mut dao, "Carol");
// A relational fact where Carol is the object — exactly the
// shape the CHECK + SET NULL combination would otherwise break.
let (rel_fact, _) = dao
.upsert_fact(
&cx,
InsertEntityFact {
subject_entity_id: bob.id,
predicate: "is_friend_of".to_string(),
object_entity_id: Some(carol.id),
object_value: None,
source_photo: None,
source_insight_id: None,
confidence: 0.6,
status: "active".to_string(),
created_at: 0,
persona_id: "default".to_string(),
user_id: alice,
valid_from: None,
valid_until: None,
superseded_by: None,
},
)
.unwrap();
// A typed fact where Bob is the subject — should survive.
add_fact(&mut dao, bob.id, "has_age", "30", alice, "default");
// Delete Carol — should succeed (relational fact pre-deleted).
dao.delete_entity(&cx, carol.id).unwrap();
assert!(
dao.get_entity_by_id(&cx, carol.id).unwrap().is_none(),
"Carol should be deleted"
);
// The relational fact about Carol should be gone (pre-deleted by
// the DAO's transaction, not SET NULL'd).
let bob_facts = dao
.get_facts_for_entity(
&cx,
bob.id,
&PersonaFilter::Single {
user_id: alice,
persona_id: "default".to_string(),
},
)
.unwrap();
assert!(
!bob_facts.iter().any(|f| f.id == rel_fact.id),
"relational fact pointing at Carol should be removed"
);
// The typed fact survives.
assert!(
bob_facts.iter().any(|f| f.predicate == "has_age"),
"typed fact about Bob should survive Carol's deletion"
);
}
#[test]
fn upsert_entity_collapses_near_duplicate_by_embedding() {
// The agent's pre-flight check uses FTS5 prefix tokens, which
// miss "Sarah" / "Sara" / "Sarah J." pairs. The DAO upsert is
// the safety net: if no exact (name, type) match but the new
// entity's embedding sits above the cosine threshold against an
// existing same-type entity, we collapse instead of inserting.
let cx = opentelemetry::Context::new();
let conn = connection_with_fks_on();
let mut dao = SqliteKnowledgeDao::from_connection(conn.clone());
let mut emb_a = vec![0.0_f32; 64];
emb_a[0] = 1.0;
emb_a[1] = 0.5;
let mut emb_b_near = emb_a.clone();
emb_b_near[2] = 0.05; // nudge — cosine still well above 0.92
// Seed an existing entity with the embedding.
let seeded = dao
.upsert_entity(
&cx,
InsertEntity {
name: "Sarah".to_string(),
entity_type: "person".to_string(),
description: "tagged friend".to_string(),
embedding: Some(SqliteKnowledgeDao::serialize_embedding(&emb_a)),
confidence: 0.6,
status: "active".to_string(),
created_at: 0,
updated_at: 0,
},
)
.unwrap();
// A "different name" with a near-identical embedding should
// collapse onto the existing row, not create a new entity.
let collapsed = dao
.upsert_entity(
&cx,
InsertEntity {
name: "Sara".to_string(),
entity_type: "person".to_string(),
description: "tagged friend".to_string(),
embedding: Some(SqliteKnowledgeDao::serialize_embedding(&emb_b_near)),
confidence: 0.6,
status: "active".to_string(),
created_at: 0,
updated_at: 0,
},
)
.unwrap();
assert_eq!(
collapsed.id, seeded.id,
"near-duplicate by cosine should reuse the existing entity id"
);
// And a clearly-different embedding under a different name should
// still create a new row.
let mut emb_unrelated = vec![0.0_f32; 64];
emb_unrelated[10] = 1.0;
let distinct = dao
.upsert_entity(
&cx,
InsertEntity {
name: "Bob".to_string(),
entity_type: "person".to_string(),
description: String::new(),
embedding: Some(SqliteKnowledgeDao::serialize_embedding(&emb_unrelated)),
confidence: 0.6,
status: "active".to_string(),
created_at: 0,
updated_at: 0,
},
)
.unwrap();
assert_ne!(
distinct.id, seeded.id,
"unrelated embedding should not collapse"
);
}
}