Files
ImageApi/src/database/models.rs
Cameron Cordes fbd769e475 personas: composite FK + built-in update guard
Two persona-infrastructure correctness fixes that go together because
the second one (FK with CASCADE) requires the first (preventing the
persona row from being mutated out from under its facts).

1. update_persona handler refuses name/systemPrompt edits to built-ins
   (409). includeAllMemories stays editable — that's a per-user
   preference, not the persona's identity. Mirrors the existing
   delete_persona guard. The DAO is intentionally permissive so the
   guard sits at the HTTP layer; persona_dao test pins that contract.

2. Migration 2026-05-10 adds user_id to entity_facts and a composite
   FK (user_id, persona_id) -> personas(user_id, persona_id) ON DELETE
   CASCADE. This closes two issues at once:

   - Persona orphans: deleting a custom persona used to leave its
     facts dangling forever, readable only via PersonaFilter::All.
     CASCADE now wipes them with the persona row.

   - Multi-user fact leakage: PersonaFilter::Single("default") used
     to surface every user's default-scoped facts. PersonaFilter is
     now { user_id, persona_id } and all read paths
     (get_facts_for_entity, list_facts, get_recent_activity) filter
     on user_id first. upsert_fact's dedup key extends to user_id so
     identical claims under shared persona names from different
     users no longer corroborate-bump each other's confidence.

   - user_id threads from Claims.sub.parse::<i32>().unwrap_or(1) at
     the chat / insight handlers through ChatTurnRequest, the
     streaming agentic loop, execute_tool, and into the leaf tools
     (tool_store_fact, tool_recall_facts_for_photo). The ".unwrap_or(1)"
     accommodates Apollo's service token whose sub is non-numeric on
     legacy mints.

   - Backfill picks the smallest user_id matching each legacy fact's
     persona_id so the FK holds for already-stored rows.

Five new knowledge_dao tests with FK-on connection: persona scoping
isolation, All-variant union per-user, dedup not crossing users,
CASCADE delete, FK rejection of unknown personas. Plus
dao_update_does_not_block_built_ins documenting where the
HTTP-layer guard lives.

Apollo coordinates separately — the matching changes there add the
/api/personas proxy and start sending persona_id on photo-chat turns.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-10 13:30:35 -04:00

342 lines
11 KiB
Rust

use crate::database::schema::{
entities, entity_facts, entity_photo_links, favorites, image_exif, libraries, personas,
photo_insights, users, video_preview_clips,
};
use serde::Serialize;
#[derive(Insertable)]
#[diesel(table_name = users)]
pub struct InsertUser<'a> {
pub username: &'a str,
pub password: &'a str,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct User {
pub id: i32,
pub username: String,
#[serde(skip_serializing)]
pub password: String,
}
#[derive(Insertable)]
#[diesel(table_name = favorites)]
pub struct InsertFavorite<'a> {
pub userid: &'a i32,
#[diesel(column_name = rel_path)]
pub path: &'a str,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct Favorite {
pub id: i32,
pub userid: i32,
#[diesel(column_name = rel_path)]
pub path: String,
}
#[derive(Insertable)]
#[diesel(table_name = image_exif)]
pub struct InsertImageExif {
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub lens_model: Option<String>,
pub width: Option<i32>,
pub height: Option<i32>,
pub orientation: Option<i32>,
pub gps_latitude: Option<f32>,
pub gps_longitude: Option<f32>,
pub gps_altitude: Option<f32>,
pub focal_length: Option<f32>,
pub aperture: Option<f32>,
pub shutter_speed: Option<String>,
pub iso: Option<i32>,
pub date_taken: Option<i64>,
pub created_time: i64,
pub last_modified: i64,
pub content_hash: Option<String>,
pub size_bytes: Option<i64>,
/// 64-bit pHash (DCT) packed as i64. NULL for videos and decode failures.
pub phash_64: Option<i64>,
/// 64-bit dHash (gradient). NULL for videos and decode failures.
pub dhash_64: Option<i64>,
/// Which step of the canonical-date waterfall populated `date_taken`:
/// `"exif"` | `"exiftool"` | `"filename"` | `"fs_time"`. NULL when
/// `date_taken` is NULL (no source resolved it). The per-tick backfill
/// drain re-resolves rows whose source is `"fs_time"` once exiftool
/// has had a chance to run.
pub date_taken_source: Option<String>,
}
// Field order matches the post-migration column order in `image_exif`.
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct ImageExif {
pub id: i32,
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub lens_model: Option<String>,
pub width: Option<i32>,
pub height: Option<i32>,
pub orientation: Option<i32>,
pub gps_latitude: Option<f32>,
pub gps_longitude: Option<f32>,
pub gps_altitude: Option<f32>,
pub focal_length: Option<f32>,
pub aperture: Option<f32>,
pub shutter_speed: Option<String>,
pub iso: Option<i32>,
pub date_taken: Option<i64>,
pub created_time: i64,
pub last_modified: i64,
pub content_hash: Option<String>,
pub size_bytes: Option<i64>,
pub phash_64: Option<i64>,
pub dhash_64: Option<i64>,
/// When non-null, this row is a soft-marked duplicate of the file
/// whose `content_hash` matches this value. The default `/photos`
/// listing filters such rows out.
pub duplicate_of_hash: Option<String>,
/// Unix seconds at which the resolve was committed.
pub duplicate_decided_at: Option<i64>,
/// Which step of the canonical-date waterfall populated `date_taken`.
/// Plus `"manual"` when the operator has set it via POST /image/exif/date.
pub date_taken_source: Option<String>,
/// Snapshot of the prior `date_taken` taken on first manual override.
/// NULL when no override is active. POST /image/exif/date/clear restores
/// `date_taken` from this column and nulls it back out.
pub original_date_taken: Option<i64>,
/// Snapshot of the prior `date_taken_source` taken on first manual
/// override. NULL when no override is active.
pub original_date_taken_source: Option<String>,
}
#[derive(Insertable)]
#[diesel(table_name = photo_insights)]
pub struct InsertPhotoInsight {
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub title: String,
pub summary: String,
pub generated_at: i64,
pub model_version: String,
pub is_current: bool,
pub training_messages: Option<String>,
/// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat).
pub backend: String,
/// JSON array of insight ids whose `training_messages` were compressed
/// and injected into the system prompt as few-shot exemplars when this
/// row was generated. `None` means no few-shot was used (pristine
/// generation). Used downstream to filter out contaminated rows when
/// assembling an unbiased training / evaluation set.
pub fewshot_source_ids: Option<String>,
/// Bytes-keyed identity. When present, this insight is considered
/// to belong to the content rather than the path — see CLAUDE.md
/// "Multi-library data model". The DAO populates this from
/// `image_exif.content_hash` at insert time when known; rows
/// inserted before the hash is available stay null and the
/// reconciliation pass backfills them.
pub content_hash: Option<String>,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct PhotoInsight {
pub id: i32,
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub title: String,
pub summary: String,
pub generated_at: i64,
pub model_version: String,
pub is_current: bool,
pub training_messages: Option<String>,
pub approved: Option<bool>,
/// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat).
pub backend: String,
pub fewshot_source_ids: Option<String>,
pub content_hash: Option<String>,
}
// --- Libraries ---
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct LibraryRow {
pub id: i32,
pub name: String,
pub root_path: String,
pub created_at: i64,
/// Operator kill switch. `false` = the watcher skips this library
/// entirely (no probe, no ingest, no maintenance) and orphan-GC
/// treats it as out-of-scope for the all-online consensus rule.
/// Toggle via SQL today — there is intentionally no HTTP endpoint
/// for library mutation (see CLAUDE.md "Multi-library data model").
pub enabled: bool,
/// Per-library excluded paths/patterns, stored comma-separated
/// (same shape as the global `EXCLUDED_DIRS` env var). NULL = no
/// extra excludes for this library; the global env var still
/// applies. The runtime `Library` struct parses this into a
/// `Vec<String>` and the walker applies the union of (global,
/// library) excludes when scanning. Use case: mount a parent
/// directory while another library covers a child subtree.
pub excluded_dirs: Option<String>,
}
#[derive(Insertable)]
#[diesel(table_name = libraries)]
pub struct InsertLibrary<'a> {
pub name: &'a str,
pub root_path: &'a str,
pub created_at: i64,
pub enabled: bool,
pub excluded_dirs: Option<&'a str>,
}
// --- Knowledge memory models ---
#[derive(Insertable)]
#[diesel(table_name = entities)]
pub struct InsertEntity {
pub name: String,
pub entity_type: String,
pub description: String,
pub embedding: Option<Vec<u8>>,
pub confidence: f32,
pub status: String,
pub created_at: i64,
pub updated_at: i64,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct Entity {
pub id: i32,
pub name: String,
pub entity_type: String,
pub description: String,
pub embedding: Option<Vec<u8>>,
pub confidence: f32,
pub status: String,
pub created_at: i64,
pub updated_at: i64,
}
#[derive(Insertable)]
#[diesel(table_name = entity_facts)]
pub struct InsertEntityFact {
pub subject_entity_id: i32,
pub predicate: String,
pub object_entity_id: Option<i32>,
pub object_value: Option<String>,
pub source_photo: Option<String>,
pub source_insight_id: Option<i32>,
pub confidence: f32,
pub status: String,
pub created_at: i64,
/// Which persona authored this fact. Shared entities, persona-tagged
/// facts: each persona accumulates its own voice over the same
/// real-world referents. Defaults to `'default'` for legacy rows
/// (see migration 2026-05-09-000000).
pub persona_id: String,
/// Author's user_id. Required for the composite FK to
/// `personas(user_id, persona_id)` (migration 2026-05-10-000000) and
/// for cross-user fact isolation: two users with the same 'default'
/// persona must not see each other's facts. Always paired with
/// `persona_id` — they're a unit.
pub user_id: i32,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct EntityFact {
pub id: i32,
pub subject_entity_id: i32,
pub predicate: String,
pub object_entity_id: Option<i32>,
pub object_value: Option<String>,
pub source_photo: Option<String>,
pub source_insight_id: Option<i32>,
pub confidence: f32,
pub status: String,
pub created_at: i64,
pub persona_id: String,
pub user_id: i32,
}
#[derive(Insertable)]
#[diesel(table_name = entity_photo_links)]
pub struct InsertEntityPhotoLink {
pub entity_id: i32,
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub role: String,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct EntityPhotoLink {
pub id: i32,
pub entity_id: i32,
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub role: String,
}
// --- Personas ---
#[derive(Insertable)]
#[diesel(table_name = personas)]
pub struct InsertPersona<'a> {
pub user_id: i32,
pub persona_id: &'a str,
pub name: &'a str,
pub system_prompt: &'a str,
pub is_built_in: bool,
pub include_all_memories: bool,
pub created_at: i64,
pub updated_at: i64,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct Persona {
pub id: i32,
pub user_id: i32,
pub persona_id: String,
pub name: String,
pub system_prompt: String,
pub is_built_in: bool,
pub include_all_memories: bool,
pub created_at: i64,
pub updated_at: i64,
}
#[derive(Insertable)]
#[diesel(table_name = video_preview_clips)]
pub struct InsertVideoPreviewClip {
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub status: String,
pub created_at: String,
pub updated_at: String,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct VideoPreviewClip {
pub id: i32,
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub status: String,
pub duration_seconds: Option<f32>,
pub file_size_bytes: Option<i32>,
pub error_message: Option<String>,
pub created_at: String,
pub updated_at: String,
}