Files
ImageApi/src/database/models.rs
Cameron Cordes 01f5ad7527 knowledge: valid-time on facts + interval-aware conflict detection
Adds bitemporal support to entity_facts. Existing `created_at` is
transaction time (when we recorded the fact); the new
`valid_from` / `valid_until` BIGINT columns are valid time (when the
fact is/was true in the real world). NULL on either side = unbounded
on that side, both NULL = "always-true / unknown" — matches the
default state of every legacy row, no backfill needed.

The split matters for time-bounded predicates like
is_in_relationship_with / lives_in / works_at: recording the fact
once doesn't mean the relationship is still ongoing. Same predicate
across different windows ("lives_in NYC 2018-2020", "lives_in SF
2020-present") is no longer a conflict — the interval-aware check
in get_entity only flags pairs whose windows overlap. Facts with no
valid-time data still flag against everything (worst case for legacy
rows — user adds dates to suppress).

API surface:
- POST /knowledge/facts accepts optional valid_from / valid_until.
- PATCH /knowledge/facts/{id} accepts both with tri-state semantics:
  field omitted = leave alone, JSON null = clear to NULL, number =
  set. Implemented via a small serde helper around Option<Option>.
- GET /knowledge/entities/{id} surfaces both fields per fact and
  uses them in conflict detection.

Agent path (insight_generator) writes NULL/NULL for now — deriving
valid_from from the source photo's date_taken is slated for a
follow-up agent tool alongside Phase 2's supersession.

Test pins set + clear semantics via update_fact: setting both
bounds, leaving them alone on a subsequent patch, then clearing
valid_until back to NULL.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-10 19:25:55 -04:00

353 lines
11 KiB
Rust

use crate::database::schema::{
entities, entity_facts, entity_photo_links, favorites, image_exif, libraries, personas,
photo_insights, users, video_preview_clips,
};
use serde::Serialize;
#[derive(Insertable)]
#[diesel(table_name = users)]
pub struct InsertUser<'a> {
pub username: &'a str,
pub password: &'a str,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct User {
pub id: i32,
pub username: String,
#[serde(skip_serializing)]
pub password: String,
}
#[derive(Insertable)]
#[diesel(table_name = favorites)]
pub struct InsertFavorite<'a> {
pub userid: &'a i32,
#[diesel(column_name = rel_path)]
pub path: &'a str,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct Favorite {
pub id: i32,
pub userid: i32,
#[diesel(column_name = rel_path)]
pub path: String,
}
#[derive(Insertable)]
#[diesel(table_name = image_exif)]
pub struct InsertImageExif {
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub lens_model: Option<String>,
pub width: Option<i32>,
pub height: Option<i32>,
pub orientation: Option<i32>,
pub gps_latitude: Option<f32>,
pub gps_longitude: Option<f32>,
pub gps_altitude: Option<f32>,
pub focal_length: Option<f32>,
pub aperture: Option<f32>,
pub shutter_speed: Option<String>,
pub iso: Option<i32>,
pub date_taken: Option<i64>,
pub created_time: i64,
pub last_modified: i64,
pub content_hash: Option<String>,
pub size_bytes: Option<i64>,
/// 64-bit pHash (DCT) packed as i64. NULL for videos and decode failures.
pub phash_64: Option<i64>,
/// 64-bit dHash (gradient). NULL for videos and decode failures.
pub dhash_64: Option<i64>,
/// Which step of the canonical-date waterfall populated `date_taken`:
/// `"exif"` | `"exiftool"` | `"filename"` | `"fs_time"`. NULL when
/// `date_taken` is NULL (no source resolved it). The per-tick backfill
/// drain re-resolves rows whose source is `"fs_time"` once exiftool
/// has had a chance to run.
pub date_taken_source: Option<String>,
}
// Field order matches the post-migration column order in `image_exif`.
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct ImageExif {
pub id: i32,
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub lens_model: Option<String>,
pub width: Option<i32>,
pub height: Option<i32>,
pub orientation: Option<i32>,
pub gps_latitude: Option<f32>,
pub gps_longitude: Option<f32>,
pub gps_altitude: Option<f32>,
pub focal_length: Option<f32>,
pub aperture: Option<f32>,
pub shutter_speed: Option<String>,
pub iso: Option<i32>,
pub date_taken: Option<i64>,
pub created_time: i64,
pub last_modified: i64,
pub content_hash: Option<String>,
pub size_bytes: Option<i64>,
pub phash_64: Option<i64>,
pub dhash_64: Option<i64>,
/// When non-null, this row is a soft-marked duplicate of the file
/// whose `content_hash` matches this value. The default `/photos`
/// listing filters such rows out.
pub duplicate_of_hash: Option<String>,
/// Unix seconds at which the resolve was committed.
pub duplicate_decided_at: Option<i64>,
/// Which step of the canonical-date waterfall populated `date_taken`.
/// Plus `"manual"` when the operator has set it via POST /image/exif/date.
pub date_taken_source: Option<String>,
/// Snapshot of the prior `date_taken` taken on first manual override.
/// NULL when no override is active. POST /image/exif/date/clear restores
/// `date_taken` from this column and nulls it back out.
pub original_date_taken: Option<i64>,
/// Snapshot of the prior `date_taken_source` taken on first manual
/// override. NULL when no override is active.
pub original_date_taken_source: Option<String>,
}
#[derive(Insertable)]
#[diesel(table_name = photo_insights)]
pub struct InsertPhotoInsight {
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub title: String,
pub summary: String,
pub generated_at: i64,
pub model_version: String,
pub is_current: bool,
pub training_messages: Option<String>,
/// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat).
pub backend: String,
/// JSON array of insight ids whose `training_messages` were compressed
/// and injected into the system prompt as few-shot exemplars when this
/// row was generated. `None` means no few-shot was used (pristine
/// generation). Used downstream to filter out contaminated rows when
/// assembling an unbiased training / evaluation set.
pub fewshot_source_ids: Option<String>,
/// Bytes-keyed identity. When present, this insight is considered
/// to belong to the content rather than the path — see CLAUDE.md
/// "Multi-library data model". The DAO populates this from
/// `image_exif.content_hash` at insert time when known; rows
/// inserted before the hash is available stay null and the
/// reconciliation pass backfills them.
pub content_hash: Option<String>,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct PhotoInsight {
pub id: i32,
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub title: String,
pub summary: String,
pub generated_at: i64,
pub model_version: String,
pub is_current: bool,
pub training_messages: Option<String>,
pub approved: Option<bool>,
/// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat).
pub backend: String,
pub fewshot_source_ids: Option<String>,
pub content_hash: Option<String>,
}
// --- Libraries ---
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct LibraryRow {
pub id: i32,
pub name: String,
pub root_path: String,
pub created_at: i64,
/// Operator kill switch. `false` = the watcher skips this library
/// entirely (no probe, no ingest, no maintenance) and orphan-GC
/// treats it as out-of-scope for the all-online consensus rule.
/// Toggle via SQL today — there is intentionally no HTTP endpoint
/// for library mutation (see CLAUDE.md "Multi-library data model").
pub enabled: bool,
/// Per-library excluded paths/patterns, stored comma-separated
/// (same shape as the global `EXCLUDED_DIRS` env var). NULL = no
/// extra excludes for this library; the global env var still
/// applies. The runtime `Library` struct parses this into a
/// `Vec<String>` and the walker applies the union of (global,
/// library) excludes when scanning. Use case: mount a parent
/// directory while another library covers a child subtree.
pub excluded_dirs: Option<String>,
}
#[derive(Insertable)]
#[diesel(table_name = libraries)]
pub struct InsertLibrary<'a> {
pub name: &'a str,
pub root_path: &'a str,
pub created_at: i64,
pub enabled: bool,
pub excluded_dirs: Option<&'a str>,
}
// --- Knowledge memory models ---
#[derive(Insertable)]
#[diesel(table_name = entities)]
pub struct InsertEntity {
pub name: String,
pub entity_type: String,
pub description: String,
pub embedding: Option<Vec<u8>>,
pub confidence: f32,
pub status: String,
pub created_at: i64,
pub updated_at: i64,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct Entity {
pub id: i32,
pub name: String,
pub entity_type: String,
pub description: String,
pub embedding: Option<Vec<u8>>,
pub confidence: f32,
pub status: String,
pub created_at: i64,
pub updated_at: i64,
}
#[derive(Insertable)]
#[diesel(table_name = entity_facts)]
pub struct InsertEntityFact {
pub subject_entity_id: i32,
pub predicate: String,
pub object_entity_id: Option<i32>,
pub object_value: Option<String>,
pub source_photo: Option<String>,
pub source_insight_id: Option<i32>,
pub confidence: f32,
pub status: String,
pub created_at: i64,
/// Which persona authored this fact. Shared entities, persona-tagged
/// facts: each persona accumulates its own voice over the same
/// real-world referents. Defaults to `'default'` for legacy rows
/// (see migration 2026-05-09-000000).
pub persona_id: String,
/// Author's user_id. Required for the composite FK to
/// `personas(user_id, persona_id)` (migration 2026-05-10-000000) and
/// for cross-user fact isolation: two users with the same 'default'
/// persona must not see each other's facts. Always paired with
/// `persona_id` — they're a unit.
pub user_id: i32,
/// Real-world period the fact is/was true (unix seconds). NULL on
/// either side = unbounded — `valid_from IS NULL` reads as
/// "always-true-back-to-the-beginning", `valid_until IS NULL` as
/// "still-true-now-or-unknown". Distinguishes valid time from
/// transaction time (`created_at` is when we recorded the fact,
/// not when it was true in the world). See migration
/// 2026-05-10-000100.
pub valid_from: Option<i64>,
pub valid_until: Option<i64>,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct EntityFact {
pub id: i32,
pub subject_entity_id: i32,
pub predicate: String,
pub object_entity_id: Option<i32>,
pub object_value: Option<String>,
pub source_photo: Option<String>,
pub source_insight_id: Option<i32>,
pub confidence: f32,
pub status: String,
pub created_at: i64,
pub persona_id: String,
pub user_id: i32,
pub valid_from: Option<i64>,
pub valid_until: Option<i64>,
}
#[derive(Insertable)]
#[diesel(table_name = entity_photo_links)]
pub struct InsertEntityPhotoLink {
pub entity_id: i32,
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub role: String,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct EntityPhotoLink {
pub id: i32,
pub entity_id: i32,
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub role: String,
}
// --- Personas ---
#[derive(Insertable)]
#[diesel(table_name = personas)]
pub struct InsertPersona<'a> {
pub user_id: i32,
pub persona_id: &'a str,
pub name: &'a str,
pub system_prompt: &'a str,
pub is_built_in: bool,
pub include_all_memories: bool,
pub created_at: i64,
pub updated_at: i64,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct Persona {
pub id: i32,
pub user_id: i32,
pub persona_id: String,
pub name: String,
pub system_prompt: String,
pub is_built_in: bool,
pub include_all_memories: bool,
pub created_at: i64,
pub updated_at: i64,
}
#[derive(Insertable)]
#[diesel(table_name = video_preview_clips)]
pub struct InsertVideoPreviewClip {
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub status: String,
pub created_at: String,
pub updated_at: String,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct VideoPreviewClip {
pub id: i32,
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String,
pub status: String,
pub duration_seconds: Option<f32>,
pub file_size_bytes: Option<i32>,
pub error_message: Option<String>,
pub created_at: String,
pub updated_at: String,
}