-- Local face recognition tables. -- -- `persons` are visual identities (the "who" of a face). The optional -- `entity_id` bridges to the existing knowledge graph `entities` table — -- when set, this person is the visual side of an LLM-extracted entity. -- Don't auto-create entities from persons; the entity table represents -- LLM-extracted knowledge with its own confidence semantics, and silently -- filling it from face detections muddies the provenance. -- -- `face_detections` carries one row per detected face on a content_hash, -- plus marker rows with `status='no_faces'` or `status='failed'` so the -- file watcher knows not to re-scan a hash. Keying on `content_hash` -- (cross-library dedup) rather than `(library_id, rel_path)` means the -- same JPEG in two libraries is scanned once. The denormalized `rel_path` -- carries the most-recently-seen path — useful for cluster-thumb URL -- generation; canonical path lookup goes through image_exif. CREATE TABLE persons ( id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, name TEXT NOT NULL, cover_face_id INTEGER, -- backfilled when the first face binds entity_id INTEGER, -- optional bridge to entities(id) created_from_tag BOOLEAN NOT NULL DEFAULT 0, notes TEXT, created_at BIGINT NOT NULL, updated_at BIGINT NOT NULL, CONSTRAINT fk_persons_entity FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE SET NULL, UNIQUE(name COLLATE NOCASE) ); CREATE INDEX idx_persons_entity ON persons(entity_id); CREATE TABLE face_detections ( id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, library_id INTEGER NOT NULL, content_hash TEXT NOT NULL, -- canonical key (cross-library dedup) rel_path TEXT NOT NULL, -- denormalized; most recently seen bbox_x REAL, -- normalized 0..1; NULL on marker rows bbox_y REAL, bbox_w REAL, bbox_h REAL, embedding BLOB, -- 512×f32 = 2048 bytes; NULL on marker rows confidence REAL, -- detector score source TEXT NOT NULL, -- 'auto' | 'manual' person_id INTEGER, status TEXT NOT NULL DEFAULT 'detected', -- 'detected' | 'no_faces' | 'failed' model_version TEXT NOT NULL, -- e.g. 'buffalo_l'; embedding lineage created_at BIGINT NOT NULL, CONSTRAINT fk_fd_library FOREIGN KEY (library_id) REFERENCES libraries(id), CONSTRAINT fk_fd_person FOREIGN KEY (person_id) REFERENCES persons(id) ON DELETE SET NULL, -- Detected rows carry geometry + embedding; marker rows ('no_faces', -- 'failed') carry neither. CHECK enforces the invariant so manual -- inserts can't slip through with half a row. CONSTRAINT chk_marker CHECK ( (status = 'detected' AND bbox_x IS NOT NULL AND embedding IS NOT NULL) OR (status IN ('no_faces','failed') AND bbox_x IS NULL AND embedding IS NULL) ) ); CREATE INDEX idx_face_detections_hash ON face_detections(content_hash); CREATE INDEX idx_face_detections_lib_path ON face_detections(library_id, rel_path); CREATE INDEX idx_face_detections_person ON face_detections(person_id); CREATE INDEX idx_face_detections_status ON face_detections(status); -- One marker row per (content_hash, status='no_faces') so the file watcher -- doesn't double-mark when a hash is seen on multiple full-scan passes. CREATE UNIQUE INDEX idx_face_detections_no_faces_unique ON face_detections(content_hash) WHERE status = 'no_faces';