004 Multi-library Support #54

Merged
cameron merged 19 commits from 004-multi-library into master 2026-04-21 01:55:23 +00:00
34 changed files with 3210 additions and 819 deletions

40
Cargo.lock generated
View File

@@ -474,6 +474,12 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "arrayref"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
[[package]] [[package]]
name = "arrayvec" name = "arrayvec"
version = "0.7.6" version = "0.7.6"
@@ -572,6 +578,20 @@ version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6099cdc01846bc367c4e7dd630dc5966dccf36b652fae7a74e17b640411a91b2" checksum = "6099cdc01846bc367c4e7dd630dc5966dccf36b652fae7a74e17b640411a91b2"
[[package]]
name = "blake3"
version = "1.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e"
dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if",
"constant_time_eq",
"cpufeatures 0.3.0",
]
[[package]] [[package]]
name = "block-buffer" name = "block-buffer"
version = "0.10.4" version = "0.10.4"
@@ -766,6 +786,12 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "constant_time_eq"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
[[package]] [[package]]
name = "convert_case" name = "convert_case"
version = "0.4.0" version = "0.4.0"
@@ -808,6 +834,15 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "cpufeatures"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "crc32fast" name = "crc32fast"
version = "1.5.0" version = "1.5.0"
@@ -1797,7 +1832,7 @@ dependencies = [
[[package]] [[package]]
name = "image-api" name = "image-api"
version = "0.5.2" version = "1.0.0"
dependencies = [ dependencies = [
"actix", "actix",
"actix-cors", "actix-cors",
@@ -1810,6 +1845,7 @@ dependencies = [
"anyhow", "anyhow",
"base64", "base64",
"bcrypt", "bcrypt",
"blake3",
"chrono", "chrono",
"clap", "clap",
"diesel", "diesel",
@@ -3365,7 +3401,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"cpufeatures", "cpufeatures 0.2.17",
"digest", "digest",
] ]

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "image-api" name = "image-api"
version = "0.5.2" version = "1.0.0"
authors = ["Cameron Cordes <cameronc.dev@gmail.com>"] authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
edition = "2024" edition = "2024"
@@ -55,3 +55,4 @@ zerocopy = "0.8"
ical = "0.11" ical = "0.11"
scraper = "0.20" scraper = "0.20"
base64 = "0.22" base64 = "0.22"
blake3 = "1.5"

View File

@@ -0,0 +1,155 @@
-- Revert multi-library support.
-- Drops library_id/content_hash/size_bytes, renames rel_path back to the
-- original column names, and drops the libraries table. Rows originally
-- from non-primary libraries (id > 1) would be orphaned, so the rollback
-- keeps only rows from library_id=1.
PRAGMA foreign_keys=OFF;
-- tagged_photo: rel_path → photo_name.
DROP INDEX IF EXISTS idx_tagged_photo_relpath_tag;
DROP INDEX IF EXISTS idx_tagged_photo_rel_path;
ALTER TABLE tagged_photo RENAME COLUMN rel_path TO photo_name;
CREATE INDEX IF NOT EXISTS idx_tagged_photo_photo_name ON tagged_photo(photo_name);
CREATE INDEX IF NOT EXISTS idx_tagged_photo_count ON tagged_photo(photo_name, tag_id);
-- favorites: rel_path → path.
DROP INDEX IF EXISTS idx_favorites_unique;
DROP INDEX IF EXISTS idx_favorites_rel_path;
ALTER TABLE favorites RENAME COLUMN rel_path TO path;
CREATE INDEX IF NOT EXISTS idx_favorites_path ON favorites(path);
CREATE UNIQUE INDEX IF NOT EXISTS idx_favorites_unique ON favorites(userid, path);
-- video_preview_clips: drop library_id, rel_path → file_path.
CREATE TABLE video_preview_clips_old (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
file_path TEXT NOT NULL UNIQUE,
status TEXT NOT NULL DEFAULT 'pending',
duration_seconds REAL,
file_size_bytes INTEGER,
error_message TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);
INSERT INTO video_preview_clips_old (
id, file_path, status, duration_seconds, file_size_bytes,
error_message, created_at, updated_at
)
SELECT
id, rel_path, status, duration_seconds, file_size_bytes,
error_message, created_at, updated_at
FROM video_preview_clips
WHERE library_id = 1;
DROP TABLE video_preview_clips;
ALTER TABLE video_preview_clips_old RENAME TO video_preview_clips;
CREATE INDEX idx_preview_clips_file_path ON video_preview_clips(file_path);
CREATE INDEX idx_preview_clips_status ON video_preview_clips(status);
-- entity_photo_links: drop library_id, rel_path → file_path.
CREATE TABLE entity_photo_links_old (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
entity_id INTEGER NOT NULL,
file_path TEXT NOT NULL,
role TEXT NOT NULL,
CONSTRAINT fk_epl_entity FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE CASCADE,
UNIQUE(entity_id, file_path, role)
);
INSERT INTO entity_photo_links_old (id, entity_id, file_path, role)
SELECT id, entity_id, rel_path, role
FROM entity_photo_links
WHERE library_id = 1;
DROP TABLE entity_photo_links;
ALTER TABLE entity_photo_links_old RENAME TO entity_photo_links;
CREATE INDEX idx_entity_photo_links_entity ON entity_photo_links(entity_id);
CREATE INDEX idx_entity_photo_links_photo ON entity_photo_links(file_path);
-- photo_insights: drop library_id, rel_path → file_path.
CREATE TABLE photo_insights_old (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
file_path TEXT NOT NULL,
title TEXT NOT NULL,
summary TEXT NOT NULL,
generated_at BIGINT NOT NULL,
model_version TEXT NOT NULL,
is_current BOOLEAN NOT NULL DEFAULT 0,
training_messages TEXT,
approved BOOLEAN
);
INSERT INTO photo_insights_old (
id, file_path, title, summary, generated_at, model_version, is_current,
training_messages, approved
)
SELECT
id, rel_path, title, summary, generated_at, model_version, is_current,
training_messages, approved
FROM photo_insights
WHERE library_id = 1;
DROP TABLE photo_insights;
ALTER TABLE photo_insights_old RENAME TO photo_insights;
CREATE INDEX idx_photo_insights_file_path ON photo_insights(file_path);
CREATE INDEX idx_photo_insights_current ON photo_insights(file_path, is_current);
-- image_exif: drop library_id/content_hash/size_bytes, rel_path → file_path.
CREATE TABLE image_exif_old (
id INTEGER PRIMARY KEY NOT NULL,
file_path TEXT NOT NULL UNIQUE,
camera_make TEXT,
camera_model TEXT,
lens_model TEXT,
width INTEGER,
height INTEGER,
orientation INTEGER,
gps_latitude REAL,
gps_longitude REAL,
gps_altitude REAL,
focal_length REAL,
aperture REAL,
shutter_speed TEXT,
iso INTEGER,
date_taken BIGINT,
created_time BIGINT NOT NULL,
last_modified BIGINT NOT NULL
);
INSERT INTO image_exif_old (
id, file_path,
camera_make, camera_model, lens_model,
width, height, orientation,
gps_latitude, gps_longitude, gps_altitude,
focal_length, aperture, shutter_speed, iso, date_taken,
created_time, last_modified
)
SELECT
id, rel_path,
camera_make, camera_model, lens_model,
width, height, orientation,
gps_latitude, gps_longitude, gps_altitude,
focal_length, aperture, shutter_speed, iso, date_taken,
created_time, last_modified
FROM image_exif
WHERE library_id = 1;
DROP TABLE image_exif;
ALTER TABLE image_exif_old RENAME TO image_exif;
CREATE INDEX idx_image_exif_file_path ON image_exif(file_path);
CREATE INDEX idx_image_exif_camera ON image_exif(camera_make, camera_model);
CREATE INDEX idx_image_exif_gps ON image_exif(gps_latitude, gps_longitude);
CREATE INDEX idx_image_exif_date_taken ON image_exif(date_taken);
CREATE INDEX idx_image_exif_date_path ON image_exif(date_taken DESC, file_path);
-- Finally, drop the libraries registry.
DROP TABLE libraries;
PRAGMA foreign_keys=ON;
ANALYZE;

View File

@@ -0,0 +1,216 @@
-- Multi-library support.
-- Adds `libraries` registry table and a `library_id` column on per-instance
-- metadata tables. Renames `file_path` / `photo_name` to `rel_path` for
-- semantic clarity (values already stored relative to BASE_PATH).
-- Adds `content_hash` + `size_bytes` to `image_exif` to support
-- content-based dedup of thumbnails and HLS output across libraries.
--
-- SQLite cannot alter column constraints in place, so per-instance tables
-- are recreated following the idiom established in
-- 2026-04-02-000000_photo_insights_history/up.sql. Existing row `id`s are
-- preserved so foreign keys (entity_facts.source_insight_id, etc.) remain
-- valid after migration.
PRAGMA foreign_keys=OFF;
-- ---------------------------------------------------------------------------
-- 1. Libraries registry.
-- Seeded with a placeholder for the primary library; AppState patches
-- `root_path` from the BASE_PATH env var on first boot. Subsequent
-- prod-to-dev DB syncs update this row via a single SQL UPDATE.
-- ---------------------------------------------------------------------------
CREATE TABLE libraries (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
name TEXT NOT NULL UNIQUE,
root_path TEXT NOT NULL,
created_at BIGINT NOT NULL
);
INSERT INTO libraries (id, name, root_path, created_at)
VALUES (1, 'main', 'BASE_PATH_PLACEHOLDER', strftime('%s','now'));
-- ---------------------------------------------------------------------------
-- 2. image_exif: + library_id, file_path → rel_path, + content_hash/size_bytes.
-- ---------------------------------------------------------------------------
CREATE TABLE image_exif_new (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
library_id INTEGER NOT NULL REFERENCES libraries(id),
rel_path TEXT NOT NULL,
-- Camera information
camera_make TEXT,
camera_model TEXT,
lens_model TEXT,
-- Image properties
width INTEGER,
height INTEGER,
orientation INTEGER,
-- GPS
gps_latitude REAL,
gps_longitude REAL,
gps_altitude REAL,
-- Capture settings
focal_length REAL,
aperture REAL,
shutter_speed TEXT,
iso INTEGER,
date_taken BIGINT,
-- Housekeeping
created_time BIGINT NOT NULL,
last_modified BIGINT NOT NULL,
-- Content identity (backfilled by the `backfill_hashes` binary and by the watcher for new files)
content_hash TEXT,
size_bytes BIGINT,
UNIQUE(library_id, rel_path)
);
INSERT INTO image_exif_new (
id, library_id, rel_path,
camera_make, camera_model, lens_model,
width, height, orientation,
gps_latitude, gps_longitude, gps_altitude,
focal_length, aperture, shutter_speed, iso, date_taken,
created_time, last_modified
)
SELECT
id, 1, file_path,
camera_make, camera_model, lens_model,
width, height, orientation,
gps_latitude, gps_longitude, gps_altitude,
focal_length, aperture, shutter_speed, iso, date_taken,
created_time, last_modified
FROM image_exif;
DROP TABLE image_exif;
ALTER TABLE image_exif_new RENAME TO image_exif;
CREATE INDEX idx_image_exif_rel_path ON image_exif(rel_path);
CREATE INDEX idx_image_exif_camera ON image_exif(camera_make, camera_model);
CREATE INDEX idx_image_exif_gps ON image_exif(gps_latitude, gps_longitude);
CREATE INDEX idx_image_exif_date_taken ON image_exif(date_taken);
CREATE INDEX idx_image_exif_date_path ON image_exif(date_taken DESC, rel_path);
CREATE INDEX idx_image_exif_lib_date ON image_exif(library_id, date_taken);
CREATE INDEX idx_image_exif_content_hash ON image_exif(content_hash);
-- ---------------------------------------------------------------------------
-- 3. photo_insights: + library_id, file_path → rel_path.
-- Preserve `id` so entity_facts.source_insight_id FKs remain valid.
-- ---------------------------------------------------------------------------
CREATE TABLE photo_insights_new (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
library_id INTEGER NOT NULL REFERENCES libraries(id),
rel_path TEXT NOT NULL,
title TEXT NOT NULL,
summary TEXT NOT NULL,
generated_at BIGINT NOT NULL,
model_version TEXT NOT NULL,
is_current BOOLEAN NOT NULL DEFAULT 0,
training_messages TEXT,
approved BOOLEAN
);
INSERT INTO photo_insights_new (
id, library_id, rel_path, title, summary, generated_at, model_version,
is_current, training_messages, approved
)
SELECT
id, 1, file_path, title, summary, generated_at, model_version,
is_current, training_messages, approved
FROM photo_insights;
DROP TABLE photo_insights;
ALTER TABLE photo_insights_new RENAME TO photo_insights;
CREATE INDEX idx_photo_insights_rel_path ON photo_insights(rel_path);
CREATE INDEX idx_photo_insights_current ON photo_insights(library_id, rel_path, is_current);
-- ---------------------------------------------------------------------------
-- 4. entity_photo_links: + library_id, file_path → rel_path.
-- Preserves entity FK; UNIQUE now includes library_id to allow the same
-- rel_path to link entities in multiple libraries independently.
-- ---------------------------------------------------------------------------
CREATE TABLE entity_photo_links_new (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
entity_id INTEGER NOT NULL,
library_id INTEGER NOT NULL REFERENCES libraries(id),
rel_path TEXT NOT NULL,
role TEXT NOT NULL,
CONSTRAINT fk_epl_entity FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE CASCADE,
UNIQUE(entity_id, library_id, rel_path, role)
);
INSERT INTO entity_photo_links_new (id, entity_id, library_id, rel_path, role)
SELECT id, entity_id, 1, file_path, role FROM entity_photo_links;
DROP TABLE entity_photo_links;
ALTER TABLE entity_photo_links_new RENAME TO entity_photo_links;
CREATE INDEX idx_entity_photo_links_entity ON entity_photo_links(entity_id);
CREATE INDEX idx_entity_photo_links_photo ON entity_photo_links(library_id, rel_path);
-- ---------------------------------------------------------------------------
-- 5. video_preview_clips: + library_id, file_path → rel_path.
-- ---------------------------------------------------------------------------
CREATE TABLE video_preview_clips_new (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
library_id INTEGER NOT NULL REFERENCES libraries(id),
rel_path TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
duration_seconds REAL,
file_size_bytes INTEGER,
error_message TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
UNIQUE(library_id, rel_path)
);
INSERT INTO video_preview_clips_new (
id, library_id, rel_path, status, duration_seconds, file_size_bytes,
error_message, created_at, updated_at
)
SELECT
id, 1, file_path, status, duration_seconds, file_size_bytes,
error_message, created_at, updated_at
FROM video_preview_clips;
DROP TABLE video_preview_clips;
ALTER TABLE video_preview_clips_new RENAME TO video_preview_clips;
CREATE INDEX idx_preview_clips_rel_path ON video_preview_clips(rel_path);
CREATE INDEX idx_preview_clips_status ON video_preview_clips(status);
-- ---------------------------------------------------------------------------
-- 6. favorites: path → rel_path. Library-agnostic (cross-library sharing).
-- ---------------------------------------------------------------------------
ALTER TABLE favorites RENAME COLUMN path TO rel_path;
DROP INDEX IF EXISTS idx_favorites_path;
DROP INDEX IF EXISTS idx_favorites_unique;
CREATE INDEX idx_favorites_rel_path ON favorites(rel_path);
CREATE UNIQUE INDEX idx_favorites_unique ON favorites(userid, rel_path);
-- ---------------------------------------------------------------------------
-- 7. tagged_photo: photo_name → rel_path. Library-agnostic.
-- Dedup first so the (rel_path, tag_id) unique index can be created safely.
-- ---------------------------------------------------------------------------
ALTER TABLE tagged_photo RENAME COLUMN photo_name TO rel_path;
DELETE FROM tagged_photo
WHERE id NOT IN (
SELECT MIN(id) FROM tagged_photo GROUP BY rel_path, tag_id
);
DROP INDEX IF EXISTS idx_tagged_photo_photo_name;
DROP INDEX IF EXISTS idx_tagged_photo_count;
CREATE INDEX idx_tagged_photo_rel_path ON tagged_photo(rel_path);
CREATE UNIQUE INDEX idx_tagged_photo_relpath_tag ON tagged_photo(rel_path, tag_id);
PRAGMA foreign_keys=ON;
ANALYZE;

View File

@@ -0,0 +1,4 @@
-- No-op: there's no sensible way to recover which rows originally used
-- backslashes, and there's no reason to want backslashes back. The
-- deleted duplicates are also gone.
SELECT 1;

View File

@@ -0,0 +1,85 @@
-- Normalize `rel_path` columns to forward slashes. Windows ingest
-- historically produced a mix of `\` and `/`, which broke lookups and
-- caused spurious UNIQUE-constraint violations on re-registration.
--
-- SQLite enforces UNIQUE per-row during UPDATE, so we have to drop
-- losing duplicates BEFORE normalizing. For each table that has a
-- UNIQUE on rel_path, we delete rows whose normalized form already
-- exists in canonical (forward-slash) form — keeping the existing
-- forward-slash row as the survivor. Then a flat UPDATE finishes the
-- job for remaining backslash rows.
-- image_exif: UNIQUE(library_id, rel_path)
DELETE FROM image_exif
WHERE rel_path LIKE '%\%'
AND EXISTS (
SELECT 1 FROM image_exif AS other
WHERE other.library_id = image_exif.library_id
AND other.rel_path = REPLACE(image_exif.rel_path, '\', '/')
AND other.id != image_exif.id
);
UPDATE image_exif
SET rel_path = REPLACE(rel_path, '\', '/')
WHERE rel_path LIKE '%\%';
-- favorites: UNIQUE(userid, rel_path)
DELETE FROM favorites
WHERE rel_path LIKE '%\%'
AND EXISTS (
SELECT 1 FROM favorites AS other
WHERE other.userid = favorites.userid
AND other.rel_path = REPLACE(favorites.rel_path, '\', '/')
AND other.id != favorites.id
);
UPDATE favorites
SET rel_path = REPLACE(rel_path, '\', '/')
WHERE rel_path LIKE '%\%';
-- tagged_photo: UNIQUE(rel_path, tag_id)
DELETE FROM tagged_photo
WHERE rel_path LIKE '%\%'
AND EXISTS (
SELECT 1 FROM tagged_photo AS other
WHERE other.tag_id = tagged_photo.tag_id
AND other.rel_path = REPLACE(tagged_photo.rel_path, '\', '/')
AND other.id != tagged_photo.id
);
UPDATE tagged_photo
SET rel_path = REPLACE(rel_path, '\', '/')
WHERE rel_path LIKE '%\%';
-- entity_photo_links: UNIQUE(entity_id, library_id, rel_path, role)
DELETE FROM entity_photo_links
WHERE rel_path LIKE '%\%'
AND EXISTS (
SELECT 1 FROM entity_photo_links AS other
WHERE other.entity_id = entity_photo_links.entity_id
AND other.library_id = entity_photo_links.library_id
AND other.role = entity_photo_links.role
AND other.rel_path = REPLACE(entity_photo_links.rel_path, '\', '/')
AND other.id != entity_photo_links.id
);
UPDATE entity_photo_links
SET rel_path = REPLACE(rel_path, '\', '/')
WHERE rel_path LIKE '%\%';
-- video_preview_clips: UNIQUE(library_id, rel_path)
DELETE FROM video_preview_clips
WHERE rel_path LIKE '%\%'
AND EXISTS (
SELECT 1 FROM video_preview_clips AS other
WHERE other.library_id = video_preview_clips.library_id
AND other.rel_path = REPLACE(video_preview_clips.rel_path, '\', '/')
AND other.id != video_preview_clips.id
);
UPDATE video_preview_clips
SET rel_path = REPLACE(rel_path, '\', '/')
WHERE rel_path LIKE '%\%';
-- photo_insights has no UNIQUE on rel_path (history table), so a plain
-- normalize is safe.
UPDATE photo_insights
SET rel_path = REPLACE(rel_path, '\', '/')
WHERE rel_path LIKE '%\%';
ANALYZE;

View File

@@ -5,8 +5,10 @@ use serde::{Deserialize, Serialize};
use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient}; use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient};
use crate::data::Claims; use crate::data::Claims;
use crate::database::InsightDao; use crate::database::{ExifDao, InsightDao};
use crate::libraries;
use crate::otel::{extract_context_from_request, global_tracer}; use crate::otel::{extract_context_from_request, global_tracer};
use crate::state::AppState;
use crate::utils::normalize_path; use crate::utils::normalize_path;
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
@@ -31,6 +33,10 @@ pub struct GeneratePhotoInsightRequest {
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
pub struct GetPhotoInsightQuery { pub struct GetPhotoInsightQuery {
pub path: String, pub path: String,
/// Library context for this lookup. Used to pick the right content
/// hash when the same rel_path exists under multiple roots.
#[serde(default)]
pub library: Option<String>,
} }
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
@@ -146,15 +152,30 @@ pub async fn generate_insight_handler(
pub async fn get_insight_handler( pub async fn get_insight_handler(
_claims: Claims, _claims: Claims,
query: web::Query<GetPhotoInsightQuery>, query: web::Query<GetPhotoInsightQuery>,
app_state: web::Data<AppState>,
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>, insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
exif_dao: web::Data<std::sync::Mutex<Box<dyn ExifDao>>>,
) -> impl Responder { ) -> impl Responder {
let normalized_path = normalize_path(&query.path); let normalized_path = normalize_path(&query.path);
log::debug!("Fetching insight for {}", normalized_path); log::debug!("Fetching insight for {}", normalized_path);
let otel_context = opentelemetry::Context::new(); let otel_context = opentelemetry::Context::new();
// Expand to rel_paths sharing content so an insight generated under
// library 1 still shows when the same photo is viewed from library 2.
let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
.ok()
.flatten()
.unwrap_or_else(|| app_state.primary_library());
let sibling_paths = {
let mut exif = exif_dao.lock().expect("Unable to lock ExifDao");
exif.get_rel_paths_sharing_content(&otel_context, library.id, &normalized_path)
.unwrap_or_else(|_| vec![normalized_path.clone()])
};
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao"); let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
match dao.get_insight(&otel_context, &normalized_path) { match dao.get_insight_for_paths(&otel_context, &sibling_paths) {
Ok(Some(insight)) => { Ok(Some(insight)) => {
let response = PhotoInsightResponse { let response = PhotoInsightResponse {
id: insight.id, id: insight.id,
@@ -482,7 +503,10 @@ pub async fn export_training_data_handler(
HttpResponse::Ok() HttpResponse::Ok()
.content_type("application/jsonl") .content_type("application/jsonl")
.insert_header(("Content-Disposition", "attachment; filename=\"training_data.jsonl\"")) .insert_header((
"Content-Disposition",
"attachment; filename=\"training_data.jsonl\"",
))
.body(jsonl) .body(jsonl)
} }
Err(e) => { Err(e) => {

View File

@@ -16,6 +16,7 @@ use crate::database::{
CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao, CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao,
SearchHistoryDao, SearchHistoryDao,
}; };
use crate::libraries::Library;
use crate::memories::extract_date_from_filename; use crate::memories::extract_date_from_filename;
use crate::otel::global_tracer; use crate::otel::global_tracer;
use crate::tags::TagDao; use crate::tags::TagDao;
@@ -52,7 +53,7 @@ pub struct InsightGenerator {
// Knowledge memory // Knowledge memory
knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>>, knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>>,
base_path: String, libraries: Vec<Library>,
} }
impl InsightGenerator { impl InsightGenerator {
@@ -67,7 +68,7 @@ impl InsightGenerator {
search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>>, search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>>,
tag_dao: Arc<Mutex<Box<dyn TagDao>>>, tag_dao: Arc<Mutex<Box<dyn TagDao>>>,
knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>>, knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>>,
base_path: String, libraries: Vec<Library>,
) -> Self { ) -> Self {
Self { Self {
ollama, ollama,
@@ -80,10 +81,25 @@ impl InsightGenerator {
search_dao, search_dao,
tag_dao, tag_dao,
knowledge_dao, knowledge_dao,
base_path, libraries,
} }
} }
/// Resolve `rel_path` against the configured libraries, returning the
/// first root under which the file exists. Insights may be generated
/// for any library — the generator itself doesn't know which — so we
/// probe each root rather than trust a single `base_path`.
fn resolve_full_path(&self, rel_path: &str) -> Option<std::path::PathBuf> {
use std::path::Path;
for lib in &self.libraries {
let candidate = Path::new(&lib.root_path).join(rel_path);
if candidate.exists() {
return Some(candidate);
}
}
None
}
/// Extract contact name from file path /// Extract contact name from file path
/// e.g., "Sarah/img.jpeg" -> Some("Sarah") /// e.g., "Sarah/img.jpeg" -> Some("Sarah")
/// e.g., "img.jpeg" -> None /// e.g., "img.jpeg" -> None
@@ -108,9 +124,13 @@ impl InsightGenerator {
/// Resizes to max 1024px on longest edge to reduce context usage /// Resizes to max 1024px on longest edge to reduce context usage
fn load_image_as_base64(&self, file_path: &str) -> Result<String> { fn load_image_as_base64(&self, file_path: &str) -> Result<String> {
use image::imageops::FilterType; use image::imageops::FilterType;
use std::path::Path;
let full_path = Path::new(&self.base_path).join(file_path); let full_path = self.resolve_full_path(file_path).ok_or_else(|| {
anyhow::anyhow!(
"File '{}' not found under any configured library",
file_path
)
})?;
log::debug!("Loading image for vision model: {:?}", full_path); log::debug!("Loading image for vision model: {:?}", full_path);
@@ -420,7 +440,11 @@ impl InsightGenerator {
.iter() .iter()
.map(|e| { .map(|e| {
let date = DateTime::from_timestamp(e.start_time, 0) let date = DateTime::from_timestamp(e.start_time, 0)
.map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) .map(|dt| {
dt.with_timezone(&Local)
.format("%Y-%m-%d %H:%M")
.to_string()
})
.unwrap_or_else(|| "unknown".to_string()); .unwrap_or_else(|| "unknown".to_string());
let attendees = e let attendees = e
@@ -725,8 +749,7 @@ impl InsightGenerator {
extract_date_from_filename(&file_path) extract_date_from_filename(&file_path)
.map(|dt| dt.timestamp()) .map(|dt| dt.timestamp())
.or_else(|| { .or_else(|| {
// Combine base_path with file_path to get full path let full_path = self.resolve_full_path(&file_path)?;
let full_path = std::path::Path::new(&self.base_path).join(&file_path);
File::open(&full_path) File::open(&full_path)
.and_then(|f| f.metadata()) .and_then(|f| f.metadata())
.and_then(|m| m.created().or(m.modified())) .and_then(|m| m.created().or(m.modified()))
@@ -1187,6 +1210,7 @@ impl InsightGenerator {
// 11. Store in database // 11. Store in database
let insight = InsertPhotoInsight { let insight = InsertPhotoInsight {
library_id: crate::libraries::PRIMARY_LIBRARY_ID,
file_path: file_path.to_string(), file_path: file_path.to_string(),
title, title,
summary, summary,
@@ -1334,7 +1358,11 @@ Return ONLY the summary, nothing else."#,
.map(|m| { .map(|m| {
let sender = if m.is_sent { "Me" } else { &m.contact }; let sender = if m.is_sent { "Me" } else { &m.contact };
let timestamp = chrono::DateTime::from_timestamp(m.timestamp, 0) let timestamp = chrono::DateTime::from_timestamp(m.timestamp, 0)
.map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) .map(|dt| {
dt.with_timezone(&Local)
.format("%Y-%m-%d %H:%M")
.to_string()
})
.unwrap_or_else(|| "unknown time".to_string()); .unwrap_or_else(|| "unknown time".to_string());
format!("[{}] {}: {}", timestamp, sender, m.body) format!("[{}] {}: {}", timestamp, sender, m.body)
}) })
@@ -1429,16 +1457,22 @@ Return ONLY the summary, nothing else."#,
.get("contact") .get("contact")
.and_then(|v| v.as_str()) .and_then(|v| v.as_str())
.map(|s| s.to_string()); .map(|s| s.to_string());
let limit = args
.get("limit")
.and_then(|v| v.as_i64())
.unwrap_or(10)
.clamp(1, 25) as usize;
log::info!( log::info!(
"tool_search_rag: query='{}', date={}, contact={:?}", "tool_search_rag: query='{}', date={}, contact={:?}, limit={}",
query, query,
date, date,
contact contact,
limit
); );
match self match self
.find_relevant_messages_rag(date, None, contact.as_deref(), None, 5, Some(&query)) .find_relevant_messages_rag(date, None, contact.as_deref(), None, limit, Some(&query))
.await .await
{ {
Ok(results) if !results.is_empty() => results.join("\n\n"), Ok(results) if !results.is_empty() => results.join("\n\n"),
@@ -1465,6 +1499,11 @@ Return ONLY the summary, nothing else."#,
.get("days_radius") .get("days_radius")
.and_then(|v| v.as_i64()) .and_then(|v| v.as_i64())
.unwrap_or(4); .unwrap_or(4);
let limit = args
.get("limit")
.and_then(|v| v.as_i64())
.unwrap_or(60)
.clamp(1, 150) as usize;
let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") { let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") {
Ok(d) => d, Ok(d) => d,
@@ -1473,10 +1512,11 @@ Return ONLY the summary, nothing else."#,
let timestamp = date.and_hms_opt(12, 0, 0).unwrap().and_utc().timestamp(); let timestamp = date.and_hms_opt(12, 0, 0).unwrap().and_utc().timestamp();
log::info!( log::info!(
"tool_get_sms_messages: date={}, contact={:?}, days_radius={}", "tool_get_sms_messages: date={}, contact={:?}, days_radius={}, limit={}",
date, date,
contact, contact,
days_radius days_radius,
limit
); );
match self match self
@@ -1487,11 +1527,15 @@ Return ONLY the summary, nothing else."#,
Ok(messages) if !messages.is_empty() => { Ok(messages) if !messages.is_empty() => {
let formatted: Vec<String> = messages let formatted: Vec<String> = messages
.iter() .iter()
.take(30) .take(limit)
.map(|m| { .map(|m| {
let sender = if m.is_sent { "Me" } else { &m.contact }; let sender = if m.is_sent { "Me" } else { &m.contact };
let ts = DateTime::from_timestamp(m.timestamp, 0) let ts = DateTime::from_timestamp(m.timestamp, 0)
.map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) .map(|dt| {
dt.with_timezone(&Local)
.format("%Y-%m-%d %H:%M")
.to_string()
})
.unwrap_or_else(|| "unknown".to_string()); .unwrap_or_else(|| "unknown".to_string());
format!("[{}] {}: {}", ts, sender, m.body) format!("[{}] {}: {}", ts, sender, m.body)
}) })
@@ -1524,6 +1568,11 @@ Return ONLY the summary, nothing else."#,
.get("days_radius") .get("days_radius")
.and_then(|v| v.as_i64()) .and_then(|v| v.as_i64())
.unwrap_or(7); .unwrap_or(7);
let limit = args
.get("limit")
.and_then(|v| v.as_i64())
.unwrap_or(20)
.clamp(1, 50) as usize;
let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") { let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") {
Ok(d) => d, Ok(d) => d,
@@ -1532,9 +1581,10 @@ Return ONLY the summary, nothing else."#,
let timestamp = date.and_hms_opt(12, 0, 0).unwrap().and_utc().timestamp(); let timestamp = date.and_hms_opt(12, 0, 0).unwrap().and_utc().timestamp();
log::info!( log::info!(
"tool_get_calendar_events: date={}, days_radius={}", "tool_get_calendar_events: date={}, days_radius={}, limit={}",
date, date,
days_radius days_radius,
limit
); );
let events = { let events = {
@@ -1542,7 +1592,7 @@ Return ONLY the summary, nothing else."#,
.calendar_dao .calendar_dao
.lock() .lock()
.expect("Unable to lock CalendarEventDao"); .expect("Unable to lock CalendarEventDao");
dao.find_relevant_events_hybrid(cx, timestamp, days_radius, None, 10) dao.find_relevant_events_hybrid(cx, timestamp, days_radius, None, limit)
.ok() .ok()
}; };
@@ -1552,7 +1602,11 @@ Return ONLY the summary, nothing else."#,
.iter() .iter()
.map(|e| { .map(|e| {
let dt = DateTime::from_timestamp(e.start_time, 0) let dt = DateTime::from_timestamp(e.start_time, 0)
.map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) .map(|dt| {
dt.with_timezone(&Local)
.format("%Y-%m-%d %H:%M")
.to_string()
})
.unwrap_or_else(|| "unknown".to_string()); .unwrap_or_else(|| "unknown".to_string());
let loc = e let loc = e
.location .location
@@ -1624,7 +1678,11 @@ Return ONLY the summary, nothing else."#,
.take(20) .take(20)
.map(|loc| { .map(|loc| {
let dt = DateTime::from_timestamp(loc.timestamp, 0) let dt = DateTime::from_timestamp(loc.timestamp, 0)
.map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) .map(|dt| {
dt.with_timezone(&Local)
.format("%Y-%m-%d %H:%M")
.to_string()
})
.unwrap_or_else(|| "unknown".to_string()); .unwrap_or_else(|| "unknown".to_string());
let activity = loc let activity = loc
.activity .activity
@@ -1733,7 +1791,11 @@ Return ONLY the summary, nothing else."#,
.get("entity_type") .get("entity_type")
.and_then(|v| v.as_str()) .and_then(|v| v.as_str())
.map(|s| s.to_string()); .map(|s| s.to_string());
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(10); let limit = args
.get("limit")
.and_then(|v| v.as_i64())
.unwrap_or(20)
.clamp(1, 50);
log::info!( log::info!(
"tool_recall_entities: name={:?}, type={:?}, limit={}", "tool_recall_entities: name={:?}, type={:?}, limit={}",
@@ -1807,8 +1869,9 @@ Return ONLY the summary, nothing else."#,
// For each linked entity, fetch its facts // For each linked entity, fetch its facts
for entity_id in entity_ids { for entity_id in entity_ids {
if let Ok(entity) = kdao.get_entity_by_id(cx, entity_id) { if let Ok(entity) = kdao.get_entity_by_id(cx, entity_id)
if let Some(e) = entity { && let Some(e) = entity
{
let role = links let role = links
.iter() .iter()
.find(|l| l.entity_id == entity_id) .find(|l| l.entity_id == entity_id)
@@ -1836,7 +1899,6 @@ Return ONLY the summary, nothing else."#,
} }
} }
} }
}
if output_lines.is_empty() { if output_lines.is_empty() {
"No active knowledge facts found for this photo.".to_string() "No active knowledge facts found for this photo.".to_string()
@@ -1882,14 +1944,10 @@ Return ONLY the summary, nothing else."#,
// those already). Results are appended to the tool response so the // those already). Results are appended to the tool response so the
// model can choose to use an existing entity's ID instead. // model can choose to use an existing entity's ID instead.
let similar_entities: Vec<String> = { let similar_entities: Vec<String> = {
use crate::database::{EntityFilter, KnowledgeDao}; use crate::database::EntityFilter;
use crate::database::knowledge_dao::normalize_entity_type; use crate::database::knowledge_dao::normalize_entity_type;
let normalised_type = normalize_entity_type(&entity_type); let normalised_type = normalize_entity_type(&entity_type);
let first_token = name let first_token = name.split_whitespace().next().unwrap_or(&name).to_string();
.split_whitespace()
.next()
.unwrap_or(&name)
.to_string();
let filter = EntityFilter { let filter = EntityFilter {
entity_type: None, // search all types, filter client-side to avoid case issues entity_type: None, // search all types, filter client-side to avoid case issues
status: Some("active".to_string()), status: Some("active".to_string()),
@@ -1897,7 +1955,10 @@ Return ONLY the summary, nothing else."#,
limit: 10, limit: 10,
offset: 0, offset: 0,
}; };
let mut kdao = self.knowledge_dao.lock().expect("Unable to lock KnowledgeDao"); let mut kdao = self
.knowledge_dao
.lock()
.expect("Unable to lock KnowledgeDao");
kdao.list_entities(cx, filter) kdao.list_entities(cx, filter)
.unwrap_or_default() .unwrap_or_default()
.0 .0
@@ -2031,6 +2092,7 @@ Return ONLY the summary, nothing else."#,
// Upsert a photo link so this entity is associated with this photo // Upsert a photo link so this entity is associated with this photo
let link = InsertEntityPhotoLink { let link = InsertEntityPhotoLink {
entity_id: subject_entity_id, entity_id: subject_entity_id,
library_id: crate::libraries::PRIMARY_LIBRARY_ID,
file_path: file_path.to_string(), file_path: file_path.to_string(),
role: photo_role, role: photo_role,
}; };
@@ -2086,6 +2148,10 @@ Return ONLY the summary, nothing else."#,
"contact": { "contact": {
"type": "string", "type": "string",
"description": "Optional contact name to filter results" "description": "Optional contact name to filter results"
},
"limit": {
"type": "integer",
"description": "Maximum number of results to return (default: 10, max: 25)"
} }
} }
}), }),
@@ -2108,6 +2174,10 @@ Return ONLY the summary, nothing else."#,
"days_radius": { "days_radius": {
"type": "integer", "type": "integer",
"description": "Number of days before and after the date to search (default: 4)" "description": "Number of days before and after the date to search (default: 4)"
},
"limit": {
"type": "integer",
"description": "Maximum number of messages to return (default: 60, max: 150)"
} }
} }
}), }),
@@ -2126,6 +2196,10 @@ Return ONLY the summary, nothing else."#,
"days_radius": { "days_radius": {
"type": "integer", "type": "integer",
"description": "Number of days before and after the date to search (default: 7)" "description": "Number of days before and after the date to search (default: 7)"
},
"limit": {
"type": "integer",
"description": "Maximum number of events to return (default: 20, max: 50)"
} }
} }
}), }),
@@ -2201,7 +2275,7 @@ Return ONLY the summary, nothing else."#,
}, },
"limit": { "limit": {
"type": "integer", "type": "integer",
"description": "Maximum number of results to return (default: 10)" "description": "Maximum number of results to return (default: 20, max: 50)"
} }
} }
}), }),
@@ -2453,7 +2527,7 @@ Return ONLY the summary, nothing else."#,
extract_date_from_filename(&file_path) extract_date_from_filename(&file_path)
.map(|dt| dt.timestamp()) .map(|dt| dt.timestamp())
.or_else(|| { .or_else(|| {
let full_path = std::path::Path::new(&self.base_path).join(&file_path); let full_path = self.resolve_full_path(&file_path)?;
File::open(&full_path) File::open(&full_path)
.and_then(|f| f.metadata()) .and_then(|f| f.metadata())
.and_then(|m| m.created().or(m.modified())) .and_then(|m| m.created().or(m.modified()))
@@ -2704,8 +2778,7 @@ Return ONLY the summary, nothing else."#,
messages.push(ChatMessage::user( messages.push(ChatMessage::user(
"Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as Cameron.", "Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as Cameron.",
)); ));
let (final_response, prompt_tokens, eval_tokens) = let (final_response, prompt_tokens, eval_tokens) = ollama_client
ollama_client
.chat_with_tools(messages.clone(), vec![]) .chat_with_tools(messages.clone(), vec![])
.await?; .await?;
last_prompt_eval_count = prompt_tokens; last_prompt_eval_count = prompt_tokens;
@@ -2742,6 +2815,7 @@ Return ONLY the summary, nothing else."#,
// 15. Store insight (returns the persisted row including its new id) // 15. Store insight (returns the persisted row including its new id)
let insight = InsertPhotoInsight { let insight = InsertPhotoInsight {
library_id: crate::libraries::PRIMARY_LIBRARY_ID,
file_path: file_path.to_string(), file_path: file_path.to_string(),
title, title,
summary: final_content, summary: final_content,

View File

@@ -120,6 +120,7 @@ impl OllamaClient {
/// Replace the HTTP client with one using a custom request timeout. /// Replace the HTTP client with one using a custom request timeout.
/// Useful for slow models where the default 120s may be insufficient. /// Useful for slow models where the default 120s may be insufficient.
#[allow(dead_code)]
pub fn with_request_timeout(mut self, secs: u64) -> Self { pub fn with_request_timeout(mut self, secs: u64) -> Self {
self.client = Client::builder() self.client = Client::builder()
.connect_timeout(Duration::from_secs(5)) .connect_timeout(Duration::from_secs(5))
@@ -174,6 +175,7 @@ impl OllamaClient {
} }
/// Clear the model list cache for a specific URL or all URLs /// Clear the model list cache for a specific URL or all URLs
#[allow(dead_code)]
pub fn clear_model_cache(url: Option<&str>) { pub fn clear_model_cache(url: Option<&str>) {
let mut cache = MODEL_LIST_CACHE.lock().unwrap(); let mut cache = MODEL_LIST_CACHE.lock().unwrap();
if let Some(url) = url { if let Some(url) = url {
@@ -186,6 +188,7 @@ impl OllamaClient {
} }
/// Clear the model capabilities cache for a specific URL or all URLs /// Clear the model capabilities cache for a specific URL or all URLs
#[allow(dead_code)]
pub fn clear_capabilities_cache(url: Option<&str>) { pub fn clear_capabilities_cache(url: Option<&str>) {
let mut cache = MODEL_CAPABILITIES_CACHE.lock().unwrap(); let mut cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
if let Some(url) = url { if let Some(url) = url {
@@ -992,7 +995,6 @@ struct OllamaEmbedResponse {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*;
#[test] #[test]
fn generate_photo_description_prompt_is_concise() { fn generate_photo_description_prompt_is_concise() {

181
src/bin/backfill_hashes.rs Normal file
View File

@@ -0,0 +1,181 @@
//! Backfill `image_exif.content_hash` + `size_bytes` for rows that were
//! ingested before hash computation was wired into the watcher.
//!
//! The watcher computes hashes for new files as they're ingested, so this
//! binary is a one-shot tool for the historical backlog. Safe to re-run;
//! only rows with NULL content_hash are processed.
use std::path::Path;
use std::sync::{Arc, Mutex};
use std::time::Instant;
use clap::Parser;
use rayon::prelude::*;
use image_api::content_hash;
use image_api::database::{ExifDao, SqliteExifDao, connect};
use image_api::libraries::{self, Library};
#[derive(Parser, Debug)]
#[command(name = "backfill_hashes")]
#[command(about = "Compute content_hash for image_exif rows missing one")]
struct Args {
/// Max rows to hash per batch. The process loops until no rows remain.
#[arg(long, default_value_t = 500)]
batch_size: i64,
/// Rayon parallelism override. 0 uses the default thread pool size.
#[arg(long, default_value_t = 0)]
parallelism: usize,
/// Dry-run: log what would be hashed without writing to the DB.
#[arg(long)]
dry_run: bool,
}
fn main() -> anyhow::Result<()> {
env_logger::init();
dotenv::dotenv().ok();
let args = Args::parse();
if args.parallelism > 0 {
rayon::ThreadPoolBuilder::new()
.num_threads(args.parallelism)
.build_global()
.expect("Unable to configure rayon thread pool");
}
// Resolve libraries (patch placeholder if still unset) so we can map
// library_id back to a root_path on disk.
let base_path = dotenv::var("BASE_PATH").ok();
let mut seed_conn = connect();
if let Some(base) = base_path.as_deref() {
libraries::seed_or_patch_from_env(&mut seed_conn, base);
}
let libs = libraries::load_all(&mut seed_conn);
drop(seed_conn);
if libs.is_empty() {
anyhow::bail!("No libraries configured; cannot backfill hashes");
}
let libs_by_id: std::collections::HashMap<i32, Library> =
libs.into_iter().map(|lib| (lib.id, lib)).collect();
println!(
"Configured libraries: {}",
libs_by_id
.values()
.map(|l| format!("{} -> {}", l.name, l.root_path))
.collect::<Vec<_>>()
.join(", ")
);
let dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
let ctx = opentelemetry::Context::new();
let mut total_hashed = 0u64;
let mut total_missing = 0u64;
let mut total_errors = 0u64;
let start = Instant::now();
loop {
let rows = {
let mut guard = dao.lock().expect("Unable to lock ExifDao");
guard
.get_rows_missing_hash(&ctx, args.batch_size)
.map_err(|e| anyhow::anyhow!("DB error: {:?}", e))?
};
if rows.is_empty() {
break;
}
println!("Processing batch of {} rows", rows.len());
// Compute hashes in parallel (I/O-bound; rayon helps on local disks,
// throttled by network on SMB mounts — use --parallelism to tune).
let results: Vec<(i32, String, Option<content_hash::FileIdentity>)> = rows
.into_par_iter()
.map(|(library_id, rel_path)| {
let abs = libs_by_id
.get(&library_id)
.map(|lib| Path::new(&lib.root_path).join(&rel_path));
match abs {
Some(abs_path) if abs_path.exists() => match content_hash::compute(&abs_path) {
Ok(id) => (library_id, rel_path, Some(id)),
Err(e) => {
eprintln!("hash error for {}: {:?}", abs_path.display(), e);
(library_id, rel_path, None)
}
},
Some(_) => (library_id, rel_path, None), // file missing on disk
None => {
eprintln!("Row refers to unknown library_id {}", library_id);
(library_id, rel_path, None)
}
}
})
.collect();
// Persist sequentially — SQLite writes serialize anyway.
if !args.dry_run {
let mut guard = dao.lock().expect("Unable to lock ExifDao");
for (library_id, rel_path, ident) in &results {
match ident {
Some(id) => {
match guard.backfill_content_hash(
&ctx,
*library_id,
rel_path,
&id.content_hash,
id.size_bytes,
) {
Ok(_) => total_hashed += 1,
Err(e) => {
eprintln!("persist error for {}: {:?}", rel_path, e);
total_errors += 1;
}
}
}
None => {
total_missing += 1;
}
}
}
} else {
for (_, rel_path, ident) in &results {
match ident {
Some(id) => {
println!(
"[dry-run] {} -> {} ({} bytes)",
rel_path, id.content_hash, id.size_bytes
);
total_hashed += 1;
}
None => {
total_missing += 1;
}
}
}
println!(
"[dry-run] processed one batch of {}. Stopping — a real run would continue \
until no NULL content_hash rows remain.",
results.len()
);
break;
}
let elapsed = start.elapsed().as_secs_f64().max(0.001);
let rate = total_hashed as f64 / elapsed;
println!(
" hashed={} missing={} errors={} ({:.1} files/sec)",
total_hashed, total_missing, total_errors, rate
);
}
println!();
println!(
"Done. hashed={}, skipped (missing on disk)={}, errors={}, elapsed={:.1}s",
total_hashed,
total_missing,
total_errors,
start.elapsed().as_secs_f64()
);
Ok(())
}

View File

@@ -67,7 +67,7 @@ fn main() -> anyhow::Result<()> {
let context = opentelemetry::Context::new(); let context = opentelemetry::Context::new();
let relative_path = match path.strip_prefix(&base) { let relative_path = match path.strip_prefix(&base) {
Ok(p) => p.to_str().unwrap().to_string(), Ok(p) => p.to_str().unwrap().replace('\\', "/"),
Err(_) => { Err(_) => {
eprintln!( eprintln!(
"Error: Could not create relative path for {}", "Error: Could not create relative path for {}",
@@ -94,6 +94,7 @@ fn main() -> anyhow::Result<()> {
Ok(exif_data) => { Ok(exif_data) => {
let timestamp = Utc::now().timestamp(); let timestamp = Utc::now().timestamp();
let insert_exif = InsertImageExif { let insert_exif = InsertImageExif {
library_id: image_api::libraries::PRIMARY_LIBRARY_ID,
file_path: relative_path.clone(), file_path: relative_path.clone(),
camera_make: exif_data.camera_make, camera_make: exif_data.camera_make,
camera_model: exif_data.camera_model, camera_model: exif_data.camera_model,
@@ -114,6 +115,8 @@ fn main() -> anyhow::Result<()> {
.map(|e| e.created_time) .map(|e| e.created_time)
.unwrap_or(timestamp), .unwrap_or(timestamp),
last_modified: timestamp, last_modified: timestamp,
content_hash: None,
size_bytes: None,
}; };
// Store or update in database // Store or update in database

View File

@@ -11,6 +11,7 @@ use image_api::database::{
SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao, SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao,
}; };
use image_api::file_types::{IMAGE_EXTENSIONS, VIDEO_EXTENSIONS}; use image_api::file_types::{IMAGE_EXTENSIONS, VIDEO_EXTENSIONS};
use image_api::libraries::{self, Library};
use image_api::tags::{SqliteTagDao, TagDao}; use image_api::tags::{SqliteTagDao, TagDao};
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
@@ -125,6 +126,12 @@ async fn main() -> anyhow::Result<()> {
let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> = let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> =
Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new()))); Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
let populate_lib = Library {
id: libraries::PRIMARY_LIBRARY_ID,
name: "main".to_string(),
root_path: base_path.clone(),
};
let generator = InsightGenerator::new( let generator = InsightGenerator::new(
ollama, ollama,
sms_client, sms_client,
@@ -136,7 +143,7 @@ async fn main() -> anyhow::Result<()> {
search_dao, search_dao,
tag_dao, tag_dao,
knowledge_dao, knowledge_dao,
base_path.clone(), vec![populate_lib],
); );
println!("Knowledge Base Population"); println!("Knowledge Base Population");

108
src/content_hash.rs Normal file
View File

@@ -0,0 +1,108 @@
//! Content-based file identity used to dedup derivative outputs
//! (thumbnails, HLS segments) across libraries.
//!
//! Hashes are computed with blake3 streaming so that network-mounted
//! libraries don't need to load whole files into memory. The result is
//! a 64-character hex string; we shard derivative directories on the
//! first two characters to keep any single directory's fanout bounded.
use std::fs::File;
use std::io::{self, Read};
use std::path::{Path, PathBuf};
/// Size of the read buffer used when streaming a file through blake3.
/// 1 MiB trades a bit of RSS for fewer syscalls on slow network mounts.
const HASH_BUFFER_SIZE: usize = 1024 * 1024;
/// Hash identity of a file, together with its byte length.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct FileIdentity {
pub content_hash: String,
pub size_bytes: i64,
}
/// Stream a file through blake3 and return the hex-encoded digest + size.
pub fn compute(path: &Path) -> io::Result<FileIdentity> {
let mut file = File::open(path)?;
let size_bytes = file.metadata()?.len() as i64;
let mut hasher = blake3::Hasher::new();
let mut buf = vec![0u8; HASH_BUFFER_SIZE];
loop {
let n = file.read(&mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(FileIdentity {
content_hash: hasher.finalize().to_hex().to_string(),
size_bytes,
})
}
/// Hash-keyed thumbnail path: `<thumbs_dir>/<hash[..2]>/<hash>.jpg`.
/// Generation and serving both consult this first; the legacy mirrored
/// path acts as a fallback for pre-backfill rows.
pub fn thumbnail_path(thumbs_dir: &Path, hash: &str) -> PathBuf {
let shard = shard_prefix(hash);
thumbs_dir.join(shard).join(format!("{}.jpg", hash))
}
/// Hash-keyed HLS output directory: `<video_dir>/<hash[..2]>/<hash>/`.
/// The playlist lives at `playlist.m3u8` inside this directory and its
/// segments are co-located so HLS relative references Just Work.
#[allow(dead_code)]
pub fn hls_dir(video_dir: &Path, hash: &str) -> PathBuf {
let shard = shard_prefix(hash);
video_dir.join(shard).join(hash)
}
fn shard_prefix(hash: &str) -> &str {
let end = hash
.char_indices()
.nth(2)
.map(|(i, _)| i)
.unwrap_or(hash.len());
&hash[..end]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn identical_content_yields_identical_hash() {
let dir = tempfile::tempdir().unwrap();
let a = dir.path().join("a.bin");
let b = dir.path().join("b.bin");
std::fs::write(&a, b"hello world").unwrap();
std::fs::write(&b, b"hello world").unwrap();
let ha = compute(&a).unwrap();
let hb = compute(&b).unwrap();
assert_eq!(ha, hb);
assert_eq!(ha.size_bytes, 11);
}
#[test]
fn different_content_yields_different_hash() {
let dir = tempfile::tempdir().unwrap();
let a = dir.path().join("a.bin");
let b = dir.path().join("b.bin");
std::fs::write(&a, b"aaa").unwrap();
std::fs::write(&b, b"bbb").unwrap();
assert_ne!(compute(&a).unwrap(), compute(&b).unwrap());
}
#[test]
fn derivative_paths_shard_by_first_two_hex() {
let thumbs = Path::new("/tmp/thumbs");
let p = thumbnail_path(thumbs, "abcdef0123");
assert_eq!(p, PathBuf::from("/tmp/thumbs/ab/abcdef0123.jpg"));
let video = Path::new("/tmp/video");
let d = hls_dir(video, "1234deadbeef");
assert_eq!(d, PathBuf::from("/tmp/video/12/1234deadbeef"));
}
}

View File

@@ -102,6 +102,12 @@ pub struct PhotosResponse {
pub photos: Vec<String>, pub photos: Vec<String>,
pub dirs: Vec<String>, pub dirs: Vec<String>,
/// Library id for each entry in `photos`, same length and ordering.
/// Parallel array rather than an object per row to keep the payload
/// small and backwards-compatible with older clients.
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub photo_libraries: Vec<i32>,
// Pagination metadata (only present when limit is set) // Pagination metadata (only present when limit is set)
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub total_count: Option<i64>, pub total_count: Option<i64>,
@@ -155,6 +161,10 @@ pub struct FilesRequest {
// Pagination parameters (optional - backward compatible) // Pagination parameters (optional - backward compatible)
pub limit: Option<i64>, pub limit: Option<i64>,
pub offset: Option<i64>, pub offset: Option<i64>,
/// Optional library filter. Accepts a library id (e.g. "1") or name
/// (e.g. "main"). When omitted, results span all libraries.
pub library: Option<String>,
} }
#[derive(Copy, Clone, Deserialize, PartialEq, Debug)] #[derive(Copy, Clone, Deserialize, PartialEq, Debug)]
@@ -187,7 +197,12 @@ pub struct ThumbnailRequest {
#[allow(dead_code)] // Part of API contract, may be used in future #[allow(dead_code)] // Part of API contract, may be used in future
pub(crate) format: Option<ThumbnailFormat>, pub(crate) format: Option<ThumbnailFormat>,
#[serde(default)] #[serde(default)]
#[allow(dead_code)] // Part of API contract, may be used in future
pub(crate) shape: Option<ThumbnailShape>, pub(crate) shape: Option<ThumbnailShape>,
/// Optional library filter. Accepts a library id (e.g. "1") or name
/// (e.g. "main"). When omitted, defaults to the primary library.
#[serde(default)]
pub(crate) library: Option<String>,
} }
#[derive(Debug, Deserialize, PartialEq)] #[derive(Debug, Deserialize, PartialEq)]
@@ -231,6 +246,8 @@ pub struct MetadataResponse {
pub size: u64, pub size: u64,
pub exif: Option<ExifMetadata>, pub exif: Option<ExifMetadata>,
pub filename_date: Option<i64>, // Date extracted from filename pub filename_date: Option<i64>, // Date extracted from filename
pub library_id: Option<i32>,
pub library_name: Option<String>,
} }
impl From<fs::Metadata> for MetadataResponse { impl From<fs::Metadata> for MetadataResponse {
@@ -247,6 +264,8 @@ impl From<fs::Metadata> for MetadataResponse {
size: metadata.len(), size: metadata.len(),
exif: None, exif: None,
filename_date: None, // Will be set in endpoint handler filename_date: None, // Will be set in endpoint handler
library_id: None,
library_name: None,
} }
} }
} }
@@ -422,11 +441,8 @@ mod tests {
); );
match err.unwrap_err().into_kind() { match err.unwrap_err().into_kind() {
ErrorKind::ExpiredSignature => assert!(true), ErrorKind::ExpiredSignature => {}
kind => { kind => panic!("Unexpected error: {:?}", kind),
println!("Unexpected error: {:?}", kind);
assert!(false)
}
} }
} }
@@ -435,11 +451,8 @@ mod tests {
let err = Claims::from_str("uni-֍ՓՓՓՓՓՓՓՓՓՓՓՓՓՓՓ"); let err = Claims::from_str("uni-֍ՓՓՓՓՓՓՓՓՓՓՓՓՓՓՓ");
match err.unwrap_err().into_kind() { match err.unwrap_err().into_kind() {
ErrorKind::InvalidToken => assert!(true), ErrorKind::InvalidToken => {}
kind => { kind => panic!("Unexpected error: {:?}", kind),
println!("Unexpected error: {:?}", kind);
assert!(false)
}
} }
} }

View File

@@ -1,3 +1,5 @@
#![allow(dead_code)]
use diesel::prelude::*; use diesel::prelude::*;
use diesel::sqlite::SqliteConnection; use diesel::sqlite::SqliteConnection;
use serde::Serialize; use serde::Serialize;

View File

@@ -1,3 +1,5 @@
#![allow(dead_code)]
use chrono::NaiveDate; use chrono::NaiveDate;
use diesel::prelude::*; use diesel::prelude::*;
use diesel::sqlite::SqliteConnection; use diesel::sqlite::SqliteConnection;

View File

@@ -21,6 +21,17 @@ pub trait InsightDao: Sync + Send {
file_path: &str, file_path: &str,
) -> Result<Option<PhotoInsight>, DbError>; ) -> Result<Option<PhotoInsight>, DbError>;
/// Return the most recent current insight whose rel_path is one of
/// `paths`. Used for content-hash sharing: the caller expands a
/// single file into all rel_paths with the same content_hash, then
/// asks here for any existing insight attached to any of them.
fn get_insight_for_paths(
&mut self,
context: &opentelemetry::Context,
paths: &[String],
) -> Result<Option<PhotoInsight>, DbError>;
#[allow(dead_code)]
fn get_insight_history( fn get_insight_history(
&mut self, &mut self,
context: &opentelemetry::Context, context: &opentelemetry::Context,
@@ -69,6 +80,7 @@ impl SqliteInsightDao {
} }
#[cfg(test)] #[cfg(test)]
#[allow(dead_code)]
pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self { pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
SqliteInsightDao { connection: conn } SqliteInsightDao { connection: conn }
} }
@@ -86,7 +98,11 @@ impl InsightDao for SqliteInsightDao {
let mut connection = self.connection.lock().expect("Unable to get InsightDao"); let mut connection = self.connection.lock().expect("Unable to get InsightDao");
// Mark all existing insights for this file as no longer current // Mark all existing insights for this file as no longer current
diesel::update(photo_insights.filter(file_path.eq(&insight.file_path))) diesel::update(
photo_insights
.filter(library_id.eq(insight.library_id))
.filter(rel_path.eq(&insight.file_path)),
)
.set(is_current.eq(false)) .set(is_current.eq(false))
.execute(connection.deref_mut()) .execute(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Update is_current error"))?; .map_err(|_| anyhow::anyhow!("Update is_current error"))?;
@@ -99,7 +115,8 @@ impl InsightDao for SqliteInsightDao {
// Retrieve the inserted record (is_current = true) // Retrieve the inserted record (is_current = true)
photo_insights photo_insights
.filter(file_path.eq(&insight.file_path)) .filter(library_id.eq(insight.library_id))
.filter(rel_path.eq(&insight.file_path))
.filter(is_current.eq(true)) .filter(is_current.eq(true))
.first::<PhotoInsight>(connection.deref_mut()) .first::<PhotoInsight>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error")) .map_err(|_| anyhow::anyhow!("Query error"))
@@ -118,7 +135,7 @@ impl InsightDao for SqliteInsightDao {
let mut connection = self.connection.lock().expect("Unable to get InsightDao"); let mut connection = self.connection.lock().expect("Unable to get InsightDao");
photo_insights photo_insights
.filter(file_path.eq(path)) .filter(rel_path.eq(path))
.filter(is_current.eq(true)) .filter(is_current.eq(true))
.first::<PhotoInsight>(connection.deref_mut()) .first::<PhotoInsight>(connection.deref_mut())
.optional() .optional()
@@ -127,6 +144,30 @@ impl InsightDao for SqliteInsightDao {
.map_err(|_| DbError::new(DbErrorKind::QueryError)) .map_err(|_| DbError::new(DbErrorKind::QueryError))
} }
fn get_insight_for_paths(
&mut self,
context: &opentelemetry::Context,
paths: &[String],
) -> Result<Option<PhotoInsight>, DbError> {
if paths.is_empty() {
return Ok(None);
}
trace_db_call(context, "query", "get_insight_for_paths", |_span| {
use schema::photo_insights::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
photo_insights
.filter(rel_path.eq_any(paths))
.filter(is_current.eq(true))
.order(generated_at.desc())
.first::<PhotoInsight>(connection.deref_mut())
.optional()
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_insight_history( fn get_insight_history(
&mut self, &mut self,
context: &opentelemetry::Context, context: &opentelemetry::Context,
@@ -138,7 +179,7 @@ impl InsightDao for SqliteInsightDao {
let mut connection = self.connection.lock().expect("Unable to get InsightDao"); let mut connection = self.connection.lock().expect("Unable to get InsightDao");
photo_insights photo_insights
.filter(file_path.eq(path)) .filter(rel_path.eq(path))
.order(generated_at.desc()) .order(generated_at.desc())
.load::<PhotoInsight>(connection.deref_mut()) .load::<PhotoInsight>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error")) .map_err(|_| anyhow::anyhow!("Query error"))
@@ -156,7 +197,7 @@ impl InsightDao for SqliteInsightDao {
let mut connection = self.connection.lock().expect("Unable to get InsightDao"); let mut connection = self.connection.lock().expect("Unable to get InsightDao");
diesel::delete(photo_insights.filter(file_path.eq(path))) diesel::delete(photo_insights.filter(rel_path.eq(path)))
.execute(connection.deref_mut()) .execute(connection.deref_mut())
.map(|_| ()) .map(|_| ())
.map_err(|_| anyhow::anyhow!("Delete error")) .map_err(|_| anyhow::anyhow!("Delete error"))
@@ -195,7 +236,7 @@ impl InsightDao for SqliteInsightDao {
diesel::update( diesel::update(
photo_insights photo_insights
.filter(file_path.eq(path)) .filter(rel_path.eq(path))
.filter(is_current.eq(true)), .filter(is_current.eq(true)),
) )
.set(approved.eq(Some(is_approved))) .set(approved.eq(Some(is_approved)))

View File

@@ -1,3 +1,5 @@
#![allow(dead_code)]
use diesel::prelude::*; use diesel::prelude::*;
use diesel::sqlite::SqliteConnection; use diesel::sqlite::SqliteConnection;
use std::ops::DerefMut; use std::ops::DerefMut;
@@ -230,7 +232,7 @@ impl SqliteKnowledgeDao {
} }
fn deserialize_embedding(bytes: &[u8]) -> Result<Vec<f32>, DbError> { fn deserialize_embedding(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
if bytes.len() % 4 != 0 { if !bytes.len().is_multiple_of(4) {
return Err(DbError::new(DbErrorKind::QueryError)); return Err(DbError::new(DbErrorKind::QueryError));
} }
Ok(bytes Ok(bytes
@@ -535,7 +537,6 @@ impl KnowledgeDao for SqliteKnowledgeDao {
conn.transaction::<(i64, i64), diesel::result::Error, _>(|conn| { conn.transaction::<(i64, i64), diesel::result::Error, _>(|conn| {
use schema::entity_facts::dsl as ef; use schema::entity_facts::dsl as ef;
use schema::entity_photo_links::dsl as epl;
// 1. Re-point facts where source is subject // 1. Re-point facts where source is subject
let facts_updated = let facts_updated =
@@ -550,8 +551,8 @@ impl KnowledgeDao for SqliteKnowledgeDao {
// 3. Copy photo links to target (INSERT OR IGNORE to skip duplicates) // 3. Copy photo links to target (INSERT OR IGNORE to skip duplicates)
let links_updated = diesel::sql_query( let links_updated = diesel::sql_query(
"INSERT OR IGNORE INTO entity_photo_links (entity_id, file_path, role) \ "INSERT OR IGNORE INTO entity_photo_links (entity_id, library_id, rel_path, role) \
SELECT ?, file_path, role FROM entity_photo_links WHERE entity_id = ?", SELECT ?, library_id, rel_path, role FROM entity_photo_links WHERE entity_id = ?",
) )
.bind::<diesel::sql_types::Integer, _>(target_id) .bind::<diesel::sql_types::Integer, _>(target_id)
.bind::<diesel::sql_types::Integer, _>(source_id) .bind::<diesel::sql_types::Integer, _>(source_id)
@@ -781,11 +782,12 @@ impl KnowledgeDao for SqliteKnowledgeDao {
) -> Result<(), DbError> { ) -> Result<(), DbError> {
trace_db_call(cx, "insert", "upsert_photo_link", |_span| { trace_db_call(cx, "insert", "upsert_photo_link", |_span| {
let mut conn = self.connection.lock().expect("KnowledgeDao lock"); let mut conn = self.connection.lock().expect("KnowledgeDao lock");
// INSERT OR IGNORE respects the UNIQUE(entity_id, file_path, role) constraint // INSERT OR IGNORE respects the UNIQUE(entity_id, library_id, rel_path, role) constraint
diesel::sql_query( diesel::sql_query(
"INSERT OR IGNORE INTO entity_photo_links (entity_id, file_path, role) VALUES (?, ?, ?)" "INSERT OR IGNORE INTO entity_photo_links (entity_id, library_id, rel_path, role) VALUES (?, ?, ?, ?)"
) )
.bind::<diesel::sql_types::Integer, _>(link.entity_id) .bind::<diesel::sql_types::Integer, _>(link.entity_id)
.bind::<diesel::sql_types::Integer, _>(link.library_id)
.bind::<diesel::sql_types::Text, _>(&link.file_path) .bind::<diesel::sql_types::Text, _>(&link.file_path)
.bind::<diesel::sql_types::Text, _>(&link.role) .bind::<diesel::sql_types::Text, _>(&link.role)
.execute(conn.deref_mut()) .execute(conn.deref_mut())
@@ -803,7 +805,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
trace_db_call(cx, "delete", "delete_photo_links_for_file", |_span| { trace_db_call(cx, "delete", "delete_photo_links_for_file", |_span| {
use schema::entity_photo_links::dsl::*; use schema::entity_photo_links::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock"); let mut conn = self.connection.lock().expect("KnowledgeDao lock");
diesel::delete(entity_photo_links.filter(file_path.eq(file_path_val))) diesel::delete(entity_photo_links.filter(rel_path.eq(file_path_val)))
.execute(conn.deref_mut()) .execute(conn.deref_mut())
.map(|_| ()) .map(|_| ())
.map_err(|e| anyhow::anyhow!("Delete error: {}", e)) .map_err(|e| anyhow::anyhow!("Delete error: {}", e))
@@ -820,7 +822,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
use schema::entity_photo_links::dsl::*; use schema::entity_photo_links::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock"); let mut conn = self.connection.lock().expect("KnowledgeDao lock");
entity_photo_links entity_photo_links
.filter(file_path.eq(file_path_val)) .filter(rel_path.eq(file_path_val))
.load::<EntityPhotoLink>(conn.deref_mut()) .load::<EntityPhotoLink>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e)) .map_err(|e| anyhow::anyhow!("Query error: {}", e))
}) })

View File

@@ -1,3 +1,5 @@
#![allow(dead_code)]
use diesel::prelude::*; use diesel::prelude::*;
use diesel::sqlite::SqliteConnection; use diesel::sqlite::SqliteConnection;
use serde::Serialize; use serde::Serialize;

View File

@@ -155,7 +155,9 @@ pub trait FavoriteDao: Sync + Send {
fn add_favorite(&mut self, user_id: i32, favorite_path: &str) -> Result<usize, DbError>; fn add_favorite(&mut self, user_id: i32, favorite_path: &str) -> Result<usize, DbError>;
fn remove_favorite(&mut self, user_id: i32, favorite_path: String); fn remove_favorite(&mut self, user_id: i32, favorite_path: String);
fn get_favorites(&mut self, user_id: i32) -> Result<Vec<Favorite>, DbError>; fn get_favorites(&mut self, user_id: i32) -> Result<Vec<Favorite>, DbError>;
#[allow(dead_code)]
fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError>; fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError>;
#[allow(dead_code)]
fn get_all_paths(&mut self) -> Result<Vec<String>, DbError>; fn get_all_paths(&mut self) -> Result<Vec<String>, DbError>;
} }
@@ -184,7 +186,7 @@ impl FavoriteDao for SqliteFavoriteDao {
let mut connection = self.connection.lock().expect("Unable to get FavoriteDao"); let mut connection = self.connection.lock().expect("Unable to get FavoriteDao");
if favorites if favorites
.filter(userid.eq(user_id).and(path.eq(&favorite_path))) .filter(userid.eq(user_id).and(rel_path.eq(&favorite_path)))
.first::<Favorite>(connection.deref_mut()) .first::<Favorite>(connection.deref_mut())
.is_err() .is_err()
{ {
@@ -204,7 +206,7 @@ impl FavoriteDao for SqliteFavoriteDao {
use schema::favorites::dsl::*; use schema::favorites::dsl::*;
diesel::delete(favorites) diesel::delete(favorites)
.filter(userid.eq(user_id).and(path.eq(favorite_path))) .filter(userid.eq(user_id).and(rel_path.eq(favorite_path)))
.execute(self.connection.lock().unwrap().deref_mut()) .execute(self.connection.lock().unwrap().deref_mut())
.unwrap(); .unwrap();
} }
@@ -221,8 +223,8 @@ impl FavoriteDao for SqliteFavoriteDao {
fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError> { fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError> {
use schema::favorites::dsl::*; use schema::favorites::dsl::*;
diesel::update(favorites.filter(path.eq(old_path))) diesel::update(favorites.filter(rel_path.eq(old_path)))
.set(path.eq(new_path)) .set(rel_path.eq(new_path))
.execute(self.connection.lock().unwrap().deref_mut()) .execute(self.connection.lock().unwrap().deref_mut())
.map_err(|_| DbError::new(DbErrorKind::UpdateError))?; .map_err(|_| DbError::new(DbErrorKind::UpdateError))?;
Ok(()) Ok(())
@@ -232,13 +234,14 @@ impl FavoriteDao for SqliteFavoriteDao {
use schema::favorites::dsl::*; use schema::favorites::dsl::*;
favorites favorites
.select(path) .select(rel_path)
.distinct() .distinct()
.load(self.connection.lock().unwrap().deref_mut()) .load(self.connection.lock().unwrap().deref_mut())
.map_err(|_| DbError::new(DbErrorKind::QueryError)) .map_err(|_| DbError::new(DbErrorKind::QueryError))
} }
} }
#[allow(dead_code)]
pub trait ExifDao: Sync + Send { pub trait ExifDao: Sync + Send {
fn store_exif( fn store_exif(
&mut self, &mut self,
@@ -263,6 +266,7 @@ pub trait ExifDao: Sync + Send {
fn get_all_with_date_taken( fn get_all_with_date_taken(
&mut self, &mut self,
context: &opentelemetry::Context, context: &opentelemetry::Context,
library_id: Option<i32>,
) -> Result<Vec<(String, i64)>, DbError>; ) -> Result<Vec<(String, i64)>, DbError>;
/// Batch load EXIF data for multiple file paths (single query) /// Batch load EXIF data for multiple file paths (single query)
@@ -306,12 +310,103 @@ pub trait ExifDao: Sync + Send {
/// Get all photos with GPS coordinates /// Get all photos with GPS coordinates
/// Returns Vec<(file_path, latitude, longitude, date_taken)> /// Returns Vec<(file_path, latitude, longitude, date_taken)>
#[allow(clippy::type_complexity)]
fn get_all_with_gps( fn get_all_with_gps(
&mut self, &mut self,
context: &opentelemetry::Context, context: &opentelemetry::Context,
base_path: &str, base_path: &str,
recursive: bool, recursive: bool,
) -> Result<Vec<(String, f64, f64, Option<i64>)>, DbError>; ) -> Result<Vec<(String, f64, f64, Option<i64>)>, DbError>;
/// Return rows that still lack a `content_hash`, oldest first. Used by
/// the `backfill_hashes` binary to batch through the historical
/// backlog. Returns `(library_id, rel_path)` tuples so the caller can
/// resolve each file on disk.
fn get_rows_missing_hash(
&mut self,
context: &opentelemetry::Context,
limit: i64,
) -> Result<Vec<(i32, String)>, DbError>;
/// Persist the computed blake3 hash + file size for an existing row.
fn backfill_content_hash(
&mut self,
context: &opentelemetry::Context,
library_id: i32,
rel_path: &str,
hash: &str,
size_bytes: i64,
) -> Result<(), DbError>;
/// Return the first EXIF row with the given content hash (any library).
/// Used by thumbnail/HLS generation to detect pre-existing derivatives
/// from another library before regenerating.
fn find_by_content_hash(
&mut self,
context: &opentelemetry::Context,
hash: &str,
) -> Result<Option<ImageExif>, DbError>;
/// Given a file instance `(library_id, rel_path)`, return every distinct
/// rel_path in `image_exif` whose `content_hash` matches this file's.
/// Used by tag and insight read-paths so annotations follow content
/// rather than path, even when the same file is indexed under
/// different library roots. Falls back to `[rel_path]` when the file
/// hasn't been hashed yet.
fn get_rel_paths_sharing_content(
&mut self,
context: &opentelemetry::Context,
library_id: i32,
rel_path: &str,
) -> Result<Vec<String>, DbError>;
/// All rel_paths known to live in a given library. Used by search to
/// scope tag-based (path-keyed) hits to a single library after joining
/// through the library-agnostic tag tables.
fn get_rel_paths_for_library(
&mut self,
context: &opentelemetry::Context,
library_id: i32,
) -> Result<Vec<String>, DbError>;
/// Look up a content_hash for a rel_path in *any* library. Useful when
/// the caller has a library-agnostic rel_path (e.g. from tagged_photo)
/// and wants to find content-equivalent siblings without knowing the
/// file's original library.
fn find_content_hash_anywhere(
&mut self,
context: &opentelemetry::Context,
rel_path: &str,
) -> Result<Option<String>, DbError>;
/// Given a content_hash, return all rel_paths carrying that hash.
fn get_rel_paths_by_hash(
&mut self,
context: &opentelemetry::Context,
hash: &str,
) -> Result<Vec<String>, DbError>;
/// List `(library_id, rel_path)` pairs for the given libraries, optionally
/// restricted to rows whose rel_path starts with `path_prefix`. When
/// `library_ids` is empty, rows from every library are returned. Used by
/// `/photos` recursive listing to skip the filesystem walk — the watcher
/// keeps image_exif in parity with disk via the reconciliation pass.
fn list_rel_paths_for_libraries(
&mut self,
context: &opentelemetry::Context,
library_ids: &[i32],
path_prefix: Option<&str>,
) -> Result<Vec<(i32, String)>, DbError>;
/// Delete a single image_exif row scoped to `(library_id, rel_path)`.
/// Distinct from `delete_exif`, which matches on rel_path alone and
/// would clobber same-named files across libraries.
fn delete_exif_by_library(
&mut self,
context: &opentelemetry::Context,
library_id: i32,
rel_path: &str,
) -> Result<(), DbError>;
} }
pub struct SqliteExifDao { pub struct SqliteExifDao {
@@ -330,6 +425,13 @@ impl SqliteExifDao {
connection: Arc::new(Mutex::new(connect())), connection: Arc::new(Mutex::new(connect())),
} }
} }
#[cfg(test)]
pub fn from_connection(conn: SqliteConnection) -> Self {
SqliteExifDao {
connection: Arc::new(Mutex::new(conn)),
}
}
} }
impl ExifDao for SqliteExifDao { impl ExifDao for SqliteExifDao {
@@ -346,12 +448,21 @@ impl ExifDao for SqliteExifDao {
diesel::insert_into(image_exif) diesel::insert_into(image_exif)
.values(&exif_data) .values(&exif_data)
.execute(connection.deref_mut()) .execute(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Insert error"))?; .map_err(|e| {
log::warn!(
"image_exif insert failed (lib={}, rel_path={:?}): {}",
exif_data.library_id,
exif_data.file_path,
e
);
anyhow::anyhow!("Insert error: {}", e)
})?;
image_exif image_exif
.filter(file_path.eq(&exif_data.file_path)) .filter(library_id.eq(exif_data.library_id))
.filter(rel_path.eq(&exif_data.file_path))
.first::<ImageExif>(connection.deref_mut()) .first::<ImageExif>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error")) .map_err(|e| anyhow::anyhow!("Post-insert lookup failed: {}", e))
}) })
.map_err(|_| DbError::new(DbErrorKind::InsertError)) .map_err(|_| DbError::new(DbErrorKind::InsertError))
} }
@@ -372,7 +483,7 @@ impl ExifDao for SqliteExifDao {
let windows_path = path.replace('/', "\\"); let windows_path = path.replace('/', "\\");
match image_exif match image_exif
.filter(file_path.eq(&normalized).or(file_path.eq(&windows_path))) .filter(rel_path.eq(&normalized).or(rel_path.eq(&windows_path)))
.first::<ImageExif>(connection.deref_mut()) .first::<ImageExif>(connection.deref_mut())
{ {
Ok(exif) => Ok(Some(exif)), Ok(exif) => Ok(Some(exif)),
@@ -393,7 +504,11 @@ impl ExifDao for SqliteExifDao {
let mut connection = self.connection.lock().expect("Unable to get ExifDao"); let mut connection = self.connection.lock().expect("Unable to get ExifDao");
diesel::update(image_exif.filter(file_path.eq(&exif_data.file_path))) diesel::update(
image_exif
.filter(library_id.eq(exif_data.library_id))
.filter(rel_path.eq(&exif_data.file_path)),
)
.set(( .set((
camera_make.eq(&exif_data.camera_make), camera_make.eq(&exif_data.camera_make),
camera_model.eq(&exif_data.camera_model), camera_model.eq(&exif_data.camera_model),
@@ -415,7 +530,8 @@ impl ExifDao for SqliteExifDao {
.map_err(|_| anyhow::anyhow!("Update error"))?; .map_err(|_| anyhow::anyhow!("Update error"))?;
image_exif image_exif
.filter(file_path.eq(&exif_data.file_path)) .filter(library_id.eq(exif_data.library_id))
.filter(rel_path.eq(&exif_data.file_path))
.first::<ImageExif>(connection.deref_mut()) .first::<ImageExif>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error")) .map_err(|_| anyhow::anyhow!("Query error"))
}) })
@@ -426,7 +542,7 @@ impl ExifDao for SqliteExifDao {
trace_db_call(context, "delete", "delete_exif", |_span| { trace_db_call(context, "delete", "delete_exif", |_span| {
use schema::image_exif::dsl::*; use schema::image_exif::dsl::*;
diesel::delete(image_exif.filter(file_path.eq(path))) diesel::delete(image_exif.filter(rel_path.eq(path)))
.execute(self.connection.lock().unwrap().deref_mut()) .execute(self.connection.lock().unwrap().deref_mut())
.map(|_| ()) .map(|_| ())
.map_err(|_| anyhow::anyhow!("Delete error")) .map_err(|_| anyhow::anyhow!("Delete error"))
@@ -437,15 +553,24 @@ impl ExifDao for SqliteExifDao {
fn get_all_with_date_taken( fn get_all_with_date_taken(
&mut self, &mut self,
context: &opentelemetry::Context, context: &opentelemetry::Context,
lib_id: Option<i32>,
) -> Result<Vec<(String, i64)>, DbError> { ) -> Result<Vec<(String, i64)>, DbError> {
trace_db_call(context, "query", "get_all_with_date_taken", |_span| { trace_db_call(context, "query", "get_all_with_date_taken", |_span| {
use schema::image_exif::dsl::*; use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao"); let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif let query = image_exif
.select((file_path, date_taken)) .select((rel_path, date_taken))
.filter(date_taken.is_not_null()) .filter(date_taken.is_not_null())
.into_boxed();
let query = match lib_id {
Some(filter_id) => query.filter(library_id.eq(filter_id)),
None => query,
};
query
.load::<(String, Option<i64>)>(connection.deref_mut()) .load::<(String, Option<i64>)>(connection.deref_mut())
.map(|records| { .map(|records| {
records records
@@ -473,7 +598,7 @@ impl ExifDao for SqliteExifDao {
let mut connection = self.connection.lock().expect("Unable to get ExifDao"); let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif image_exif
.filter(file_path.eq_any(file_paths)) .filter(rel_path.eq_any(file_paths))
.load::<ImageExif>(connection.deref_mut()) .load::<ImageExif>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error")) .map_err(|_| anyhow::anyhow!("Query error"))
}) })
@@ -572,8 +697,8 @@ impl ExifDao for SqliteExifDao {
let mut connection = self.connection.lock().expect("Unable to get ExifDao"); let mut connection = self.connection.lock().expect("Unable to get ExifDao");
diesel::update(image_exif.filter(file_path.eq(old_path))) diesel::update(image_exif.filter(rel_path.eq(old_path)))
.set(file_path.eq(new_path)) .set(rel_path.eq(new_path))
.execute(connection.deref_mut()) .execute(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Update error"))?; .map_err(|_| anyhow::anyhow!("Update error"))?;
Ok(()) Ok(())
@@ -591,14 +716,13 @@ impl ExifDao for SqliteExifDao {
let mut connection = self.connection.lock().expect("Unable to get ExifDao"); let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif image_exif
.select(file_path) .select(rel_path)
.load(connection.deref_mut()) .load(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error")) .map_err(|_| anyhow::anyhow!("Query error"))
}) })
.map_err(|_| DbError::new(DbErrorKind::QueryError)) .map_err(|_| DbError::new(DbErrorKind::QueryError))
} }
fn get_all_with_gps( fn get_all_with_gps(
&mut self, &mut self,
context: &opentelemetry::Context, context: &opentelemetry::Context,
@@ -627,7 +751,7 @@ impl ExifDao for SqliteExifDao {
// Otherwise filter by path prefix // Otherwise filter by path prefix
if !base_path.is_empty() && base_path != "/" { if !base_path.is_empty() && base_path != "/" {
// Match base path as prefix (with wildcard) // Match base path as prefix (with wildcard)
query = query.filter(file_path.like(format!("{}%", base_path))); query = query.filter(rel_path.like(format!("{}%", base_path)));
span.set_attribute(KeyValue::new("path_filter_applied", true)); span.set_attribute(KeyValue::new("path_filter_applied", true));
} else { } else {
@@ -666,4 +790,311 @@ impl ExifDao for SqliteExifDao {
}) })
.map_err(|_| DbError::new(DbErrorKind::QueryError)) .map_err(|_| DbError::new(DbErrorKind::QueryError))
} }
fn get_rows_missing_hash(
&mut self,
context: &opentelemetry::Context,
limit: i64,
) -> Result<Vec<(i32, String)>, DbError> {
trace_db_call(context, "query", "get_rows_missing_hash", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif
.filter(content_hash.is_null())
.select((library_id, rel_path))
.order(id.asc())
.limit(limit)
.load::<(i32, String)>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn backfill_content_hash(
&mut self,
context: &opentelemetry::Context,
library_id_val: i32,
rel_path_val: &str,
hash: &str,
size_val: i64,
) -> Result<(), DbError> {
trace_db_call(context, "update", "backfill_content_hash", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
diesel::update(
image_exif
.filter(library_id.eq(library_id_val))
.filter(rel_path.eq(rel_path_val)),
)
.set((content_hash.eq(hash), size_bytes.eq(size_val)))
.execute(connection.deref_mut())
.map(|_| ())
.map_err(|_| anyhow::anyhow!("Update error"))
})
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
}
fn find_by_content_hash(
&mut self,
context: &opentelemetry::Context,
hash: &str,
) -> Result<Option<ImageExif>, DbError> {
trace_db_call(context, "query", "find_by_content_hash", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif
.filter(content_hash.eq(hash))
.first::<ImageExif>(connection.deref_mut())
.optional()
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_rel_paths_sharing_content(
&mut self,
context: &opentelemetry::Context,
library_id_val: i32,
rel_path_val: &str,
) -> Result<Vec<String>, DbError> {
trace_db_call(context, "query", "get_rel_paths_sharing_content", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
// Look up this file's content_hash. Missing row or NULL hash
// means we can't expand the match set; return the given
// rel_path so callers fall through to direct-match behavior.
let hash: Option<String> = image_exif
.filter(library_id.eq(library_id_val))
.filter(rel_path.eq(rel_path_val))
.select(content_hash)
.first::<Option<String>>(connection.deref_mut())
.optional()
.map_err(|_| anyhow::anyhow!("Query error"))?
.flatten();
let paths = match hash {
Some(h) => image_exif
.filter(content_hash.eq(h))
.select(rel_path)
.distinct()
.load::<String>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))?,
None => vec![rel_path_val.to_string()],
};
Ok(paths)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_rel_paths_for_library(
&mut self,
context: &opentelemetry::Context,
library_id_val: i32,
) -> Result<Vec<String>, DbError> {
trace_db_call(context, "query", "get_rel_paths_for_library", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif
.filter(library_id.eq(library_id_val))
.select(rel_path)
.load::<String>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_content_hash_anywhere(
&mut self,
context: &opentelemetry::Context,
rel_path_val: &str,
) -> Result<Option<String>, DbError> {
trace_db_call(context, "query", "find_content_hash_anywhere", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif
.filter(rel_path.eq(rel_path_val))
.filter(content_hash.is_not_null())
.select(content_hash)
.first::<Option<String>>(connection.deref_mut())
.optional()
.map(|opt| opt.flatten())
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_rel_paths_by_hash(
&mut self,
context: &opentelemetry::Context,
hash: &str,
) -> Result<Vec<String>, DbError> {
trace_db_call(context, "query", "get_rel_paths_by_hash", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif
.filter(content_hash.eq(hash))
.select(rel_path)
.distinct()
.load::<String>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn list_rel_paths_for_libraries(
&mut self,
context: &opentelemetry::Context,
library_ids: &[i32],
path_prefix: Option<&str>,
) -> Result<Vec<(i32, String)>, DbError> {
trace_db_call(context, "query", "list_rel_paths_for_libraries", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
let mut query = image_exif.select((library_id, rel_path)).into_boxed();
if !library_ids.is_empty() {
query = query.filter(library_id.eq_any(library_ids.to_vec()));
}
if let Some(prefix) = path_prefix.map(str::trim).filter(|s| !s.is_empty()) {
// Trailing slash normalization so "2024" matches "2024/..."
// without also matching "2024-archive/...".
let prefix = prefix.trim_end_matches('/');
let pattern = format!("{}/%", prefix.replace('%', "\\%").replace('_', "\\_"));
query = query.filter(rel_path.like(pattern).escape('\\'));
}
query
.load::<(i32, String)>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn delete_exif_by_library(
&mut self,
context: &opentelemetry::Context,
library_id_val: i32,
rel_path_val: &str,
) -> Result<(), DbError> {
trace_db_call(context, "delete", "delete_exif_by_library", |_span| {
use schema::image_exif::dsl::*;
diesel::delete(
image_exif
.filter(library_id.eq(library_id_val))
.filter(rel_path.eq(rel_path_val)),
)
.execute(self.connection.lock().unwrap().deref_mut())
.map(|_| ())
.map_err(|_| anyhow::anyhow!("Delete error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
}
#[cfg(test)]
mod exif_dao_tests {
use super::*;
use crate::database::models::InsertLibrary;
use crate::database::test::in_memory_db_connection;
fn ctx() -> opentelemetry::Context {
opentelemetry::Context::new()
}
fn insert_row(dao: &mut SqliteExifDao, lib_id: i32, rel: &str, date: Option<i64>) {
dao.store_exif(
&ctx(),
InsertImageExif {
library_id: lib_id,
file_path: rel.to_string(),
camera_make: None,
camera_model: None,
lens_model: None,
width: None,
height: None,
orientation: None,
gps_latitude: None,
gps_longitude: None,
gps_altitude: None,
focal_length: None,
aperture: None,
shutter_speed: None,
iso: None,
date_taken: date,
created_time: 0,
last_modified: 0,
content_hash: None,
size_bytes: None,
},
)
.expect("insert exif row");
}
fn setup_two_libraries() -> SqliteExifDao {
let mut conn = in_memory_db_connection();
// Migration seeds library id=1 with a placeholder root; add id=2.
diesel::insert_into(schema::libraries::table)
.values(InsertLibrary {
name: "archive",
root_path: "/tmp/archive",
created_at: 0,
})
.execute(&mut conn)
.expect("seed second library");
SqliteExifDao::from_connection(conn)
}
#[test]
fn get_all_with_date_taken_union_returns_all_libraries() {
let mut dao = setup_two_libraries();
insert_row(&mut dao, 1, "main/a.jpg", Some(100));
insert_row(&mut dao, 2, "archive/b.jpg", Some(200));
// Row without a date must be excluded even in union mode.
insert_row(&mut dao, 2, "archive/c.jpg", None);
let mut rows = dao.get_all_with_date_taken(&ctx(), None).unwrap();
rows.sort_by_key(|(_, ts)| *ts);
assert_eq!(
rows,
vec![
("main/a.jpg".to_string(), 100),
("archive/b.jpg".to_string(), 200),
]
);
}
#[test]
fn get_all_with_date_taken_scopes_by_library_id() {
let mut dao = setup_two_libraries();
insert_row(&mut dao, 1, "main/a.jpg", Some(100));
insert_row(&mut dao, 2, "archive/b.jpg", Some(200));
insert_row(&mut dao, 2, "archive/c.jpg", Some(300));
let lib2 = dao.get_all_with_date_taken(&ctx(), Some(2)).unwrap();
let mut paths: Vec<String> = lib2.into_iter().map(|(p, _)| p).collect();
paths.sort();
assert_eq!(paths, vec!["archive/b.jpg", "archive/c.jpg"]);
let lib1 = dao.get_all_with_date_taken(&ctx(), Some(1)).unwrap();
assert_eq!(lib1, vec![("main/a.jpg".to_string(), 100)]);
}
} }

View File

@@ -1,6 +1,6 @@
use crate::database::schema::{ use crate::database::schema::{
entities, entity_facts, entity_photo_links, favorites, image_exif, photo_insights, users, entities, entity_facts, entity_photo_links, favorites, image_exif, libraries, photo_insights,
video_preview_clips, users, video_preview_clips,
}; };
use serde::Serialize; use serde::Serialize;
@@ -23,6 +23,7 @@ pub struct User {
#[diesel(table_name = favorites)] #[diesel(table_name = favorites)]
pub struct InsertFavorite<'a> { pub struct InsertFavorite<'a> {
pub userid: &'a i32, pub userid: &'a i32,
#[diesel(column_name = rel_path)]
pub path: &'a str, pub path: &'a str,
} }
@@ -30,12 +31,15 @@ pub struct InsertFavorite<'a> {
pub struct Favorite { pub struct Favorite {
pub id: i32, pub id: i32,
pub userid: i32, pub userid: i32,
#[diesel(column_name = rel_path)]
pub path: String, pub path: String,
} }
#[derive(Insertable)] #[derive(Insertable)]
#[diesel(table_name = image_exif)] #[diesel(table_name = image_exif)]
pub struct InsertImageExif { pub struct InsertImageExif {
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String, pub file_path: String,
pub camera_make: Option<String>, pub camera_make: Option<String>,
pub camera_model: Option<String>, pub camera_model: Option<String>,
@@ -53,11 +57,16 @@ pub struct InsertImageExif {
pub date_taken: Option<i64>, pub date_taken: Option<i64>,
pub created_time: i64, pub created_time: i64,
pub last_modified: i64, pub last_modified: i64,
pub content_hash: Option<String>,
pub size_bytes: Option<i64>,
} }
// Field order matches the post-migration column order in `image_exif`.
#[derive(Serialize, Queryable, Clone, Debug)] #[derive(Serialize, Queryable, Clone, Debug)]
pub struct ImageExif { pub struct ImageExif {
pub id: i32, pub id: i32,
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String, pub file_path: String,
pub camera_make: Option<String>, pub camera_make: Option<String>,
pub camera_model: Option<String>, pub camera_model: Option<String>,
@@ -75,11 +84,15 @@ pub struct ImageExif {
pub date_taken: Option<i64>, pub date_taken: Option<i64>,
pub created_time: i64, pub created_time: i64,
pub last_modified: i64, pub last_modified: i64,
pub content_hash: Option<String>,
pub size_bytes: Option<i64>,
} }
#[derive(Insertable)] #[derive(Insertable)]
#[diesel(table_name = photo_insights)] #[diesel(table_name = photo_insights)]
pub struct InsertPhotoInsight { pub struct InsertPhotoInsight {
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String, pub file_path: String,
pub title: String, pub title: String,
pub summary: String, pub summary: String,
@@ -92,6 +105,8 @@ pub struct InsertPhotoInsight {
#[derive(Serialize, Queryable, Clone, Debug)] #[derive(Serialize, Queryable, Clone, Debug)]
pub struct PhotoInsight { pub struct PhotoInsight {
pub id: i32, pub id: i32,
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String, pub file_path: String,
pub title: String, pub title: String,
pub summary: String, pub summary: String,
@@ -102,6 +117,24 @@ pub struct PhotoInsight {
pub approved: Option<bool>, pub approved: Option<bool>,
} }
// --- Libraries ---
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct LibraryRow {
pub id: i32,
pub name: String,
pub root_path: String,
pub created_at: i64,
}
#[derive(Insertable)]
#[diesel(table_name = libraries)]
pub struct InsertLibrary<'a> {
pub name: &'a str,
pub root_path: &'a str,
pub created_at: i64,
}
// --- Knowledge memory models --- // --- Knowledge memory models ---
#[derive(Insertable)] #[derive(Insertable)]
@@ -162,6 +195,8 @@ pub struct EntityFact {
#[diesel(table_name = entity_photo_links)] #[diesel(table_name = entity_photo_links)]
pub struct InsertEntityPhotoLink { pub struct InsertEntityPhotoLink {
pub entity_id: i32, pub entity_id: i32,
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String, pub file_path: String,
pub role: String, pub role: String,
} }
@@ -170,6 +205,8 @@ pub struct InsertEntityPhotoLink {
pub struct EntityPhotoLink { pub struct EntityPhotoLink {
pub id: i32, pub id: i32,
pub entity_id: i32, pub entity_id: i32,
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String, pub file_path: String,
pub role: String, pub role: String,
} }
@@ -177,6 +214,8 @@ pub struct EntityPhotoLink {
#[derive(Insertable)] #[derive(Insertable)]
#[diesel(table_name = video_preview_clips)] #[diesel(table_name = video_preview_clips)]
pub struct InsertVideoPreviewClip { pub struct InsertVideoPreviewClip {
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String, pub file_path: String,
pub status: String, pub status: String,
pub created_at: String, pub created_at: String,
@@ -186,6 +225,8 @@ pub struct InsertVideoPreviewClip {
#[derive(Serialize, Queryable, Clone, Debug)] #[derive(Serialize, Queryable, Clone, Debug)]
pub struct VideoPreviewClip { pub struct VideoPreviewClip {
pub id: i32, pub id: i32,
pub library_id: i32,
#[diesel(column_name = rel_path)]
pub file_path: String, pub file_path: String,
pub status: String, pub status: String,
pub duration_seconds: Option<f32>, pub duration_seconds: Option<f32>,

View File

@@ -1,3 +1,5 @@
#![allow(dead_code)]
use diesel::prelude::*; use diesel::prelude::*;
use diesel::sqlite::SqliteConnection; use diesel::sqlite::SqliteConnection;
use std::ops::DerefMut; use std::ops::DerefMut;
@@ -84,6 +86,7 @@ impl PreviewDao for SqlitePreviewDao {
diesel::insert_or_ignore_into(video_preview_clips) diesel::insert_or_ignore_into(video_preview_clips)
.values(InsertVideoPreviewClip { .values(InsertVideoPreviewClip {
library_id: 1,
file_path: file_path_val.to_string(), file_path: file_path_val.to_string(),
status: status_val.to_string(), status: status_val.to_string(),
created_at: now.clone(), created_at: now.clone(),
@@ -111,7 +114,7 @@ impl PreviewDao for SqlitePreviewDao {
let mut connection = self.connection.lock().expect("Unable to get PreviewDao"); let mut connection = self.connection.lock().expect("Unable to get PreviewDao");
let now = chrono::Utc::now().to_rfc3339(); let now = chrono::Utc::now().to_rfc3339();
diesel::update(video_preview_clips.filter(file_path.eq(file_path_val))) diesel::update(video_preview_clips.filter(rel_path.eq(file_path_val)))
.set(( .set((
status.eq(status_val), status.eq(status_val),
duration_seconds.eq(duration), duration_seconds.eq(duration),
@@ -137,7 +140,7 @@ impl PreviewDao for SqlitePreviewDao {
let mut connection = self.connection.lock().expect("Unable to get PreviewDao"); let mut connection = self.connection.lock().expect("Unable to get PreviewDao");
match video_preview_clips match video_preview_clips
.filter(file_path.eq(file_path_val)) .filter(rel_path.eq(file_path_val))
.first::<VideoPreviewClip>(connection.deref_mut()) .first::<VideoPreviewClip>(connection.deref_mut())
{ {
Ok(clip) => Ok(Some(clip)), Ok(clip) => Ok(Some(clip)),
@@ -163,7 +166,7 @@ impl PreviewDao for SqlitePreviewDao {
let mut connection = self.connection.lock().expect("Unable to get PreviewDao"); let mut connection = self.connection.lock().expect("Unable to get PreviewDao");
video_preview_clips video_preview_clips
.filter(file_path.eq_any(file_paths)) .filter(rel_path.eq_any(file_paths))
.load::<VideoPreviewClip>(connection.deref_mut()) .load::<VideoPreviewClip>(connection.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e)) .map_err(|e| anyhow::anyhow!("Query error: {}", e))
}) })

View File

@@ -64,7 +64,8 @@ diesel::table! {
entity_photo_links (id) { entity_photo_links (id) {
id -> Integer, id -> Integer,
entity_id -> Integer, entity_id -> Integer,
file_path -> Text, library_id -> Integer,
rel_path -> Text,
role -> Text, role -> Text,
} }
} }
@@ -73,14 +74,15 @@ diesel::table! {
favorites (id) { favorites (id) {
id -> Integer, id -> Integer,
userid -> Integer, userid -> Integer,
path -> Text, rel_path -> Text,
} }
} }
diesel::table! { diesel::table! {
image_exif (id) { image_exif (id) {
id -> Integer, id -> Integer,
file_path -> Text, library_id -> Integer,
rel_path -> Text,
camera_make -> Nullable<Text>, camera_make -> Nullable<Text>,
camera_model -> Nullable<Text>, camera_model -> Nullable<Text>,
lens_model -> Nullable<Text>, lens_model -> Nullable<Text>,
@@ -97,18 +99,17 @@ diesel::table! {
date_taken -> Nullable<BigInt>, date_taken -> Nullable<BigInt>,
created_time -> BigInt, created_time -> BigInt,
last_modified -> BigInt, last_modified -> BigInt,
content_hash -> Nullable<Text>,
size_bytes -> Nullable<BigInt>,
} }
} }
diesel::table! { diesel::table! {
knowledge_embeddings (id) { libraries (id) {
id -> Integer, id -> Integer,
keyword -> Text, name -> Text,
description -> Text, root_path -> Text,
category -> Nullable<Text>,
embedding -> Binary,
created_at -> BigInt, created_at -> BigInt,
model_version -> Text,
} }
} }
@@ -129,23 +130,11 @@ diesel::table! {
} }
} }
diesel::table! {
message_embeddings (id) {
id -> Integer,
contact -> Text,
body -> Text,
timestamp -> BigInt,
is_sent -> Bool,
embedding -> Binary,
created_at -> BigInt,
model_version -> Text,
}
}
diesel::table! { diesel::table! {
photo_insights (id) { photo_insights (id) {
id -> Integer, id -> Integer,
file_path -> Text, library_id -> Integer,
rel_path -> Text,
title -> Text, title -> Text,
summary -> Text, summary -> Text,
generated_at -> BigInt, generated_at -> BigInt,
@@ -171,7 +160,7 @@ diesel::table! {
diesel::table! { diesel::table! {
tagged_photo (id) { tagged_photo (id) {
id -> Integer, id -> Integer,
photo_name -> Text, rel_path -> Text,
tag_id -> Integer, tag_id -> Integer,
created_time -> BigInt, created_time -> BigInt,
} }
@@ -196,7 +185,8 @@ diesel::table! {
diesel::table! { diesel::table! {
video_preview_clips (id) { video_preview_clips (id) {
id -> Integer, id -> Integer,
file_path -> Text, library_id -> Integer,
rel_path -> Text,
status -> Text, status -> Text,
duration_seconds -> Nullable<Float>, duration_seconds -> Nullable<Float>,
file_size_bytes -> Nullable<Integer>, file_size_bytes -> Nullable<Integer>,
@@ -208,7 +198,11 @@ diesel::table! {
diesel::joinable!(entity_facts -> photo_insights (source_insight_id)); diesel::joinable!(entity_facts -> photo_insights (source_insight_id));
diesel::joinable!(entity_photo_links -> entities (entity_id)); diesel::joinable!(entity_photo_links -> entities (entity_id));
diesel::joinable!(entity_photo_links -> libraries (library_id));
diesel::joinable!(image_exif -> libraries (library_id));
diesel::joinable!(photo_insights -> libraries (library_id));
diesel::joinable!(tagged_photo -> tags (tag_id)); diesel::joinable!(tagged_photo -> tags (tag_id));
diesel::joinable!(video_preview_clips -> libraries (library_id));
diesel::allow_tables_to_appear_in_same_query!( diesel::allow_tables_to_appear_in_same_query!(
calendar_events, calendar_events,
@@ -218,9 +212,8 @@ diesel::allow_tables_to_appear_in_same_query!(
entity_photo_links, entity_photo_links,
favorites, favorites,
image_exif, image_exif,
knowledge_embeddings, libraries,
location_history, location_history,
message_embeddings,
photo_insights, photo_insights,
search_history, search_history,
tagged_photo, tagged_photo,

View File

@@ -1,3 +1,5 @@
#![allow(dead_code)]
use diesel::prelude::*; use diesel::prelude::*;
use diesel::sqlite::SqliteConnection; use diesel::sqlite::SqliteConnection;
use serde::Serialize; use serde::Serialize;

File diff suppressed because it is too large Load Diff

View File

@@ -1,9 +1,12 @@
#![allow(clippy::too_many_arguments)]
#[macro_use] #[macro_use]
extern crate diesel; extern crate diesel;
pub mod ai; pub mod ai;
pub mod auth; pub mod auth;
pub mod cleanup; pub mod cleanup;
pub mod content_hash;
pub mod data; pub mod data;
pub mod database; pub mod database;
pub mod error; pub mod error;
@@ -11,6 +14,7 @@ pub mod exif;
pub mod file_types; pub mod file_types;
pub mod files; pub mod files;
pub mod geo; pub mod geo;
pub mod libraries;
pub mod memories; pub mod memories;
pub mod otel; pub mod otel;
pub mod parsers; pub mod parsers;
@@ -32,7 +36,7 @@ pub use state::AppState;
use std::path::Path; use std::path::Path;
use walkdir::DirEntry; use walkdir::DirEntry;
pub fn create_thumbnails() { pub fn create_thumbnails(_libs: &[libraries::Library]) {
// Stub - implemented in main.rs // Stub - implemented in main.rs
} }

282
src/libraries.rs Normal file
View File

@@ -0,0 +1,282 @@
use actix_web::{HttpResponse, Responder, get, web::Data};
use chrono::Utc;
use diesel::prelude::*;
use diesel::sqlite::SqliteConnection;
use log::{info, warn};
use std::path::{Path, PathBuf};
use crate::data::Claims;
use crate::database::models::{InsertLibrary, LibraryRow};
use crate::database::schema::libraries;
use crate::state::AppState;
/// Id of the primary library row seeded by the multi-library migration.
/// Used as the default `library_id` during the Phase 2 transitional shim,
/// before handlers/callers are library-aware.
pub const PRIMARY_LIBRARY_ID: i32 = 1;
/// Placeholder value written into `libraries.root_path` by the migration.
/// Replaced on startup with the live `BASE_PATH` env var.
pub const ROOT_PATH_PLACEHOLDER: &str = "BASE_PATH_PLACEHOLDER";
/// A media library mount point: its numeric id, logical name, and absolute
/// root on disk. `rel_path` values stored in the DB are relative to this root.
#[derive(Clone, Debug, serde::Serialize)]
pub struct Library {
pub id: i32,
pub name: String,
pub root_path: String,
}
impl Library {
/// Resolve a library-relative path into an absolute `PathBuf` under the
/// library root. Does not validate traversal — use `is_valid_full_path`
/// for untrusted input.
#[allow(dead_code)]
pub fn resolve(&self, rel_path: &str) -> PathBuf {
Path::new(&self.root_path).join(rel_path)
}
/// Inverse of `resolve`: given an absolute path under this library's
/// root, return the root-relative portion. Returns `None` if the path
/// is not under the library.
#[allow(dead_code)]
pub fn strip_root(&self, abs_path: &Path) -> Option<String> {
abs_path
.strip_prefix(&self.root_path)
.ok()
.map(|p| p.to_string_lossy().replace('\\', "/"))
}
}
impl From<LibraryRow> for Library {
fn from(row: LibraryRow) -> Self {
Library {
id: row.id,
name: row.name,
root_path: row.root_path,
}
}
}
/// Load all library rows from the database into `Library` values.
pub fn load_all(conn: &mut SqliteConnection) -> Vec<Library> {
libraries::table
.order(libraries::id.asc())
.load::<LibraryRow>(conn)
.unwrap_or_else(|e| {
warn!("Failed to load libraries table: {:?}", e);
Vec::new()
})
.into_iter()
.map(Library::from)
.collect()
}
/// Ensure at least one library exists and that the seeded placeholder row is
/// patched with the live `BASE_PATH`. Safe to call on every startup; it only
/// writes when the placeholder is still present.
pub fn seed_or_patch_from_env(conn: &mut SqliteConnection, base_path: &str) {
// Check whether the primary row still carries the placeholder from the
// migration. If so, replace it with the live BASE_PATH.
let placeholder_count: i64 = libraries::table
.filter(libraries::root_path.eq(ROOT_PATH_PLACEHOLDER))
.count()
.get_result(conn)
.unwrap_or(0);
if placeholder_count > 0 {
diesel::update(libraries::table.filter(libraries::root_path.eq(ROOT_PATH_PLACEHOLDER)))
.set(libraries::root_path.eq(base_path))
.execute(conn)
.map(|rows| {
info!(
"Patched {} library row(s) with BASE_PATH='{}'",
rows, base_path
);
})
.unwrap_or_else(|e| warn!("Failed to patch library root_path: {:?}", e));
return;
}
// If no rows exist at all (e.g. table created outside the seeded migration),
// insert a primary library pointing at BASE_PATH.
let total: i64 = libraries::table.count().get_result(conn).unwrap_or(0);
if total == 0 {
let now = Utc::now().timestamp();
let result = diesel::insert_into(libraries::table)
.values(InsertLibrary {
name: "main",
root_path: base_path,
created_at: now,
})
.execute(conn);
match result {
Ok(_) => info!(
"Seeded primary library 'main' with BASE_PATH='{}'",
base_path
),
Err(e) => warn!("Failed to seed primary library: {:?}", e),
}
}
}
/// Resolve a library request parameter (accepts numeric id as string or name)
/// against the configured libraries. Returns `Ok(None)` when the param is
/// absent, meaning "span all libraries". Returns `Err` when a value is
/// provided but does not match any library.
pub fn resolve_library_param<'a>(
state: &'a AppState,
param: Option<&str>,
) -> Result<Option<&'a Library>, String> {
let Some(raw) = param.map(str::trim).filter(|s| !s.is_empty()) else {
return Ok(None);
};
if let Ok(id) = raw.parse::<i32>() {
return state
.library_by_id(id)
.map(Some)
.ok_or_else(|| format!("unknown library id: {}", id));
}
state
.library_by_name(raw)
.map(Some)
.ok_or_else(|| format!("unknown library name: {}", raw))
}
#[derive(serde::Serialize)]
pub struct LibrariesResponse {
pub libraries: Vec<Library>,
}
#[get("/libraries")]
pub async fn list_libraries(_claims: Claims, app_state: Data<AppState>) -> impl Responder {
HttpResponse::Ok().json(LibrariesResponse {
libraries: app_state.libraries.clone(),
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::database::test::in_memory_db_connection;
#[test]
fn seed_patches_placeholder() {
let mut conn = in_memory_db_connection();
// Migration seeds one row with the placeholder.
seed_or_patch_from_env(&mut conn, "/tmp/media");
let libs = load_all(&mut conn);
assert_eq!(libs.len(), 1);
assert_eq!(libs[0].id, 1);
assert_eq!(libs[0].name, "main");
assert_eq!(libs[0].root_path, "/tmp/media");
}
#[test]
fn seed_is_idempotent() {
let mut conn = in_memory_db_connection();
seed_or_patch_from_env(&mut conn, "/tmp/media");
seed_or_patch_from_env(&mut conn, "/tmp/other");
// Second call should not overwrite an already-patched row.
let libs = load_all(&mut conn);
assert_eq!(libs.len(), 1);
assert_eq!(libs[0].root_path, "/tmp/media");
}
#[test]
fn library_strip_root() {
let lib = Library {
id: 1,
name: "main".into(),
root_path: "/tmp/media".into(),
};
let rel = lib.strip_root(Path::new("/tmp/media/2024/photo.jpg"));
assert_eq!(rel.as_deref(), Some("2024/photo.jpg"));
let outside = lib.strip_root(Path::new("/etc/passwd"));
assert!(outside.is_none());
}
#[test]
fn library_resolve_joins_under_root() {
let lib = Library {
id: 1,
name: "main".into(),
root_path: "/tmp/media".into(),
};
let abs = lib.resolve("2024/photo.jpg");
assert_eq!(abs, PathBuf::from("/tmp/media/2024/photo.jpg"));
}
fn state_with_libraries(libs: Vec<Library>) -> AppState {
let mut state = AppState::test_state();
state.libraries = libs;
state
}
fn sample_libraries() -> Vec<Library> {
vec![
Library {
id: 1,
name: "main".into(),
root_path: "/tmp/main".into(),
},
Library {
id: 7,
name: "archive".into(),
root_path: "/tmp/archive".into(),
},
]
}
#[actix_rt::test]
async fn resolve_library_param_absent_is_union() {
let state = state_with_libraries(sample_libraries());
assert!(matches!(resolve_library_param(&state, None), Ok(None)));
}
#[actix_rt::test]
async fn resolve_library_param_empty_or_whitespace_is_union() {
let state = state_with_libraries(sample_libraries());
assert!(matches!(resolve_library_param(&state, Some("")), Ok(None)));
assert!(matches!(
resolve_library_param(&state, Some(" ")),
Ok(None)
));
}
#[actix_rt::test]
async fn resolve_library_param_numeric_id_matches() {
let state = state_with_libraries(sample_libraries());
let lib = resolve_library_param(&state, Some("7"))
.expect("valid id")
.expect("some library");
assert_eq!(lib.id, 7);
assert_eq!(lib.name, "archive");
}
#[actix_rt::test]
async fn resolve_library_param_name_matches() {
let state = state_with_libraries(sample_libraries());
let lib = resolve_library_param(&state, Some("main"))
.expect("valid name")
.expect("some library");
assert_eq!(lib.id, 1);
}
#[actix_rt::test]
async fn resolve_library_param_unknown_id_errs() {
let state = state_with_libraries(sample_libraries());
let err = resolve_library_param(&state, Some("999")).unwrap_err();
assert!(err.contains("unknown library id"));
}
#[actix_rt::test]
async fn resolve_library_param_unknown_name_errs() {
let state = state_with_libraries(sample_libraries());
let err = resolve_library_param(&state, Some("missing")).unwrap_err();
assert!(err.contains("unknown library name"));
}
}

View File

@@ -1,3 +1,5 @@
#![allow(clippy::too_many_arguments)]
#[macro_use] #[macro_use]
extern crate diesel; extern crate diesel;
extern crate rayon; extern crate rayon;
@@ -12,7 +14,10 @@ use prometheus::{self, IntGauge};
use std::error::Error; use std::error::Error;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use std::time::{Duration, SystemTime}; use std::time::{Duration, SystemTime};
use std::{collections::HashMap, io::prelude::*}; use std::{
collections::{HashMap, HashSet},
io::prelude::*,
};
use std::{env, fs::File}; use std::{env, fs::File};
use std::{ use std::{
io::ErrorKind, io::ErrorKind,
@@ -55,6 +60,7 @@ use opentelemetry::{KeyValue, global};
mod ai; mod ai;
mod auth; mod auth;
mod content_hash;
mod data; mod data;
mod database; mod database;
mod error; mod error;
@@ -62,6 +68,7 @@ mod exif;
mod file_types; mod file_types;
mod files; mod files;
mod geo; mod geo;
mod libraries;
mod state; mod state;
mod tags; mod tags;
mod utils; mod utils;
@@ -95,27 +102,86 @@ async fn get_image(
request: HttpRequest, request: HttpRequest,
req: web::Query<ThumbnailRequest>, req: web::Query<ThumbnailRequest>,
app_state: Data<AppState>, app_state: Data<AppState>,
exif_dao: Data<Mutex<Box<dyn ExifDao>>>,
) -> impl Responder { ) -> impl Responder {
let tracer = global_tracer(); let tracer = global_tracer();
let context = extract_context_from_request(&request); let context = extract_context_from_request(&request);
let mut span = tracer.start_with_context("get_image", &context); let mut span = tracer.start_with_context("get_image", &context);
if let Some(path) = is_valid_full_path(&app_state.base_path, &req.path, false) { // Resolve library from query param; default to primary so clients that
// don't yet send `library=` continue to work.
let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) {
Ok(Some(lib)) => lib,
Ok(None) => app_state.primary_library(),
Err(msg) => {
span.set_status(Status::error(msg.clone()));
return HttpResponse::BadRequest().body(msg);
}
};
// Union-mode search returns flat rel_paths with no library attribution,
// so clients may request a file under the wrong library. Try the
// resolved library first; if the file isn't there, fall back to any
// other library holding that rel_path on disk.
let resolved = is_valid_full_path(&library.root_path, &req.path, false)
.filter(|p| p.exists())
.map(|p| (library, p))
.or_else(|| {
app_state.libraries.iter().find_map(|lib| {
if lib.id == library.id {
return None;
}
is_valid_full_path(&lib.root_path, &req.path, false)
.filter(|p| p.exists())
.map(|p| (lib, p))
})
});
if let Some((library, path)) = resolved {
let image_size = req.size.unwrap_or(PhotoSize::Full); let image_size = req.size.unwrap_or(PhotoSize::Full);
if image_size == PhotoSize::Thumb { if image_size == PhotoSize::Thumb {
let relative_path = path let relative_path = path
.strip_prefix(&app_state.base_path) .strip_prefix(&library.root_path)
.expect("Error stripping base path prefix from thumbnail"); .expect("Error stripping library root prefix from thumbnail");
let relative_path_str = relative_path.to_string_lossy().replace('\\', "/");
let thumbs = &app_state.thumbnail_path; let thumbs = &app_state.thumbnail_path;
let mut thumb_path = Path::new(&thumbs).join(relative_path); let legacy_thumb_path = Path::new(&thumbs).join(relative_path);
// If it's a video and GIF format is requested, try to serve GIF thumbnail // Gif thumbnails are a separate lookup (video GIF previews).
// Dual-lookup for gif is out of scope; preserve existing flow.
if req.format == Some(ThumbnailFormat::Gif) && is_video_file(&path) { if req.format == Some(ThumbnailFormat::Gif) && is_video_file(&path) {
thumb_path = Path::new(&app_state.gif_path).join(relative_path); let mut gif_path = Path::new(&app_state.gif_path).join(relative_path);
thumb_path.set_extension("gif"); gif_path.set_extension("gif");
trace!("Gif thumbnail path: {:?}", gif_path);
if let Ok(file) = NamedFile::open(&gif_path) {
span.set_status(Status::Ok);
return file
.use_etag(true)
.use_last_modified(true)
.prefer_utf8(true)
.into_response(&request);
} }
}
// Resolve the hash-keyed thumbnail (if the row already has a
// content_hash) and fall back to the legacy mirrored path.
let hash_thumb_path: Option<PathBuf> = {
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
match dao.get_exif(&context, &relative_path_str) {
Ok(Some(row)) => row
.content_hash
.as_deref()
.map(|h| content_hash::thumbnail_path(Path::new(thumbs), h)),
_ => None,
}
};
let thumb_path = hash_thumb_path
.as_ref()
.filter(|p| p.exists())
.cloned()
.unwrap_or_else(|| legacy_thumb_path.clone());
// Handle circular thumbnail request // Handle circular thumbnail request
if req.shape == Some(ThumbnailShape::Circle) { if req.shape == Some(ThumbnailShape::Circle) {
@@ -140,8 +206,6 @@ async fn get_image(
trace!("Thumbnail path: {:?}", thumb_path); trace!("Thumbnail path: {:?}", thumb_path);
if let Ok(file) = NamedFile::open(&thumb_path) { if let Ok(file) = NamedFile::open(&thumb_path) {
span.set_status(Status::Ok); span.set_status(Status::Ok);
// The NamedFile will automatically set the correct content-type
// Enable ETag and set cache headers for thumbnails (1 day cache)
return file return file
.use_etag(true) .use_etag(true)
.use_last_modified(true) .use_last_modified(true)
@@ -163,9 +227,9 @@ async fn get_image(
span.set_status(Status::error("Not found")); span.set_status(Status::error("Not found"));
HttpResponse::NotFound().finish() HttpResponse::NotFound().finish()
} else { } else {
span.set_status(Status::error("Bad photos request")); span.set_status(Status::error("Not found"));
error!("Bad photos request: {}", req.path); error!("Path does not exist in any library: {}", req.path);
HttpResponse::BadRequest().finish() HttpResponse::NotFound().finish()
} }
} }
@@ -250,15 +314,38 @@ async fn get_file_metadata(
let span_context = let span_context =
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
let full_path = is_valid_full_path(&app_state.base_path, &path.path, false); let library = libraries::resolve_library_param(&app_state, path.library.as_deref())
.ok()
.flatten()
.unwrap_or_else(|| app_state.primary_library());
match full_path // Fall back to other libraries if the file isn't under the resolved one,
// matching the `/image` handler so union-mode search results resolve.
let resolved = is_valid_full_path(&library.root_path, &path.path, false)
.filter(|p| p.exists())
.map(|p| (library, p))
.or_else(|| {
app_state.libraries.iter().find_map(|lib| {
if lib.id == library.id {
return None;
}
is_valid_full_path(&lib.root_path, &path.path, false)
.filter(|p| p.exists())
.map(|p| (lib, p))
})
});
match resolved
.ok_or_else(|| ErrorKind::InvalidData.into()) .ok_or_else(|| ErrorKind::InvalidData.into())
.and_then(File::open) .and_then(|(lib, full_path)| {
File::open(&full_path)
.and_then(|file| file.metadata()) .and_then(|file| file.metadata())
{ .map(|metadata| (lib, metadata))
Ok(metadata) => { }) {
Ok((resolved_library, metadata)) => {
let mut response: MetadataResponse = metadata.into(); let mut response: MetadataResponse = metadata.into();
response.library_id = Some(resolved_library.id);
response.library_name = Some(resolved_library.name.clone());
// Extract date from filename if possible // Extract date from filename if possible
response.filename_date = response.filename_date =
@@ -289,10 +376,16 @@ async fn get_file_metadata(
} }
} }
#[derive(serde::Deserialize)]
struct UploadQuery {
library: Option<String>,
}
#[post("/image")] #[post("/image")]
async fn upload_image( async fn upload_image(
_: Claims, _: Claims,
request: HttpRequest, request: HttpRequest,
query: web::Query<UploadQuery>,
mut payload: mp::Multipart, mut payload: mp::Multipart,
app_state: Data<AppState>, app_state: Data<AppState>,
exif_dao: Data<Mutex<Box<dyn ExifDao>>>, exif_dao: Data<Mutex<Box<dyn ExifDao>>>,
@@ -303,6 +396,18 @@ async fn upload_image(
let span_context = let span_context =
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
// Resolve the optional library selector. Absent → primary library
// (backwards-compatible with clients that don't yet send `library=`).
let target_library =
match libraries::resolve_library_param(&app_state, query.library.as_deref()) {
Ok(Some(lib)) => lib,
Ok(None) => app_state.primary_library(),
Err(msg) => {
span.set_status(Status::error(msg.clone()));
return HttpResponse::BadRequest().body(msg);
}
};
let mut file_content: BytesMut = BytesMut::new(); let mut file_content: BytesMut = BytesMut::new();
let mut file_name: Option<String> = None; let mut file_name: Option<String> = None;
let mut file_path: Option<String> = None; let mut file_path: Option<String> = None;
@@ -332,7 +437,7 @@ async fn upload_image(
} }
} }
let path = file_path.unwrap_or_else(|| app_state.base_path.clone()); let path = file_path.unwrap_or_else(|| target_library.root_path.clone());
if !file_content.is_empty() { if !file_content.is_empty() {
if file_name.is_none() { if file_name.is_none() {
span.set_status(Status::error("No filename provided")); span.set_status(Status::error("No filename provided"));
@@ -340,7 +445,7 @@ async fn upload_image(
} }
let full_path = PathBuf::from(&path).join(file_name.unwrap()); let full_path = PathBuf::from(&path).join(file_name.unwrap());
if let Some(full_path) = is_valid_full_path( if let Some(full_path) = is_valid_full_path(
&app_state.base_path, &target_library.root_path,
&full_path.to_str().unwrap().to_string(), &full_path.to_str().unwrap().to_string(),
true, true,
) { ) {
@@ -381,16 +486,29 @@ async fn upload_image(
// Extract and store EXIF data if file supports it // Extract and store EXIF data if file supports it
if exif::supports_exif(&uploaded_path) { if exif::supports_exif(&uploaded_path) {
let relative_path = uploaded_path let relative_path = uploaded_path
.strip_prefix(&app_state.base_path) .strip_prefix(&target_library.root_path)
.expect("Error stripping base path prefix") .expect("Error stripping library root prefix")
.to_str() .to_str()
.unwrap() .unwrap()
.to_string(); .replace('\\', "/");
match exif::extract_exif_from_path(&uploaded_path) { match exif::extract_exif_from_path(&uploaded_path) {
Ok(exif_data) => { Ok(exif_data) => {
let timestamp = Utc::now().timestamp(); let timestamp = Utc::now().timestamp();
let (content_hash, size_bytes) = match content_hash::compute(&uploaded_path)
{
Ok(id) => (Some(id.content_hash), Some(id.size_bytes)),
Err(e) => {
warn!(
"Failed to hash uploaded {}: {:?}",
uploaded_path.display(),
e
);
(None, None)
}
};
let insert_exif = InsertImageExif { let insert_exif = InsertImageExif {
library_id: target_library.id,
file_path: relative_path.clone(), file_path: relative_path.clone(),
camera_make: exif_data.camera_make, camera_make: exif_data.camera_make,
camera_model: exif_data.camera_model, camera_model: exif_data.camera_model,
@@ -408,6 +526,8 @@ async fn upload_image(
date_taken: exif_data.date_taken, date_taken: exif_data.date_taken,
created_time: timestamp, created_time: timestamp,
last_modified: timestamp, last_modified: timestamp,
content_hash,
size_bytes,
}; };
if let Ok(mut dao) = exif_dao.lock() { if let Ok(mut dao) = exif_dao.lock() {
@@ -460,7 +580,28 @@ async fn generate_video(
if let Some(name) = filename.file_name() { if let Some(name) = filename.file_name() {
let filename = name.to_str().expect("Filename should convert to string"); let filename = name.to_str().expect("Filename should convert to string");
let playlist = format!("{}/{}.m3u8", app_state.video_path, filename); let playlist = format!("{}/{}.m3u8", app_state.video_path, filename);
if let Some(path) = is_valid_full_path(&app_state.base_path, &body.path, false) {
let library = libraries::resolve_library_param(&app_state, body.library.as_deref())
.ok()
.flatten()
.unwrap_or_else(|| app_state.primary_library());
// Try the resolved library first, then fall back to any other library
// that actually contains the file — handles union-mode requests where
// the mobile client passes no library but the file lives in a
// non-primary library.
let resolved = is_valid_full_path(&library.root_path, &body.path, false)
.filter(|p| p.exists())
.or_else(|| {
app_state.libraries.iter().find_map(|lib| {
if lib.id == library.id {
return None;
}
is_valid_full_path(&lib.root_path, &body.path, false).filter(|p| p.exists())
})
});
if let Some(path) = resolved {
if let Ok(child) = create_playlist(path.to_str().unwrap(), &playlist).await { if let Ok(child) = create_playlist(path.to_str().unwrap(), &playlist).await {
span.add_event( span.add_event(
"playlist_created".to_string(), "playlist_created".to_string(),
@@ -832,9 +973,12 @@ async fn favorites(
.collect::<Vec<String>>(); .collect::<Vec<String>>();
span.set_status(Status::Ok); span.set_status(Status::Ok);
// Favorites are library-agnostic (shared by rel_path), so we
// intentionally leave photo_libraries empty to signal "no badge".
HttpResponse::Ok().json(PhotosResponse { HttpResponse::Ok().json(PhotosResponse {
photos: favorites, photos: favorites,
dirs: Vec::new(), dirs: Vec::new(),
photo_libraries: Vec::new(),
total_count: None, total_count: None,
has_more: None, has_more: None,
next_offset: None, next_offset: None,
@@ -916,14 +1060,19 @@ async fn delete_favorite(
} }
} }
fn create_thumbnails() { fn create_thumbnails(libs: &[libraries::Library]) {
let tracer = global_tracer(); let tracer = global_tracer();
let span = tracer.start("creating thumbnails"); let span = tracer.start("creating thumbnails");
let thumbs = &dotenv::var("THUMBNAILS").expect("THUMBNAILS not defined"); let thumbs = &dotenv::var("THUMBNAILS").expect("THUMBNAILS not defined");
let thumbnail_directory: &Path = Path::new(thumbs); let thumbnail_directory: &Path = Path::new(thumbs);
let images = PathBuf::from(dotenv::var("BASE_PATH").unwrap()); for lib in libs {
info!(
"Scanning thumbnails for library '{}' at {}",
lib.name, lib.root_path
);
let images = PathBuf::from(&lib.root_path);
WalkDir::new(&images) WalkDir::new(&images)
.into_iter() .into_iter()
@@ -950,6 +1099,7 @@ fn create_thumbnails() {
video_span.set_attributes(vec![ video_span.set_attributes(vec![
KeyValue::new("type", "video"), KeyValue::new("type", "video"),
KeyValue::new("file-name", thumb_path.display().to_string()), KeyValue::new("file-name", thumb_path.display().to_string()),
KeyValue::new("library", lib.name.clone()),
]); ]);
debug!("Generating video thumbnail: {:?}", thumb_path); debug!("Generating video thumbnail: {:?}", thumb_path);
@@ -984,10 +1134,13 @@ fn create_thumbnails() {
image.save(thumb_path).expect("Failure saving thumbnail"); image.save(thumb_path).expect("Failure saving thumbnail");
}) })
.for_each(drop); .for_each(drop);
}
debug!("Finished making thumbnails"); debug!("Finished making thumbnails");
update_media_counts(&images); for lib in libs {
update_media_counts(Path::new(&lib.root_path));
}
} }
fn update_media_counts(media_dir: &Path) { fn update_media_counts(media_dir: &Path) {
@@ -1035,11 +1188,22 @@ fn main() -> std::io::Result<()> {
otel::init_tracing(); otel::init_tracing();
} }
create_thumbnails(); // AppState construction loads (and seeds if needed) the libraries
// generate_video_gifs().await; // table; we use that list to drive the initial thumbnail sweep.
let app_data = Data::new(AppState::default()); let app_data = Data::new(AppState::default());
// Kick thumbnail generation onto a background thread so the HTTP
// server can accept traffic while large libraries are backfilling.
// Existing thumbs are re-used (exists() check inside the walk),
// so missed files are filled in over successive scans.
{
let libs = app_data.libraries.clone();
std::thread::spawn(move || {
create_thumbnails(&libs);
});
}
// generate_video_gifs().await;
let labels = HashMap::new(); let labels = HashMap::new();
let prometheus = PrometheusMetricsBuilder::new("api") let prometheus = PrometheusMetricsBuilder::new("api")
.const_labels(labels) .const_labels(labels)
@@ -1056,14 +1220,20 @@ fn main() -> std::io::Result<()> {
.unwrap(); .unwrap();
let app_state = app_data.clone(); let app_state = app_data.clone();
for lib in &app_state.libraries {
app_state.playlist_manager.do_send(ScanDirectoryMessage { app_state.playlist_manager.do_send(ScanDirectoryMessage {
directory: app_state.base_path.clone(), directory: lib.root_path.clone(),
}); });
}
// Start file watcher with playlist manager and preview generator // Start file watcher with playlist manager and preview generator
let playlist_mgr_for_watcher = app_state.playlist_manager.as_ref().clone(); let playlist_mgr_for_watcher = app_state.playlist_manager.as_ref().clone();
let preview_gen_for_watcher = app_state.preview_clip_generator.as_ref().clone(); let preview_gen_for_watcher = app_state.preview_clip_generator.as_ref().clone();
watch_files(playlist_mgr_for_watcher, preview_gen_for_watcher); watch_files(
app_state.libraries.clone(),
playlist_mgr_for_watcher,
preview_gen_for_watcher,
);
// Start orphaned playlist cleanup job // Start orphaned playlist cleanup job
cleanup_orphaned_playlists(); cleanup_orphaned_playlists();
@@ -1187,6 +1357,7 @@ fn main() -> std::io::Result<()> {
.service(ai::get_available_models_handler) .service(ai::get_available_models_handler)
.service(ai::rate_insight_handler) .service(ai::rate_insight_handler)
.service(ai::export_training_data_handler) .service(ai::export_training_data_handler)
.service(libraries::list_libraries)
.add_feature(add_tag_services::<_, SqliteTagDao>) .add_feature(add_tag_services::<_, SqliteTagDao>)
.add_feature(knowledge::add_knowledge_services::<_, SqliteKnowledgeDao>) .add_feature(knowledge::add_knowledge_services::<_, SqliteKnowledgeDao>)
.app_data(app_data.clone()) .app_data(app_data.clone())
@@ -1371,13 +1542,11 @@ fn cleanup_orphaned_playlists() {
} }
fn watch_files( fn watch_files(
libs: Vec<libraries::Library>,
playlist_manager: Addr<VideoPlaylistManager>, playlist_manager: Addr<VideoPlaylistManager>,
preview_generator: Addr<video::actors::PreviewClipGenerator>, preview_generator: Addr<video::actors::PreviewClipGenerator>,
) { ) {
std::thread::spawn(move || { std::thread::spawn(move || {
let base_str = dotenv::var("BASE_PATH").unwrap();
let base_path = PathBuf::from(&base_str);
// Get polling intervals from environment variables // Get polling intervals from environment variables
// Quick scan: Check recently modified files (default: 60 seconds) // Quick scan: Check recently modified files (default: 60 seconds)
let quick_interval_secs = dotenv::var("WATCH_QUICK_INTERVAL_SECONDS") let quick_interval_secs = dotenv::var("WATCH_QUICK_INTERVAL_SECONDS")
@@ -1394,7 +1563,12 @@ fn watch_files(
info!("Starting optimized file watcher"); info!("Starting optimized file watcher");
info!(" Quick scan interval: {} seconds", quick_interval_secs); info!(" Quick scan interval: {} seconds", quick_interval_secs);
info!(" Full scan interval: {} seconds", full_interval_secs); info!(" Full scan interval: {} seconds", full_interval_secs);
info!(" Watching directory: {}", base_str); for lib in &libs {
info!(
" Watching library '{}' (id={}) at {}",
lib.name, lib.id, lib.root_path
);
}
// Create DAOs for tracking processed files // Create DAOs for tracking processed files
let exif_dao = Arc::new(Mutex::new( let exif_dao = Arc::new(Mutex::new(
@@ -1418,28 +1592,31 @@ fn watch_files(
let is_full_scan = since_last_full.as_secs() >= full_interval_secs; let is_full_scan = since_last_full.as_secs() >= full_interval_secs;
for lib in &libs {
if is_full_scan { if is_full_scan {
info!("Running full scan (scan #{})", scan_count); info!(
"Running full scan for library '{}' (scan #{})",
lib.name, scan_count
);
process_new_files( process_new_files(
&base_path, lib,
Arc::clone(&exif_dao), Arc::clone(&exif_dao),
Arc::clone(&preview_dao), Arc::clone(&preview_dao),
None, None,
playlist_manager.clone(), playlist_manager.clone(),
preview_generator.clone(), preview_generator.clone(),
); );
last_full_scan = now;
} else { } else {
debug!( debug!(
"Running quick scan (checking files modified in last {} seconds)", "Running quick scan for library '{}' (checking files modified in last {} seconds)",
lib.name,
quick_interval_secs + 10 quick_interval_secs + 10
); );
// Check files modified since last quick scan, plus 10 second buffer
let check_since = last_quick_scan let check_since = last_quick_scan
.checked_sub(Duration::from_secs(10)) .checked_sub(Duration::from_secs(10))
.unwrap_or(last_quick_scan); .unwrap_or(last_quick_scan);
process_new_files( process_new_files(
&base_path, lib,
Arc::clone(&exif_dao), Arc::clone(&exif_dao),
Arc::clone(&preview_dao), Arc::clone(&preview_dao),
Some(check_since), Some(check_since),
@@ -1448,11 +1625,15 @@ fn watch_files(
); );
} }
// Update media counts per library (metric aggregates across all)
update_media_counts(Path::new(&lib.root_path));
}
if is_full_scan {
last_full_scan = now;
}
last_quick_scan = now; last_quick_scan = now;
scan_count += 1; scan_count += 1;
// Update media counts
update_media_counts(&base_path);
} }
}); });
} }
@@ -1481,7 +1662,7 @@ fn playlist_needs_generation(video_path: &Path, playlist_path: &Path) -> bool {
} }
fn process_new_files( fn process_new_files(
base_path: &Path, library: &libraries::Library,
exif_dao: Arc<Mutex<Box<dyn ExifDao>>>, exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>, preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>,
modified_since: Option<SystemTime>, modified_since: Option<SystemTime>,
@@ -1491,6 +1672,7 @@ fn process_new_files(
let context = opentelemetry::Context::new(); let context = opentelemetry::Context::new();
let thumbs = dotenv::var("THUMBNAILS").expect("THUMBNAILS not defined"); let thumbs = dotenv::var("THUMBNAILS").expect("THUMBNAILS not defined");
let thumbnail_directory = Path::new(&thumbs); let thumbnail_directory = Path::new(&thumbs);
let base_path = Path::new(&library.root_path);
// Collect all image and video files, optionally filtered by modification time // Collect all image and video files, optionally filtered by modification time
let files: Vec<(PathBuf, String)> = WalkDir::new(base_path) let files: Vec<(PathBuf, String)> = WalkDir::new(base_path)
@@ -1513,11 +1695,13 @@ fn process_new_files(
.filter(|entry| is_image(entry) || is_video(entry)) .filter(|entry| is_image(entry) || is_video(entry))
.filter_map(|entry| { .filter_map(|entry| {
let file_path = entry.path().to_path_buf(); let file_path = entry.path().to_path_buf();
// Canonical rel_path is forward-slash regardless of OS so DB
// comparisons against the batch EXIF lookup line up.
let relative_path = file_path let relative_path = file_path
.strip_prefix(base_path) .strip_prefix(base_path)
.ok()? .ok()?
.to_str()? .to_str()?
.to_string(); .replace('\\', "/");
Some((file_path, relative_path)) Some((file_path, relative_path))
}) })
.collect(); .collect();
@@ -1547,79 +1731,110 @@ fn process_new_files(
}; };
let mut new_files_found = false; let mut new_files_found = false;
let mut files_needing_exif = Vec::new(); let mut files_needing_row = Vec::new();
// Check each file for missing thumbnail or EXIF data // Register every image/video file in image_exif. Rows without EXIF
// still carry library_id, rel_path, content_hash, and size_bytes so
// derivative dedup and DB-indexed sort/filter work for every file,
// not just photos with parseable EXIF.
for (file_path, relative_path) in &files { for (file_path, relative_path) in &files {
// Check if thumbnail exists
let thumb_path = thumbnail_directory.join(relative_path); let thumb_path = thumbnail_directory.join(relative_path);
let needs_thumbnail = !thumb_path.exists(); let needs_thumbnail = !thumb_path.exists();
let needs_row = !existing_exif_paths.contains_key(relative_path);
// Check if EXIF data exists (for supported files) if needs_thumbnail || needs_row {
let needs_exif = if exif::supports_exif(file_path) {
!existing_exif_paths.contains_key(relative_path)
} else {
false
};
if needs_thumbnail || needs_exif {
new_files_found = true; new_files_found = true;
if needs_thumbnail { if needs_thumbnail {
info!("New file detected (missing thumbnail): {}", relative_path); info!("New file detected (missing thumbnail): {}", relative_path);
} }
if needs_exif { if needs_row {
files_needing_exif.push((file_path.clone(), relative_path.clone())); files_needing_row.push((file_path.clone(), relative_path.clone()));
} }
} }
} }
// Process EXIF data for files that need it if !files_needing_row.is_empty() {
if !files_needing_exif.is_empty() {
info!( info!(
"Processing EXIF data for {} files", "Registering {} new files in image_exif",
files_needing_exif.len() files_needing_row.len()
); );
for (file_path, relative_path) in files_needing_exif { for (file_path, relative_path) in files_needing_row {
match exif::extract_exif_from_path(&file_path) {
Ok(exif_data) => {
let timestamp = Utc::now().timestamp(); let timestamp = Utc::now().timestamp();
// Hash + size from filesystem metadata — always attempted so
// every file gets a content_hash, even when EXIF is absent.
let (content_hash, size_bytes) = match content_hash::compute(&file_path) {
Ok(id) => (Some(id.content_hash), Some(id.size_bytes)),
Err(e) => {
warn!("Failed to hash {}: {:?}", file_path.display(), e);
(None, None)
}
};
// EXIF is best-effort enrichment. When extraction fails (or the
// file type doesn't support EXIF) we still store a row with all
// EXIF fields NULL; the file remains visible to sort-by-date
// and tag queries via its rel_path and filesystem timestamps.
let exif_fields = if exif::supports_exif(&file_path) {
match exif::extract_exif_from_path(&file_path) {
Ok(data) => Some(data),
Err(e) => {
debug!(
"No EXIF or parse error for {}: {:?}",
file_path.display(),
e
);
None
}
}
} else {
None
};
let insert_exif = InsertImageExif { let insert_exif = InsertImageExif {
library_id: library.id,
file_path: relative_path.clone(), file_path: relative_path.clone(),
camera_make: exif_data.camera_make, camera_make: exif_fields.as_ref().and_then(|e| e.camera_make.clone()),
camera_model: exif_data.camera_model, camera_model: exif_fields.as_ref().and_then(|e| e.camera_model.clone()),
lens_model: exif_data.lens_model, lens_model: exif_fields.as_ref().and_then(|e| e.lens_model.clone()),
width: exif_data.width, width: exif_fields.as_ref().and_then(|e| e.width),
height: exif_data.height, height: exif_fields.as_ref().and_then(|e| e.height),
orientation: exif_data.orientation, orientation: exif_fields.as_ref().and_then(|e| e.orientation),
gps_latitude: exif_data.gps_latitude.map(|v| v as f32), gps_latitude: exif_fields
gps_longitude: exif_data.gps_longitude.map(|v| v as f32), .as_ref()
gps_altitude: exif_data.gps_altitude.map(|v| v as f32), .and_then(|e| e.gps_latitude.map(|v| v as f32)),
focal_length: exif_data.focal_length.map(|v| v as f32), gps_longitude: exif_fields
aperture: exif_data.aperture.map(|v| v as f32), .as_ref()
shutter_speed: exif_data.shutter_speed, .and_then(|e| e.gps_longitude.map(|v| v as f32)),
iso: exif_data.iso, gps_altitude: exif_fields
date_taken: exif_data.date_taken, .as_ref()
.and_then(|e| e.gps_altitude.map(|v| v as f32)),
focal_length: exif_fields
.as_ref()
.and_then(|e| e.focal_length.map(|v| v as f32)),
aperture: exif_fields
.as_ref()
.and_then(|e| e.aperture.map(|v| v as f32)),
shutter_speed: exif_fields.as_ref().and_then(|e| e.shutter_speed.clone()),
iso: exif_fields.as_ref().and_then(|e| e.iso),
date_taken: exif_fields.as_ref().and_then(|e| e.date_taken),
created_time: timestamp, created_time: timestamp,
last_modified: timestamp, last_modified: timestamp,
content_hash,
size_bytes,
}; };
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
if let Err(e) = dao.store_exif(&context, insert_exif) { if let Err(e) = dao.store_exif(&context, insert_exif) {
error!("Failed to store EXIF data for {}: {:?}", relative_path, e); error!(
} else { "Failed to register {} in image_exif: {:?}",
debug!("EXIF data stored for {}", relative_path); relative_path, e
}
}
Err(e) => {
debug!(
"No EXIF data or error extracting from {}: {:?}",
file_path.display(),
e
); );
} } else {
debug!("Registered {} in image_exif", relative_path);
} }
} }
} }
@@ -1702,7 +1917,49 @@ fn process_new_files(
// Generate thumbnails for all files that need them // Generate thumbnails for all files that need them
if new_files_found { if new_files_found {
info!("Processing thumbnails for new files..."); info!("Processing thumbnails for new files...");
create_thumbnails(); create_thumbnails(std::slice::from_ref(library));
}
// Reconciliation: on a full scan, prune image_exif rows whose rel_path no
// longer exists on disk for this library. Keeps the DB in parity so
// downstream DB-backed listings (e.g. recursive /photos) don't return
// phantom files. Skipped on quick scans — those only look at recently
// modified files and can't distinguish "missing" from "unchanged".
if modified_since.is_none() {
let disk_paths: HashSet<String> = files.iter().map(|(_, rel)| rel.clone()).collect();
let db_paths: Vec<String> = {
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
dao.get_rel_paths_for_library(&context, library.id)
.unwrap_or_else(|e| {
error!(
"Reconciliation: failed to load image_exif rel_paths for lib {}: {:?}",
library.id, e
);
Vec::new()
})
};
let stale: Vec<String> = db_paths
.into_iter()
.filter(|p| !disk_paths.contains(p))
.collect();
if !stale.is_empty() {
info!(
"Reconciliation: pruning {} stale image_exif rows for library '{}'",
stale.len(),
library.name
);
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
for rel in &stale {
if let Err(e) = dao.delete_exif_by_library(&context, library.id, rel) {
warn!(
"Reconciliation: failed to delete {} (lib {}): {:?}",
rel, library.id, e
);
}
}
}
} }
} }

View File

@@ -16,6 +16,7 @@ use walkdir::WalkDir;
use crate::data::Claims; use crate::data::Claims;
use crate::database::ExifDao; use crate::database::ExifDao;
use crate::files::is_image_or_video; use crate::files::is_image_or_video;
use crate::libraries::Library;
use crate::otel::{extract_context_from_request, global_tracer}; use crate::otel::{extract_context_from_request, global_tracer};
use crate::state::AppState; use crate::state::AppState;
@@ -107,6 +108,9 @@ pub struct MemoriesRequest {
pub span: Option<MemoriesSpan>, pub span: Option<MemoriesSpan>,
/// Client timezone offset in minutes from UTC (e.g., -480 for PST, 60 for CET) /// Client timezone offset in minutes from UTC (e.g., -480 for PST, 60 for CET)
pub timezone_offset_minutes: Option<i32>, pub timezone_offset_minutes: Option<i32>,
/// Optional library filter. Accepts a library id (e.g. "1") or name
/// (e.g. "main"). When omitted, results span all libraries.
pub library: Option<String>,
} }
#[derive(Debug, Serialize, Clone)] #[derive(Debug, Serialize, Clone)]
@@ -114,6 +118,9 @@ pub struct MemoryItem {
pub path: String, pub path: String,
pub created: Option<i64>, pub created: Option<i64>,
pub modified: Option<i64>, pub modified: Option<i64>,
/// Id of the library this memory belongs to. Allows clients to show a
/// per-item source badge in union mode.
pub library_id: i32,
} }
#[derive(Debug, Serialize)] #[derive(Debug, Serialize)]
@@ -363,6 +370,7 @@ fn collect_exif_memories(
exif_dao: &Data<Mutex<Box<dyn ExifDao>>>, exif_dao: &Data<Mutex<Box<dyn ExifDao>>>,
context: &opentelemetry::Context, context: &opentelemetry::Context,
base_path: &str, base_path: &str,
library_id: i32,
now: NaiveDate, now: NaiveDate,
span_mode: MemoriesSpan, span_mode: MemoriesSpan,
years_back: u32, years_back: u32,
@@ -371,7 +379,7 @@ fn collect_exif_memories(
) -> Vec<(MemoryItem, NaiveDate)> { ) -> Vec<(MemoryItem, NaiveDate)> {
// Query database for all files with date_taken // Query database for all files with date_taken
let exif_records = match exif_dao.lock() { let exif_records = match exif_dao.lock() {
Ok(mut dao) => match dao.get_all_with_date_taken(context) { Ok(mut dao) => match dao.get_all_with_date_taken(context, Some(library_id)) {
Ok(records) => records, Ok(records) => records,
Err(e) => { Err(e) => {
warn!("Failed to query EXIF database: {:?}", e); warn!("Failed to query EXIF database: {:?}", e);
@@ -417,6 +425,7 @@ fn collect_exif_memories(
path: file_path.clone(), path: file_path.clone(),
created, created,
modified, modified,
library_id,
}, },
file_date, file_date,
)) ))
@@ -427,6 +436,7 @@ fn collect_exif_memories(
/// Collect memories from file system scan (for files not in EXIF DB) /// Collect memories from file system scan (for files not in EXIF DB)
fn collect_filesystem_memories( fn collect_filesystem_memories(
base_path: &str, base_path: &str,
library_id: i32,
path_excluder: &PathExcluder, path_excluder: &PathExcluder,
skip_paths: &HashSet<PathBuf>, skip_paths: &HashSet<PathBuf>,
now: NaiveDate, now: NaiveDate,
@@ -478,6 +488,7 @@ fn collect_filesystem_memories(
path: path_relative, path: path_relative,
created, created,
modified, modified,
library_id,
}, },
file_date, file_date,
)) ))
@@ -526,16 +537,34 @@ pub async fn list_memories(
debug!("Now: {:?}", now); debug!("Now: {:?}", now);
let base = Path::new(&app_state.base_path); // Resolve the optional library filter. Unknown values are a 400; None
// means "all libraries" — currently equivalent to the primary library
// while only one is configured.
let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) {
Ok(lib) => lib,
Err(msg) => {
warn!("Rejecting /memories request: {}", msg);
return HttpResponse::BadRequest().body(msg);
}
};
// When `library` is `Some`, scope to that one library; otherwise union
// across every configured library and let the results interleave.
let libraries_to_scan: Vec<&Library> = match library {
Some(lib) => vec![lib],
None => app_state.libraries.iter().collect(),
};
// Build the path excluder from base and env-configured exclusions let mut memories_with_dates: Vec<(MemoryItem, NaiveDate)> = Vec::new();
for lib in &libraries_to_scan {
let base = Path::new(&lib.root_path);
let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs); let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs);
// Phase 1: Query EXIF database
let exif_memories = collect_exif_memories( let exif_memories = collect_exif_memories(
&exif_dao, &exif_dao,
&span_context, &span_context,
&app_state.base_path, &lib.root_path,
lib.id,
now, now,
span_mode, span_mode,
years_back, years_back,
@@ -543,15 +572,14 @@ pub async fn list_memories(
&path_excluder, &path_excluder,
); );
// Build HashSet for deduplication
let exif_paths: HashSet<PathBuf> = exif_memories let exif_paths: HashSet<PathBuf> = exif_memories
.iter() .iter()
.map(|(item, _)| PathBuf::from(&app_state.base_path).join(&item.path)) .map(|(item, _)| PathBuf::from(&lib.root_path).join(&item.path))
.collect(); .collect();
// Phase 2: File system scan (skip EXIF files)
let fs_memories = collect_filesystem_memories( let fs_memories = collect_filesystem_memories(
&app_state.base_path, &lib.root_path,
lib.id,
&path_excluder, &path_excluder,
&exif_paths, &exif_paths,
now, now,
@@ -560,9 +588,9 @@ pub async fn list_memories(
&client_timezone, &client_timezone,
); );
// Phase 3: Merge and sort memories_with_dates.extend(exif_memories);
let mut memories_with_dates = exif_memories;
memories_with_dates.extend(fs_memories); memories_with_dates.extend(fs_memories);
}
match span_mode { match span_mode {
// Sort by absolute time for a more 'overview' // Sort by absolute time for a more 'overview'
@@ -795,7 +823,7 @@ mod tests {
// Verify timestamp is within expected range (should be around 1422489671) // Verify timestamp is within expected range (should be around 1422489671)
let timestamp = date_time.timestamp(); let timestamp = date_time.timestamp();
assert!(timestamp >= 1422480000 && timestamp <= 1422576000); // Jan 28-29, 2015 assert!((1422480000..=1422576000).contains(&timestamp)); // Jan 28-29, 2015
} }
#[test] #[test]
@@ -813,7 +841,7 @@ mod tests {
// Verify timestamp is within expected range (should be around 1422489664) // Verify timestamp is within expected range (should be around 1422489664)
let timestamp = date_time.timestamp(); let timestamp = date_time.timestamp();
assert!(timestamp >= 1422480000 && timestamp <= 1422576000); // Jan 28-29, 2015 assert!((1422480000..=1422576000).contains(&timestamp)); // Jan 28-29, 2015
} }
#[test] #[test]
@@ -1092,12 +1120,13 @@ mod tests {
.and_utc() .and_utc()
.timestamp(); .timestamp();
let mut memories_with_dates = vec![ let mut memories_with_dates = [
( (
MemoryItem { MemoryItem {
path: "photo1.jpg".to_string(), path: "photo1.jpg".to_string(),
created: Some(jan_15_2024_9am), created: Some(jan_15_2024_9am),
modified: Some(jan_15_2024_9am), modified: Some(jan_15_2024_9am),
library_id: 1,
}, },
NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(), NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
), ),
@@ -1106,6 +1135,7 @@ mod tests {
path: "photo2.jpg".to_string(), path: "photo2.jpg".to_string(),
created: Some(jan_15_2020_10am), created: Some(jan_15_2020_10am),
modified: Some(jan_15_2020_10am), modified: Some(jan_15_2020_10am),
library_id: 1,
}, },
NaiveDate::from_ymd_opt(2020, 1, 15).unwrap(), NaiveDate::from_ymd_opt(2020, 1, 15).unwrap(),
), ),
@@ -1114,6 +1144,7 @@ mod tests {
path: "photo3.jpg".to_string(), path: "photo3.jpg".to_string(),
created: Some(jan_16_2021_8am), created: Some(jan_16_2021_8am),
modified: Some(jan_16_2021_8am), modified: Some(jan_16_2021_8am),
library_id: 1,
}, },
NaiveDate::from_ymd_opt(2021, 1, 16).unwrap(), NaiveDate::from_ymd_opt(2021, 1, 16).unwrap(),
), ),

View File

@@ -3,8 +3,10 @@ use crate::database::{
CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao, CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao,
SearchHistoryDao, SqliteCalendarEventDao, SqliteDailySummaryDao, SqliteExifDao, SearchHistoryDao, SqliteCalendarEventDao, SqliteDailySummaryDao, SqliteExifDao,
SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao, SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao,
connect,
}; };
use crate::database::{PreviewDao, SqlitePreviewDao}; use crate::database::{PreviewDao, SqlitePreviewDao};
use crate::libraries::{self, Library};
use crate::tags::{SqliteTagDao, TagDao}; use crate::tags::{SqliteTagDao, TagDao};
use crate::video::actors::{ use crate::video::actors::{
PlaylistGenerator, PreviewClipGenerator, StreamActor, VideoPlaylistManager, PlaylistGenerator, PreviewClipGenerator, StreamActor, VideoPlaylistManager,
@@ -17,6 +19,11 @@ pub struct AppState {
pub stream_manager: Arc<Addr<StreamActor>>, pub stream_manager: Arc<Addr<StreamActor>>,
pub playlist_manager: Arc<Addr<VideoPlaylistManager>>, pub playlist_manager: Arc<Addr<VideoPlaylistManager>>,
pub preview_clip_generator: Arc<Addr<PreviewClipGenerator>>, pub preview_clip_generator: Arc<Addr<PreviewClipGenerator>>,
/// All configured media libraries. Ordered by `id` ascending; the first
/// entry is the primary library.
pub libraries: Vec<Library>,
/// Legacy shim equal to `libraries[0].root_path`. Phase 2 transitional —
/// new code should go through `primary_library()`.
pub base_path: String, pub base_path: String,
pub thumbnail_path: String, pub thumbnail_path: String,
pub video_path: String, pub video_path: String,
@@ -28,10 +35,26 @@ pub struct AppState {
pub insight_generator: InsightGenerator, pub insight_generator: InsightGenerator,
} }
impl AppState {
pub fn primary_library(&self) -> &Library {
self.libraries
.first()
.expect("AppState constructed without any libraries")
}
pub fn library_by_id(&self, id: i32) -> Option<&Library> {
self.libraries.iter().find(|l| l.id == id)
}
pub fn library_by_name(&self, name: &str) -> Option<&Library> {
self.libraries.iter().find(|l| l.name == name)
}
}
impl AppState { impl AppState {
pub fn new( pub fn new(
stream_manager: Arc<Addr<StreamActor>>, stream_manager: Arc<Addr<StreamActor>>,
base_path: String, libraries_vec: Vec<Library>,
thumbnail_path: String, thumbnail_path: String,
video_path: String, video_path: String,
gif_path: String, gif_path: String,
@@ -42,17 +65,26 @@ impl AppState {
insight_generator: InsightGenerator, insight_generator: InsightGenerator,
preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>, preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>,
) -> Self { ) -> Self {
assert!(
!libraries_vec.is_empty(),
"AppState::new requires at least one library"
);
let base_path = libraries_vec[0].root_path.clone();
let playlist_generator = PlaylistGenerator::new(); let playlist_generator = PlaylistGenerator::new();
let video_playlist_manager = let video_playlist_manager =
VideoPlaylistManager::new(video_path.clone(), playlist_generator.start()); VideoPlaylistManager::new(video_path.clone(), playlist_generator.start());
let preview_clip_generator = let preview_clip_generator = PreviewClipGenerator::new(
PreviewClipGenerator::new(preview_clips_path.clone(), base_path.clone(), preview_dao); preview_clips_path.clone(),
libraries_vec.clone(),
preview_dao,
);
Self { Self {
stream_manager, stream_manager,
playlist_manager: Arc::new(video_playlist_manager.start()), playlist_manager: Arc::new(video_playlist_manager.start()),
preview_clip_generator: Arc::new(preview_clip_generator.start()), preview_clip_generator: Arc::new(preview_clip_generator.start()),
libraries: libraries_vec,
base_path, base_path,
thumbnail_path, thumbnail_path,
video_path, video_path,
@@ -122,8 +154,16 @@ impl Default for AppState {
let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> = let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> =
Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new()))); Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
// Load base path // Load base path and ensure the primary library row reflects it.
let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env"); let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env");
let mut seed_conn = connect();
libraries::seed_or_patch_from_env(&mut seed_conn, &base_path);
let libraries_vec = libraries::load_all(&mut seed_conn);
assert!(
!libraries_vec.is_empty(),
"libraries table is empty after seed_or_patch_from_env"
);
drop(seed_conn);
// Initialize InsightGenerator with all data sources // Initialize InsightGenerator with all data sources
let insight_generator = InsightGenerator::new( let insight_generator = InsightGenerator::new(
@@ -137,7 +177,7 @@ impl Default for AppState {
search_dao.clone(), search_dao.clone(),
tag_dao.clone(), tag_dao.clone(),
knowledge_dao, knowledge_dao,
base_path.clone(), libraries_vec.clone(),
); );
// Ensure preview clips directory exists // Ensure preview clips directory exists
@@ -148,7 +188,7 @@ impl Default for AppState {
Self::new( Self::new(
Arc::new(StreamActor {}.start()), Arc::new(StreamActor {}.start()),
base_path, libraries_vec,
env::var("THUMBNAILS").expect("THUMBNAILS was not set in the env"), env::var("THUMBNAILS").expect("THUMBNAILS was not set in the env"),
env::var("VIDEO_PATH").expect("VIDEO_PATH was not set in the env"), env::var("VIDEO_PATH").expect("VIDEO_PATH was not set in the env"),
env::var("GIFS_DIRECTORY").expect("GIFS_DIRECTORY was not set in the env"), env::var("GIFS_DIRECTORY").expect("GIFS_DIRECTORY was not set in the env"),
@@ -208,6 +248,11 @@ impl AppState {
// Initialize test InsightGenerator with all data sources // Initialize test InsightGenerator with all data sources
let base_path_str = base_path.to_string_lossy().to_string(); let base_path_str = base_path.to_string_lossy().to_string();
let test_lib = Library {
id: crate::libraries::PRIMARY_LIBRARY_ID,
name: "main".to_string(),
root_path: base_path_str.clone(),
};
let insight_generator = InsightGenerator::new( let insight_generator = InsightGenerator::new(
ollama.clone(), ollama.clone(),
sms_client.clone(), sms_client.clone(),
@@ -219,7 +264,7 @@ impl AppState {
search_dao.clone(), search_dao.clone(),
tag_dao.clone(), tag_dao.clone(),
knowledge_dao, knowledge_dao,
base_path_str.clone(), vec![test_lib],
); );
// Initialize test preview DAO // Initialize test preview DAO
@@ -227,9 +272,14 @@ impl AppState {
Arc::new(Mutex::new(Box::new(SqlitePreviewDao::new()))); Arc::new(Mutex::new(Box::new(SqlitePreviewDao::new())));
// Create the AppState with the temporary paths // Create the AppState with the temporary paths
let test_libraries = vec![Library {
id: crate::libraries::PRIMARY_LIBRARY_ID,
name: "main".to_string(),
root_path: base_path_str.clone(),
}];
AppState::new( AppState::new(
Arc::new(StreamActor {}.start()), Arc::new(StreamActor {}.start()),
base_path_str, test_libraries,
thumbnail_path.to_string_lossy().to_string(), thumbnail_path.to_string_lossy().to_string(),
video_path.to_string_lossy().to_string(), video_path.to_string_lossy().to_string(),
gif_path.to_string_lossy().to_string(), gif_path.to_string_lossy().to_string(),

View File

@@ -1,5 +1,8 @@
use crate::data::GetTagsRequest; use crate::data::GetTagsRequest;
use crate::database::ExifDao;
use crate::libraries;
use crate::otel::{extract_context_from_request, global_tracer, trace_db_call}; use crate::otel::{extract_context_from_request, global_tracer, trace_db_call};
use crate::state::AppState;
use crate::utils::normalize_path; use crate::utils::normalize_path;
use crate::{Claims, ThumbnailRequest, connect, data::AddTagRequest, error::IntoHttpError, schema}; use crate::{Claims, ThumbnailRequest, connect, data::AddTagRequest, error::IntoHttpError, schema};
use actix_web::dev::{ServiceFactory, ServiceRequest}; use actix_web::dev::{ServiceFactory, ServiceRequest};
@@ -71,15 +74,32 @@ async fn get_tags<D: TagDao>(
_: Claims, _: Claims,
http_request: HttpRequest, http_request: HttpRequest,
request: web::Query<ThumbnailRequest>, request: web::Query<ThumbnailRequest>,
app_state: web::Data<AppState>,
tag_dao: web::Data<Mutex<D>>, tag_dao: web::Data<Mutex<D>>,
exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
) -> impl Responder { ) -> impl Responder {
let context = extract_context_from_request(&http_request); let context = extract_context_from_request(&http_request);
let span = global_tracer().start_with_context("get_tags", &context); let span = global_tracer().start_with_context("get_tags", &context);
let span_context = opentelemetry::Context::current_with_span(span); let span_context = opentelemetry::Context::current_with_span(span);
let normalized_path = normalize_path(&request.path); let normalized_path = normalize_path(&request.path);
// Expand the query set to every rel_path that shares content with
// this file, so tags added under one library show up under the
// others when they hold the same file. Falls back to direct rel_path
// match when the file hasn't been hashed yet.
let library = libraries::resolve_library_param(&app_state, request.library.as_deref())
.ok()
.flatten()
.unwrap_or_else(|| app_state.primary_library());
let sibling_paths = {
let mut exif = exif_dao.lock().expect("Unable to get ExifDao");
exif.get_rel_paths_sharing_content(&span_context, library.id, &normalized_path)
.unwrap_or_else(|_| vec![normalized_path.clone()])
};
let mut tag_dao = tag_dao.lock().expect("Unable to get TagDao"); let mut tag_dao = tag_dao.lock().expect("Unable to get TagDao");
tag_dao tag_dao
.get_tags_for_path(&span_context, &normalized_path) .get_tags_for_paths(&span_context, &sibling_paths)
.map(|tags| { .map(|tags| {
span_context.span().set_status(Status::Ok); span_context.span().set_status(Status::Ok);
HttpResponse::Ok().json(tags) HttpResponse::Ok().json(tags)
@@ -254,6 +274,7 @@ pub struct InsertTag {
#[diesel(table_name = tagged_photo)] #[diesel(table_name = tagged_photo)]
pub struct InsertTaggedPhoto { pub struct InsertTaggedPhoto {
pub tag_id: i32, pub tag_id: i32,
#[diesel(column_name = rel_path)]
pub photo_name: String, pub photo_name: String,
pub created_time: i64, pub created_time: i64,
} }
@@ -263,6 +284,7 @@ pub struct TaggedPhoto {
#[allow(dead_code)] // Part of API contract #[allow(dead_code)] // Part of API contract
pub id: i32, pub id: i32,
#[allow(dead_code)] // Part of API contract #[allow(dead_code)] // Part of API contract
#[diesel(column_name = rel_path)]
pub photo_name: String, pub photo_name: String,
#[allow(dead_code)] // Part of API contract #[allow(dead_code)] // Part of API contract
pub tag_id: i32, pub tag_id: i32,
@@ -287,6 +309,14 @@ pub trait TagDao: Send + Sync {
context: &opentelemetry::Context, context: &opentelemetry::Context,
path: &str, path: &str,
) -> anyhow::Result<Vec<Tag>>; ) -> anyhow::Result<Vec<Tag>>;
/// Union of tags for every rel_path in `paths`. Used by content-hash
/// sharing: the caller resolves all rel_paths with the same content
/// via `ExifDao::get_rel_paths_sharing_content`, then passes them here.
fn get_tags_for_paths(
&mut self,
context: &opentelemetry::Context,
paths: &[String],
) -> anyhow::Result<Vec<Tag>>;
fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result<Tag>; fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result<Tag>;
fn remove_tag( fn remove_tag(
&mut self, &mut self,
@@ -312,12 +342,14 @@ pub trait TagDao: Send + Sync {
exclude_tag_ids: Vec<i32>, exclude_tag_ids: Vec<i32>,
context: &opentelemetry::Context, context: &opentelemetry::Context,
) -> anyhow::Result<Vec<FileWithTagCount>>; ) -> anyhow::Result<Vec<FileWithTagCount>>;
#[allow(dead_code)]
fn update_photo_name( fn update_photo_name(
&mut self, &mut self,
old_name: &str, old_name: &str,
new_name: &str, new_name: &str,
context: &opentelemetry::Context, context: &opentelemetry::Context,
) -> anyhow::Result<()>; ) -> anyhow::Result<()>;
#[allow(dead_code)]
fn get_all_photo_names( fn get_all_photo_names(
&mut self, &mut self,
context: &opentelemetry::Context, context: &opentelemetry::Context,
@@ -334,6 +366,7 @@ pub struct SqliteTagDao {
} }
impl SqliteTagDao { impl SqliteTagDao {
#[allow(dead_code)]
pub(crate) fn new(connection: Arc<Mutex<SqliteConnection>>) -> Self { pub(crate) fn new(connection: Arc<Mutex<SqliteConnection>>) -> Self {
SqliteTagDao { connection } SqliteTagDao { connection }
} }
@@ -368,7 +401,7 @@ impl TagDao for SqliteTagDao {
.inner_join(tagged_photo::table) .inner_join(tagged_photo::table)
.group_by(tags::id) .group_by(tags::id)
.select((count_star(), id, name, created_time)) .select((count_star(), id, name, created_time))
.filter(tagged_photo::photo_name.like(path)) .filter(tagged_photo::rel_path.like(path))
.get_results(conn.deref_mut()) .get_results(conn.deref_mut())
.map::<Vec<(i64, Tag)>, _>(|tags_with_count: Vec<(i64, i32, String, i64)>| { .map::<Vec<(i64, Tag)>, _>(|tags_with_count: Vec<(i64, i32, String, i64)>| {
tags_with_count tags_with_count
@@ -404,13 +437,39 @@ impl TagDao for SqliteTagDao {
debug!("Getting Tags for path: {:?}", path); debug!("Getting Tags for path: {:?}", path);
tags::table tags::table
.left_join(tagged_photo::table) .left_join(tagged_photo::table)
.filter(tagged_photo::photo_name.eq(&path)) .filter(tagged_photo::rel_path.eq(&path))
.select((tags::id, tags::name, tags::created_time)) .select((tags::id, tags::name, tags::created_time))
.get_results::<Tag>(conn.deref_mut()) .get_results::<Tag>(conn.deref_mut())
.with_context(|| "Unable to get tags from Sqlite") .with_context(|| "Unable to get tags from Sqlite")
}) })
} }
fn get_tags_for_paths(
&mut self,
context: &opentelemetry::Context,
paths: &[String],
) -> anyhow::Result<Vec<Tag>> {
if paths.is_empty() {
return Ok(Vec::new());
}
let mut conn = self
.connection
.lock()
.expect("Unable to lock SqliteTagDao connection");
trace_db_call(context, "query", "get_tags_for_paths", |span| {
span.set_attribute(KeyValue::new("path_count", paths.len() as i64));
// DISTINCT across tag ids so two rel_paths carrying the same
// tag don't produce a duplicate entry in the response.
tags::table
.inner_join(tagged_photo::table)
.filter(tagged_photo::rel_path.eq_any(paths))
.select((tags::id, tags::name, tags::created_time))
.distinct()
.get_results::<Tag>(conn.deref_mut())
.with_context(|| "Unable to get tags from Sqlite")
})
}
fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result<Tag> { fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result<Tag> {
let mut conn = self let mut conn = self
.connection .connection
@@ -474,7 +533,7 @@ impl TagDao for SqliteTagDao {
diesel::delete( diesel::delete(
tagged_photo::table tagged_photo::table
.filter(tagged_photo::tag_id.eq(tag.id)) .filter(tagged_photo::tag_id.eq(tag.id))
.filter(tagged_photo::photo_name.eq(path)), .filter(tagged_photo::rel_path.eq(path)),
) )
.execute(conn.deref_mut()) .execute(conn.deref_mut())
.with_context(|| format!("Unable to delete tag: '{}'", &tag.name)) .with_context(|| format!("Unable to delete tag: '{}'", &tag.name))
@@ -558,23 +617,23 @@ impl TagDao for SqliteTagDao {
let query = sql_query(format!( let query = sql_query(format!(
r#" r#"
WITH filtered_photos AS ( WITH filtered_photos AS (
SELECT photo_name SELECT rel_path
FROM tagged_photo tp FROM tagged_photo tp
WHERE tp.tag_id IN ({}) WHERE tp.tag_id IN ({})
AND tp.photo_name NOT IN ( AND tp.rel_path NOT IN (
SELECT photo_name SELECT rel_path
FROM tagged_photo FROM tagged_photo
WHERE tag_id IN ({}) WHERE tag_id IN ({})
) )
GROUP BY photo_name GROUP BY rel_path
HAVING COUNT(DISTINCT tag_id) >= {} HAVING COUNT(DISTINCT tag_id) >= {}
) )
SELECT SELECT
fp.photo_name as file_name, fp.rel_path as file_name,
COUNT(DISTINCT tp2.tag_id) as tag_count COUNT(DISTINCT tp2.tag_id) as tag_count
FROM filtered_photos fp FROM filtered_photos fp
JOIN tagged_photo tp2 ON fp.photo_name = tp2.photo_name JOIN tagged_photo tp2 ON fp.rel_path = tp2.rel_path
GROUP BY fp.photo_name"#, GROUP BY fp.rel_path"#,
tag_placeholders, tag_placeholders,
exclude_placeholders, exclude_placeholders,
tag_ids.len() tag_ids.len()
@@ -618,21 +677,21 @@ impl TagDao for SqliteTagDao {
let query = sql_query(format!( let query = sql_query(format!(
r#" r#"
WITH filtered_photos AS ( WITH filtered_photos AS (
SELECT DISTINCT photo_name SELECT DISTINCT rel_path
FROM tagged_photo tp FROM tagged_photo tp
WHERE tp.tag_id IN ({}) WHERE tp.tag_id IN ({})
AND tp.photo_name NOT IN ( AND tp.rel_path NOT IN (
SELECT photo_name SELECT rel_path
FROM tagged_photo FROM tagged_photo
WHERE tag_id IN ({}) WHERE tag_id IN ({})
) )
) )
SELECT SELECT
fp.photo_name as file_name, fp.rel_path as file_name,
COUNT(DISTINCT tp2.tag_id) as tag_count COUNT(DISTINCT tp2.tag_id) as tag_count
FROM filtered_photos fp FROM filtered_photos fp
JOIN tagged_photo tp2 ON fp.photo_name = tp2.photo_name JOIN tagged_photo tp2 ON fp.rel_path = tp2.rel_path
GROUP BY fp.photo_name"#, GROUP BY fp.rel_path"#,
tag_placeholders, exclude_placeholders tag_placeholders, exclude_placeholders
)) ))
.into_boxed(); .into_boxed();
@@ -663,8 +722,8 @@ impl TagDao for SqliteTagDao {
.connection .connection
.lock() .lock()
.expect("Unable to lock SqliteTagDao connection"); .expect("Unable to lock SqliteTagDao connection");
diesel::update(tagged_photo.filter(photo_name.eq(old_name))) diesel::update(tagged_photo.filter(rel_path.eq(old_name)))
.set(photo_name.eq(new_name)) .set(rel_path.eq(new_name))
.execute(conn.deref_mut())?; .execute(conn.deref_mut())?;
Ok(()) Ok(())
} }
@@ -680,7 +739,7 @@ impl TagDao for SqliteTagDao {
.lock() .lock()
.expect("Unable to lock SqliteTagDao connection"); .expect("Unable to lock SqliteTagDao connection");
tagged_photo tagged_photo
.select(photo_name) .select(rel_path)
.distinct() .distinct()
.load(conn.deref_mut()) .load(conn.deref_mut())
.with_context(|| "Unable to get photo names") .with_context(|| "Unable to get photo names")
@@ -714,10 +773,10 @@ impl TagDao for SqliteTagDao {
let query_str = format!( let query_str = format!(
r#" r#"
SELECT photo_name, COUNT(DISTINCT tag_id) as tag_count SELECT rel_path AS photo_name, COUNT(DISTINCT tag_id) as tag_count
FROM tagged_photo FROM tagged_photo
WHERE photo_name IN ({}) WHERE rel_path IN ({})
GROUP BY photo_name GROUP BY rel_path
"#, "#,
placeholders placeholders
); );
@@ -815,6 +874,25 @@ mod tests {
.clone()) .clone())
} }
fn get_tags_for_paths(
&mut self,
_context: &opentelemetry::Context,
paths: &[String],
) -> anyhow::Result<Vec<Tag>> {
let tagged = self.tagged_photos.borrow();
let mut out: Vec<Tag> = Vec::new();
for p in paths {
if let Some(tags) = tagged.get(p) {
for t in tags {
if !out.iter().any(|existing| existing.id == t.id) {
out.push(t.clone());
}
}
}
}
Ok(out)
}
fn create_tag( fn create_tag(
&mut self, &mut self,
_context: &opentelemetry::Context, _context: &opentelemetry::Context,

View File

@@ -14,6 +14,12 @@ pub struct TestUserDao {
pub user_map: RefCell<Vec<User>>, pub user_map: RefCell<Vec<User>>,
} }
impl Default for TestUserDao {
fn default() -> Self {
Self::new()
}
}
impl TestUserDao { impl TestUserDao {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
@@ -71,6 +77,12 @@ pub struct TestPreviewDao {
next_id: StdMutex<i32>, next_id: StdMutex<i32>,
} }
impl Default for TestPreviewDao {
fn default() -> Self {
Self::new()
}
}
impl TestPreviewDao { impl TestPreviewDao {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
@@ -98,6 +110,7 @@ impl PreviewDao for TestPreviewDao {
file_path_val.to_string(), file_path_val.to_string(),
VideoPreviewClip { VideoPreviewClip {
id: *id, id: *id,
library_id: crate::libraries::PRIMARY_LIBRARY_ID,
file_path: file_path_val.to_string(), file_path: file_path_val.to_string(),
status: status_val.to_string(), status: status_val.to_string(),
duration_seconds: None, duration_seconds: None,

View File

@@ -1,5 +1,6 @@
use crate::database::PreviewDao; use crate::database::PreviewDao;
use crate::is_video; use crate::is_video;
use crate::libraries::Library;
use crate::otel::global_tracer; use crate::otel::global_tracer;
use crate::video::ffmpeg::generate_preview_clip; use crate::video::ffmpeg::generate_preview_clip;
use actix::prelude::*; use actix::prelude::*;
@@ -500,23 +501,38 @@ pub struct GeneratePreviewClipMessage {
pub struct PreviewClipGenerator { pub struct PreviewClipGenerator {
semaphore: Arc<Semaphore>, semaphore: Arc<Semaphore>,
preview_clips_dir: String, preview_clips_dir: String,
base_path: String, libraries: Vec<Library>,
preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>, preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>,
} }
impl PreviewClipGenerator { impl PreviewClipGenerator {
pub fn new( pub fn new(
preview_clips_dir: String, preview_clips_dir: String,
base_path: String, libraries: Vec<Library>,
preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>, preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>,
) -> Self { ) -> Self {
PreviewClipGenerator { PreviewClipGenerator {
semaphore: Arc::new(Semaphore::new(2)), semaphore: Arc::new(Semaphore::new(2)),
preview_clips_dir, preview_clips_dir,
base_path, libraries,
preview_dao, preview_dao,
} }
} }
/// Strip whichever library root actually contains `video_path`.
/// Falls back to the first library if none match, so we never
/// accidentally emit the absolute input path as the output path
/// (which ffmpeg rejects as "cannot edit existing files in place").
fn relativize(&self, video_path: &str) -> String {
for lib in &self.libraries {
if let Some(stripped) = video_path.strip_prefix(&lib.root_path) {
return stripped.trim_start_matches(['/', '\\']).replace('\\', "/");
}
}
video_path
.trim_start_matches(['/', '\\'])
.replace('\\', "/")
}
} }
impl Actor for PreviewClipGenerator { impl Actor for PreviewClipGenerator {
@@ -533,9 +549,10 @@ impl Handler<GeneratePreviewClipMessage> for PreviewClipGenerator {
) -> Self::Result { ) -> Self::Result {
let semaphore = self.semaphore.clone(); let semaphore = self.semaphore.clone();
let preview_clips_dir = self.preview_clips_dir.clone(); let preview_clips_dir = self.preview_clips_dir.clone();
let base_path = self.base_path.clone();
let preview_dao = self.preview_dao.clone(); let preview_dao = self.preview_dao.clone();
let video_path = msg.video_path; let video_path = msg.video_path;
// Resolve against whichever library actually owns this video.
let relative_path = self.relativize(&video_path);
Box::pin(async move { Box::pin(async move {
let permit = semaphore let permit = semaphore
@@ -543,13 +560,6 @@ impl Handler<GeneratePreviewClipMessage> for PreviewClipGenerator {
.await .await
.expect("Unable to acquire preview semaphore"); .expect("Unable to acquire preview semaphore");
// Compute relative path (from BASE_PATH) for DB operations, consistent with EXIF convention
let relative_path = video_path
.strip_prefix(&base_path)
.unwrap_or(&video_path)
.trim_start_matches(['/', '\\'])
.to_string();
// Update status to processing // Update status to processing
{ {
let otel_ctx = opentelemetry::Context::current(); let otel_ctx = opentelemetry::Context::current();

View File

@@ -40,7 +40,10 @@ pub struct Ffmpeg;
pub enum GifType { pub enum GifType {
Overview, Overview,
OverviewVideo { duration: u32 }, #[allow(dead_code)]
OverviewVideo {
duration: u32,
},
} }
impl Ffmpeg { impl Ffmpeg {