feature/exif-endpoint #44

Merged
cameron merged 29 commits from feature/exif-endpoint into master 2025-12-27 03:25:19 +00:00
2 changed files with 235 additions and 60 deletions
Showing only changes of commit c7fd328925 - Show all commits

View File

@@ -4,7 +4,9 @@ use diesel::sqlite::SqliteConnection;
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};
use crate::database::models::{Favorite, ImageExif, InsertFavorite, InsertImageExif, InsertUser, User};
use crate::database::models::{
Favorite, ImageExif, InsertFavorite, InsertImageExif, InsertUser, User,
};
pub mod models;
pub mod schema;
@@ -91,7 +93,8 @@ impl UserDao for SqliteUserDao {
!users
.filter(username.eq(user))
.load::<User>(&mut self.connection)
.unwrap_or_default().is_empty()
.unwrap_or_default()
.is_empty()
}
}
@@ -187,6 +190,7 @@ pub trait ExifDao: Sync + Send {
fn get_exif(&mut self, file_path: &str) -> Result<Option<ImageExif>, DbError>;
fn update_exif(&mut self, exif_data: InsertImageExif) -> Result<ImageExif, DbError>;
fn delete_exif(&mut self, file_path: &str) -> Result<(), DbError>;
fn get_all_with_date_taken(&mut self) -> Result<Vec<(String, i64)>, DbError>;
}
pub struct SqliteExifDao {
@@ -273,4 +277,22 @@ impl ExifDao for SqliteExifDao {
.map(|_| ())
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_all_with_date_taken(&mut self) -> Result<Vec<(String, i64)>, DbError> {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif
.select((file_path, date_taken))
.filter(date_taken.is_not_null())
.load::<(String, Option<i64>)>(connection.deref_mut())
.map(|records| {
records
.into_iter()
.filter_map(|(path, dt)| dt.map(|ts| (path, ts)))
.collect()
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
}

View File

@@ -7,11 +7,14 @@ use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, Tracer};
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Mutex;
use walkdir::WalkDir;
use crate::data::Claims;
use crate::database::ExifDao;
use crate::files::is_image_or_video;
use crate::otel::{extract_context_from_request, global_tracer};
use crate::state::AppState;
@@ -76,13 +79,14 @@ impl PathExcluder {
if !self.excluded_patterns.is_empty() {
for component in path.components() {
if let Some(comp_str) = component.as_os_str().to_str()
&& self.excluded_patterns.iter().any(|pat| pat == comp_str) {
debug!(
"PathExcluder: excluded by component pattern: {:?} (component: {:?}, patterns: {:?})",
path, comp_str, self.excluded_patterns
);
return true;
}
&& self.excluded_patterns.iter().any(|pat| pat == comp_str)
{
debug!(
"PathExcluder: excluded by component pattern: {:?} (component: {:?}, patterns: {:?})",
path, comp_str, self.excluded_patterns
);
return true;
}
}
}
@@ -186,6 +190,48 @@ fn get_file_date_info(
Some((date_in_timezone, metadata_created, metadata_modified))
}
/// Convert Unix timestamp to NaiveDate in client timezone
fn timestamp_to_naive_date(
timestamp: i64,
client_timezone: &Option<FixedOffset>,
) -> Option<NaiveDate> {
let dt_utc = DateTime::<Utc>::from_timestamp(timestamp, 0)?;
let date = if let Some(tz) = client_timezone {
dt_utc.with_timezone(tz).date_naive()
} else {
dt_utc.with_timezone(&Local).date_naive()
};
Some(date)
}
/// Extract created/modified timestamps from file metadata
fn extract_metadata_timestamps(
metadata: &std::fs::Metadata,
client_timezone: &Option<FixedOffset>,
) -> (Option<i64>, Option<i64>) {
let created = metadata.created().ok().map(|t| {
let utc: DateTime<Utc> = t.into();
if let Some(tz) = client_timezone {
utc.with_timezone(tz).timestamp()
} else {
utc.timestamp()
}
});
let modified = metadata.modified().ok().map(|t| {
let utc: DateTime<Utc> = t.into();
if let Some(tz) = client_timezone {
utc.with_timezone(tz).timestamp()
} else {
utc.timestamp()
}
});
(created, modified)
}
fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset>> {
let build_date_from_ymd_capture =
|captures: &regex::Captures| -> Option<DateTime<FixedOffset>> {
@@ -267,9 +313,9 @@ fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset>> {
.ok()
.and_then(DateTime::from_timestamp_millis)
.map(|naive_dt| naive_dt.fixed_offset())
{
return Some(date_time);
}
{
return Some(date_time);
}
// Second timestamp (10 digits)
if timestamp_str.len() >= 10
@@ -278,20 +324,145 @@ fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset>> {
.ok()
.and_then(|timestamp_secs| DateTime::from_timestamp(timestamp_secs, 0))
.map(|naive_dt| naive_dt.fixed_offset())
{
return Some(date_time);
}
{
return Some(date_time);
}
}
None
}
/// Collect memories from EXIF database
fn collect_exif_memories(
exif_dao: &Data<Mutex<Box<dyn ExifDao>>>,
base_path: &str,
now: NaiveDate,
span_mode: MemoriesSpan,
years_back: u32,
client_timezone: &Option<FixedOffset>,
path_excluder: &PathExcluder,
) -> Vec<(MemoryItem, NaiveDate)> {
// Query database for all files with date_taken
let exif_records = match exif_dao.lock() {
Ok(mut dao) => match dao.get_all_with_date_taken() {
Ok(records) => records,
Err(e) => {
warn!("Failed to query EXIF database: {:?}", e);
return Vec::new(); // Graceful fallback
}
},
Err(e) => {
warn!("Failed to lock EXIF DAO: {:?}", e);
return Vec::new();
}
};
// Parallel processing with Rayon
exif_records
.par_iter()
.filter_map(|(file_path, date_taken_ts)| {
// Build full path
let full_path = Path::new(base_path).join(file_path);
// Check exclusions
if path_excluder.is_excluded(&full_path) {
return None;
}
// Verify file exists
if !full_path.exists() || !full_path.is_file() {
warn!("EXIF record exists but file not found: {:?}", full_path);
return None;
}
// Convert timestamp to NaiveDate in client timezone
let file_date = timestamp_to_naive_date(*date_taken_ts, client_timezone)?;
// Check if matches memory criteria
if !is_memories_match(file_date, now, span_mode, years_back) {
return None;
}
// Get file metadata for created/modified timestamps
let metadata = std::fs::metadata(&full_path).ok()?;
let (created, modified) = extract_metadata_timestamps(&metadata, client_timezone);
Some((
MemoryItem {
path: file_path.clone(),
created,
modified,
},
file_date,
))
})
.collect()
}
/// Collect memories from file system scan (for files not in EXIF DB)
fn collect_filesystem_memories(
base_path: &str,
path_excluder: &PathExcluder,
skip_paths: &HashSet<PathBuf>,
now: NaiveDate,
span_mode: MemoriesSpan,
years_back: u32,
client_timezone: &Option<FixedOffset>,
) -> Vec<(MemoryItem, NaiveDate)> {
let base = Path::new(base_path);
let entries: Vec<_> = WalkDir::new(base)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| {
let path = e.path();
// Skip if already processed by EXIF query
if skip_paths.contains(path) {
return false;
}
// Check exclusions
if path_excluder.is_excluded(path) {
return false;
}
// Only process image/video files
e.file_type().is_file() && is_image_or_video(path)
})
.collect();
entries
.par_iter()
.filter_map(|entry| {
// Use existing get_file_date_info() for filename/metadata fallback
let (file_date, created, modified) = get_file_date_info(entry.path(), client_timezone)?;
if is_memories_match(file_date, now, span_mode, years_back) {
let path_relative = entry.path().strip_prefix(base).ok()?.to_str()?.to_string();
Some((
MemoryItem {
path: path_relative,
created,
modified,
},
file_date,
))
} else {
None
}
})
.collect()
}
#[get("/memories")]
pub async fn list_memories(
_claims: Claims,
request: HttpRequest,
q: web::Query<MemoriesRequest>,
app_state: Data<AppState>,
exif_dao: Data<Mutex<Box<dyn ExifDao>>>,
) -> impl Responder {
let tracer = global_tracer();
let context = extract_context_from_request(&request);
@@ -326,55 +497,37 @@ pub async fn list_memories(
// Build the path excluder from base and env-configured exclusions
let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs);
let entries: Vec<_> = WalkDir::new(base)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| {
let path = e.path();
// Phase 1: Query EXIF database
let exif_memories = collect_exif_memories(
&exif_dao,
&app_state.base_path,
now,
span_mode,
years_back,
&client_timezone,
&path_excluder,
);
// Skip paths that should be excluded
if path_excluder.is_excluded(path) {
return false;
}
true
})
.filter(|e| e.file_type().is_file() && is_image_or_video(e.path()))
// Build HashSet for deduplication
let exif_paths: HashSet<PathBuf> = exif_memories
.iter()
.map(|(item, _)| PathBuf::from(&app_state.base_path).join(&item.path))
.collect();
let mut memories_with_dates: Vec<(MemoryItem, NaiveDate)> = entries
.par_iter()
.filter_map(|entry| {
let path = entry.path();
// Phase 2: File system scan (skip EXIF files)
let fs_memories = collect_filesystem_memories(
&app_state.base_path,
&path_excluder,
&exif_paths,
now,
span_mode,
years_back,
&client_timezone,
);
// Get file date and timestamps in one operation
let (file_date, created, modified) = match get_file_date_info(path, &client_timezone) {
Some(info) => info,
None => {
warn!("No date info found for file: {:?}", path);
return None;
}
};
if is_memories_match(file_date, now, span_mode, years_back) {
return if let Ok(rel) = path.strip_prefix(base) {
Some((
MemoryItem {
path: rel.to_string_lossy().to_string(),
created,
modified,
},
file_date,
))
} else {
warn!("Failed to strip prefix from path: {:?}", path);
None
};
}
None
})
.collect();
// Phase 3: Merge and sort
let mut memories_with_dates = exif_memories;
memories_with_dates.extend(fs_memories);
match span_mode {
// Sort by absolute time for a more 'overview'