Files
ImageApi/src/memories.rs
2025-12-01 13:51:17 -05:00

764 lines
25 KiB
Rust

use actix_web::web::Data;
use actix_web::{HttpRequest, HttpResponse, Responder, get, web};
use chrono::LocalResult::{Ambiguous, Single};
use chrono::{DateTime, Datelike, FixedOffset, Local, LocalResult, NaiveDate, TimeZone, Utc};
use log::{debug, trace, warn};
use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, Tracer};
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use std::path::Path;
use std::path::PathBuf;
use walkdir::WalkDir;
use crate::data::Claims;
use crate::files::is_image_or_video;
use crate::otel::{extract_context_from_request, global_tracer};
use crate::state::AppState;
// Helper that encapsulates path-exclusion semantics
#[derive(Debug)]
struct PathExcluder {
excluded_dirs: Vec<PathBuf>,
excluded_patterns: Vec<String>,
}
impl PathExcluder {
/// Build from a `base` path and the raw exclusion entries.
///
/// Rules:
/// - Entries starting with '/' are interpreted as "absolute under base"
/// (e.g. "/photos/private" -> base/photos/private).
/// - Entries without '/' are treated as substring patterns that match
/// anywhere in the full path string (still scoped under base).
fn new(base: &Path, raw_excluded: &[String]) -> Self {
let mut excluded_dirs = Vec::new();
let mut excluded_patterns = Vec::new();
for dir in raw_excluded {
if let Some(rel) = dir.strip_prefix('/') {
// Absolute under base
if !rel.is_empty() {
excluded_dirs.push(base.join(rel));
}
} else {
// Pattern anywhere under base
excluded_patterns.push(dir.clone());
}
}
debug!(
"PathExcluder created. dirs={:?}, patterns={:?}",
excluded_dirs, excluded_patterns
);
Self {
excluded_dirs,
excluded_patterns,
}
}
/// Returns true if `path` should be excluded.
fn is_excluded(&self, path: &Path) -> bool {
// Directory-based exclusions
for excluded in &self.excluded_dirs {
if path.starts_with(excluded) {
debug!(
"PathExcluder: excluded by dir: {:?} (rule: {:?})",
path, excluded
);
return true;
}
}
// Pattern-based exclusions: match whole path components (dir or file name),
// not substrings.
if !self.excluded_patterns.is_empty() {
for component in path.components() {
if let Some(comp_str) = component.as_os_str().to_str()
&& self.excluded_patterns.iter().any(|pat| pat == comp_str) {
debug!(
"PathExcluder: excluded by component pattern: {:?} (component: {:?}, patterns: {:?})",
path, comp_str, self.excluded_patterns
);
return true;
}
}
}
false
}
}
#[derive(Copy, Clone, Deserialize, PartialEq, Debug)]
#[serde(rename_all = "lowercase")]
pub enum MemoriesSpan {
Day,
Week,
Month,
}
#[derive(Deserialize)]
pub struct MemoriesRequest {
pub span: Option<MemoriesSpan>,
/// Client timezone offset in minutes from UTC (e.g., -480 for PST, 60 for CET)
pub timezone_offset_minutes: Option<i32>,
}
#[derive(Debug, Serialize, Clone)]
pub struct MemoryItem {
pub path: String,
pub created: Option<i64>,
pub modified: Option<i64>,
}
#[derive(Debug, Serialize)]
pub struct MemoriesResponse {
pub items: Vec<MemoryItem>,
}
fn get_file_date_info(
path: &Path,
client_timezone: &Option<FixedOffset>,
) -> Option<(NaiveDate, Option<i64>, Option<i64>)> {
// Read file metadata once
let meta = std::fs::metadata(path).ok()?;
// Extract metadata timestamps
let metadata_created = meta.created().ok().map(|t| {
let utc: DateTime<Utc> = t.into();
if let Some(tz) = client_timezone {
utc.with_timezone(tz).timestamp()
} else {
utc.timestamp()
}
});
let metadata_modified = meta.modified().ok().map(|t| {
let utc: DateTime<Utc> = t.into();
if let Some(tz) = client_timezone {
utc.with_timezone(tz).timestamp()
} else {
utc.timestamp()
}
});
// Try to get date from filename
if let Some(date_time) = path
.file_name()
.and_then(|filename| filename.to_str())
.and_then(extract_date_from_filename)
{
// Convert to client timezone if specified
let date_in_timezone = if let Some(tz) = client_timezone {
date_time.with_timezone(tz)
} else {
date_time.with_timezone(&Local).fixed_offset()
};
// Use the timestamp from the filename date
let created_ts = date_in_timezone.timestamp();
debug!(
"File date from file {:?} > {:?} = {:?}",
path.file_name(),
date_time,
date_in_timezone
);
return Some((
date_in_timezone.date_naive(),
Some(created_ts),
metadata_modified,
));
}
// Fall back to metadata if no date in filename
let system_time = meta.created().ok().or_else(|| meta.modified().ok())?;
let dt_utc: DateTime<Utc> = system_time.into();
let date_in_timezone = if let Some(tz) = client_timezone {
dt_utc.with_timezone(tz).date_naive()
} else {
dt_utc.with_timezone(&Local).date_naive()
};
trace!("Fallback metadata create date = {:?}", date_in_timezone);
Some((date_in_timezone, metadata_created, metadata_modified))
}
fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset>> {
let build_date_from_ymd_capture =
|captures: &regex::Captures| -> Option<DateTime<FixedOffset>> {
let year = captures.get(1)?.as_str().parse::<i32>().ok()?;
let month = captures.get(2)?.as_str().parse::<u32>().ok()?;
let day = captures.get(3)?.as_str().parse::<u32>().ok()?;
let hour = captures.get(4)?.as_str().parse::<u32>().ok()?;
let min = captures.get(5)?.as_str().parse::<u32>().ok()?;
let sec = captures.get(6)?.as_str().parse::<u32>().ok()?;
match Local.from_local_datetime(
&NaiveDate::from_ymd_opt(year, month, day)?.and_hms_opt(hour, min, sec)?,
) {
Single(dt) => Some(dt.fixed_offset()),
Ambiguous(early_dt, _) => Some(early_dt.fixed_offset()),
LocalResult::None => {
warn!("Weird local date: {:?}", filename);
None
}
}
};
// 1. Screenshot format: Screenshot_2014-06-01-20-44-50.png
if let Some(captures) = regex::Regex::new(r"(\d{4})-(\d{2})-(\d{2})-(\d{2})-(\d{2})-(\d{2})")
.ok()?
.captures(filename)
.and_then(|c| build_date_from_ymd_capture(&c))
{
return Some(captures);
}
// Screenshot format: Screenshot_20140601[_-]204450.png
if let Some(captures) = regex::Regex::new(r"(\d{4})(\d{2})(\d{2})[_-](\d{2})(\d{2})(\d{2})")
.ok()?
.captures(filename)
.and_then(|c| build_date_from_ymd_capture(&c))
{
return Some(captures);
}
// 2. Dash format: 2015-01-09_02-15-15.jpg
if let Some(captures) = regex::Regex::new(r"(\d{4})-(\d{2})-(\d{2})_(\d{2})-(\d{2})-(\d{2})")
.ok()?
.captures(filename)
.and_then(|c| build_date_from_ymd_capture(&c))
{
return Some(captures);
}
// Dash with compact time format: 2015-01-09-021515.jpg
if let Some(captures) = regex::Regex::new(r"(\d{4})-(\d{2})-(\d{2})-(\d{2})(\d{2})(\d{2})")
.ok()?
.captures(filename)
.and_then(|c| build_date_from_ymd_capture(&c))
{
return Some(captures);
}
// 3. Compact format: 20140927101712.jpg
if let Some(captures) = regex::Regex::new(r"(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})")
.ok()?
.captures(filename)
.and_then(|c| build_date_from_ymd_capture(&c))
{
return Some(captures);
}
// 4. Timestamp format: 1401638400.jpeg
if let Some(captures) = regex::Regex::new(r"(\d{10}|\d{13})\.")
.ok()?
.captures(filename)
{
let timestamp_str = captures.get(1)?.as_str();
// Millisecond timestamp (13 digits)
if timestamp_str.len() >= 13
&& let Some(date_time) = timestamp_str[0..13]
.parse::<i64>()
.ok()
.and_then(DateTime::from_timestamp_millis)
.map(|naive_dt| naive_dt.fixed_offset())
{
return Some(date_time);
}
// Second timestamp (10 digits)
if timestamp_str.len() >= 10
&& let Some(date_time) = timestamp_str[0..10]
.parse::<i64>()
.ok()
.and_then(|timestamp_secs| DateTime::from_timestamp(timestamp_secs, 0))
.map(|naive_dt| naive_dt.fixed_offset())
{
return Some(date_time);
}
}
None
}
#[get("/memories")]
pub async fn list_memories(
_claims: Claims,
request: HttpRequest,
q: web::Query<MemoriesRequest>,
app_state: Data<AppState>,
) -> impl Responder {
let tracer = global_tracer();
let context = extract_context_from_request(&request);
let mut span = tracer.start_with_context("list_memories", &context);
let span_mode = q.span.unwrap_or(MemoriesSpan::Day);
let years_back: u32 = 15;
// Create timezone from client offset, default to local timezone if not provided
let client_timezone = match q.timezone_offset_minutes {
Some(offset_mins) => {
let offset_secs = offset_mins * 60;
Some(
FixedOffset::east_opt(offset_secs)
.unwrap_or_else(|| FixedOffset::east_opt(0).unwrap()),
)
}
None => None,
};
let now = if let Some(tz) = client_timezone {
debug!("Client timezone: {:?}", tz);
Utc::now().with_timezone(&tz).date_naive()
} else {
Local::now().date_naive()
};
debug!("Now: {:?}", now);
let base = Path::new(&app_state.base_path);
// Build the path excluder from base and env-configured exclusions
let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs);
let entries: Vec<_> = WalkDir::new(base)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| {
let path = e.path();
// Skip paths that should be excluded
if path_excluder.is_excluded(path) {
return false;
}
true
})
.filter(|e| e.file_type().is_file() && is_image_or_video(e.path()))
.collect();
let mut memories_with_dates: Vec<(MemoryItem, NaiveDate)> = entries
.par_iter()
.filter_map(|entry| {
let path = entry.path();
// Get file date and timestamps in one operation
let (file_date, created, modified) = match get_file_date_info(path, &client_timezone) {
Some(info) => info,
None => {
warn!("No date info found for file: {:?}", path);
return None;
}
};
if is_memories_match(file_date, now, span_mode, years_back) {
return if let Ok(rel) = path.strip_prefix(base) {
Some((
MemoryItem {
path: rel.to_string_lossy().to_string(),
created,
modified,
},
file_date,
))
} else {
warn!("Failed to strip prefix from path: {:?}", path);
None
};
}
None
})
.collect();
match span_mode {
// Sort by absolute time for a more 'overview'
MemoriesSpan::Month => memories_with_dates.sort_by(|a, b| a.1.cmp(&b.1)),
_ => {
memories_with_dates.sort_by(|a, b| {
let day_comparison = a.1.day().cmp(&b.1.day());
if day_comparison == std::cmp::Ordering::Equal {
match (a.0.created, b.0.created) {
(Some(a_time), Some(b_time)) => a_time.cmp(&b_time),
(Some(_), None) => std::cmp::Ordering::Less,
(None, Some(_)) => std::cmp::Ordering::Greater,
(None, None) => std::cmp::Ordering::Equal,
}
} else {
day_comparison
}
});
}
}
// Sort by day of the month and time (using the created timestamp)
let items: Vec<MemoryItem> = memories_with_dates.into_iter().map(|(m, _)| m).collect();
span.add_event(
"memories_scanned",
vec![
KeyValue::new("span", format!("{:?}", span_mode)),
KeyValue::new("years_back", years_back.to_string()),
KeyValue::new("result_count", items.len().to_string()),
KeyValue::new(
"client_timezone",
format!(
"{:?}",
client_timezone.unwrap_or_else(|| FixedOffset::east_opt(0).unwrap())
),
),
KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)),
],
);
span.set_status(Status::Ok);
HttpResponse::Ok().json(MemoriesResponse { items })
}
fn is_memories_match(
file_date: NaiveDate,
today: NaiveDate,
span: MemoriesSpan,
years_back: u32,
) -> bool {
if file_date > today {
return false;
}
let years_diff = (today.year() - file_date.year()).unsigned_abs();
if years_diff > years_back {
warn!(
"File date is too far in the past: {:?} vs {:?}",
file_date, today
);
return false;
}
match span {
MemoriesSpan::Day => same_month_day_any_year(file_date, today),
MemoriesSpan::Week => same_week_any_year(file_date, today),
MemoriesSpan::Month => same_month_any_year(file_date, today),
}
}
fn same_month_day_any_year(a: NaiveDate, b: NaiveDate) -> bool {
a.month() == b.month() && a.day() == b.day()
}
// Match same ISO week number and same weekday (ignoring year)
fn same_week_any_year(a: NaiveDate, b: NaiveDate) -> bool {
a.iso_week().week().eq(&b.iso_week().week())
}
// Match same month (ignoring day and year)
fn same_month_any_year(a: NaiveDate, b: NaiveDate) -> bool {
a.month() == b.month()
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Timelike;
use std::fs::{self, File};
use tempfile::tempdir;
#[test]
fn test_extract_date_from_filename_screenshot_format() {
let filename = "Screenshot_2014-06-01-20-44-50.png";
let date_time = extract_date_from_filename(filename).unwrap();
assert_eq!(date_time.year(), 2014);
assert_eq!(date_time.month(), 6);
assert_eq!(date_time.day(), 1);
assert_eq!(date_time.hour(), 20);
assert_eq!(date_time.minute(), 44);
assert_eq!(date_time.second(), 50);
}
#[test]
fn test_extract_date_from_filename_screenshot_less_dashes_format() {
let filename = "Screenshot_20140601-204450.png";
let date_time = extract_date_from_filename(filename).unwrap();
assert_eq!(date_time.year(), 2014);
assert_eq!(date_time.month(), 6);
assert_eq!(date_time.day(), 1);
assert_eq!(date_time.hour(), 20);
assert_eq!(date_time.minute(), 44);
assert_eq!(date_time.second(), 50);
}
#[test]
fn test_extract_date_from_filename_screenshot_underscores_format() {
let filename = "20140601_204450.png";
let date_time = extract_date_from_filename(filename).unwrap();
assert_eq!(date_time.year(), 2014);
assert_eq!(date_time.month(), 6);
assert_eq!(date_time.day(), 1);
assert_eq!(date_time.hour(), 20);
assert_eq!(date_time.minute(), 44);
assert_eq!(date_time.second(), 50);
}
#[test]
fn test_extract_date_from_filename_dash_format() {
let filename = "2015-01-09_02-15-15.jpg";
let date_time = extract_date_from_filename(filename).unwrap();
assert_eq!(date_time.year(), 2015);
assert_eq!(date_time.month(), 1);
assert_eq!(date_time.day(), 9);
assert_eq!(date_time.hour(), 2);
assert_eq!(date_time.minute(), 15);
assert_eq!(date_time.second(), 15);
}
#[test]
fn test_extract_date_from_filename_dash_compact_time_format() {
let filename = "2015-01-09-021515.jpg";
let date_time = extract_date_from_filename(filename).unwrap();
assert_eq!(date_time.year(), 2015);
assert_eq!(date_time.month(), 1);
assert_eq!(date_time.day(), 9);
assert_eq!(date_time.hour(), 2);
assert_eq!(date_time.minute(), 15);
assert_eq!(date_time.second(), 15);
}
#[test]
fn test_extract_date_from_filename_compact_format() {
let filename = "20140927101712.jpg";
let date_time = extract_date_from_filename(filename).unwrap();
assert_eq!(date_time.year(), 2014);
assert_eq!(date_time.month(), 9);
assert_eq!(date_time.day(), 27);
assert_eq!(date_time.hour(), 10);
assert_eq!(date_time.minute(), 17);
assert_eq!(date_time.second(), 12);
}
#[test]
fn test_extract_date_from_filename_timestamp_format() {
let filename = "xyz_1401638400.jpeg"; // Unix timestamp for 2014-06-01 16:00:00 UTC
// Timestamps are already in UTC, so timezone doesn't matter for this test
let date_time = extract_date_from_filename(filename).unwrap();
assert_eq!(date_time.year(), 2014);
assert_eq!(date_time.month(), 6);
assert_eq!(date_time.day(), 1);
assert_eq!(date_time.hour(), 16);
assert_eq!(date_time.minute(), 0);
assert_eq!(date_time.second(), 0);
}
#[test]
fn test_extract_date_from_filename_timestamp_millis_format() {
let filename = "xyz_1401638400000.jpeg"; // Unix timestamp in milliseconds
let date_time = extract_date_from_filename(filename).unwrap();
assert_eq!(date_time.year(), 2014);
assert_eq!(date_time.month(), 6);
assert_eq!(date_time.day(), 1);
assert_eq!(date_time.hour(), 16);
assert_eq!(date_time.minute(), 0);
assert_eq!(date_time.second(), 0);
}
#[test]
fn test_get_file_date_info_from_filename() {
let temp_dir = tempdir().unwrap();
let temp_file = temp_dir.path().join("Screenshot_2014-06-01-20-44-50.png");
File::create(&temp_file).unwrap();
let (date, created, _) =
get_file_date_info(&temp_file, &Some(*Local::now().fixed_offset().offset())).unwrap();
// Check that date is from filename
assert_eq!(date.year(), 2014);
assert_eq!(date.month(), 6);
assert_eq!(date.day(), 1);
// Check that created timestamp matches the date from filename
assert!(created.is_some());
let ts = created.unwrap();
// The timestamp should be for 2014-06-01 20:44:50 in the LOCAL timezone
let dt_from_ts = Local.timestamp_opt(ts, 0).unwrap();
assert_eq!(dt_from_ts.year(), 2014);
assert_eq!(dt_from_ts.month(), 6);
assert_eq!(dt_from_ts.day(), 1);
assert_eq!(dt_from_ts.hour(), 20);
assert_eq!(dt_from_ts.minute(), 44);
assert_eq!(dt_from_ts.second(), 50);
}
#[test]
fn test_get_file_date_info_from_metadata() {
let temp_dir = tempdir().unwrap();
let temp_file = temp_dir.path().join("regular_image.jpg");
File::create(&temp_file).unwrap();
let (date, created, modified) = get_file_date_info(&temp_file, &None).unwrap();
// Both date and timestamps should be from metadata (recent)
let today = Local::now().date_naive();
assert_eq!(date.year(), today.year());
assert_eq!(date.month(), today.month());
// Both timestamps should be valid
assert!(created.is_some());
assert!(modified.is_some());
// Check that timestamps are recent
let dt_created = DateTime::<Utc>::from_timestamp(created.unwrap(), 0).unwrap();
assert_eq!(dt_created.year(), today.year());
let dt_modified = DateTime::<Utc>::from_timestamp(modified.unwrap(), 0).unwrap();
assert_eq!(dt_modified.year(), today.year());
}
#[test]
fn test_path_excluder_absolute_under_base() {
let tmp = tempdir().unwrap();
let base = tmp.path();
// Simulate structure:
// base/photos/private/secret.jpg
// base/photos/public/ok.jpg
// base/screenshots/img.png
let photos_private = base.join("photos/private");
let photos_public = base.join("photos/public");
let screenshots = base.join("screenshots");
fs::create_dir_all(&photos_private).unwrap();
fs::create_dir_all(&photos_public).unwrap();
fs::create_dir_all(&screenshots).unwrap();
let secret = photos_private.join("secret.jpg");
let ok = photos_public.join("ok.jpg");
let shot = screenshots.join("img.png");
File::create(&secret).unwrap();
File::create(&ok).unwrap();
File::create(&shot).unwrap();
// Exclude "/photos/private" and "/screenshots" under base
let excluded = vec![
String::from("/photos/private"),
String::from("/screenshots"),
];
let excluder = PathExcluder::new(base, &excluded);
assert!(excluder.is_excluded(&secret), "secret should be excluded");
assert!(
excluder.is_excluded(&shot),
"screenshots should be excluded"
);
assert!(
!excluder.is_excluded(&ok),
"public photo should NOT be excluded"
);
}
#[test]
fn test_path_excluder_pattern_anywhere_under_base() {
let tmp = tempdir().unwrap();
let base = tmp.path();
// Simulate:
// base/a/tmp_file.jpg
// base/b/normal.jpg
// base/c/sometmpdir/file.jpg
let a = base.join("a");
let b = base.join("b");
let c = base.join("c/tmp");
fs::create_dir_all(&a).unwrap();
fs::create_dir_all(&b).unwrap();
fs::create_dir_all(&c).unwrap();
let tmp_file = a.join("tmp_file.jpg");
let normal = b.join("normal.jpg");
let tmp_dir_file = c.join("file.jpg");
File::create(&tmp_file).unwrap();
File::create(&normal).unwrap();
File::create(&tmp_dir_file).unwrap();
// Exclude any path containing "tmp"
let excluded = vec![String::from("tmp")];
let excluder = PathExcluder::new(base, &excluded);
assert!(
!excluder.is_excluded(&tmp_file),
"file with 'tmp' in name should NOT be excluded"
);
assert!(
excluder.is_excluded(&tmp_dir_file),
"file in directory with 'tmp' in path should be excluded"
);
assert!(
!excluder.is_excluded(&normal),
"file without 'tmp' in its path should NOT be excluded"
);
}
#[test]
fn test_path_excluder_mixed_absolute_and_pattern() {
let tmp = tempdir().unwrap();
let base = tmp.path();
// Simulate:
// base/photos/private/secret_tmp.jpg -> excluded by absolute dir rule
// base/photos/private/secret.jpg -> excluded by absolute dir rule
// base/photos/tmp/public.jpg -> excluded by pattern "tmp" (dir name)
// base/photos/public/tmp_public.jpg -> NOT excluded (file name contains "tmp" but not equal)
// base/other/keep.jpg -> NOT excluded
let photos_private = base.join("photos/private");
let photos_tmp = base.join("photos/tmp");
let photos_public = base.join("photos/public");
let other = base.join("other");
fs::create_dir_all(&photos_private).unwrap();
fs::create_dir_all(&photos_tmp).unwrap();
fs::create_dir_all(&photos_public).unwrap();
fs::create_dir_all(&other).unwrap();
let secret_tmp = photos_private.join("secret_tmp.jpg");
let secret = photos_private.join("secret.jpg");
let tmp_dir_file = photos_tmp.join("public.jpg");
let tmp_in_name = photos_public.join("tmp_public.jpg");
let keep = other.join("keep.jpg");
File::create(&secret_tmp).unwrap();
File::create(&secret).unwrap();
File::create(&tmp_dir_file).unwrap();
File::create(&tmp_in_name).unwrap();
File::create(&keep).unwrap();
// Mixed: exclude "/photos/private" (dir) and any component equal to "tmp"
let excluded = vec![String::from("/photos/private"), String::from("tmp")];
let excluder = PathExcluder::new(base, &excluded);
// Entire private tree is excluded by dir rule
assert!(excluder.is_excluded(&secret_tmp));
assert!(excluder.is_excluded(&secret));
// Dir 'tmp' under photos excluded by pattern
assert!(excluder.is_excluded(&tmp_dir_file));
// File name containing 'tmp' but not equal should NOT be excluded
assert!(!excluder.is_excluded(&tmp_in_name));
// keep.jpg doesn't match any rule
assert!(!excluder.is_excluded(&keep));
}
}