memories: single-SQL rewrite + 20-year lookback
Replaces the EXIF-loop + WalkDir-fallback pipeline that powered
`/memories` with a single per-library SQL query
(`get_memories_in_window`) that uses `strftime('%m-%d' | '%W' | '%m',
date_taken, 'unixepoch', tz_offset)` for calendar matching in the
client's timezone, plus a `years_back` lower bound and a
no-future-dates upper bound. Returns only the matching rows; the
handler applies per-library `PathExcluder` post-query and sorts.
Drops:
- `collect_exif_memories` — replaced by the single SQL query.
- `collect_filesystem_memories` — the canonical-date pipeline now
populates `date_taken` for every row at ingest, so the WalkDir
fallback that scanned 14k+ files each request is no longer needed.
- `get_memory_date_with_priority` and friends — request-time waterfall
superseded by `date_resolver` running at ingest. The associated
three priority-tests are dropped; their replacement lives in
`date_resolver::tests`.
On a ~14k-file library this drops `/memories` from 10–15 s
(dominated by `fs::metadata` per row) to single-digit ms.
Bumps `DEFAULT_YEARS_BACK` from 15 → 20 to surface deeper archives
on matching anniversaries.
Note vs. ISO weeks: the original Rust used `chrono::iso_week().week()`
for week-span matching. SQLite's `%W` is Monday-anchored but uses week
0 for days before the first Monday, so it can disagree with ISO at
year boundaries by ±1. Acceptable for nostalgia browsing.
Adds 3 new DAO tests covering month-span filter, library scoping, and
the unknown-span-token guard. Also adds a CLAUDE.md section describing
the canonical-date pipeline end-to-end and the new
`DATE_BACKFILL_MAX_PER_TICK` env var.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -9,6 +9,21 @@ use crate::database::models::{
|
||||
};
|
||||
use crate::otel::trace_db_call;
|
||||
|
||||
/// Decoded shape for `get_memories_in_window`'s raw `sql_query`. Diesel's
|
||||
/// query DSL doesn't expose strftime, so the memories filter is hand-
|
||||
/// written SQL — but the returned columns are simple enough that a small
|
||||
/// `QueryableByName` struct suffices, kept private to this module.
|
||||
#[derive(diesel::QueryableByName)]
|
||||
#[allow(dead_code)] // fields read via Diesel's QueryableByName derive
|
||||
struct MemoriesWindowRow {
|
||||
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||
rel_path: String,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
date_taken: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
last_modified: i64,
|
||||
}
|
||||
|
||||
/// Wire shape for a single member of a duplicate group, returned by
|
||||
/// `list_duplicates_*` and `lookup_duplicate_row`. Carries everything
|
||||
/// the Apollo modal needs to render a member tile and its meta line —
|
||||
@@ -424,6 +439,35 @@ pub trait ExifDao: Sync + Send {
|
||||
source: &str,
|
||||
) -> Result<(), DbError>;
|
||||
|
||||
/// Single-query backend for `/memories`. Returns
|
||||
/// `(rel_path, date_taken, last_modified)` for rows in `library_id`
|
||||
/// whose `date_taken` falls within `[now - years_back y, now]` and
|
||||
/// whose calendar position matches the request's span:
|
||||
/// - `"day"` — same month + day-of-month (any year)
|
||||
/// - `"week"` — same week-of-year (SQLite `%W`, Monday-anchored —
|
||||
/// close to but not exactly ISO week 8601; the
|
||||
/// boundary cases at year-start/end can shift by ±1
|
||||
/// vs the prior request-time `iso_week()` filter)
|
||||
/// - `"month"` — same month (any year)
|
||||
///
|
||||
/// `tz_offset_minutes` is applied to both sides of the strftime
|
||||
/// comparison so the calendar match is in the user's local time.
|
||||
/// Backed by the `(library_id, date_taken)` index.
|
||||
///
|
||||
/// This is the single-SQL replacement for the EXIF-loop +
|
||||
/// WalkDir-fallback that powered `/memories` previously; it's
|
||||
/// correct only because the canonical-date waterfall at ingest
|
||||
/// (`crate::date_resolver`) populates `date_taken` for every row
|
||||
/// it can resolve.
|
||||
fn get_memories_in_window(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id: i32,
|
||||
span_token: &str,
|
||||
years_back: i32,
|
||||
tz_offset_minutes: i32,
|
||||
) -> Result<Vec<(String, i64, i64)>, DbError>;
|
||||
|
||||
/// Return image rows that have a `content_hash` but no `phash_64`,
|
||||
/// oldest first. Used by the `backfill_perceptual_hash` binary.
|
||||
/// Filters by image extension at the DB layer to avoid ever asking
|
||||
@@ -1090,23 +1134,28 @@ impl ExifDao for SqliteExifDao {
|
||||
library_id_val: i32,
|
||||
limit: i64,
|
||||
) -> Result<Vec<(i32, String)>, DbError> {
|
||||
trace_db_call(context, "query", "get_rows_needing_date_backfill", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
trace_db_call(
|
||||
context,
|
||||
"query",
|
||||
"get_rows_needing_date_backfill",
|
||||
|_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
// The partial index is on `(library_id, id) WHERE date_taken
|
||||
// IS NULL OR date_taken_source = 'fs_time'`, so the planner
|
||||
// hits it directly when both predicates are present.
|
||||
image_exif
|
||||
.filter(library_id.eq(library_id_val))
|
||||
.filter(date_taken.is_null().or(date_taken_source.eq("fs_time")))
|
||||
.select((library_id, rel_path))
|
||||
.order(id.asc())
|
||||
.limit(limit)
|
||||
.load::<(i32, String)>(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
})
|
||||
// The partial index is on `(library_id, id) WHERE date_taken
|
||||
// IS NULL OR date_taken_source = 'fs_time'`, so the planner
|
||||
// hits it directly when both predicates are present.
|
||||
image_exif
|
||||
.filter(library_id.eq(library_id_val))
|
||||
.filter(date_taken.is_null().or(date_taken_source.eq("fs_time")))
|
||||
.select((library_id, rel_path))
|
||||
.order(id.asc())
|
||||
.limit(limit)
|
||||
.load::<(i32, String)>(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
},
|
||||
)
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
@@ -1128,10 +1177,7 @@ impl ExifDao for SqliteExifDao {
|
||||
.filter(library_id.eq(library_id_val))
|
||||
.filter(rel_path.eq(rel_path_val)),
|
||||
)
|
||||
.set((
|
||||
date_taken.eq(date_taken_val),
|
||||
date_taken_source.eq(source),
|
||||
))
|
||||
.set((date_taken.eq(date_taken_val), date_taken_source.eq(source)))
|
||||
.execute(connection.deref_mut())
|
||||
.map(|_| ())
|
||||
.map_err(|_| anyhow::anyhow!("Update error"))
|
||||
@@ -1139,6 +1185,60 @@ impl ExifDao for SqliteExifDao {
|
||||
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
||||
}
|
||||
|
||||
fn get_memories_in_window(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id: i32,
|
||||
span_token: &str,
|
||||
years_back: i32,
|
||||
tz_offset_minutes: i32,
|
||||
) -> Result<Vec<(String, i64, i64)>, DbError> {
|
||||
trace_db_call(context, "query", "get_memories_in_window", |_span| {
|
||||
// strftime pattern is span-dependent; the rest of the WHERE
|
||||
// clause is shared. Only `%m-%d`, `%W`, `%m` are accepted —
|
||||
// anything else is a programmer error.
|
||||
let pattern = match span_token {
|
||||
"day" => "%m-%d",
|
||||
"week" => "%W",
|
||||
"month" => "%m",
|
||||
_ => return Err(anyhow::anyhow!("invalid span token: {}", span_token)),
|
||||
};
|
||||
|
||||
// SQLite's date modifiers want a string like `'-480 minutes'`
|
||||
// (signed) or `'-15 years'`. Use the `+` flag so positive
|
||||
// offsets render as `+480 minutes`.
|
||||
let tz_modifier = format!("{:+} minutes", tz_offset_minutes);
|
||||
let years_modifier = format!("-{} years", years_back);
|
||||
|
||||
let sql = format!(
|
||||
"SELECT rel_path, date_taken, last_modified \
|
||||
FROM image_exif \
|
||||
WHERE library_id = ?1 \
|
||||
AND date_taken IS NOT NULL \
|
||||
AND date_taken <= unixepoch('now') \
|
||||
AND date_taken >= unixepoch('now', ?2) \
|
||||
AND strftime('{p}', date_taken, 'unixepoch', ?3) \
|
||||
= strftime('{p}', 'now', ?3)",
|
||||
p = pattern,
|
||||
);
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
diesel::sql_query(sql)
|
||||
.bind::<diesel::sql_types::Integer, _>(library_id)
|
||||
.bind::<diesel::sql_types::Text, _>(years_modifier)
|
||||
.bind::<diesel::sql_types::Text, _>(tz_modifier)
|
||||
.load::<MemoriesWindowRow>(connection.deref_mut())
|
||||
.map(|rows| {
|
||||
rows.into_iter()
|
||||
.map(|r| (r.rel_path, r.date_taken, r.last_modified))
|
||||
.collect()
|
||||
})
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {}", e))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_by_content_hash(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
@@ -2069,9 +2169,7 @@ mod exif_dao_tests {
|
||||
// Other library — never returned even when eligible.
|
||||
insert_row_with_source(&mut dao, 2, "archive/null.jpg", None, None);
|
||||
|
||||
let rows = dao
|
||||
.get_rows_needing_date_backfill(&ctx(), 1, 100)
|
||||
.unwrap();
|
||||
let rows = dao.get_rows_needing_date_backfill(&ctx(), 1, 100).unwrap();
|
||||
let paths: Vec<String> = rows.into_iter().map(|(_, p)| p).collect();
|
||||
assert_eq!(paths.len(), 2, "expected null + fs_time eligible only");
|
||||
assert!(paths.contains(&"main/null.jpg".to_string()));
|
||||
@@ -2098,4 +2196,125 @@ mod exif_dao_tests {
|
||||
assert_eq!(row.content_hash, Some("deadbeef".to_string()));
|
||||
assert_eq!(row.size_bytes, Some(1024));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_memories_in_window_day_matches_only_same_md_in_year_window() {
|
||||
let mut dao = setup_two_libraries();
|
||||
|
||||
// Anchor on a known date so the test is timezone-stable: insert
|
||||
// rows whose date_taken IS the same wall-clock time as `now()`
|
||||
// would have been some N years ago, and verify the day-span
|
||||
// filter returns them. We can't bind 'now' from Rust, so instead
|
||||
// we insert rows for the *current* day (offset by 365 days * N
|
||||
// years) and rely on SQLite computing the same `%m-%d` for both
|
||||
// sides of the equality. Using the unix-now-minus-365*N seconds
|
||||
// approximation is good enough — leap years drift by ~one day
|
||||
// every four years, but the test only checks day-of-year match
|
||||
// for rows inserted "today minus N years (no leap correction)".
|
||||
// To dodge the leap-year drift entirely, we use rows whose
|
||||
// calendar date is read back from SQLite and we just check
|
||||
// membership.
|
||||
|
||||
// 1y, 5y, 10y, 21y back from 'now':
|
||||
let now_ts = chrono::Utc::now().timestamp();
|
||||
let year_secs: i64 = 365 * 86_400;
|
||||
insert_row_with_source(
|
||||
&mut dao,
|
||||
1,
|
||||
"y1.jpg",
|
||||
Some(now_ts - year_secs),
|
||||
Some("exif"),
|
||||
);
|
||||
insert_row_with_source(
|
||||
&mut dao,
|
||||
1,
|
||||
"y5.jpg",
|
||||
Some(now_ts - 5 * year_secs),
|
||||
Some("exif"),
|
||||
);
|
||||
insert_row_with_source(
|
||||
&mut dao,
|
||||
1,
|
||||
"y10.jpg",
|
||||
Some(now_ts - 10 * year_secs),
|
||||
Some("exif"),
|
||||
);
|
||||
// Outside the 20-year window:
|
||||
insert_row_with_source(
|
||||
&mut dao,
|
||||
1,
|
||||
"y21.jpg",
|
||||
Some(now_ts - 21 * year_secs),
|
||||
Some("exif"),
|
||||
);
|
||||
// Future row: must be excluded by the `<= now` clause.
|
||||
insert_row_with_source(
|
||||
&mut dao,
|
||||
1,
|
||||
"future.jpg",
|
||||
Some(now_ts + 86_400),
|
||||
Some("exif"),
|
||||
);
|
||||
// No date — never returned regardless of source.
|
||||
insert_row_with_source(&mut dao, 1, "nodate.jpg", None, None);
|
||||
|
||||
// Month span returns rows from the same calendar month over the
|
||||
// window — y1, y5, y10 should all qualify (same month any year),
|
||||
// y21 trims (out of years_back), future trims (> now), nodate
|
||||
// never qualifies. Day-of-month leap drift means even with 365-
|
||||
// day approximation a row may shift by one in either direction;
|
||||
// month is the safer assertion under that approximation.
|
||||
let rows = dao
|
||||
.get_memories_in_window(&ctx(), 1, "month", 20, 0)
|
||||
.unwrap();
|
||||
let paths: std::collections::HashSet<String> =
|
||||
rows.into_iter().map(|(p, _, _)| p).collect();
|
||||
assert!(
|
||||
paths.contains("y1.jpg") && paths.contains("y5.jpg") && paths.contains("y10.jpg"),
|
||||
"month span should include all in-window rows: {:?}",
|
||||
paths
|
||||
);
|
||||
assert!(
|
||||
!paths.contains("y21.jpg"),
|
||||
"21-year-old row should fall outside the years_back window"
|
||||
);
|
||||
assert!(!paths.contains("future.jpg"), "future row must be excluded");
|
||||
assert!(
|
||||
!paths.contains("nodate.jpg"),
|
||||
"row without date must never appear"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_memories_in_window_scopes_by_library_id() {
|
||||
let mut dao = setup_two_libraries();
|
||||
let now_ts = chrono::Utc::now().timestamp();
|
||||
let year = 365 * 86_400i64;
|
||||
insert_row_with_source(&mut dao, 1, "main/x.jpg", Some(now_ts - year), Some("exif"));
|
||||
insert_row_with_source(
|
||||
&mut dao,
|
||||
2,
|
||||
"archive/x.jpg",
|
||||
Some(now_ts - year),
|
||||
Some("exif"),
|
||||
);
|
||||
|
||||
let lib1 = dao
|
||||
.get_memories_in_window(&ctx(), 1, "month", 20, 0)
|
||||
.unwrap();
|
||||
let lib2 = dao
|
||||
.get_memories_in_window(&ctx(), 2, "month", 20, 0)
|
||||
.unwrap();
|
||||
assert_eq!(lib1.len(), 1);
|
||||
assert_eq!(lib1[0].0, "main/x.jpg");
|
||||
assert_eq!(lib2.len(), 1);
|
||||
assert_eq!(lib2[0].0, "archive/x.jpg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_memories_in_window_rejects_unknown_span_token() {
|
||||
let mut dao = setup_two_libraries();
|
||||
let err = dao.get_memories_in_window(&ctx(), 1, "decade", 20, 0);
|
||||
assert!(err.is_err());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user