Reels: bound disk/ledger growth (pre-gen prune + on-demand cache sweep)

Nothing reaped reels before, so the on-disk cache and ledger grew
unbounded — each night's daily reel is a new ~4MB file + ledger row that's
stale within ~26h.

- Pre-gen self-prune: after recording a reel, prune_superseded keeps the
  newest PREGEN_KEEP_PER_SCOPE (2) rows per (span, library) and unlinks the
  superseded reels' mp4+sidecar. Caps the ledger/disk at ~spans×libraries×2.
- On-disk sweeper (spawn_reel_cache_sweeper): every 24h, removes reel mp4s
  with no ledger row and no live job older than REEL_CACHE_MAX_AGE_DAYS (7) —
  bounding the on-demand cache, which has no ledger row and otherwise grows
  forever — plus crashed-render cruft (.mp4.tmp/.concat.txt/orphan sidecars).
  Runs regardless of REEL_PREGEN_ENABLED; disable with REEL_CACHE_SWEEP_ENABLED=0.
- New DAO methods prune_superseded + all_cache_keys (with tests); env knobs
  documented in .env.example.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-06-13 23:27:32 -04:00
parent 664b3694f8
commit 7e21213181
4 changed files with 296 additions and 17 deletions
+118
View File
@@ -41,6 +41,23 @@ pub trait PrecomputedReelDao: Sync + Send {
render_version: i32,
min_generated_at: i64,
) -> Result<bool, DbError>;
/// Delete all but the newest `keep` rows for (span, library_key), returning
/// the deleted rows so the caller can unlink their output files. Used by the
/// nightly job to retire superseded reels (e.g. yesterday's daily).
#[allow(dead_code)]
fn prune_superseded(
&mut self,
context: &opentelemetry::Context,
span: &str,
library_key: &str,
keep: usize,
) -> Result<Vec<PrecomputedReel>, DbError>;
/// Every cache_key currently in the ledger. Used by the on-disk cache sweep
/// to protect files a ledger row still points at.
#[allow(dead_code)]
fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result<Vec<String>, DbError>;
}
pub struct SqlitePrecomputedReelDao {
@@ -148,6 +165,60 @@ impl PrecomputedReelDao for SqlitePrecomputedReelDao {
})
.map_err(|e| DbError::log(DbErrorKind::QueryError, e))
}
fn prune_superseded(
&mut self,
context: &opentelemetry::Context,
span: &str,
library_key: &str,
keep: usize,
) -> Result<Vec<PrecomputedReel>, DbError> {
trace_db_call(context, "delete", "prune_superseded", |_span| {
use schema::precomputed_reels::dsl;
let mut connection = self
.connection
.lock()
.expect("Unable to lock PrecomputedReelDao");
// Newest first; everything past `keep` is superseded. The table
// holds at most a handful of rows per (span, library), so loading
// and slicing in Rust is cheaper than a correlated subquery.
let mut rows: Vec<PrecomputedReel> = dsl::precomputed_reels
.filter(dsl::span.eq(span))
.filter(dsl::library_key.eq(library_key))
.order(dsl::generated_at.desc())
.load::<PrecomputedReel>(connection.deref_mut())
.map_err(|e| anyhow::anyhow!("Failed to load reels for prune: {}", e))?;
let stale = rows.split_off(rows.len().min(keep));
if !stale.is_empty() {
let ids: Vec<i32> = stale.iter().map(|r| r.id).collect();
diesel::delete(dsl::precomputed_reels.filter(dsl::id.eq_any(ids)))
.execute(connection.deref_mut())
.map_err(|e| anyhow::anyhow!("Failed to delete superseded reels: {}", e))?;
}
Ok(stale)
})
.map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
}
fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result<Vec<String>, DbError> {
trace_db_call(context, "query", "all_cache_keys", |_span| {
use schema::precomputed_reels::dsl;
let mut connection = self
.connection
.lock()
.expect("Unable to lock PrecomputedReelDao");
dsl::precomputed_reels
.select(dsl::cache_key)
.load::<String>(connection.deref_mut())
.map_err(|e| anyhow::anyhow!("Failed to load cache keys: {}", e))
})
.map_err(|e| DbError::log(DbErrorKind::QueryError, e))
}
}
#[cfg(test)]
@@ -318,4 +389,51 @@ mod tests {
assert!(dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap());
assert!(!dao.exists_fresh(&ctx, "day", "1", 2, 900_000).unwrap());
}
#[test]
fn prune_superseded_keeps_newest_and_returns_deleted() {
let mut dao = setup_dao();
let ctx = ctx();
// Three day/lib1 reels at increasing timestamps, plus an unrelated one.
for (i, key) in ["k1", "k2", "k3"].iter().enumerate() {
dao.record_reel(
&ctx,
&InsertablePrecomputedReel {
cache_key: key.to_string(),
generated_at: 1_000_000 + i as i64 * 1000,
..sample_row()
},
)
.unwrap();
}
let other = InsertablePrecomputedReel {
library_key: "2".to_string(),
cache_key: "other".to_string(),
..sample_row()
};
dao.record_reel(&ctx, &other).unwrap();
// Keep the newest 2 of (day, "1"); k1 (oldest) is superseded.
let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap();
assert_eq!(deleted.len(), 1);
assert_eq!(deleted[0].cache_key, "k1");
// The newest 2 survive; the other-library row is untouched.
let keys = dao.all_cache_keys(&ctx).unwrap();
assert_eq!(keys.len(), 3);
assert!(keys.contains(&"k2".to_string()));
assert!(keys.contains(&"k3".to_string()));
assert!(keys.contains(&"other".to_string()));
assert!(!keys.contains(&"k1".to_string()));
}
#[test]
fn prune_superseded_noop_when_within_keep() {
let mut dao = setup_dao();
let ctx = ctx();
dao.record_reel(&ctx, &sample_row()).unwrap();
let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap();
assert!(deleted.is_empty());
assert_eq!(dao.all_cache_keys(&ctx).unwrap().len(), 1);
}
}