From 78fabc2b3249b62139afc98f78abe51dfb2c425d Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Thu, 14 May 2026 15:43:13 -0400 Subject: [PATCH] hls: retire legacy basename-keyed HLS files on startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `video::legacy_migration::retire_legacy_hls_output`, called once from `main` right after the diesel migrations run and before the actor pipeline starts. Walks `$VIDEO_PATH` at depth 1, deletes every `.m3u8` / `.m3u8.tmp` / `.m3u8.unsupported` / `.ts` file at root, and logs a single info line with per-class counts. Skips directories (the new layout's `//` lives there) and unknown extensions, so an operator's stashed README or `.tmp` from a different tool is safe. Why this needs its own one-shot pass rather than letting the rewritten `cleanup_orphaned_playlists` handle it: the cleanup walk deliberately only looks at `//` dirs (so it can't accidentally `rm` operator-stashed content), so without this migration the legacy files would sit at root forever, never served, never refreshed. Operator complaint count from the previous IMG_NNNN.MOV collision: ~10 duplicate-basename hits on one library alone; total .m3u8 count was 699 vs a much larger video count — i.e. the loser of every collision was a permanent orphan. This pass collects all of them, then the running watcher writes hash-keyed playlists going forward. Idempotent — a second boot finds nothing and reports zero deletions, so the call site can stay in `main` across releases until the module is removed in a later cleanup commit. Tests cover the happy path (legacy artifacts gone, hash dir untouched, unrelated files left alone), idempotency, and the missing-directory case. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/main.rs | 10 ++ src/video/legacy_migration.rs | 246 ++++++++++++++++++++++++++++++++++ src/video/mod.rs | 1 + 3 files changed, 257 insertions(+) create mode 100644 src/video/legacy_migration.rs diff --git a/src/main.rs b/src/main.rs index ec5b3d0..023b57b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -72,6 +72,16 @@ fn main() -> std::io::Result<()> { run_migrations(&mut connect()).expect("Failed to run migrations"); + // One-shot retirement of the pre-content-hash HLS layout. Idempotent + // — a second boot finds nothing and reports zero deletions, so it's + // safe to leave wired in until the module is removed in a later + // release. Runs before the actor pipeline starts so we never race a + // PlaylistGenerator write against this rm. + { + let video_path = env::var("VIDEO_PATH").expect("VIDEO_PATH was not set in the env"); + video::legacy_migration::retire_legacy_hls_output(std::path::Path::new(&video_path)); + } + let system = actix::System::new(); system.block_on(async { // Just use basic logger when running a non-release build diff --git a/src/video/legacy_migration.rs b/src/video/legacy_migration.rs new file mode 100644 index 0000000..0db8e25 --- /dev/null +++ b/src/video/legacy_migration.rs @@ -0,0 +1,246 @@ +//! One-shot retirement of the pre-content-hash HLS output layout. +//! +//! Before the hash-keyed layout landed, the actor pipeline wrote every +//! playlist as `$VIDEO_PATH/.m3u8` with sibling +//! `_NNN.ts` segments and a `.m3u8.unsupported` +//! sentinel on ffmpeg failure. The new pipeline (see +//! [`crate::video::hls_paths`]) puts everything inside a hash-keyed +//! subdirectory, so the legacy flat files are orphaned the moment the +//! upgraded binary boots — they're not served, not refreshed, and not +//! GC'd by the new orphan cleanup (which deliberately ignores anything +//! that doesn't sit inside a `//` dir). +//! +//! This migration runs once on startup. It walks `$VIDEO_PATH` at depth +//! 1, deletes every `.m3u8` / `.m3u8.tmp` / `.m3u8.unsupported` / `.ts` +//! file, and reports a single info line. It is idempotent — a second +//! run finds nothing and reports zero deletions, so it's safe to leave +//! wired in across releases until the codebase finally drops the +//! module. +//! +//! Sub-directories under `$VIDEO_PATH` are intentionally left alone: +//! every legitimate child of `$VIDEO_PATH` in the new layout is a +//! 2-char shard directory holding hash subdirs, and those are managed +//! by `cleanup_orphaned_playlists`. + +use std::path::Path; + +use log::{info, warn}; + +/// Counters for what the migration did this run. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub struct RetireStats { + pub deleted_playlists: usize, + pub deleted_segments: usize, + pub deleted_sentinels: usize, + pub deleted_tmp: usize, + pub errors: usize, +} + +impl RetireStats { + pub fn total_deleted(&self) -> usize { + self.deleted_playlists + + self.deleted_segments + + self.deleted_sentinels + + self.deleted_tmp + } +} + +/// Delete every legacy basename-keyed HLS artifact at the root of +/// `video_dir`. Hash dirs (children that are directories) are skipped. +/// Returns counts so the caller can log a single line summary. +pub fn retire_legacy_hls_output(video_dir: &Path) -> RetireStats { + let mut stats = RetireStats::default(); + + let read = match std::fs::read_dir(video_dir) { + Ok(r) => r, + Err(e) => { + warn!( + "Legacy HLS migration: cannot read {} ({}); skipping", + video_dir.display(), + e + ); + return stats; + } + }; + + for entry in read.flatten() { + let file_type = match entry.file_type() { + Ok(t) => t, + Err(_) => continue, + }; + if !file_type.is_file() { + // Hash shard directories live here in the new layout. + continue; + } + let path = entry.path(); + let Some(name) = path.file_name().and_then(|n| n.to_str()) else { + continue; + }; + + let bucket = classify(name); + let Some(bucket) = bucket else { + continue; + }; + + match std::fs::remove_file(&path) { + Ok(()) => match bucket { + LegacyKind::Playlist => stats.deleted_playlists += 1, + LegacyKind::Segment => stats.deleted_segments += 1, + LegacyKind::Sentinel => stats.deleted_sentinels += 1, + LegacyKind::Tmp => stats.deleted_tmp += 1, + }, + Err(e) => { + warn!( + "Legacy HLS migration: failed to remove {}: {}", + path.display(), + e + ); + stats.errors += 1; + } + } + } + + if stats.total_deleted() > 0 || stats.errors > 0 { + info!( + "Legacy HLS migration: deleted {} playlist(s), {} segment(s), {} sentinel(s), {} tmp; {} error(s)", + stats.deleted_playlists, + stats.deleted_segments, + stats.deleted_sentinels, + stats.deleted_tmp, + stats.errors, + ); + } else { + info!( + "Legacy HLS migration: nothing to do under {}", + video_dir.display() + ); + } + + stats +} + +#[derive(Debug, Clone, Copy)] +enum LegacyKind { + Playlist, + Segment, + Sentinel, + Tmp, +} + +/// Decide whether a flat file at `$VIDEO_PATH` root is legacy HLS +/// output. Returns `None` for anything else — operator-stashed files, +/// new-layout files (which don't live here), etc. — so we don't rm them. +fn classify(name: &str) -> Option { + // Order matters: sentinel and tmp are more specific suffixes that + // sit on top of the .m3u8 / .ts extensions, so check them first. + if name.ends_with(".m3u8.unsupported") { + return Some(LegacyKind::Sentinel); + } + if name.ends_with(".m3u8.tmp") { + return Some(LegacyKind::Tmp); + } + if name.ends_with(".m3u8") { + return Some(LegacyKind::Playlist); + } + if name.ends_with(".ts") { + return Some(LegacyKind::Segment); + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::tempdir; + + #[test] + fn classify_recognises_each_legacy_artifact() { + assert!(matches!( + classify("IMG_0341.MOV.m3u8"), + Some(LegacyKind::Playlist) + )); + assert!(matches!( + classify("IMG_0341.MOV_000.ts"), + Some(LegacyKind::Segment) + )); + assert!(matches!( + classify("IMG_0341.MOV.m3u8.unsupported"), + Some(LegacyKind::Sentinel) + )); + assert!(matches!( + classify("IMG_0341.MOV.m3u8.tmp"), + Some(LegacyKind::Tmp) + )); + + assert!(classify("README.md").is_none()); + assert!(classify("ab").is_none()); // shard dir name + assert!(classify(".keep").is_none()); + } + + #[test] + fn retire_deletes_legacy_and_leaves_hash_dirs() { + let tmp = tempdir().unwrap(); + let root = tmp.path(); + + // Legacy artifacts at root. + fs::write(root.join("IMG_0341.MOV.m3u8"), b"#EXTM3U").unwrap(); + fs::write(root.join("IMG_0341.MOV_000.ts"), b"\x00").unwrap(); + fs::write(root.join("IMG_0341.MOV_001.ts"), b"\x00").unwrap(); + fs::write(root.join("clip.MP4.m3u8.unsupported"), b"").unwrap(); + fs::write(root.join("partial.m3u8.tmp"), b"").unwrap(); + + // New-layout hash dir we must NOT touch. + let hash_dir = root.join("ab").join("a".repeat(64)); + fs::create_dir_all(&hash_dir).unwrap(); + fs::write(hash_dir.join("playlist.m3u8"), b"#EXTM3U").unwrap(); + fs::write(hash_dir.join("segment_000.ts"), b"\x00").unwrap(); + + // Unrelated file we must NOT touch. + fs::write(root.join("README.md"), b"don't touch me").unwrap(); + + let stats = retire_legacy_hls_output(root); + assert_eq!(stats.deleted_playlists, 1); + assert_eq!(stats.deleted_segments, 2); + assert_eq!(stats.deleted_sentinels, 1); + assert_eq!(stats.deleted_tmp, 1); + assert_eq!(stats.errors, 0); + + // Legacy artifacts gone. + assert!(!root.join("IMG_0341.MOV.m3u8").exists()); + assert!(!root.join("IMG_0341.MOV_000.ts").exists()); + assert!(!root.join("clip.MP4.m3u8.unsupported").exists()); + assert!(!root.join("partial.m3u8.tmp").exists()); + // Hash dir untouched. + assert!(hash_dir.join("playlist.m3u8").exists()); + assert!(hash_dir.join("segment_000.ts").exists()); + // Unrelated file untouched. + assert!(root.join("README.md").exists()); + } + + #[test] + fn retire_is_idempotent() { + let tmp = tempdir().unwrap(); + let root = tmp.path(); + + fs::write(root.join("video.mp4.m3u8"), b"#EXTM3U").unwrap(); + fs::write(root.join("video.mp4_000.ts"), b"\x00").unwrap(); + + let first = retire_legacy_hls_output(root); + assert_eq!(first.deleted_playlists + first.deleted_segments, 2); + + let second = retire_legacy_hls_output(root); + assert_eq!(second.total_deleted(), 0); + assert_eq!(second.errors, 0); + } + + #[test] + fn retire_handles_missing_dir() { + // No panic, no error count blowing up — just a warn + zero stats. + let tmp = tempdir().unwrap(); + let missing = tmp.path().join("does_not_exist"); + let stats = retire_legacy_hls_output(&missing); + assert_eq!(stats.total_deleted(), 0); + assert_eq!(stats.errors, 0); + } +} diff --git a/src/video/mod.rs b/src/video/mod.rs index e7cfa67..f28d302 100644 --- a/src/video/mod.rs +++ b/src/video/mod.rs @@ -10,6 +10,7 @@ use walkdir::WalkDir; pub mod actors; pub mod ffmpeg; pub mod hls_paths; +pub mod legacy_migration; #[allow(dead_code)] pub async fn generate_video_gifs() {