feature/hls-content-hash #95

Merged
cameron merged 10 commits from feature/hls-content-hash into master 2026-05-15 20:09:49 +00:00
3 changed files with 257 additions and 0 deletions
Showing only changes of commit 78fabc2b32 - Show all commits

View File

@@ -72,6 +72,16 @@ fn main() -> std::io::Result<()> {
run_migrations(&mut connect()).expect("Failed to run migrations");
// One-shot retirement of the pre-content-hash HLS layout. Idempotent
// — a second boot finds nothing and reports zero deletions, so it's
// safe to leave wired in until the module is removed in a later
// release. Runs before the actor pipeline starts so we never race a
// PlaylistGenerator write against this rm.
{
let video_path = env::var("VIDEO_PATH").expect("VIDEO_PATH was not set in the env");
video::legacy_migration::retire_legacy_hls_output(std::path::Path::new(&video_path));
}
let system = actix::System::new();
system.block_on(async {
// Just use basic logger when running a non-release build

View File

@@ -0,0 +1,246 @@
//! One-shot retirement of the pre-content-hash HLS output layout.
//!
//! Before the hash-keyed layout landed, the actor pipeline wrote every
//! playlist as `$VIDEO_PATH/<source-basename>.m3u8` with sibling
//! `<source-basename>_NNN.ts` segments and a `<source-basename>.m3u8.unsupported`
//! sentinel on ffmpeg failure. The new pipeline (see
//! [`crate::video::hls_paths`]) puts everything inside a hash-keyed
//! subdirectory, so the legacy flat files are orphaned the moment the
//! upgraded binary boots — they're not served, not refreshed, and not
//! GC'd by the new orphan cleanup (which deliberately ignores anything
//! that doesn't sit inside a `<shard>/<hash>/` dir).
//!
//! This migration runs once on startup. It walks `$VIDEO_PATH` at depth
//! 1, deletes every `.m3u8` / `.m3u8.tmp` / `.m3u8.unsupported` / `.ts`
//! file, and reports a single info line. It is idempotent — a second
//! run finds nothing and reports zero deletions, so it's safe to leave
//! wired in across releases until the codebase finally drops the
//! module.
//!
//! Sub-directories under `$VIDEO_PATH` are intentionally left alone:
//! every legitimate child of `$VIDEO_PATH` in the new layout is a
//! 2-char shard directory holding hash subdirs, and those are managed
//! by `cleanup_orphaned_playlists`.
use std::path::Path;
use log::{info, warn};
/// Counters for what the migration did this run.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub struct RetireStats {
pub deleted_playlists: usize,
pub deleted_segments: usize,
pub deleted_sentinels: usize,
pub deleted_tmp: usize,
pub errors: usize,
}
impl RetireStats {
pub fn total_deleted(&self) -> usize {
self.deleted_playlists
+ self.deleted_segments
+ self.deleted_sentinels
+ self.deleted_tmp
}
}
/// Delete every legacy basename-keyed HLS artifact at the root of
/// `video_dir`. Hash dirs (children that are directories) are skipped.
/// Returns counts so the caller can log a single line summary.
pub fn retire_legacy_hls_output(video_dir: &Path) -> RetireStats {
let mut stats = RetireStats::default();
let read = match std::fs::read_dir(video_dir) {
Ok(r) => r,
Err(e) => {
warn!(
"Legacy HLS migration: cannot read {} ({}); skipping",
video_dir.display(),
e
);
return stats;
}
};
for entry in read.flatten() {
let file_type = match entry.file_type() {
Ok(t) => t,
Err(_) => continue,
};
if !file_type.is_file() {
// Hash shard directories live here in the new layout.
continue;
}
let path = entry.path();
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
continue;
};
let bucket = classify(name);
let Some(bucket) = bucket else {
continue;
};
match std::fs::remove_file(&path) {
Ok(()) => match bucket {
LegacyKind::Playlist => stats.deleted_playlists += 1,
LegacyKind::Segment => stats.deleted_segments += 1,
LegacyKind::Sentinel => stats.deleted_sentinels += 1,
LegacyKind::Tmp => stats.deleted_tmp += 1,
},
Err(e) => {
warn!(
"Legacy HLS migration: failed to remove {}: {}",
path.display(),
e
);
stats.errors += 1;
}
}
}
if stats.total_deleted() > 0 || stats.errors > 0 {
info!(
"Legacy HLS migration: deleted {} playlist(s), {} segment(s), {} sentinel(s), {} tmp; {} error(s)",
stats.deleted_playlists,
stats.deleted_segments,
stats.deleted_sentinels,
stats.deleted_tmp,
stats.errors,
);
} else {
info!(
"Legacy HLS migration: nothing to do under {}",
video_dir.display()
);
}
stats
}
#[derive(Debug, Clone, Copy)]
enum LegacyKind {
Playlist,
Segment,
Sentinel,
Tmp,
}
/// Decide whether a flat file at `$VIDEO_PATH` root is legacy HLS
/// output. Returns `None` for anything else — operator-stashed files,
/// new-layout files (which don't live here), etc. — so we don't rm them.
fn classify(name: &str) -> Option<LegacyKind> {
// Order matters: sentinel and tmp are more specific suffixes that
// sit on top of the .m3u8 / .ts extensions, so check them first.
if name.ends_with(".m3u8.unsupported") {
return Some(LegacyKind::Sentinel);
}
if name.ends_with(".m3u8.tmp") {
return Some(LegacyKind::Tmp);
}
if name.ends_with(".m3u8") {
return Some(LegacyKind::Playlist);
}
if name.ends_with(".ts") {
return Some(LegacyKind::Segment);
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::tempdir;
#[test]
fn classify_recognises_each_legacy_artifact() {
assert!(matches!(
classify("IMG_0341.MOV.m3u8"),
Some(LegacyKind::Playlist)
));
assert!(matches!(
classify("IMG_0341.MOV_000.ts"),
Some(LegacyKind::Segment)
));
assert!(matches!(
classify("IMG_0341.MOV.m3u8.unsupported"),
Some(LegacyKind::Sentinel)
));
assert!(matches!(
classify("IMG_0341.MOV.m3u8.tmp"),
Some(LegacyKind::Tmp)
));
assert!(classify("README.md").is_none());
assert!(classify("ab").is_none()); // shard dir name
assert!(classify(".keep").is_none());
}
#[test]
fn retire_deletes_legacy_and_leaves_hash_dirs() {
let tmp = tempdir().unwrap();
let root = tmp.path();
// Legacy artifacts at root.
fs::write(root.join("IMG_0341.MOV.m3u8"), b"#EXTM3U").unwrap();
fs::write(root.join("IMG_0341.MOV_000.ts"), b"\x00").unwrap();
fs::write(root.join("IMG_0341.MOV_001.ts"), b"\x00").unwrap();
fs::write(root.join("clip.MP4.m3u8.unsupported"), b"").unwrap();
fs::write(root.join("partial.m3u8.tmp"), b"").unwrap();
// New-layout hash dir we must NOT touch.
let hash_dir = root.join("ab").join("a".repeat(64));
fs::create_dir_all(&hash_dir).unwrap();
fs::write(hash_dir.join("playlist.m3u8"), b"#EXTM3U").unwrap();
fs::write(hash_dir.join("segment_000.ts"), b"\x00").unwrap();
// Unrelated file we must NOT touch.
fs::write(root.join("README.md"), b"don't touch me").unwrap();
let stats = retire_legacy_hls_output(root);
assert_eq!(stats.deleted_playlists, 1);
assert_eq!(stats.deleted_segments, 2);
assert_eq!(stats.deleted_sentinels, 1);
assert_eq!(stats.deleted_tmp, 1);
assert_eq!(stats.errors, 0);
// Legacy artifacts gone.
assert!(!root.join("IMG_0341.MOV.m3u8").exists());
assert!(!root.join("IMG_0341.MOV_000.ts").exists());
assert!(!root.join("clip.MP4.m3u8.unsupported").exists());
assert!(!root.join("partial.m3u8.tmp").exists());
// Hash dir untouched.
assert!(hash_dir.join("playlist.m3u8").exists());
assert!(hash_dir.join("segment_000.ts").exists());
// Unrelated file untouched.
assert!(root.join("README.md").exists());
}
#[test]
fn retire_is_idempotent() {
let tmp = tempdir().unwrap();
let root = tmp.path();
fs::write(root.join("video.mp4.m3u8"), b"#EXTM3U").unwrap();
fs::write(root.join("video.mp4_000.ts"), b"\x00").unwrap();
let first = retire_legacy_hls_output(root);
assert_eq!(first.deleted_playlists + first.deleted_segments, 2);
let second = retire_legacy_hls_output(root);
assert_eq!(second.total_deleted(), 0);
assert_eq!(second.errors, 0);
}
#[test]
fn retire_handles_missing_dir() {
// No panic, no error count blowing up — just a warn + zero stats.
let tmp = tempdir().unwrap();
let missing = tmp.path().join("does_not_exist");
let stats = retire_legacy_hls_output(&missing);
assert_eq!(stats.total_deleted(), 0);
assert_eq!(stats.errors, 0);
}
}

View File

@@ -10,6 +10,7 @@ use walkdir::WalkDir;
pub mod actors;
pub mod ffmpeg;
pub mod hls_paths;
pub mod legacy_migration;
#[allow(dead_code)]
pub async fn generate_video_gifs() {