hls: retire legacy basename-keyed HLS files on startup
Adds `video::legacy_migration::retire_legacy_hls_output`, called once from `main` right after the diesel migrations run and before the actor pipeline starts. Walks `$VIDEO_PATH` at depth 1, deletes every `.m3u8` / `.m3u8.tmp` / `.m3u8.unsupported` / `.ts` file at root, and logs a single info line with per-class counts. Skips directories (the new layout's `<shard>/<hash>/` lives there) and unknown extensions, so an operator's stashed README or `.tmp` from a different tool is safe. Why this needs its own one-shot pass rather than letting the rewritten `cleanup_orphaned_playlists` handle it: the cleanup walk deliberately only looks at `<shard>/<hash>/` dirs (so it can't accidentally `rm` operator-stashed content), so without this migration the legacy files would sit at root forever, never served, never refreshed. Operator complaint count from the previous IMG_NNNN.MOV collision: ~10 duplicate-basename hits on one library alone; total .m3u8 count was 699 vs a much larger video count — i.e. the loser of every collision was a permanent orphan. This pass collects all of them, then the running watcher writes hash-keyed playlists going forward. Idempotent — a second boot finds nothing and reports zero deletions, so the call site can stay in `main` across releases until the module is removed in a later cleanup commit. Tests cover the happy path (legacy artifacts gone, hash dir untouched, unrelated files left alone), idempotency, and the missing-directory case. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
10
src/main.rs
10
src/main.rs
@@ -72,6 +72,16 @@ fn main() -> std::io::Result<()> {
|
|||||||
|
|
||||||
run_migrations(&mut connect()).expect("Failed to run migrations");
|
run_migrations(&mut connect()).expect("Failed to run migrations");
|
||||||
|
|
||||||
|
// One-shot retirement of the pre-content-hash HLS layout. Idempotent
|
||||||
|
// — a second boot finds nothing and reports zero deletions, so it's
|
||||||
|
// safe to leave wired in until the module is removed in a later
|
||||||
|
// release. Runs before the actor pipeline starts so we never race a
|
||||||
|
// PlaylistGenerator write against this rm.
|
||||||
|
{
|
||||||
|
let video_path = env::var("VIDEO_PATH").expect("VIDEO_PATH was not set in the env");
|
||||||
|
video::legacy_migration::retire_legacy_hls_output(std::path::Path::new(&video_path));
|
||||||
|
}
|
||||||
|
|
||||||
let system = actix::System::new();
|
let system = actix::System::new();
|
||||||
system.block_on(async {
|
system.block_on(async {
|
||||||
// Just use basic logger when running a non-release build
|
// Just use basic logger when running a non-release build
|
||||||
|
|||||||
246
src/video/legacy_migration.rs
Normal file
246
src/video/legacy_migration.rs
Normal file
@@ -0,0 +1,246 @@
|
|||||||
|
//! One-shot retirement of the pre-content-hash HLS output layout.
|
||||||
|
//!
|
||||||
|
//! Before the hash-keyed layout landed, the actor pipeline wrote every
|
||||||
|
//! playlist as `$VIDEO_PATH/<source-basename>.m3u8` with sibling
|
||||||
|
//! `<source-basename>_NNN.ts` segments and a `<source-basename>.m3u8.unsupported`
|
||||||
|
//! sentinel on ffmpeg failure. The new pipeline (see
|
||||||
|
//! [`crate::video::hls_paths`]) puts everything inside a hash-keyed
|
||||||
|
//! subdirectory, so the legacy flat files are orphaned the moment the
|
||||||
|
//! upgraded binary boots — they're not served, not refreshed, and not
|
||||||
|
//! GC'd by the new orphan cleanup (which deliberately ignores anything
|
||||||
|
//! that doesn't sit inside a `<shard>/<hash>/` dir).
|
||||||
|
//!
|
||||||
|
//! This migration runs once on startup. It walks `$VIDEO_PATH` at depth
|
||||||
|
//! 1, deletes every `.m3u8` / `.m3u8.tmp` / `.m3u8.unsupported` / `.ts`
|
||||||
|
//! file, and reports a single info line. It is idempotent — a second
|
||||||
|
//! run finds nothing and reports zero deletions, so it's safe to leave
|
||||||
|
//! wired in across releases until the codebase finally drops the
|
||||||
|
//! module.
|
||||||
|
//!
|
||||||
|
//! Sub-directories under `$VIDEO_PATH` are intentionally left alone:
|
||||||
|
//! every legitimate child of `$VIDEO_PATH` in the new layout is a
|
||||||
|
//! 2-char shard directory holding hash subdirs, and those are managed
|
||||||
|
//! by `cleanup_orphaned_playlists`.
|
||||||
|
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use log::{info, warn};
|
||||||
|
|
||||||
|
/// Counters for what the migration did this run.
|
||||||
|
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub struct RetireStats {
|
||||||
|
pub deleted_playlists: usize,
|
||||||
|
pub deleted_segments: usize,
|
||||||
|
pub deleted_sentinels: usize,
|
||||||
|
pub deleted_tmp: usize,
|
||||||
|
pub errors: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RetireStats {
|
||||||
|
pub fn total_deleted(&self) -> usize {
|
||||||
|
self.deleted_playlists
|
||||||
|
+ self.deleted_segments
|
||||||
|
+ self.deleted_sentinels
|
||||||
|
+ self.deleted_tmp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete every legacy basename-keyed HLS artifact at the root of
|
||||||
|
/// `video_dir`. Hash dirs (children that are directories) are skipped.
|
||||||
|
/// Returns counts so the caller can log a single line summary.
|
||||||
|
pub fn retire_legacy_hls_output(video_dir: &Path) -> RetireStats {
|
||||||
|
let mut stats = RetireStats::default();
|
||||||
|
|
||||||
|
let read = match std::fs::read_dir(video_dir) {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(e) => {
|
||||||
|
warn!(
|
||||||
|
"Legacy HLS migration: cannot read {} ({}); skipping",
|
||||||
|
video_dir.display(),
|
||||||
|
e
|
||||||
|
);
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for entry in read.flatten() {
|
||||||
|
let file_type = match entry.file_type() {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
if !file_type.is_file() {
|
||||||
|
// Hash shard directories live here in the new layout.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let path = entry.path();
|
||||||
|
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
let bucket = classify(name);
|
||||||
|
let Some(bucket) = bucket else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
match std::fs::remove_file(&path) {
|
||||||
|
Ok(()) => match bucket {
|
||||||
|
LegacyKind::Playlist => stats.deleted_playlists += 1,
|
||||||
|
LegacyKind::Segment => stats.deleted_segments += 1,
|
||||||
|
LegacyKind::Sentinel => stats.deleted_sentinels += 1,
|
||||||
|
LegacyKind::Tmp => stats.deleted_tmp += 1,
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
warn!(
|
||||||
|
"Legacy HLS migration: failed to remove {}: {}",
|
||||||
|
path.display(),
|
||||||
|
e
|
||||||
|
);
|
||||||
|
stats.errors += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if stats.total_deleted() > 0 || stats.errors > 0 {
|
||||||
|
info!(
|
||||||
|
"Legacy HLS migration: deleted {} playlist(s), {} segment(s), {} sentinel(s), {} tmp; {} error(s)",
|
||||||
|
stats.deleted_playlists,
|
||||||
|
stats.deleted_segments,
|
||||||
|
stats.deleted_sentinels,
|
||||||
|
stats.deleted_tmp,
|
||||||
|
stats.errors,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
info!(
|
||||||
|
"Legacy HLS migration: nothing to do under {}",
|
||||||
|
video_dir.display()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
stats
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
enum LegacyKind {
|
||||||
|
Playlist,
|
||||||
|
Segment,
|
||||||
|
Sentinel,
|
||||||
|
Tmp,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decide whether a flat file at `$VIDEO_PATH` root is legacy HLS
|
||||||
|
/// output. Returns `None` for anything else — operator-stashed files,
|
||||||
|
/// new-layout files (which don't live here), etc. — so we don't rm them.
|
||||||
|
fn classify(name: &str) -> Option<LegacyKind> {
|
||||||
|
// Order matters: sentinel and tmp are more specific suffixes that
|
||||||
|
// sit on top of the .m3u8 / .ts extensions, so check them first.
|
||||||
|
if name.ends_with(".m3u8.unsupported") {
|
||||||
|
return Some(LegacyKind::Sentinel);
|
||||||
|
}
|
||||||
|
if name.ends_with(".m3u8.tmp") {
|
||||||
|
return Some(LegacyKind::Tmp);
|
||||||
|
}
|
||||||
|
if name.ends_with(".m3u8") {
|
||||||
|
return Some(LegacyKind::Playlist);
|
||||||
|
}
|
||||||
|
if name.ends_with(".ts") {
|
||||||
|
return Some(LegacyKind::Segment);
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::fs;
|
||||||
|
use tempfile::tempdir;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn classify_recognises_each_legacy_artifact() {
|
||||||
|
assert!(matches!(
|
||||||
|
classify("IMG_0341.MOV.m3u8"),
|
||||||
|
Some(LegacyKind::Playlist)
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
classify("IMG_0341.MOV_000.ts"),
|
||||||
|
Some(LegacyKind::Segment)
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
classify("IMG_0341.MOV.m3u8.unsupported"),
|
||||||
|
Some(LegacyKind::Sentinel)
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
classify("IMG_0341.MOV.m3u8.tmp"),
|
||||||
|
Some(LegacyKind::Tmp)
|
||||||
|
));
|
||||||
|
|
||||||
|
assert!(classify("README.md").is_none());
|
||||||
|
assert!(classify("ab").is_none()); // shard dir name
|
||||||
|
assert!(classify(".keep").is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn retire_deletes_legacy_and_leaves_hash_dirs() {
|
||||||
|
let tmp = tempdir().unwrap();
|
||||||
|
let root = tmp.path();
|
||||||
|
|
||||||
|
// Legacy artifacts at root.
|
||||||
|
fs::write(root.join("IMG_0341.MOV.m3u8"), b"#EXTM3U").unwrap();
|
||||||
|
fs::write(root.join("IMG_0341.MOV_000.ts"), b"\x00").unwrap();
|
||||||
|
fs::write(root.join("IMG_0341.MOV_001.ts"), b"\x00").unwrap();
|
||||||
|
fs::write(root.join("clip.MP4.m3u8.unsupported"), b"").unwrap();
|
||||||
|
fs::write(root.join("partial.m3u8.tmp"), b"").unwrap();
|
||||||
|
|
||||||
|
// New-layout hash dir we must NOT touch.
|
||||||
|
let hash_dir = root.join("ab").join("a".repeat(64));
|
||||||
|
fs::create_dir_all(&hash_dir).unwrap();
|
||||||
|
fs::write(hash_dir.join("playlist.m3u8"), b"#EXTM3U").unwrap();
|
||||||
|
fs::write(hash_dir.join("segment_000.ts"), b"\x00").unwrap();
|
||||||
|
|
||||||
|
// Unrelated file we must NOT touch.
|
||||||
|
fs::write(root.join("README.md"), b"don't touch me").unwrap();
|
||||||
|
|
||||||
|
let stats = retire_legacy_hls_output(root);
|
||||||
|
assert_eq!(stats.deleted_playlists, 1);
|
||||||
|
assert_eq!(stats.deleted_segments, 2);
|
||||||
|
assert_eq!(stats.deleted_sentinels, 1);
|
||||||
|
assert_eq!(stats.deleted_tmp, 1);
|
||||||
|
assert_eq!(stats.errors, 0);
|
||||||
|
|
||||||
|
// Legacy artifacts gone.
|
||||||
|
assert!(!root.join("IMG_0341.MOV.m3u8").exists());
|
||||||
|
assert!(!root.join("IMG_0341.MOV_000.ts").exists());
|
||||||
|
assert!(!root.join("clip.MP4.m3u8.unsupported").exists());
|
||||||
|
assert!(!root.join("partial.m3u8.tmp").exists());
|
||||||
|
// Hash dir untouched.
|
||||||
|
assert!(hash_dir.join("playlist.m3u8").exists());
|
||||||
|
assert!(hash_dir.join("segment_000.ts").exists());
|
||||||
|
// Unrelated file untouched.
|
||||||
|
assert!(root.join("README.md").exists());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn retire_is_idempotent() {
|
||||||
|
let tmp = tempdir().unwrap();
|
||||||
|
let root = tmp.path();
|
||||||
|
|
||||||
|
fs::write(root.join("video.mp4.m3u8"), b"#EXTM3U").unwrap();
|
||||||
|
fs::write(root.join("video.mp4_000.ts"), b"\x00").unwrap();
|
||||||
|
|
||||||
|
let first = retire_legacy_hls_output(root);
|
||||||
|
assert_eq!(first.deleted_playlists + first.deleted_segments, 2);
|
||||||
|
|
||||||
|
let second = retire_legacy_hls_output(root);
|
||||||
|
assert_eq!(second.total_deleted(), 0);
|
||||||
|
assert_eq!(second.errors, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn retire_handles_missing_dir() {
|
||||||
|
// No panic, no error count blowing up — just a warn + zero stats.
|
||||||
|
let tmp = tempdir().unwrap();
|
||||||
|
let missing = tmp.path().join("does_not_exist");
|
||||||
|
let stats = retire_legacy_hls_output(&missing);
|
||||||
|
assert_eq!(stats.total_deleted(), 0);
|
||||||
|
assert_eq!(stats.errors, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -10,6 +10,7 @@ use walkdir::WalkDir;
|
|||||||
pub mod actors;
|
pub mod actors;
|
||||||
pub mod ffmpeg;
|
pub mod ffmpeg;
|
||||||
pub mod hls_paths;
|
pub mod hls_paths;
|
||||||
|
pub mod legacy_migration;
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub async fn generate_video_gifs() {
|
pub async fn generate_video_gifs() {
|
||||||
|
|||||||
Reference in New Issue
Block a user