diff --git a/src/handlers/video.rs b/src/handlers/video.rs index d00346c..21338b0 100644 --- a/src/handlers/video.rs +++ b/src/handlers/video.rs @@ -13,92 +13,298 @@ use actix_web::{ use log::{debug, error, info, warn}; use opentelemetry::trace::{Span, Status, Tracer}; use opentelemetry::{KeyValue, global}; +use serde::Serialize; +use crate::content_hash; use crate::data::{ Claims, PreviewClipRequest, PreviewStatusItem, PreviewStatusRequest, PreviewStatusResponse, ThumbnailRequest, }; -use crate::database::PreviewDao; +use crate::database::{ExifDao, PreviewDao}; use crate::files::is_valid_full_path; use crate::libraries; use crate::otel::{extract_context_from_request, global_tracer}; use crate::state::AppState; -use crate::video::actors::{GeneratePreviewClipMessage, ProcessMessage, create_playlist}; +use crate::video::actors::{GeneratePreviewClipMessage, QueueVideosMessage, VideoToQueue}; +use crate::video::hls_paths; + +/// Response body for `POST /video/generate`. New clients should consume +/// `playlist_url` (hash-keyed, stable across libraries and renames) and +/// poll for readiness via the URL itself. Legacy clients reading the +/// raw `playlist` string will be served the legacy basename-keyed path +/// for as long as the field exists — that field will be dropped once +/// every shipped client has migrated. +#[derive(Serialize, Debug)] +struct GenerateVideoResponse { + /// Hash-keyed URL to the HLS playlist. Resolves to + /// `$VIDEO_PATH///playlist.m3u8` server-side. Relative + /// segment refs inside the playlist resolve correctly because the + /// browser appends to this URL's path. + playlist_url: String, + /// blake3 content hash of the source video. Stable per byte content, + /// so duplicate uploads / archive ingests share one set of HLS + /// output. + content_hash: String, + /// `true` iff the playlist file is already on disk. `false` means a + /// transcode was queued; clients should retry the URL after a short + /// delay (or rely on HLS.js's own retry policy). + ready: bool, + /// Legacy basename-keyed playlist *path string*. Returned for older + /// clients that read the response body as a single string under the + /// pre-2026-05 wire format. New clients should ignore this field. + #[serde(rename = "playlist")] + legacy_path: String, +} #[post("/video/generate")] pub async fn generate_video( _claims: Claims, request: HttpRequest, app_state: Data, + exif_dao: Data>>, body: web::Json, ) -> impl Responder { let tracer = global_tracer(); - let context = extract_context_from_request(&request); let mut span = tracer.start_with_context("generate_video", &context); - let filename = PathBuf::from(&body.path); + let filename_pb = PathBuf::from(&body.path); + let Some(filename) = filename_pb + .file_name() + .and_then(|n| n.to_str()) + .map(str::to_string) + else { + let message = format!("Unable to get file name: {:?}", &body.path); + error!("{}", message); + span.set_status(Status::error(message)); + return HttpResponse::BadRequest().finish(); + }; - if let Some(name) = filename.file_name() { - let filename = name.to_str().expect("Filename should convert to string"); - // KNOWN ISSUE (multi-library): playlist filename is the basename - // alone, so two source files with the same basename — whether in - // different libraries or different subdirs of one library — - // overwrite each other's playlists while ffmpeg runs. The - // hash-keyed `content_hash::hls_dir` is the long-term answer - // (see CLAUDE.md "Multi-library data model"); rewiring the - // actor pipeline to use it is out of scope for this branch. - // The orphan-cleanup job above already walks every library so - // it doesn't false-delete archive playlists. - let playlist = format!("{}/{}.m3u8", app_state.video_path, filename); - - let library = libraries::resolve_library_param(&app_state, body.library.as_deref()) + let preferred_library = + libraries::resolve_library_param(&app_state, body.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); - // Try the resolved library first, then fall back to any other library - // that actually contains the file — handles union-mode requests where - // the mobile client passes no library but the file lives in a - // non-primary library. - let resolved = is_valid_full_path(&library.root_path, &body.path, false) - .filter(|p| p.exists()) - .or_else(|| { - app_state.libraries.iter().find_map(|lib| { - if lib.id == library.id { - return None; - } - is_valid_full_path(&lib.root_path, &body.path, false).filter(|p| p.exists()) - }) - }); + // Try the resolved library first, then fall back to any other library + // that actually contains the file — handles union-mode requests where + // the mobile client passes no library but the file lives in a + // non-primary library. Track which library won so the DB lookup is + // scoped correctly. + let resolved = is_valid_full_path(&preferred_library.root_path, &body.path, false) + .filter(|p| p.exists()) + .map(|p| (preferred_library.id, preferred_library.root_path.clone(), p)) + .or_else(|| { + app_state.libraries.iter().find_map(|lib| { + if lib.id == preferred_library.id { + return None; + } + is_valid_full_path(&lib.root_path, &body.path, false) + .filter(|p| p.exists()) + .map(|p| (lib.id, lib.root_path.clone(), p)) + }) + }); - if let Some(path) = resolved { - if let Ok(child) = create_playlist(path.to_str().unwrap(), &playlist).await { - span.add_event( - "playlist_created".to_string(), - vec![KeyValue::new("playlist-name", filename.to_string())], + let Some((resolved_library_id, resolved_root, full_path)) = resolved else { + span.set_status(Status::error(format!("invalid path {:?}", &body.path))); + return HttpResponse::BadRequest().finish(); + }; + + // Build the rel_path used to look up the row. + let full_path_str = full_path.to_string_lossy().to_string(); + let rel_path = full_path_str + .strip_prefix(&resolved_root) + .unwrap_or(full_path_str.as_str()) + .trim_start_matches(['/', '\\']) + .to_string(); + + // DB lookup first. Cheap and avoids re-reading the file off disk for + // already-ingested videos. + let hash_from_db: Option = { + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + match dao.get_exif_batch(&context, Some(resolved_library_id), &[rel_path.clone()]) { + Ok(rows) => rows.into_iter().next().and_then(|r| r.content_hash), + Err(e) => { + warn!( + "exif_dao.get_exif_batch failed for {} (lib {}): {:?}", + rel_path, resolved_library_id, e ); - - span.set_status(Status::Ok); - app_state.stream_manager.do_send(ProcessMessage( - playlist.clone(), - child, - // opentelemetry::Context::new().with_span(span), - )); + None } - } else { - span.set_status(Status::error(format!("invalid path {:?}", &body.path))); - return HttpResponse::BadRequest().finish(); } + }; - HttpResponse::Ok().json(playlist) + // Best-effort fallback: compute on-the-fly when the DB row hasn't + // been written or is mid-backfill. Read-only — no library mutation. + let content_hash_str = match hash_from_db { + Some(h) => h, + None => match content_hash::compute(&full_path) { + Ok(id) => id.content_hash, + Err(e) => { + error!( + "Failed to compute content_hash for {}: {}", + full_path.display(), + e + ); + span.set_status(Status::error(format!("hash compute failed: {}", e))); + return HttpResponse::InternalServerError().finish(); + } + }, + }; + + let video_dir = std::path::Path::new(&app_state.video_path); + let playlist_path = hls_paths::playlist_for_hash(video_dir, &content_hash_str); + let sentinel_path = hls_paths::sentinel_for_hash(video_dir, &content_hash_str); + let ready = playlist_path.exists(); + + if !ready && !sentinel_path.exists() { + // Kick off generation via the existing actor pipeline. Fire-and- + // forget — the playlist appears at `playlist_path` once ffmpeg + // + rename complete. The client polls the URL. + info!( + "/video/generate: queueing playlist for {} (hash={})", + full_path.display(), + &content_hash_str[..content_hash_str.len().min(16)] + ); + app_state.playlist_manager.do_send(QueueVideosMessage { + videos: vec![VideoToQueue { + video_path: full_path.clone(), + content_hash: content_hash_str.clone(), + }], + }); + span.add_event( + "playlist_queued", + vec![KeyValue::new("content_hash", content_hash_str.clone())], + ); + } else if ready { + span.add_event( + "playlist_already_present", + vec![KeyValue::new("content_hash", content_hash_str.clone())], + ); } else { - let message = format!("Unable to get file name: {:?}", filename); - error!("{}", message); - span.set_status(Status::error(message)); - - HttpResponse::BadRequest().finish() + // Sentinel present — past transcode attempt failed. Return the + // URL anyway (it'll 404 / 5xx at fetch time) so the client gets + // a deterministic answer. Operator must delete the sentinel to + // force a retry. + warn!( + "/video/generate: unsupported sentinel present for {} (hash={}); not re-queueing", + full_path.display(), + &content_hash_str[..content_hash_str.len().min(16)] + ); } + + let playlist_url = format!( + "/video/hls/{}/{}", + content_hash_str, + hls_paths::PLAYLIST_FILENAME + ); + let legacy_path = format!("{}/{}.m3u8", app_state.video_path, filename); + + span.set_status(Status::Ok); + HttpResponse::Ok().json(GenerateVideoResponse { + playlist_url, + content_hash: content_hash_str, + ready, + legacy_path, + }) +} + +/// Serve HLS playlist or segment files under the hash-keyed layout +/// `$VIDEO_PATH///`. The matched `{file}` must be +/// either `playlist.m3u8` or a `segment_NNN.ts` style segment; any other +/// shape is 400'd to defend against operators stashing other content in +/// the hash dir. +#[get("/video/hls/{hash}/{file}")] +pub async fn stream_hls_file( + request: HttpRequest, + _: Claims, + path: web::Path<(String, String)>, + app_state: Data, +) -> impl Responder { + let tracer = global_tracer(); + let context = extract_context_from_request(&request); + let mut span = tracer.start_with_context("stream_hls_file", &context); + + let (hash, file) = path.into_inner(); + if !is_valid_hash(&hash) { + span.set_status(Status::error("invalid hash")); + return HttpResponse::BadRequest().body("invalid hash"); + } + if !is_allowed_hls_filename(&file) { + span.set_status(Status::error("invalid file")); + return HttpResponse::BadRequest().body("invalid file"); + } + + let shard = &hash[..2]; + let file_path = PathBuf::from(&app_state.video_path) + .join(shard) + .join(&hash) + .join(&file); + + // Path-traversal guard: canonicalize both sides and require the file + // to live under `app_state.video_path`. `is_valid_hash` / + // `is_allowed_hls_filename` already block dangerous strings, but + // belt-and-suspenders here is cheap. + let canonical_base = match std::fs::canonicalize(&app_state.video_path) { + Ok(p) => p, + Err(e) => { + error!("Failed to canonicalize VIDEO_PATH: {:?}", e); + span.set_status(Status::error("VIDEO_PATH not canonicalisable")); + return HttpResponse::InternalServerError().finish(); + } + }; + let canonical_file = match std::fs::canonicalize(&file_path) { + Ok(p) => p, + Err(_) => { + debug!("HLS file not found: {}", file_path.display()); + span.set_status(Status::error("not found")); + return HttpResponse::NotFound().finish(); + } + }; + if !canonical_file.starts_with(&canonical_base) { + warn!( + "Path traversal attempt: {} resolved outside VIDEO_PATH", + file_path.display() + ); + span.set_status(Status::error("traversal")); + return HttpResponse::Forbidden().finish(); + } + + match NamedFile::open(&canonical_file) { + Ok(f) => { + span.set_status(Status::Ok); + f.into_response(&request) + } + Err(_) => { + span.set_status(Status::error("not found")); + HttpResponse::NotFound().finish() + } + } +} + +/// 64 lowercase-or-upper hex chars. Strict so we don't accept arbitrary +/// strings that might canonicalize into trouble. +fn is_valid_hash(s: &str) -> bool { + s.len() == 64 && s.bytes().all(|b| b.is_ascii_hexdigit()) +} + +/// Allowed file names inside a hash dir. `playlist.m3u8` plus segment +/// files matching the `segment_NNN.ts` template that `PlaylistGenerator` +/// writes via `hls_paths::SEGMENT_TEMPLATE`. Anything else (including +/// `.tmp`, `.unsupported`, dotfiles) returns 400 — these are internal +/// artifacts the client should never request. +fn is_allowed_hls_filename(name: &str) -> bool { + if name == hls_paths::PLAYLIST_FILENAME { + return true; + } + if let Some(rest) = name.strip_prefix("segment_") + && let Some(num) = rest.strip_suffix(".ts") + && !num.is_empty() + && num.bytes().all(|b| b.is_ascii_digit()) + { + return true; + } + false } #[get("/video/stream")] @@ -427,6 +633,41 @@ mod tests { use crate::testhelpers::TestPreviewDao; use actix_web::App; + #[test] + fn is_valid_hash_requires_64_ascii_hex() { + assert!(is_valid_hash(&"a".repeat(64))); + assert!(is_valid_hash(&"F".repeat(64))); + assert!(is_valid_hash(&format!("ab{}", "0".repeat(62)))); + + assert!(!is_valid_hash(&"a".repeat(63))); + assert!(!is_valid_hash(&"a".repeat(65))); + // Anything outside the hex alphabet — including '/', '.', '..' — + // is rejected up front so the path-traversal canonicalisation + // never has to defend the boundary alone. + assert!(!is_valid_hash(&format!("/{}", "a".repeat(63)))); + assert!(!is_valid_hash(&format!("..{}", "a".repeat(62)))); + assert!(!is_valid_hash(&"g".repeat(64))); + } + + #[test] + fn is_allowed_hls_filename_accepts_only_playlist_and_segments() { + assert!(is_allowed_hls_filename("playlist.m3u8")); + assert!(is_allowed_hls_filename("segment_000.ts")); + assert!(is_allowed_hls_filename("segment_999.ts")); + assert!(is_allowed_hls_filename("segment_0.ts")); + + // Internal artifacts the client should never request. + assert!(!is_allowed_hls_filename("playlist.m3u8.tmp")); + assert!(!is_allowed_hls_filename("playlist.unsupported")); + // Traversal / path components — defence in depth alongside + // the actix path matcher itself. + assert!(!is_allowed_hls_filename("..")); + assert!(!is_allowed_hls_filename("../etc/passwd")); + assert!(!is_allowed_hls_filename("segment_abc.ts")); + assert!(!is_allowed_hls_filename("segment_.ts")); + assert!(!is_allowed_hls_filename("")); + } + fn make_token() -> String { let claims = Claims::valid_user("1".to_string()); jsonwebtoken::encode( diff --git a/src/main.rs b/src/main.rs index 023b57b..cf493b7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -266,6 +266,7 @@ fn main() -> std::io::Result<()> { .service(handlers::image::upload_image) .service(handlers::video::generate_video) .service(handlers::video::stream_video) + .service(handlers::video::stream_hls_file) .service(handlers::video::get_video_preview) .service(handlers::video::get_preview_status) .service(handlers::video::get_video_part) diff --git a/src/video/actors.rs b/src/video/actors.rs index d0ae04f..9f2df1b 100644 --- a/src/video/actors.rs +++ b/src/video/actors.rs @@ -5,12 +5,12 @@ use crate::otel::global_tracer; use crate::video::ffmpeg::{generate_preview_clip, get_duration_seconds_blocking}; use crate::video::hls_paths; use actix::prelude::*; -use log::{debug, error, info, trace, warn}; +use log::{debug, error, info, warn}; use opentelemetry::KeyValue; use opentelemetry::trace::{Span, Status, Tracer}; use std::io::Result; use std::path::{Path, PathBuf}; -use std::process::{Child, Command, ExitStatus, Stdio}; +use std::process::{Command, Stdio}; use std::sync::{Arc, Mutex}; use tokio::sync::Semaphore; // ffmpeg -i test.mp4 -c:v h264 -flags +cgop -g 30 -hls_time 3 out.m3u8 @@ -22,31 +22,6 @@ impl Actor for StreamActor { type Context = Context; } -pub struct ProcessMessage(pub String, pub Child); - -impl Message for ProcessMessage { - type Result = Result; -} - -impl Handler for StreamActor { - type Result = Result; - - fn handle(&mut self, msg: ProcessMessage, _ctx: &mut Self::Context) -> Self::Result { - trace!("Message received"); - let mut process = msg.1; - let result = process.wait(); - - debug!( - "Finished waiting for: {:?}. Code: {:?}", - msg.0, - result - .as_ref() - .map_or(-1, |status| status.code().unwrap_or(-1)) - ); - result - } -} - /// A video paired with its content hash, ready to be queued for HLS /// playlist generation. Hash is required because all output paths are /// keyed on it; callers that lack a hash (rows mid-backfill) must skip @@ -57,70 +32,6 @@ pub struct VideoToQueue { pub content_hash: String, } -/// Legacy basename-keyed playlist path. Retained for the one-shot startup -/// migration that retires pre-content-hash output; new playlist writes go -/// through [`hls_paths::playlist_for_hash`]. Will be removed once the -/// migration ships and runs to completion in production. -#[allow(dead_code)] -pub fn playlist_file_for(playlist_dir: &str, video_path: &Path) -> PathBuf { - let filename = video_path - .file_name() - .and_then(|n| n.to_str()) - .unwrap_or("unknown"); - PathBuf::from(format!("{}/{}.m3u8", playlist_dir, filename)) -} - -/// Legacy basename-keyed sentinel path. Same migration-only contract as -/// [`playlist_file_for`]. -#[allow(dead_code)] -pub fn playlist_unsupported_sentinel(playlist_file: &Path) -> PathBuf { - let mut s = playlist_file.as_os_str().to_owned(); - s.push(".unsupported"); - PathBuf::from(s) -} - -pub async fn create_playlist(video_path: &str, playlist_file: &str) -> Result { - if Path::new(playlist_file).exists() { - debug!("Playlist already exists: {}", playlist_file); - return Err(std::io::Error::from(std::io::ErrorKind::AlreadyExists)); - } - - let result = Command::new("ffmpeg") - .arg("-i") - .arg(video_path) - .arg("-c:v") - .arg("h264") - .arg("-crf") - .arg("21") - .arg("-preset") - .arg("veryfast") - .arg("-hls_time") - .arg("3") - .arg("-hls_list_size") - .arg("0") - .arg("-hls_playlist_type") - .arg("vod") - .arg("-vf") - .arg("scale='min(1080,iw)':-2,setsar=1:1") - .arg(playlist_file) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .spawn(); - - let start_time = std::time::Instant::now(); - loop { - actix::clock::sleep(std::time::Duration::from_secs(1)).await; - - if Path::new(playlist_file).exists() - || std::time::Instant::now() - start_time > std::time::Duration::from_secs(5) - { - break; - } - } - - result -} - pub fn generate_video_thumbnail(path: &Path, destination: &Path) -> std::io::Result<()> { // Probe duration up front and seek to ~50% — gives a more // representative frame than a fixed offset (skipping title cards on