Merge pull request 'feature/hls-content-hash' (#95) from feature/hls-content-hash into master

Reviewed-on: #95
This commit was merged in pull request #95.
This commit is contained in:
2026-05-15 20:09:48 +00:00
19 changed files with 1528 additions and 571 deletions
+11 -7
View File
@@ -1749,8 +1749,8 @@ Return ONLY the summary, nothing else."#,
.iter() .iter()
.enumerate() .enumerate()
.map(|(i, c)| { .map(|(i, c)| {
let trimmed = if c.len() > 1000 { let trimmed = if c.chars().count() > 1000 {
format!("{}", &c[..1000]) format!("{}", c.chars().take(1000).collect::<String>())
} else { } else {
c.clone() c.clone()
}; };
@@ -3406,8 +3406,8 @@ Return ONLY the summary, nothing else."#,
obj.iter() obj.iter()
.map(|(k, v)| { .map(|(k, v)| {
let rendered = match v { let rendered = match v {
serde_json::Value::String(s) if s.len() > 40 => { serde_json::Value::String(s) if s.chars().count() > 40 => {
format!("\"{}...\"", &s[..40]) format!("\"{}...\"", s.chars().take(40).collect::<String>())
} }
_ => v.to_string(), _ => v.to_string(),
}; };
@@ -4088,10 +4088,11 @@ Return ONLY the summary, nothing else."#,
let title = title_raw.trim().trim_matches('"').to_string(); let title = title_raw.trim().trim_matches('"').to_string();
log::info!("Agentic generated title: {}", title); log::info!("Agentic generated title: {}", title);
let summary_preview: String = final_content.chars().take(200).collect();
log::info!( log::info!(
"Agentic generated summary ({} chars): {}", "Agentic generated summary ({} chars): {}",
final_content.len(), final_content.len(),
&final_content[..final_content.len().min(200)] summary_preview
); );
// 14. Serialize the full message history for training data // 14. Serialize the full message history for training data
@@ -4548,7 +4549,10 @@ mod tests {
#[test] #[test]
fn strip_mark_tags_handles_common_patterns() { fn strip_mark_tags_handles_common_patterns() {
assert_eq!(InsightGenerator::strip_mark_tags("plain text"), "plain text"); assert_eq!(
InsightGenerator::strip_mark_tags("plain text"),
"plain text"
);
assert_eq!( assert_eq!(
InsightGenerator::strip_mark_tags("…the <mark>lake</mark>…"), InsightGenerator::strip_mark_tags("…the <mark>lake</mark>…"),
"…the lake…" "…the lake…"
@@ -4668,7 +4672,7 @@ mod tests {
assert!( assert!(
out.starts_with("You are a journal writer in first person, warm and reflective."), out.starts_with("You are a journal writer in first person, warm and reflective."),
"custom prompt must lead the system content; got: {}", "custom prompt must lead the system content; got: {}",
&out[..out.len().min(200)], out.chars().take(200).collect::<String>(),
); );
assert!( assert!(
!out.contains("personal photo memory assistant"), !out.contains("personal photo memory assistant"),
+3 -6
View File
@@ -52,12 +52,9 @@ pub fn thumbnail_path(thumbs_dir: &Path, hash: &str) -> PathBuf {
/// Hash-keyed HLS output directory: `<video_dir>/<hash[..2]>/<hash>/`. /// Hash-keyed HLS output directory: `<video_dir>/<hash[..2]>/<hash>/`.
/// The playlist lives at `playlist.m3u8` inside this directory and its /// The playlist lives at `playlist.m3u8` inside this directory and its
/// segments are co-located so HLS relative references Just Work. /// segments are co-located so HLS relative references Just Work. See
/// /// [`crate::video::hls_paths`] for the filename constants and the
/// Allow-dead until Branch B/C rewires the HLS pipeline to use it; the /// per-file helpers built on this dir.
/// helper lives here today so Branch A's path layout decisions stay
/// adjacent to thumbnail/legacy ones.
#[allow(dead_code)]
pub fn hls_dir(video_dir: &Path, hash: &str) -> PathBuf { pub fn hls_dir(video_dir: &Path, hash: &str) -> PathBuf {
let shard = shard_prefix(hash); let shard = shard_prefix(hash);
video_dir.join(shard).join(hash) video_dir.join(shard).join(hash)
+10 -4
View File
@@ -235,6 +235,7 @@ pub trait KnowledgeDao: Sync + Send {
/// - entity_type: optional, restricts nodes to one type /// - entity_type: optional, restricts nodes to one type
/// - node_limit: caps the number of nodes; lower-fact-count /// - node_limit: caps the number of nodes; lower-fact-count
/// entities drop first /// entities drop first
///
/// Edges between dropped entities are pruned. Persona scoping /// Edges between dropped entities are pruned. Persona scoping
/// affects fact_count + edge inclusion (rejected / superseded /// affects fact_count + edge inclusion (rejected / superseded
/// excluded; All vs Single mirrors the existing pattern). /// excluded; All vs Single mirrors the existing pattern).
@@ -937,7 +938,10 @@ impl KnowledgeDao for SqliteKnowledgeDao {
let mut conn = self.connection.lock().expect("KnowledgeDao lock"); let mut conn = self.connection.lock().expect("KnowledgeDao lock");
let mut q = sql_query(sql).into_boxed(); let mut q = sql_query(sql).into_boxed();
match persona { match persona {
PersonaFilter::Single { user_id, persona_id } => { PersonaFilter::Single {
user_id,
persona_id,
} => {
q = q q = q
.bind::<Integer, _>(*user_id) .bind::<Integer, _>(*user_id)
.bind::<Text, _>(persona_id.clone()); .bind::<Text, _>(persona_id.clone());
@@ -977,7 +981,10 @@ impl KnowledgeDao for SqliteKnowledgeDao {
// rows flip — REVIEWED survives so the curator can preserve // rows flip — REVIEWED survives so the curator can preserve
// a hand-approved exception under the same predicate. // a hand-approved exception under the same predicate.
let touched = match persona { let touched = match persona {
PersonaFilter::Single { user_id: uid, persona_id: pid } => diesel::update( PersonaFilter::Single {
user_id: uid,
persona_id: pid,
} => diesel::update(
entity_facts entity_facts
.filter(predicate.eq(target_predicate)) .filter(predicate.eq(target_predicate))
.filter(user_id.eq(*uid)) .filter(user_id.eq(*uid))
@@ -1282,8 +1289,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
Some(v) => v, Some(v) => v,
None => continue, None => continue,
}; };
for b in (a + 1)..indices.len() { for &ib in &indices[a + 1..] {
let ib = indices[b];
let vb = match &decoded[ib] { let vb = match &decoded[ib] {
Some(v) => v, Some(v) => v,
None => continue, None => continue,
+68 -3
View File
@@ -414,6 +414,27 @@ pub trait ExifDao: Sync + Send {
size_bytes: i64, size_bytes: i64,
) -> Result<(), DbError>; ) -> Result<(), DbError>;
/// Every distinct non-NULL `content_hash` across all libraries. Used
/// by HLS orphan cleanup to identify hash dirs under `$VIDEO_PATH`
/// whose source video no longer exists. Cheap query (single column,
/// indexed) but unbounded in size — the result is a HashSet membership
/// check, so a 100k-photo library produces ~100k strings.
fn list_distinct_content_hashes(
&mut self,
context: &opentelemetry::Context,
) -> Result<Vec<String>, DbError>;
/// Every row in `image_exif` for `library_id`, as
/// `(rel_path, content_hash)`. The hash is Option because rows
/// mid-backfill carry NULL. Used by HLS readiness stats; callers
/// filter by extension client-side because the DB schema doesn't
/// carry media type.
fn list_paths_and_hashes_for_library(
&mut self,
context: &opentelemetry::Context,
library_id: i32,
) -> Result<Vec<(String, Option<String>)>, DbError>;
/// Return image_exif rows that need their `date_taken` resolved by the /// Return image_exif rows that need their `date_taken` resolved by the
/// canonical-date waterfall (see `crate::date_resolver`): `date_taken /// canonical-date waterfall (see `crate::date_resolver`): `date_taken
/// IS NULL`. Returns `(library_id, rel_path)`. The caller filters to /// IS NULL`. Returns `(library_id, rel_path)`. The caller filters to
@@ -481,9 +502,9 @@ pub trait ExifDao: Sync + Send {
/// whose calendar position matches the request's span: /// whose calendar position matches the request's span:
/// - `"day"` — same month + day-of-month (any year) /// - `"day"` — same month + day-of-month (any year)
/// - `"week"` — same week-of-year (SQLite `%W`, Monday-anchored — /// - `"week"` — same week-of-year (SQLite `%W`, Monday-anchored —
/// close to but not exactly ISO week 8601; the /// close to but not exactly ISO week 8601; the boundary cases
/// boundary cases at year-start/end can shift by ±1 /// at year-start/end can shift by ±1 vs the prior request-time
/// vs the prior request-time `iso_week()` filter) /// `iso_week()` filter)
/// - `"month"` — same month (any year) /// - `"month"` — same month (any year)
/// ///
/// `tz_offset_minutes` is applied to both sides of the strftime /// `tz_offset_minutes` is applied to both sides of the strftime
@@ -1231,6 +1252,50 @@ impl ExifDao for SqliteExifDao {
.map_err(|_| DbError::new(DbErrorKind::UpdateError)) .map_err(|_| DbError::new(DbErrorKind::UpdateError))
} }
fn list_distinct_content_hashes(
&mut self,
context: &opentelemetry::Context,
) -> Result<Vec<String>, DbError> {
trace_db_call(context, "query", "list_distinct_content_hashes", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif
.filter(content_hash.is_not_null())
.select(content_hash)
.distinct()
.load::<Option<String>>(connection.deref_mut())
.map(|rows| rows.into_iter().flatten().collect())
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn list_paths_and_hashes_for_library(
&mut self,
context: &opentelemetry::Context,
lib_id: i32,
) -> Result<Vec<(String, Option<String>)>, DbError> {
trace_db_call(
context,
"query",
"list_paths_and_hashes_for_library",
|_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif
.filter(library_id.eq(lib_id))
.select((rel_path, content_hash))
.load::<(String, Option<String>)>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))
},
)
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_rows_needing_date_backfill( fn get_rows_needing_date_backfill(
&mut self, &mut self,
context: &opentelemetry::Context, context: &opentelemetry::Context,
+7 -9
View File
@@ -57,30 +57,28 @@ impl ReconcileStats {
/// watcher tick. Errors are logged but never propagated; reconciliation /// watcher tick. Errors are logged but never propagated; reconciliation
/// is best-effort and a transient DB hiccup must not stall the watcher. /// is best-effort and a transient DB hiccup must not stall the watcher.
pub fn run(conn: &mut SqliteConnection) -> ReconcileStats { pub fn run(conn: &mut SqliteConnection) -> ReconcileStats {
let mut stats = ReconcileStats::default(); let stats = ReconcileStats {
tagged_photo_hashes_filled: match backfill_tagged_photo_hashes(conn) {
stats.tagged_photo_hashes_filled = match backfill_tagged_photo_hashes(conn) {
Ok(n) => n, Ok(n) => n,
Err(e) => { Err(e) => {
warn!("reconcile: tagged_photo hash backfill failed: {:?}", e); warn!("reconcile: tagged_photo hash backfill failed: {:?}", e);
0 0
} }
}; },
photo_insights_hashes_filled: match backfill_photo_insights_hashes(conn) {
stats.photo_insights_hashes_filled = match backfill_photo_insights_hashes(conn) {
Ok(n) => n, Ok(n) => n,
Err(e) => { Err(e) => {
warn!("reconcile: photo_insights hash backfill failed: {:?}", e); warn!("reconcile: photo_insights hash backfill failed: {:?}", e);
0 0
} }
}; },
photo_insights_demoted: match collapse_insight_currents(conn) {
stats.photo_insights_demoted = match collapse_insight_currents(conn) {
Ok(n) => n, Ok(n) => n,
Err(e) => { Err(e) => {
warn!("reconcile: photo_insights scalar merge failed: {:?}", e); warn!("reconcile: photo_insights scalar merge failed: {:?}", e);
0 0
} }
},
}; };
if stats.changed() { if stats.changed() {
+4 -1
View File
@@ -2118,7 +2118,10 @@ async fn update_face_handler<D: FaceDao>(
// the short context string we surface in the response body — // the short context string we surface in the response body —
// SQLITE_BUSY here usually means another DAO's writer held the // SQLITE_BUSY here usually means another DAO's writer held the
// lock past `busy_timeout` (5s), which is invisible in `{}`. // lock past `busy_timeout` (5s), which is invisible in `{}`.
warn!("PATCH /image/faces/{}: 500 — update_face failed: {:#}", id, e); warn!(
"PATCH /image/faces/{}: 500 — update_face failed: {:#}",
id, e
);
return HttpResponse::InternalServerError().body(e.to_string()); return HttpResponse::InternalServerError().body(e.to_string());
} }
}; };
+15
View File
@@ -1689,6 +1689,21 @@ mod tests {
Ok(()) Ok(())
} }
fn list_distinct_content_hashes(
&mut self,
_context: &opentelemetry::Context,
) -> Result<Vec<String>, DbError> {
Ok(Vec::new())
}
fn list_paths_and_hashes_for_library(
&mut self,
_context: &opentelemetry::Context,
_library_id: i32,
) -> Result<Vec<(String, Option<String>)>, DbError> {
Ok(Vec::new())
}
fn get_rows_needing_date_backfill( fn get_rows_needing_date_backfill(
&mut self, &mut self,
_context: &opentelemetry::Context, _context: &opentelemetry::Context,
+6 -5
View File
@@ -183,15 +183,16 @@ pub async fn get_image(
// review JPEG, ~12 MP). Falls through to NamedFile if no preview is // review JPEG, ~12 MP). Falls through to NamedFile if no preview is
// available, which preserves the historical behavior for callers // available, which preserves the historical behavior for callers
// that genuinely want the original bytes. // that genuinely want the original bytes.
if image_size == PhotoSize::Full && exif::is_tiff_raw(&path) { if image_size == PhotoSize::Full
if let Some(preview) = exif::extract_embedded_jpeg_preview(&path) { && exif::is_tiff_raw(&path)
&& let Some(preview) = exif::extract_embedded_jpeg_preview(&path)
{
span.set_status(Status::Ok); span.set_status(Status::Ok);
return HttpResponse::Ok() return HttpResponse::Ok()
.content_type("image/jpeg") .content_type("image/jpeg")
.insert_header(("Cache-Control", "public, max-age=3600")) .insert_header(("Cache-Control", "public, max-age=3600"))
.body(preview); .body(preview);
} }
}
if let Ok(file) = NamedFile::open(&path) { if let Ok(file) = NamedFile::open(&path) {
span.set_status(Status::Ok); span.set_status(Status::Ok);
@@ -706,7 +707,7 @@ pub async fn set_image_date(
Ok(row) => { Ok(row) => {
span.set_status(Status::Ok); span.set_status(Status::Ok);
HttpResponse::Ok().json(build_metadata_response_for_date_mutation( HttpResponse::Ok().json(build_metadata_response_for_date_mutation(
&library, library,
&normalized_path, &normalized_path,
row, row,
)) ))
@@ -757,7 +758,7 @@ pub async fn clear_image_date(
Ok(row) => { Ok(row) => {
span.set_status(Status::Ok); span.set_status(Status::Ok);
HttpResponse::Ok().json(build_metadata_response_for_date_mutation( HttpResponse::Ok().json(build_metadata_response_for_date_mutation(
&library, library,
&normalized_path, &normalized_path,
row, row,
)) ))
+239 -111
View File
@@ -11,48 +11,56 @@ use actix_web::{
web::{self, Data}, web::{self, Data},
}; };
use log::{debug, error, info, warn}; use log::{debug, error, info, warn};
use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, Tracer}; use opentelemetry::trace::{Span, Status, Tracer};
use opentelemetry::{KeyValue, global}; use serde::Serialize;
use crate::content_hash;
use crate::data::{ use crate::data::{
Claims, PreviewClipRequest, PreviewStatusItem, PreviewStatusRequest, PreviewStatusResponse, Claims, PreviewClipRequest, PreviewStatusItem, PreviewStatusRequest, PreviewStatusResponse,
ThumbnailRequest, ThumbnailRequest,
}; };
use crate::database::PreviewDao; use crate::database::{ExifDao, PreviewDao};
use crate::files::is_valid_full_path; use crate::files::is_valid_full_path;
use crate::libraries; use crate::libraries;
use crate::otel::{extract_context_from_request, global_tracer}; use crate::otel::{extract_context_from_request, global_tracer};
use crate::state::AppState; use crate::state::AppState;
use crate::video::actors::{GeneratePreviewClipMessage, ProcessMessage, create_playlist}; use crate::video::actors::{GeneratePreviewClipMessage, QueueVideosMessage, VideoToQueue};
use crate::video::hls_paths;
/// Response body for `POST /video/generate`. Clients consume
/// `playlist_url` (hash-keyed, stable across libraries and renames)
/// and poll for readiness via the URL itself.
#[derive(Serialize, Debug)]
struct GenerateVideoResponse {
/// Hash-keyed URL to the HLS playlist. Resolves to
/// `$VIDEO_PATH/<shard>/<hash>/playlist.m3u8` server-side. Relative
/// segment refs inside the playlist resolve correctly because the
/// browser appends to this URL's path.
playlist_url: String,
/// blake3 content hash of the source video. Stable per byte content,
/// so duplicate uploads / archive ingests share one set of HLS
/// output.
content_hash: String,
/// `true` iff the playlist file is already on disk. `false` means a
/// transcode was queued; clients should retry the URL after a short
/// delay (or rely on HLS.js's own retry policy).
ready: bool,
}
#[post("/video/generate")] #[post("/video/generate")]
pub async fn generate_video( pub async fn generate_video(
_claims: Claims, _claims: Claims,
request: HttpRequest, request: HttpRequest,
app_state: Data<AppState>, app_state: Data<AppState>,
exif_dao: Data<std::sync::Mutex<Box<dyn ExifDao>>>,
body: web::Json<ThumbnailRequest>, body: web::Json<ThumbnailRequest>,
) -> impl Responder { ) -> impl Responder {
let tracer = global_tracer(); let tracer = global_tracer();
let context = extract_context_from_request(&request); let context = extract_context_from_request(&request);
let mut span = tracer.start_with_context("generate_video", &context); let mut span = tracer.start_with_context("generate_video", &context);
let filename = PathBuf::from(&body.path); let preferred_library = libraries::resolve_library_param(&app_state, body.library.as_deref())
if let Some(name) = filename.file_name() {
let filename = name.to_str().expect("Filename should convert to string");
// KNOWN ISSUE (multi-library): playlist filename is the basename
// alone, so two source files with the same basename — whether in
// different libraries or different subdirs of one library —
// overwrite each other's playlists while ffmpeg runs. The
// hash-keyed `content_hash::hls_dir` is the long-term answer
// (see CLAUDE.md "Multi-library data model"); rewiring the
// actor pipeline to use it is out of scope for this branch.
// The orphan-cleanup job above already walks every library so
// it doesn't false-delete archive playlists.
let playlist = format!("{}/{}.m3u8", app_state.video_path, filename);
let library = libraries::resolve_library_param(&app_state, body.library.as_deref())
.ok() .ok()
.flatten() .flatten()
.unwrap_or_else(|| app_state.primary_library()); .unwrap_or_else(|| app_state.primary_library());
@@ -60,141 +68,226 @@ pub async fn generate_video(
// Try the resolved library first, then fall back to any other library // Try the resolved library first, then fall back to any other library
// that actually contains the file — handles union-mode requests where // that actually contains the file — handles union-mode requests where
// the mobile client passes no library but the file lives in a // the mobile client passes no library but the file lives in a
// non-primary library. // non-primary library. Track which library won so the DB lookup is
let resolved = is_valid_full_path(&library.root_path, &body.path, false) // scoped correctly.
let resolved = is_valid_full_path(&preferred_library.root_path, &body.path, false)
.filter(|p| p.exists()) .filter(|p| p.exists())
.map(|p| (preferred_library.id, preferred_library.root_path.clone(), p))
.or_else(|| { .or_else(|| {
app_state.libraries.iter().find_map(|lib| { app_state.libraries.iter().find_map(|lib| {
if lib.id == library.id { if lib.id == preferred_library.id {
return None; return None;
} }
is_valid_full_path(&lib.root_path, &body.path, false).filter(|p| p.exists()) is_valid_full_path(&lib.root_path, &body.path, false)
.filter(|p| p.exists())
.map(|p| (lib.id, lib.root_path.clone(), p))
}) })
}); });
if let Some(path) = resolved { let Some((resolved_library_id, resolved_root, full_path)) = resolved else {
if let Ok(child) = create_playlist(path.to_str().unwrap(), &playlist).await { span.set_status(Status::error(format!("invalid path {:?}", &body.path)));
return HttpResponse::BadRequest().finish();
};
// Build the rel_path used to look up the row.
let full_path_str = full_path.to_string_lossy().to_string();
let rel_path = full_path_str
.strip_prefix(&resolved_root)
.unwrap_or(full_path_str.as_str())
.trim_start_matches(['/', '\\'])
.to_string();
// DB lookup first. Cheap and avoids re-reading the file off disk for
// already-ingested videos.
let hash_from_db: Option<String> = {
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
match dao.get_exif_batch(
&context,
Some(resolved_library_id),
std::slice::from_ref(&rel_path),
) {
Ok(rows) => rows.into_iter().next().and_then(|r| r.content_hash),
Err(e) => {
warn!(
"exif_dao.get_exif_batch failed for {} (lib {}): {:?}",
rel_path, resolved_library_id, e
);
None
}
}
};
// Best-effort fallback: compute on-the-fly when the DB row hasn't
// been written or is mid-backfill. Read-only — no library mutation.
let content_hash_str = match hash_from_db {
Some(h) => h,
None => match content_hash::compute(&full_path) {
Ok(id) => id.content_hash,
Err(e) => {
error!(
"Failed to compute content_hash for {}: {}",
full_path.display(),
e
);
span.set_status(Status::error(format!("hash compute failed: {}", e)));
return HttpResponse::InternalServerError().finish();
}
},
};
let video_dir = std::path::Path::new(&app_state.video_path);
let playlist_path = hls_paths::playlist_for_hash(video_dir, &content_hash_str);
let sentinel_path = hls_paths::sentinel_for_hash(video_dir, &content_hash_str);
let ready = playlist_path.exists();
if !ready && !sentinel_path.exists() {
// Kick off generation via the existing actor pipeline. Fire-and-
// forget — the playlist appears at `playlist_path` once ffmpeg
// + rename complete. The client polls the URL.
info!(
"/video/generate: queueing playlist for {} (hash={})",
full_path.display(),
&content_hash_str[..content_hash_str.len().min(16)]
);
app_state.playlist_manager.do_send(QueueVideosMessage {
videos: vec![VideoToQueue {
video_path: full_path.clone(),
content_hash: content_hash_str.clone(),
}],
});
span.add_event( span.add_event(
"playlist_created".to_string(), "playlist_queued",
vec![KeyValue::new("playlist-name", filename.to_string())], vec![KeyValue::new("content_hash", content_hash_str.clone())],
);
} else if ready {
span.add_event(
"playlist_already_present",
vec![KeyValue::new("content_hash", content_hash_str.clone())],
);
} else {
// Sentinel present — past transcode attempt failed. Return the
// URL anyway (it'll 404 / 5xx at fetch time) so the client gets
// a deterministic answer. Operator must delete the sentinel to
// force a retry.
warn!(
"/video/generate: unsupported sentinel present for {} (hash={}); not re-queueing",
full_path.display(),
&content_hash_str[..content_hash_str.len().min(16)]
);
}
let playlist_url = format!(
"/video/hls/{}/{}",
content_hash_str,
hls_paths::PLAYLIST_FILENAME
); );
span.set_status(Status::Ok); span.set_status(Status::Ok);
app_state.stream_manager.do_send(ProcessMessage( HttpResponse::Ok().json(GenerateVideoResponse {
playlist.clone(), playlist_url,
child, content_hash: content_hash_str,
// opentelemetry::Context::new().with_span(span), ready,
)); })
}
} else {
span.set_status(Status::error(format!("invalid path {:?}", &body.path)));
return HttpResponse::BadRequest().finish();
}
HttpResponse::Ok().json(playlist)
} else {
let message = format!("Unable to get file name: {:?}", filename);
error!("{}", message);
span.set_status(Status::error(message));
HttpResponse::BadRequest().finish()
}
} }
#[get("/video/stream")] /// Serve HLS playlist or segment files under the hash-keyed layout
pub async fn stream_video( /// `$VIDEO_PATH/<shard>/<hash>/<file>`. The matched `{file}` must be
/// either `playlist.m3u8` or a `segment_NNN.ts` style segment; any other
/// shape is 400'd to defend against operators stashing other content in
/// the hash dir.
#[get("/video/hls/{hash}/{file}")]
pub async fn stream_hls_file(
request: HttpRequest, request: HttpRequest,
_: Claims, _: Claims,
path: web::Query<ThumbnailRequest>, path: web::Path<(String, String)>,
app_state: Data<AppState>,
) -> impl Responder {
let tracer = global::tracer("image-server");
let context = extract_context_from_request(&request);
let mut span = tracer.start_with_context("stream_video", &context);
let playlist = &path.path;
debug!("Playlist: {}", playlist);
// Only serve files under video_path (HLS playlists) or base_path (source videos)
if playlist.starts_with(&app_state.video_path)
|| is_valid_full_path(&app_state.base_path, playlist, false).is_some()
{
match NamedFile::open(playlist) {
Ok(file) => {
span.set_status(Status::Ok);
file.into_response(&request)
}
_ => {
span.set_status(Status::error(format!("playlist not found {}", playlist)));
HttpResponse::NotFound().finish()
}
}
} else {
span.set_status(Status::error(format!("playlist not valid {}", playlist)));
HttpResponse::BadRequest().finish()
}
}
#[get("/video/{path}")]
pub async fn get_video_part(
request: HttpRequest,
_: Claims,
path: web::Path<ThumbnailRequest>,
app_state: Data<AppState>, app_state: Data<AppState>,
) -> impl Responder { ) -> impl Responder {
let tracer = global_tracer(); let tracer = global_tracer();
let context = extract_context_from_request(&request); let context = extract_context_from_request(&request);
let mut span = tracer.start_with_context("get_video_part", &context); let mut span = tracer.start_with_context("stream_hls_file", &context);
let part = &path.path; let (hash, file) = path.into_inner();
debug!("Video part: {}", part); if !is_valid_hash(&hash) {
span.set_status(Status::error("invalid hash"));
return HttpResponse::BadRequest().body("invalid hash");
}
if !is_allowed_hls_filename(&file) {
span.set_status(Status::error("invalid file"));
return HttpResponse::BadRequest().body("invalid file");
}
let mut file_part = PathBuf::new(); let shard = &hash[..2];
file_part.push(app_state.video_path.clone()); let file_path = PathBuf::from(&app_state.video_path)
file_part.push(part); .join(shard)
.join(&hash)
.join(&file);
// Guard against directory traversal attacks // Path-traversal guard: canonicalize both sides and require the file
// to live under `app_state.video_path`. `is_valid_hash` /
// `is_allowed_hls_filename` already block dangerous strings, but
// belt-and-suspenders here is cheap.
let canonical_base = match std::fs::canonicalize(&app_state.video_path) { let canonical_base = match std::fs::canonicalize(&app_state.video_path) {
Ok(path) => path, Ok(p) => p,
Err(e) => { Err(e) => {
error!("Failed to canonicalize video path: {:?}", e); error!("Failed to canonicalize VIDEO_PATH: {:?}", e);
span.set_status(Status::error("Invalid video path configuration")); span.set_status(Status::error("VIDEO_PATH not canonicalisable"));
return HttpResponse::InternalServerError().finish(); return HttpResponse::InternalServerError().finish();
} }
}; };
let canonical_file = match std::fs::canonicalize(&file_path) {
let canonical_file = match std::fs::canonicalize(&file_part) { Ok(p) => p,
Ok(path) => path,
Err(_) => { Err(_) => {
warn!("Video part not found or invalid: {:?}", file_part); debug!("HLS file not found: {}", file_path.display());
span.set_status(Status::error(format!("Video part not found '{}'", part))); span.set_status(Status::error("not found"));
return HttpResponse::NotFound().finish(); return HttpResponse::NotFound().finish();
} }
}; };
// Ensure the resolved path is still within the video directory
if !canonical_file.starts_with(&canonical_base) { if !canonical_file.starts_with(&canonical_base) {
warn!("Directory traversal attempt detected: {:?}", part); warn!(
span.set_status(Status::error("Invalid video path")); "Path traversal attempt: {} resolved outside VIDEO_PATH",
file_path.display()
);
span.set_status(Status::error("traversal"));
return HttpResponse::Forbidden().finish(); return HttpResponse::Forbidden().finish();
} }
match NamedFile::open(&canonical_file) { match NamedFile::open(&canonical_file) {
Ok(file) => { Ok(f) => {
span.set_status(Status::Ok); span.set_status(Status::Ok);
file.into_response(&request) f.into_response(&request)
} }
_ => { Err(_) => {
error!("Video part not found: {:?}", file_part); span.set_status(Status::error("not found"));
span.set_status(Status::error(format!(
"Video part not found '{}'",
file_part.to_str().unwrap()
)));
HttpResponse::NotFound().finish() HttpResponse::NotFound().finish()
} }
} }
} }
/// 64 lowercase-or-upper hex chars. Strict so we don't accept arbitrary
/// strings that might canonicalize into trouble.
fn is_valid_hash(s: &str) -> bool {
s.len() == 64 && s.bytes().all(|b| b.is_ascii_hexdigit())
}
/// Allowed file names inside a hash dir. `playlist.m3u8` plus segment
/// files matching the `segment_NNN.ts` template that `PlaylistGenerator`
/// writes via `hls_paths::SEGMENT_TEMPLATE`. Anything else (including
/// `.tmp`, `.unsupported`, dotfiles) returns 400 — these are internal
/// artifacts the client should never request.
fn is_allowed_hls_filename(name: &str) -> bool {
if name == hls_paths::PLAYLIST_FILENAME {
return true;
}
if let Some(rest) = name.strip_prefix("segment_")
&& let Some(num) = rest.strip_suffix(".ts")
&& !num.is_empty()
&& num.bytes().all(|b| b.is_ascii_digit())
{
return true;
}
false
}
#[get("/video/preview")] #[get("/video/preview")]
pub async fn get_video_preview( pub async fn get_video_preview(
_claims: Claims, _claims: Claims,
@@ -427,6 +520,41 @@ mod tests {
use crate::testhelpers::TestPreviewDao; use crate::testhelpers::TestPreviewDao;
use actix_web::App; use actix_web::App;
#[test]
fn is_valid_hash_requires_64_ascii_hex() {
assert!(is_valid_hash(&"a".repeat(64)));
assert!(is_valid_hash(&"F".repeat(64)));
assert!(is_valid_hash(&format!("ab{}", "0".repeat(62))));
assert!(!is_valid_hash(&"a".repeat(63)));
assert!(!is_valid_hash(&"a".repeat(65)));
// Anything outside the hex alphabet — including '/', '.', '..' —
// is rejected up front so the path-traversal canonicalisation
// never has to defend the boundary alone.
assert!(!is_valid_hash(&format!("/{}", "a".repeat(63))));
assert!(!is_valid_hash(&format!("..{}", "a".repeat(62))));
assert!(!is_valid_hash(&"g".repeat(64)));
}
#[test]
fn is_allowed_hls_filename_accepts_only_playlist_and_segments() {
assert!(is_allowed_hls_filename("playlist.m3u8"));
assert!(is_allowed_hls_filename("segment_000.ts"));
assert!(is_allowed_hls_filename("segment_999.ts"));
assert!(is_allowed_hls_filename("segment_0.ts"));
// Internal artifacts the client should never request.
assert!(!is_allowed_hls_filename("playlist.m3u8.tmp"));
assert!(!is_allowed_hls_filename("playlist.unsupported"));
// Traversal / path components — defence in depth alongside
// the actix path matcher itself.
assert!(!is_allowed_hls_filename(".."));
assert!(!is_allowed_hls_filename("../etc/passwd"));
assert!(!is_allowed_hls_filename("segment_abc.ts"));
assert!(!is_allowed_hls_filename("segment_.ts"));
assert!(!is_allowed_hls_filename(""));
}
fn make_token() -> String { fn make_token() -> String {
let claims = Claims::valid_user("1".to_string()); let claims = Claims::valid_user("1".to_string());
jsonwebtoken::encode( jsonwebtoken::encode(
+409
View File
@@ -0,0 +1,409 @@
//! Per-library HLS readiness: Prometheus gauges + `/hls/stats` endpoint.
//!
//! The new hash-keyed pipeline transcodes lazily — most of a freshly
//! mounted library is "pending" for the first hour, and operators want
//! a live read on "how much work is left, am I CPU-bound, do I need to
//! bump `HLS_CONCURRENCY`." This module supplies both surfaces against
//! the same compute path:
//!
//! - **Prometheus gauges** `imageserver_hls_videos_total{library}`,
//! `..._with_playlist{library}`, `..._pending{library}`,
//! `..._unsupported{library}`. Updated every watcher full-scan tick
//! and on every `/hls/stats` request, so the freshness matches
//! whichever surface the operator is watching.
//!
//! - **`GET /hls/stats`** returns a JSON snapshot of the same counts
//! plus a top-level cross-library aggregate. Claims-protected
//! (matches every other authenticated read in this crate).
//!
//! Cost is O(distinct video hashes per library), each row needing a
//! single `stat()` on the playlist file. On a 100k-video library that's
//! noticeable; on a typical home library (few thousand) it's noise.
//! We call from explicit triggers only — never per-request from
//! middleware — so the cost is bounded.
use std::collections::HashSet;
use std::path::Path;
use std::sync::{Arc, Mutex};
use actix_web::{HttpResponse, Responder, get, web};
use lazy_static::lazy_static;
use log::{info, warn};
use prometheus::IntGaugeVec;
use serde::Serialize;
use crate::data::Claims;
use crate::database::ExifDao;
use crate::file_types;
use crate::libraries::Library;
use crate::state::AppState;
use crate::video::hls_paths;
lazy_static! {
pub static ref HLS_VIDEOS_TOTAL: IntGaugeVec = IntGaugeVec::new(
prometheus::Opts::new(
"imageserver_hls_videos_total",
"Distinct video content hashes per library known to image_exif",
),
&["library"],
)
.expect("HLS_VIDEOS_TOTAL");
pub static ref HLS_VIDEOS_WITH_PLAYLIST: IntGaugeVec = IntGaugeVec::new(
prometheus::Opts::new(
"imageserver_hls_videos_with_playlist",
"Videos whose hash-keyed HLS playlist is already on disk",
),
&["library"],
)
.expect("HLS_VIDEOS_WITH_PLAYLIST");
pub static ref HLS_VIDEOS_PENDING: IntGaugeVec = IntGaugeVec::new(
prometheus::Opts::new(
"imageserver_hls_videos_pending",
"Videos whose hash-keyed HLS playlist is not yet on disk",
),
&["library"],
)
.expect("HLS_VIDEOS_PENDING");
pub static ref HLS_VIDEOS_UNSUPPORTED: IntGaugeVec = IntGaugeVec::new(
prometheus::Opts::new(
"imageserver_hls_videos_unsupported",
"Videos with an `.unsupported` sentinel — ffmpeg refused; \
operator must delete to retry",
),
&["library"],
)
.expect("HLS_VIDEOS_UNSUPPORTED");
}
/// Per-library HLS readiness snapshot.
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
pub struct HlsLibraryStats {
pub library_id: i32,
pub library: String,
/// Distinct video content hashes (dedupes intra-library bytes-at-N-paths).
pub total: usize,
/// Of `total`, hashes whose `playlist.m3u8` is on disk.
pub with_playlist: usize,
/// Of `total`, hashes whose ffmpeg attempt left a `.unsupported`
/// sentinel. Counted separately because they won't progress without
/// operator intervention (delete the sentinel to retry).
pub unsupported: usize,
/// `total - (with_playlist + unsupported)` — videos awaiting transcode.
pub pending: usize,
/// Distinct rel_paths under this library that are video files but
/// whose `image_exif.content_hash` is still NULL (mid-backfill).
/// These don't yet count toward `total` because they're invisible
/// to the hash-keyed pipeline; surfaced so the operator can see
/// "hash backfill, then transcode" pipeline depth.
pub hashless_videos: usize,
}
/// JSON response body for `GET /hls/stats`.
#[derive(Serialize, Debug)]
pub struct HlsStatsResponse {
pub libraries: Vec<HlsLibraryStats>,
pub total: usize,
pub with_playlist: usize,
pub pending: usize,
pub unsupported: usize,
pub hashless_videos: usize,
}
/// Compute current readiness per library and publish to Prometheus.
/// Returns the same data so callers can serialise it. The publish step
/// is idempotent on the gauge — old values get overwritten.
pub fn compute_and_publish(
libraries: &[Library],
exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
video_dir: &Path,
) -> Vec<HlsLibraryStats> {
let ctx = opentelemetry::Context::new();
let mut out = Vec::with_capacity(libraries.len());
for lib in libraries {
let stats = compute_for_library(&ctx, lib, exif_dao, video_dir);
publish_gauges(&stats);
out.push(stats);
}
out
}
fn publish_gauges(s: &HlsLibraryStats) {
HLS_VIDEOS_TOTAL
.with_label_values(&[s.library.as_str()])
.set(s.total as i64);
HLS_VIDEOS_WITH_PLAYLIST
.with_label_values(&[s.library.as_str()])
.set(s.with_playlist as i64);
HLS_VIDEOS_PENDING
.with_label_values(&[s.library.as_str()])
.set(s.pending as i64);
HLS_VIDEOS_UNSUPPORTED
.with_label_values(&[s.library.as_str()])
.set(s.unsupported as i64);
}
fn compute_for_library(
ctx: &opentelemetry::Context,
lib: &Library,
exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
video_dir: &Path,
) -> HlsLibraryStats {
let rows = {
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
match dao.list_paths_and_hashes_for_library(ctx, lib.id) {
Ok(r) => r,
Err(e) => {
warn!(
"hls_stats: list_paths_and_hashes_for_library failed for lib {}: {:?}",
lib.id, e
);
Vec::new()
}
}
};
stats_from_rows(lib, &rows, video_dir)
}
/// Pure function — same compute as [`compute_for_library`] but works
/// on caller-supplied rows. Split out so tests don't need a full
/// `ExifDao` mock; the integration path is exercised through
/// `compute_and_publish` against the real SQLite DAO at runtime.
fn stats_from_rows(
lib: &Library,
rows: &[(String, Option<String>)],
video_dir: &Path,
) -> HlsLibraryStats {
let mut hashes: HashSet<String> = HashSet::new();
let mut hashless_videos = 0usize;
for (rel_path, hash_opt) in rows {
if !file_types::is_video_file(Path::new(rel_path)) {
continue;
}
match hash_opt {
Some(h) => {
hashes.insert(h.clone());
}
None => {
hashless_videos += 1;
}
}
}
let mut with_playlist = 0usize;
let mut unsupported = 0usize;
for h in &hashes {
if hls_paths::playlist_for_hash(video_dir, h).exists() {
with_playlist += 1;
} else if hls_paths::sentinel_for_hash(video_dir, h).exists() {
unsupported += 1;
}
}
let total = hashes.len();
let pending = total.saturating_sub(with_playlist + unsupported);
HlsLibraryStats {
library_id: lib.id,
library: lib.name.clone(),
total,
with_playlist,
unsupported,
pending,
hashless_videos,
}
}
/// Log a single info line summarising readiness across all libraries.
/// Called by the watcher at the end of a full-scan tick so operators
/// who tail the log see the headline number without scraping
/// Prometheus.
pub fn log_summary(stats: &[HlsLibraryStats]) {
let total: usize = stats.iter().map(|s| s.total).sum();
let with_playlist: usize = stats.iter().map(|s| s.with_playlist).sum();
let pending: usize = stats.iter().map(|s| s.pending).sum();
let unsupported: usize = stats.iter().map(|s| s.unsupported).sum();
let hashless: usize = stats.iter().map(|s| s.hashless_videos).sum();
let per_lib: Vec<String> = stats
.iter()
.map(|s| {
format!(
"{}={}/{} pending={} unsupported={} hashless={}",
s.library, s.with_playlist, s.total, s.pending, s.unsupported, s.hashless_videos,
)
})
.collect();
info!(
"HLS readiness: {}/{} playlists on disk, {} pending, {} unsupported, {} hashless videos | per-library: [{}]",
with_playlist,
total,
pending,
unsupported,
hashless,
per_lib.join(", "),
);
}
#[get("/hls/stats")]
pub async fn hls_stats_handler(
_claims: Claims,
app_state: web::Data<AppState>,
exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
) -> impl Responder {
let libraries = app_state.libraries.clone();
let video_dir = std::path::PathBuf::from(&app_state.video_path);
let exif_dao = exif_dao.into_inner();
// Synchronous file IO + DB query — run on a blocking pool so the
// actix worker thread stays free for other requests.
let stats =
match web::block(move || compute_and_publish(&libraries, &exif_dao, &video_dir)).await {
Ok(s) => s,
Err(e) => {
warn!("/hls/stats: blocking task failed: {:?}", e);
Vec::new()
}
};
let total: usize = stats.iter().map(|s| s.total).sum();
let with_playlist: usize = stats.iter().map(|s| s.with_playlist).sum();
let pending: usize = stats.iter().map(|s| s.pending).sum();
let unsupported: usize = stats.iter().map(|s| s.unsupported).sum();
let hashless_videos: usize = stats.iter().map(|s| s.hashless_videos).sum();
HttpResponse::Ok().json(HlsStatsResponse {
libraries: stats,
total,
with_playlist,
pending,
unsupported,
hashless_videos,
})
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
fn lib(id: i32, name: &str) -> Library {
Library {
id,
name: name.into(),
root_path: String::new(),
enabled: true,
excluded_dirs: Vec::new(),
}
}
fn rows(vs: Vec<(&str, Option<&str>)>) -> Vec<(String, Option<String>)> {
vs.into_iter()
.map(|(p, h)| (p.to_string(), h.map(|s| s.to_string())))
.collect()
}
fn touch(dir: &Path, rel: &str) {
let p = dir.join(rel);
std::fs::create_dir_all(p.parent().unwrap()).unwrap();
std::fs::write(p, b"").unwrap();
}
#[test]
fn videos_only_count_in_total() {
let tmp = tempdir().unwrap();
let r = rows(vec![
("photos/IMG.jpg", Some(&"a".repeat(64))), // image: ignored
("clip.mp4", Some(&"b".repeat(64))),
("vid.mov", Some(&"c".repeat(64))),
]);
let stats = stats_from_rows(&lib(1, "main"), &r, tmp.path());
assert_eq!(stats.total, 2);
assert_eq!(stats.with_playlist, 0);
assert_eq!(stats.pending, 2);
assert_eq!(stats.unsupported, 0);
assert_eq!(stats.hashless_videos, 0);
}
#[test]
fn hash_dedup_collapses_duplicate_rel_paths() {
let tmp = tempdir().unwrap();
let r = rows(vec![
("a/clip.mp4", Some(&"a".repeat(64))),
("b/clip.mp4", Some(&"a".repeat(64))), // same bytes, dup
("other.mp4", Some(&"b".repeat(64))),
]);
let stats = stats_from_rows(&lib(1, "main"), &r, tmp.path());
assert_eq!(stats.total, 2, "duplicate hashes collapse");
}
#[test]
fn playlist_existence_promotes_to_with_playlist() {
let tmp = tempdir().unwrap();
let hash = "a".repeat(64);
touch(tmp.path(), &format!("aa/{}/playlist.m3u8", hash));
let r = rows(vec![("clip.mp4", Some(&hash))]);
let stats = stats_from_rows(&lib(1, "main"), &r, tmp.path());
assert_eq!(stats.total, 1);
assert_eq!(stats.with_playlist, 1);
assert_eq!(stats.pending, 0);
}
#[test]
fn sentinel_existence_promotes_to_unsupported() {
let tmp = tempdir().unwrap();
let hash = "b".repeat(64);
touch(tmp.path(), &format!("bb/{}/playlist.unsupported", hash));
let r = rows(vec![("clip.mov", Some(&hash))]);
let stats = stats_from_rows(&lib(1, "main"), &r, tmp.path());
assert_eq!(stats.total, 1);
assert_eq!(stats.unsupported, 1);
assert_eq!(stats.with_playlist, 0);
assert_eq!(stats.pending, 0);
}
#[test]
fn null_hash_videos_are_hashless_not_total() {
let tmp = tempdir().unwrap();
let r = rows(vec![
("clip.mp4", None),
("other.mp4", Some(&"a".repeat(64))),
]);
let stats = stats_from_rows(&lib(1, "main"), &r, tmp.path());
assert_eq!(stats.total, 1, "hashless row excluded from total");
assert_eq!(stats.hashless_videos, 1);
}
#[test]
fn publish_gauges_sets_per_library_value() {
let s = HlsLibraryStats {
library_id: 7,
library: "test_publish_a".into(),
total: 5,
with_playlist: 2,
pending: 3,
unsupported: 0,
hashless_videos: 0,
};
publish_gauges(&s);
assert_eq!(
HLS_VIDEOS_TOTAL
.with_label_values(&["test_publish_a"])
.get(),
5
);
assert_eq!(
HLS_VIDEOS_PENDING
.with_label_values(&["test_publish_a"])
.get(),
3
);
assert_eq!(
HLS_VIDEOS_WITH_PLAYLIST
.with_label_values(&["test_publish_a"])
.get(),
2
);
}
}
+3 -8
View File
@@ -444,8 +444,7 @@ where
) )
.service(web::resource("/graph").route(web::get().to(get_graph::<D>))) .service(web::resource("/graph").route(web::get().to(get_graph::<D>)))
.service( .service(
web::resource("/predicate-stats") web::resource("/predicate-stats").route(web::get().to(get_predicate_stats::<D>)),
.route(web::get().to(get_predicate_stats::<D>)),
) )
.service( .service(
web::resource("/predicates/{predicate}/bulk-reject") web::resource("/predicates/{predicate}/bulk-reject")
@@ -1261,12 +1260,8 @@ async fn bulk_reject_predicate<D: KnowledgeDao + 'static>(
let persona = resolve_persona_filter(&req, &claims, &persona_dao); let persona = resolve_persona_filter(&req, &claims, &persona_dao);
let cx = opentelemetry::Context::current(); let cx = opentelemetry::Context::current();
let mut dao = dao.lock().expect("Unable to lock KnowledgeDao"); let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
match dao.bulk_reject_facts_by_predicate( match dao.bulk_reject_facts_by_predicate(&cx, &persona, &predicate, Some(("manual", "manual")))
&cx, {
&persona,
&predicate,
Some(("manual", "manual")),
) {
Ok(rejected) => HttpResponse::Ok().json(BulkRejectResponse { rejected }), Ok(rejected) => HttpResponse::Ok().json(BulkRejectResponse { rejected }),
Err(e) => { Err(e) => {
log::error!("bulk_reject_predicate error: {:?}", e); log::error!("bulk_reject_predicate error: {:?}", e);
+10 -11
View File
@@ -94,7 +94,7 @@ pub fn parse_excluded_dirs_column(raw: Option<&str>) -> Vec<String> {
match raw { match raw {
None => Vec::new(), None => Vec::new(),
Some(s) => s Some(s) => s
.split(|c: char| matches!(c, ',' | '\n' | '\r')) .split([',', '\n', '\r'])
.map(str::trim) .map(str::trim)
.filter(|s| !s.is_empty()) .filter(|s| !s.is_empty())
.map(String::from) .map(String::from)
@@ -148,10 +148,7 @@ pub fn validate_excluded_dirs_entry(entry: &str) -> Result<String, String> {
if let Some(rel) = trimmed.strip_prefix('/') { if let Some(rel) = trimmed.strip_prefix('/') {
// Path form. Reject `..` traversal — `base.join(\"../x\")` doesn't // Path form. Reject `..` traversal — `base.join(\"../x\")` doesn't
// canonicalise, so `path.starts_with(...)` never matches. // canonicalise, so `path.starts_with(...)` never matches.
if rel if rel.split('/').any(|seg| seg == "..") {
.split('/')
.any(|seg| seg == "..")
{
return Err(format!( return Err(format!(
"'{}': '..' segments don't normalise — the prefix-match never fires", "'{}': '..' segments don't normalise — the prefix-match never fires",
trimmed trimmed
@@ -542,7 +539,10 @@ pub async fn patch_library(
{ {
Ok(n) => affected = affected.max(n), Ok(n) => affected = affected.max(n),
Err(e) => { Err(e) => {
warn!("PATCH /libraries/{}: enabled update failed: {:?}", lib_id, e); warn!(
"PATCH /libraries/{}: enabled update failed: {:?}",
lib_id, e
);
return HttpResponse::InternalServerError().body(format!("{}", e)); return HttpResponse::InternalServerError().body(format!("{}", e));
} }
} }
@@ -600,7 +600,9 @@ pub async fn patch_library(
); );
HttpResponse::Ok().json(lib) HttpResponse::Ok().json(lib)
} }
None => HttpResponse::NotFound().body(format!("library id {} not found after update", lib_id)), None => {
HttpResponse::NotFound().body(format!("library id {} not found after update", lib_id))
}
} }
} }
@@ -930,10 +932,7 @@ mod tests {
#[test] #[test]
fn validate_strips_trailing_slash_on_path_entries() { fn validate_strips_trailing_slash_on_path_entries() {
assert_eq!( assert_eq!(validate_excluded_dirs_entry("/photos/").unwrap(), "/photos");
validate_excluded_dirs_entry("/photos/").unwrap(),
"/photos"
);
assert_eq!( assert_eq!(
validate_excluded_dirs_entry("/photos//").unwrap(), validate_excluded_dirs_entry("/photos//").unwrap(),
"/photos" "/photos"
+36 -9
View File
@@ -26,7 +26,6 @@ use crate::files::{RealFileSystem, move_file};
use crate::service::ServiceBuilder; use crate::service::ServiceBuilder;
use crate::state::AppState; use crate::state::AppState;
use crate::tags::*; use crate::tags::*;
use crate::video::actors::ScanDirectoryMessage;
use log::{error, info}; use log::{error, info};
mod ai; mod ai;
@@ -46,6 +45,7 @@ mod file_types;
mod files; mod files;
mod geo; mod geo;
mod handlers; mod handlers;
mod hls_stats;
mod libraries; mod libraries;
mod library_maintenance; mod library_maintenance;
mod perceptual_hash; mod perceptual_hash;
@@ -73,6 +73,16 @@ fn main() -> std::io::Result<()> {
run_migrations(&mut connect()).expect("Failed to run migrations"); run_migrations(&mut connect()).expect("Failed to run migrations");
// One-shot retirement of the pre-content-hash HLS layout. Idempotent
// — a second boot finds nothing and reports zero deletions, so it's
// safe to leave wired in until the module is removed in a later
// release. Runs before the actor pipeline starts so we never race a
// PlaylistGenerator write against this rm.
{
let video_path = env::var("VIDEO_PATH").expect("VIDEO_PATH was not set in the env");
video::legacy_migration::retire_legacy_hls_output(std::path::Path::new(&video_path));
}
let system = actix::System::new(); let system = actix::System::new();
system.block_on(async { system.block_on(async {
// Just use basic logger when running a non-release build // Just use basic logger when running a non-release build
@@ -117,15 +127,32 @@ fn main() -> std::io::Result<()> {
.registry .registry
.register(Box::new(thumbnails::VIDEO_GAUGE.clone())) .register(Box::new(thumbnails::VIDEO_GAUGE.clone()))
.unwrap(); .unwrap();
// HLS readiness gauges. Updated by the watcher every full-scan
// tick and on every `/hls/stats` request. See `hls_stats`.
prometheus
.registry
.register(Box::new(hls_stats::HLS_VIDEOS_TOTAL.clone()))
.unwrap();
prometheus
.registry
.register(Box::new(hls_stats::HLS_VIDEOS_WITH_PLAYLIST.clone()))
.unwrap();
prometheus
.registry
.register(Box::new(hls_stats::HLS_VIDEOS_PENDING.clone()))
.unwrap();
prometheus
.registry
.register(Box::new(hls_stats::HLS_VIDEOS_UNSUPPORTED.clone()))
.unwrap();
let app_state = app_data.clone(); let app_state = app_data.clone();
for lib in &app_state.libraries {
app_state.playlist_manager.do_send(ScanDirectoryMessage {
directory: lib.root_path.clone(),
});
}
// Start file watcher with playlist manager and preview generator // Start file watcher with playlist manager and preview generator.
// The watcher's first tick is configured to be a full scan (see
// `watch_files`), so every library's missing HLS playlists are
// queued on that first iteration — no separate startup walk
// needed.
let playlist_mgr_for_watcher = app_state.playlist_manager.as_ref().clone(); let playlist_mgr_for_watcher = app_state.playlist_manager.as_ref().clone();
let preview_gen_for_watcher = app_state.preview_clip_generator.as_ref().clone(); let preview_gen_for_watcher = app_state.preview_clip_generator.as_ref().clone();
// Both background jobs read from the shared `live_libraries` lock // Both background jobs read from the shared `live_libraries` lock
@@ -257,10 +284,10 @@ fn main() -> std::io::Result<()> {
.service(handlers::image::get_image) .service(handlers::image::get_image)
.service(handlers::image::upload_image) .service(handlers::image::upload_image)
.service(handlers::video::generate_video) .service(handlers::video::generate_video)
.service(handlers::video::stream_video) .service(handlers::video::stream_hls_file)
.service(handlers::video::get_video_preview) .service(handlers::video::get_video_preview)
.service(handlers::video::get_preview_status) .service(handlers::video::get_preview_status)
.service(handlers::video::get_video_part) .service(hls_stats::hls_stats_handler)
.service(handlers::favorites::favorites) .service(handlers::favorites::favorites)
.service(handlers::favorites::put_add_favorite) .service(handlers::favorites::put_add_favorite)
.service(handlers::favorites::delete_favorite) .service(handlers::favorites::delete_favorite)
+1 -1
View File
@@ -111,7 +111,7 @@ impl AppState {
"AppState::new requires at least one library" "AppState::new requires at least one library"
); );
let base_path = libraries_vec[0].root_path.clone(); let base_path = libraries_vec[0].root_path.clone();
let playlist_generator = PlaylistGenerator::new(); let playlist_generator = PlaylistGenerator::new(video_path.clone());
let video_playlist_manager = let video_playlist_manager =
VideoPlaylistManager::new(video_path.clone(), playlist_generator.start()); VideoPlaylistManager::new(video_path.clone(), playlist_generator.start());
+110 -232
View File
@@ -1,18 +1,18 @@
use crate::content_hash;
use crate::database::PreviewDao; use crate::database::PreviewDao;
use crate::libraries::Library; use crate::libraries::Library;
use crate::otel::global_tracer; use crate::otel::global_tracer;
use crate::thumbnails::is_video;
use crate::video::ffmpeg::{generate_preview_clip, get_duration_seconds_blocking}; use crate::video::ffmpeg::{generate_preview_clip, get_duration_seconds_blocking};
use crate::video::hls_paths;
use actix::prelude::*; use actix::prelude::*;
use log::{debug, error, info, trace, warn}; use log::{debug, error, info, warn};
use opentelemetry::KeyValue; use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, Tracer}; use opentelemetry::trace::{Span, Status, Tracer};
use std::io::Result; use std::io::Result;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::process::{Child, Command, ExitStatus, Stdio}; use std::process::{Command, Stdio};
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use tokio::sync::Semaphore; use tokio::sync::Semaphore;
use walkdir::{DirEntry, WalkDir};
// ffmpeg -i test.mp4 -c:v h264 -flags +cgop -g 30 -hls_time 3 out.m3u8 // ffmpeg -i test.mp4 -c:v h264 -flags +cgop -g 30 -hls_time 3 out.m3u8
// ffmpeg -i "filename.mp4" -preset veryfast -c:v libx264 -f hls -hls_list_size 100 -hls_time 2 -crf 24 -vf scale=1080:-2,setsar=1:1 attempt/vid_out.m3u8 // ffmpeg -i "filename.mp4" -preset veryfast -c:v libx264 -f hls -hls_list_size 100 -hls_time 2 -crf 24 -vf scale=1080:-2,setsar=1:1 attempt/vid_out.m3u8
@@ -22,89 +22,14 @@ impl Actor for StreamActor {
type Context = Context<Self>; type Context = Context<Self>;
} }
pub struct ProcessMessage(pub String, pub Child); /// A video paired with its content hash, ready to be queued for HLS
/// playlist generation. Hash is required because all output paths are
impl Message for ProcessMessage { /// keyed on it; callers that lack a hash (rows mid-backfill) must skip
type Result = Result<ExitStatus>; /// the video rather than fabricate one.
} #[derive(Debug, Clone)]
pub struct VideoToQueue {
impl Handler<ProcessMessage> for StreamActor { pub video_path: PathBuf,
type Result = Result<ExitStatus>; pub content_hash: String,
fn handle(&mut self, msg: ProcessMessage, _ctx: &mut Self::Context) -> Self::Result {
trace!("Message received");
let mut process = msg.1;
let result = process.wait();
debug!(
"Finished waiting for: {:?}. Code: {:?}",
msg.0,
result
.as_ref()
.map_or(-1, |status| status.code().unwrap_or(-1))
);
result
}
}
pub fn playlist_file_for(playlist_dir: &str, video_path: &Path) -> PathBuf {
let filename = video_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown");
PathBuf::from(format!("{}/{}.m3u8", playlist_dir, filename))
}
/// Sentinel path written next to a would-be playlist when ffmpeg cannot
/// transcode the source (e.g. truncated mp4 with no moov atom). Its presence
/// causes future scans to skip the file instead of re-running ffmpeg every
/// pass. Delete the `.unsupported` file to force a retry.
pub fn playlist_unsupported_sentinel(playlist_file: &Path) -> PathBuf {
let mut s = playlist_file.as_os_str().to_owned();
s.push(".unsupported");
PathBuf::from(s)
}
pub async fn create_playlist(video_path: &str, playlist_file: &str) -> Result<Child> {
if Path::new(playlist_file).exists() {
debug!("Playlist already exists: {}", playlist_file);
return Err(std::io::Error::from(std::io::ErrorKind::AlreadyExists));
}
let result = Command::new("ffmpeg")
.arg("-i")
.arg(video_path)
.arg("-c:v")
.arg("h264")
.arg("-crf")
.arg("21")
.arg("-preset")
.arg("veryfast")
.arg("-hls_time")
.arg("3")
.arg("-hls_list_size")
.arg("0")
.arg("-hls_playlist_type")
.arg("vod")
.arg("-vf")
.arg("scale='min(1080,iw)':-2,setsar=1:1")
.arg(playlist_file)
.stdout(Stdio::null())
.stderr(Stdio::null())
.spawn();
let start_time = std::time::Instant::now();
loop {
actix::clock::sleep(std::time::Duration::from_secs(1)).await;
if Path::new(playlist_file).exists()
|| std::time::Instant::now() - start_time > std::time::Duration::from_secs(5)
{
break;
}
}
result
} }
pub fn generate_video_thumbnail(path: &Path, destination: &Path) -> std::io::Result<()> { pub fn generate_video_thumbnail(path: &Path, destination: &Path) -> std::io::Result<()> {
@@ -331,17 +256,17 @@ async fn get_max_gop_seconds(video_path: &str) -> Option<f64> {
} }
pub struct VideoPlaylistManager { pub struct VideoPlaylistManager {
playlist_dir: PathBuf, video_dir: PathBuf,
playlist_generator: Addr<PlaylistGenerator>, playlist_generator: Addr<PlaylistGenerator>,
} }
impl VideoPlaylistManager { impl VideoPlaylistManager {
pub fn new<P: Into<PathBuf>>( pub fn new<P: Into<PathBuf>>(
playlist_dir: P, video_dir: P,
playlist_generator: Addr<PlaylistGenerator>, playlist_generator: Addr<PlaylistGenerator>,
) -> Self { ) -> Self {
Self { Self {
playlist_dir: playlist_dir.into(), video_dir: video_dir.into(),
playlist_generator, playlist_generator,
} }
} }
@@ -351,144 +276,68 @@ impl Actor for VideoPlaylistManager {
type Context = Context<Self>; type Context = Context<Self>;
} }
impl Handler<ScanDirectoryMessage> for VideoPlaylistManager {
type Result = ResponseFuture<()>;
fn handle(&mut self, msg: ScanDirectoryMessage, _ctx: &mut Self::Context) -> Self::Result {
let tracer = global_tracer();
let mut span = tracer.start("videoplaylistmanager.scan_directory");
let start = std::time::Instant::now();
info!(
"Starting scan directory for video playlist generation: {}",
msg.directory
);
let playlist_output_dir = self.playlist_dir.clone();
let playlist_dir_str = playlist_output_dir.to_str().unwrap().to_string();
let video_files = WalkDir::new(&msg.directory)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.filter(is_video)
.filter(|e| {
let playlist = playlist_file_for(&playlist_dir_str, e.path());
!playlist.exists() && !playlist_unsupported_sentinel(&playlist).exists()
})
.collect::<Vec<DirEntry>>();
let scan_dir_name = msg.directory.clone();
let playlist_generator = self.playlist_generator.clone();
Box::pin(async move {
for e in video_files {
let path = e.path();
let path_as_str = path.to_str().unwrap();
debug!(
"Sending generate playlist message for path: {}",
path_as_str
);
match playlist_generator
.send(GeneratePlaylistMessage {
playlist_path: playlist_output_dir.to_str().unwrap().to_string(),
video_path: PathBuf::from(path),
})
.await
.expect("Failed to send generate playlist message")
{
Ok(_) => {
span.add_event(
"Playlist generated",
vec![KeyValue::new("video_path", path_as_str.to_string())],
);
debug!(
"Successfully generated playlist for file: '{}'",
path_as_str
);
}
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
debug!("Playlist already exists for '{:?}', skipping", path);
}
Err(e) => {
warn!("Failed to generate playlist for path '{:?}'. {:?}", path, e);
}
}
}
span.add_event(
"Finished directory scan",
vec![KeyValue::new("directory", scan_dir_name.to_string())],
);
info!(
"Finished directory scan of '{}' in {:?}",
scan_dir_name,
start.elapsed()
);
})
}
}
impl Handler<QueueVideosMessage> for VideoPlaylistManager { impl Handler<QueueVideosMessage> for VideoPlaylistManager {
type Result = (); type Result = ();
fn handle(&mut self, msg: QueueVideosMessage, _ctx: &mut Self::Context) -> Self::Result { fn handle(&mut self, msg: QueueVideosMessage, _ctx: &mut Self::Context) -> Self::Result {
if msg.video_paths.is_empty() { if msg.videos.is_empty() {
return; return;
} }
info!( let video_dir = self.video_dir.clone();
"Queueing {} videos for HLS playlist generation",
msg.video_paths.len()
);
let playlist_output_dir = self.playlist_dir.clone();
let playlist_dir_str = playlist_output_dir.to_str().unwrap().to_string();
let playlist_generator = self.playlist_generator.clone(); let playlist_generator = self.playlist_generator.clone();
for video_path in msg.video_paths { let mut queued = 0usize;
let playlist = playlist_file_for(&playlist_dir_str, &video_path); let mut already_present = 0usize;
if playlist.exists() || playlist_unsupported_sentinel(&playlist).exists() { for VideoToQueue {
video_path,
content_hash,
} in msg.videos
{
let playlist = hls_paths::playlist_for_hash(&video_dir, &content_hash);
let sentinel = hls_paths::sentinel_for_hash(&video_dir, &content_hash);
if playlist.exists() || sentinel.exists() {
already_present += 1;
continue; continue;
} }
let path_str = video_path.to_string_lossy().to_string(); debug!(
debug!("Queueing playlist generation for: {}", path_str); "Queueing playlist generation for {} (hash={})",
video_path.display(),
short_hash(&content_hash)
);
playlist_generator.do_send(GeneratePlaylistMessage { playlist_generator.do_send(GeneratePlaylistMessage {
playlist_path: playlist_dir_str.clone(),
video_path, video_path,
content_hash,
}); });
queued += 1;
} }
info!(
"Queue tick: {} queued, {} skipped (playlist or sentinel already on disk)",
queued, already_present
);
} }
} }
#[derive(Message)]
#[rtype(result = "()")]
pub struct ScanDirectoryMessage {
pub(crate) directory: String,
}
#[derive(Message)] #[derive(Message)]
#[rtype(result = "()")] #[rtype(result = "()")]
pub struct QueueVideosMessage { pub struct QueueVideosMessage {
pub video_paths: Vec<PathBuf>, pub videos: Vec<VideoToQueue>,
} }
#[derive(Message)] #[derive(Message)]
#[rtype(result = "Result<()>")] #[rtype(result = "Result<()>")]
pub struct GeneratePlaylistMessage { pub struct GeneratePlaylistMessage {
pub video_path: PathBuf, pub video_path: PathBuf,
pub playlist_path: String, pub content_hash: String,
} }
pub struct PlaylistGenerator { pub struct PlaylistGenerator {
semaphore: Arc<Semaphore>, semaphore: Arc<Semaphore>,
video_dir: PathBuf,
} }
impl PlaylistGenerator { impl PlaylistGenerator {
pub(crate) fn new() -> Self { pub(crate) fn new<P: Into<PathBuf>>(video_dir: P) -> Self {
// Concurrency is tunable via HLS_CONCURRENCY so operators can dial // Concurrency is tunable via HLS_CONCURRENCY so operators can dial
// it to their hardware: 1 on weak Synology boxes to avoid thermal // it to their hardware: 1 on weak Synology boxes to avoid thermal
// throttling, higher on desktops with spare cores. // throttling, higher on desktops with spare cores.
@@ -500,6 +349,7 @@ impl PlaylistGenerator {
info!("PlaylistGenerator: concurrency={}", concurrency); info!("PlaylistGenerator: concurrency={}", concurrency);
PlaylistGenerator { PlaylistGenerator {
semaphore: Arc::new(Semaphore::new(concurrency)), semaphore: Arc::new(Semaphore::new(concurrency)),
video_dir: video_dir.into(),
} }
} }
} }
@@ -513,20 +363,23 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
fn handle(&mut self, msg: GeneratePlaylistMessage, _ctx: &mut Self::Context) -> Self::Result { fn handle(&mut self, msg: GeneratePlaylistMessage, _ctx: &mut Self::Context) -> Self::Result {
let video_file = msg.video_path.to_str().unwrap().to_owned(); let video_file = msg.video_path.to_str().unwrap().to_owned();
let playlist_path = msg.playlist_path.as_str().to_owned(); let content_hash_str = msg.content_hash.clone();
let semaphore = self.semaphore.clone(); let semaphore = self.semaphore.clone();
let video_dir = self.video_dir.clone();
let playlist_file = format!( let hash_dir = content_hash::hls_dir(&video_dir, &content_hash_str);
"{}/{}.m3u8", let playlist_path = hls_paths::playlist_for_hash(&video_dir, &content_hash_str);
playlist_path, let sentinel_path = hls_paths::sentinel_for_hash(&video_dir, &content_hash_str);
msg.video_path.file_name().unwrap().to_str().unwrap() let segment_template = hls_paths::segment_template_for_hash(&video_dir, &content_hash_str);
); let playlist_file = playlist_path.to_string_lossy().to_string();
let segment_pattern = segment_template.to_string_lossy().to_string();
let tracer = global_tracer(); let tracer = global_tracer();
let mut span = tracer let mut span = tracer
.span_builder("playlistgenerator.generate_playlist") .span_builder("playlistgenerator.generate_playlist")
.with_attributes(vec![ .with_attributes(vec![
KeyValue::new("video_file", video_file.clone()), KeyValue::new("video_file", video_file.clone()),
KeyValue::new("content_hash", content_hash_str.clone()),
KeyValue::new("playlist_file", playlist_file.clone()), KeyValue::new("playlist_file", playlist_file.clone()),
]) ])
.start(&tracer); .start(&tracer);
@@ -550,7 +403,7 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
)], )],
); );
if Path::new(&playlist_file).exists() { if playlist_path.exists() {
debug!("Playlist already exists: {}", playlist_file); debug!("Playlist already exists: {}", playlist_file);
span.set_status(Status::error(format!( span.set_status(Status::error(format!(
"Playlist already exists: {}", "Playlist already exists: {}",
@@ -559,6 +412,19 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
return Err(std::io::Error::from(std::io::ErrorKind::AlreadyExists)); return Err(std::io::Error::from(std::io::ErrorKind::AlreadyExists));
} }
// Ensure the shard + hash directory exist. Idempotent — the
// dir may already be present from a prior attempt that wrote
// a sentinel before being cleared for retry.
if let Err(e) = tokio::fs::create_dir_all(&hash_dir).await {
error!(
"Failed to create HLS hash dir {}: {}",
hash_dir.display(),
e
);
span.set_status(Status::error(format!("mkdir failed: {}", e)));
return Err(e);
}
// One ffprobe call for codec + rotation metadata. // One ffprobe call for codec + rotation metadata.
let stream_meta = probe_video_stream_meta(&video_file).await; let stream_meta = probe_video_stream_meta(&video_file).await;
let is_h264 = stream_meta.is_h264; let is_h264 = stream_meta.is_h264;
@@ -619,16 +485,11 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
span.add_event("Transcoding to h264", vec![]); span.add_event("Transcoding to h264", vec![]);
} }
// Encode to a .tmp playlist and explicit segment names so a failed // Encode to a .tmp playlist alongside the final inside the
// encode leaves predictable artifacts we can clean up — and so a // hash dir, so a concurrent scan never sees a half-written
// concurrent scan doesn't see a half-written .m3u8 as "done". // .m3u8 as "done". Segments use the hash-keyed template;
// ffmpeg writes them next to the playlist (relative refs).
let playlist_tmp = format!("{}.tmp", playlist_file); let playlist_tmp = format!("{}.tmp", playlist_file);
let video_stem = msg
.video_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("video");
let segment_pattern = format!("{}/{}_%03d.ts", playlist_path, video_stem);
let mut cmd = tokio::process::Command::new("ffmpeg"); let mut cmd = tokio::process::Command::new("ffmpeg");
cmd.arg("-y").arg("-i").arg(&video_file); cmd.arg("-y").arg("-i").arg(&video_file);
@@ -717,12 +578,12 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
let success = matches!(&ffmpeg_result, Ok(out) if out.status.success()); let success = matches!(&ffmpeg_result, Ok(out) if out.status.success());
if success { if success {
if let Err(e) = tokio::fs::rename(&playlist_tmp, &playlist_file).await { if let Err(e) = tokio::fs::rename(&playlist_tmp, &playlist_path).await {
error!( error!(
"ffmpeg succeeded but rename {} -> {} failed: {}", "ffmpeg succeeded but rename {} -> {} failed: {}",
playlist_tmp, playlist_file, e playlist_tmp, playlist_file, e
); );
cleanup_partial_hls(&playlist_tmp, playlist_path.as_str(), video_stem).await; cleanup_partial_hls(&hash_dir).await;
span.set_status(Status::error(format!("rename failed: {}", e))); span.set_status(Status::error(format!("rename failed: {}", e)));
return Err(e); return Err(e);
} }
@@ -739,18 +600,17 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
Err(e) => format!("ffmpeg failed: {}", e), Err(e) => format!("ffmpeg failed: {}", e),
}; };
error!("ffmpeg failed for {}: {}", video_file, detail); error!("ffmpeg failed for {}: {}", video_file, detail);
cleanup_partial_hls(&playlist_tmp, playlist_path.as_str(), video_stem).await; cleanup_partial_hls(&hash_dir).await;
let sentinel = playlist_unsupported_sentinel(Path::new(&playlist_file)); if let Err(se) = tokio::fs::write(&sentinel_path, b"").await {
if let Err(se) = tokio::fs::write(&sentinel, b"").await {
warn!( warn!(
"Failed to write playlist sentinel {}: {}", "Failed to write playlist sentinel {}: {}",
sentinel.display(), sentinel_path.display(),
se se
); );
} else { } else {
info!( info!(
"Wrote playlist sentinel {} so future scans skip {}", "Wrote playlist sentinel {} so future scans skip {}",
sentinel.display(), sentinel_path.display(),
video_file video_file
); );
} }
@@ -761,29 +621,47 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
} }
} }
/// Delete the temp playlist and any segment files that ffmpeg may have written /// Delete the partial playlist (.tmp) and any segment files left behind by
/// before failing. Called both on ffmpeg error and on rename failure so a /// a failed ffmpeg run. Wipes every non-sentinel file in the hash dir;
/// retry on the next scan starts from a clean slate. /// retains the sentinel if one has already been written by an earlier
async fn cleanup_partial_hls(playlist_tmp: &str, playlist_dir: &str, video_stem: &str) { /// caller in the same path (today there is none, but kept defensively so
let _ = tokio::fs::remove_file(playlist_tmp).await; /// the function is safe to call after sentinel write too).
async fn cleanup_partial_hls(hash_dir: &Path) {
let segment_prefix = format!("{}_", video_stem); let Ok(mut entries) = tokio::fs::read_dir(hash_dir).await else {
let Ok(mut entries) = tokio::fs::read_dir(playlist_dir).await else {
return; return;
}; };
while let Ok(Some(entry)) = entries.next_entry().await { while let Ok(Some(entry)) = entries.next_entry().await {
let Some(name) = entry.file_name().to_str().map(str::to_owned) else { let path = entry.path();
let is_sentinel = path
.file_name()
.and_then(|n| n.to_str())
.map(|n| n == hls_paths::UNSUPPORTED_SENTINEL_FILENAME)
.unwrap_or(false);
if is_sentinel {
continue; continue;
}; }
if name.starts_with(&segment_prefix) if let Err(e) = tokio::fs::remove_file(&path).await {
&& name.ends_with(".ts") warn!(
&& let Err(e) = tokio::fs::remove_file(entry.path()).await "Failed to remove partial HLS file {}: {}",
{ path.display(),
warn!("Failed to remove partial segment {}: {}", name, e); e
);
} }
} }
} }
/// First 16 chars of a content hash for log lines. Short enough to keep
/// log volume sane, long enough that distinct hashes don't collide in
/// practice.
fn short_hash(hash: &str) -> &str {
let end = hash
.char_indices()
.nth(16)
.map(|(i, _)| i)
.unwrap_or(hash.len());
&hash[..end]
}
#[derive(Message)] #[derive(Message)]
#[rtype(result = "()")] #[rtype(result = "()")]
pub struct GeneratePreviewClipMessage { pub struct GeneratePreviewClipMessage {
+84
View File
@@ -0,0 +1,84 @@
//! Path layout for hash-keyed HLS output.
//!
//! Source-of-truth is [`crate::content_hash::hls_dir`], which gives
//! `<video_dir>/<hash[..2]>/<hash>/`. The playlist, the per-segment files,
//! and the "ffmpeg refused" sentinel all live inside that directory so a
//! `.m3u8` written with relative segment references resolves correctly
//! at serve time without any URL rewriting.
use std::path::{Path, PathBuf};
use crate::content_hash;
/// Standard filename for the HLS playlist inside a hash dir. Fixed so
/// the URL contract is `playlist.m3u8` regardless of the source video's
/// original basename.
pub const PLAYLIST_FILENAME: &str = "playlist.m3u8";
/// Sentinel filename written when ffmpeg refused to transcode the
/// source. Presence in the hash dir tells future scans to skip the file
/// instead of re-running ffmpeg every tick. Delete to force a retry.
pub const UNSUPPORTED_SENTINEL_FILENAME: &str = "playlist.unsupported";
/// Segment-name template passed to ffmpeg via `-hls_segment_filename`.
/// Segments live inside the hash dir; the playlist's relative refs
/// resolve to siblings automatically.
pub const SEGMENT_TEMPLATE: &str = "segment_%03d.ts";
/// Path to the HLS playlist for a video identified by content hash.
pub fn playlist_for_hash(video_dir: &Path, hash: &str) -> PathBuf {
content_hash::hls_dir(video_dir, hash).join(PLAYLIST_FILENAME)
}
/// Path to the unsupported-source sentinel for a hash.
pub fn sentinel_for_hash(video_dir: &Path, hash: &str) -> PathBuf {
content_hash::hls_dir(video_dir, hash).join(UNSUPPORTED_SENTINEL_FILENAME)
}
/// Absolute path used as ffmpeg's `-hls_segment_filename` value.
pub fn segment_template_for_hash(video_dir: &Path, hash: &str) -> PathBuf {
content_hash::hls_dir(video_dir, hash).join(SEGMENT_TEMPLATE)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn playlist_path_lives_inside_sharded_hash_dir() {
let video = Path::new("/var/video");
let p = playlist_for_hash(video, "abcdef0123456789");
assert_eq!(
p,
PathBuf::from("/var/video/ab/abcdef0123456789/playlist.m3u8")
);
}
#[test]
fn sentinel_path_lives_alongside_playlist() {
let video = Path::new("/var/video");
let s = sentinel_for_hash(video, "abcdef0123456789");
assert_eq!(
s,
PathBuf::from("/var/video/ab/abcdef0123456789/playlist.unsupported")
);
}
#[test]
fn segment_template_lives_alongside_playlist() {
let video = Path::new("/var/video");
let t = segment_template_for_hash(video, "abcdef0123456789");
assert_eq!(
t,
PathBuf::from("/var/video/ab/abcdef0123456789/segment_%03d.ts")
);
}
#[test]
fn distinct_hashes_yield_distinct_dirs() {
let video = Path::new("/var/video");
let a = playlist_for_hash(video, "1111aaaa");
let b = playlist_for_hash(video, "2222bbbb");
assert_ne!(a.parent(), b.parent());
}
}
+243
View File
@@ -0,0 +1,243 @@
//! One-shot retirement of the pre-content-hash HLS output layout.
//!
//! Before the hash-keyed layout landed, the actor pipeline wrote every
//! playlist as `$VIDEO_PATH/<source-basename>.m3u8` with sibling
//! `<source-basename>_NNN.ts` segments and a `<source-basename>.m3u8.unsupported`
//! sentinel on ffmpeg failure. The new pipeline (see
//! [`crate::video::hls_paths`]) puts everything inside a hash-keyed
//! subdirectory, so the legacy flat files are orphaned the moment the
//! upgraded binary boots — they're not served, not refreshed, and not
//! GC'd by the new orphan cleanup (which deliberately ignores anything
//! that doesn't sit inside a `<shard>/<hash>/` dir).
//!
//! This migration runs once on startup. It walks `$VIDEO_PATH` at depth
//! 1, deletes every `.m3u8` / `.m3u8.tmp` / `.m3u8.unsupported` / `.ts`
//! file, and reports a single info line. It is idempotent — a second
//! run finds nothing and reports zero deletions, so it's safe to leave
//! wired in across releases until the codebase finally drops the
//! module.
//!
//! Sub-directories under `$VIDEO_PATH` are intentionally left alone:
//! every legitimate child of `$VIDEO_PATH` in the new layout is a
//! 2-char shard directory holding hash subdirs, and those are managed
//! by `cleanup_orphaned_playlists`.
use std::path::Path;
use log::{info, warn};
/// Counters for what the migration did this run.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub struct RetireStats {
pub deleted_playlists: usize,
pub deleted_segments: usize,
pub deleted_sentinels: usize,
pub deleted_tmp: usize,
pub errors: usize,
}
impl RetireStats {
pub fn total_deleted(&self) -> usize {
self.deleted_playlists + self.deleted_segments + self.deleted_sentinels + self.deleted_tmp
}
}
/// Delete every legacy basename-keyed HLS artifact at the root of
/// `video_dir`. Hash dirs (children that are directories) are skipped.
/// Returns counts so the caller can log a single line summary.
pub fn retire_legacy_hls_output(video_dir: &Path) -> RetireStats {
let mut stats = RetireStats::default();
let read = match std::fs::read_dir(video_dir) {
Ok(r) => r,
Err(e) => {
warn!(
"Legacy HLS migration: cannot read {} ({}); skipping",
video_dir.display(),
e
);
return stats;
}
};
for entry in read.flatten() {
let file_type = match entry.file_type() {
Ok(t) => t,
Err(_) => continue,
};
if !file_type.is_file() {
// Hash shard directories live here in the new layout.
continue;
}
let path = entry.path();
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
continue;
};
let bucket = classify(name);
let Some(bucket) = bucket else {
continue;
};
match std::fs::remove_file(&path) {
Ok(()) => match bucket {
LegacyKind::Playlist => stats.deleted_playlists += 1,
LegacyKind::Segment => stats.deleted_segments += 1,
LegacyKind::Sentinel => stats.deleted_sentinels += 1,
LegacyKind::Tmp => stats.deleted_tmp += 1,
},
Err(e) => {
warn!(
"Legacy HLS migration: failed to remove {}: {}",
path.display(),
e
);
stats.errors += 1;
}
}
}
if stats.total_deleted() > 0 || stats.errors > 0 {
info!(
"Legacy HLS migration: deleted {} playlist(s), {} segment(s), {} sentinel(s), {} tmp; {} error(s)",
stats.deleted_playlists,
stats.deleted_segments,
stats.deleted_sentinels,
stats.deleted_tmp,
stats.errors,
);
} else {
info!(
"Legacy HLS migration: nothing to do under {}",
video_dir.display()
);
}
stats
}
#[derive(Debug, Clone, Copy)]
enum LegacyKind {
Playlist,
Segment,
Sentinel,
Tmp,
}
/// Decide whether a flat file at `$VIDEO_PATH` root is legacy HLS
/// output. Returns `None` for anything else — operator-stashed files,
/// new-layout files (which don't live here), etc. — so we don't rm them.
fn classify(name: &str) -> Option<LegacyKind> {
// Order matters: sentinel and tmp are more specific suffixes that
// sit on top of the .m3u8 / .ts extensions, so check them first.
if name.ends_with(".m3u8.unsupported") {
return Some(LegacyKind::Sentinel);
}
if name.ends_with(".m3u8.tmp") {
return Some(LegacyKind::Tmp);
}
if name.ends_with(".m3u8") {
return Some(LegacyKind::Playlist);
}
if name.ends_with(".ts") {
return Some(LegacyKind::Segment);
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::tempdir;
#[test]
fn classify_recognises_each_legacy_artifact() {
assert!(matches!(
classify("IMG_0341.MOV.m3u8"),
Some(LegacyKind::Playlist)
));
assert!(matches!(
classify("IMG_0341.MOV_000.ts"),
Some(LegacyKind::Segment)
));
assert!(matches!(
classify("IMG_0341.MOV.m3u8.unsupported"),
Some(LegacyKind::Sentinel)
));
assert!(matches!(
classify("IMG_0341.MOV.m3u8.tmp"),
Some(LegacyKind::Tmp)
));
assert!(classify("README.md").is_none());
assert!(classify("ab").is_none()); // shard dir name
assert!(classify(".keep").is_none());
}
#[test]
fn retire_deletes_legacy_and_leaves_hash_dirs() {
let tmp = tempdir().unwrap();
let root = tmp.path();
// Legacy artifacts at root.
fs::write(root.join("IMG_0341.MOV.m3u8"), b"#EXTM3U").unwrap();
fs::write(root.join("IMG_0341.MOV_000.ts"), b"\x00").unwrap();
fs::write(root.join("IMG_0341.MOV_001.ts"), b"\x00").unwrap();
fs::write(root.join("clip.MP4.m3u8.unsupported"), b"").unwrap();
fs::write(root.join("partial.m3u8.tmp"), b"").unwrap();
// New-layout hash dir we must NOT touch.
let hash_dir = root.join("ab").join("a".repeat(64));
fs::create_dir_all(&hash_dir).unwrap();
fs::write(hash_dir.join("playlist.m3u8"), b"#EXTM3U").unwrap();
fs::write(hash_dir.join("segment_000.ts"), b"\x00").unwrap();
// Unrelated file we must NOT touch.
fs::write(root.join("README.md"), b"don't touch me").unwrap();
let stats = retire_legacy_hls_output(root);
assert_eq!(stats.deleted_playlists, 1);
assert_eq!(stats.deleted_segments, 2);
assert_eq!(stats.deleted_sentinels, 1);
assert_eq!(stats.deleted_tmp, 1);
assert_eq!(stats.errors, 0);
// Legacy artifacts gone.
assert!(!root.join("IMG_0341.MOV.m3u8").exists());
assert!(!root.join("IMG_0341.MOV_000.ts").exists());
assert!(!root.join("clip.MP4.m3u8.unsupported").exists());
assert!(!root.join("partial.m3u8.tmp").exists());
// Hash dir untouched.
assert!(hash_dir.join("playlist.m3u8").exists());
assert!(hash_dir.join("segment_000.ts").exists());
// Unrelated file untouched.
assert!(root.join("README.md").exists());
}
#[test]
fn retire_is_idempotent() {
let tmp = tempdir().unwrap();
let root = tmp.path();
fs::write(root.join("video.mp4.m3u8"), b"#EXTM3U").unwrap();
fs::write(root.join("video.mp4_000.ts"), b"\x00").unwrap();
let first = retire_legacy_hls_output(root);
assert_eq!(first.deleted_playlists + first.deleted_segments, 2);
let second = retire_legacy_hls_output(root);
assert_eq!(second.total_deleted(), 0);
assert_eq!(second.errors, 0);
}
#[test]
fn retire_handles_missing_dir() {
// No panic, no error count blowing up — just a warn + zero stats.
let tmp = tempdir().unwrap();
let missing = tmp.path().join("does_not_exist");
let stats = retire_legacy_hls_output(&missing);
assert_eq!(stats.total_deleted(), 0);
assert_eq!(stats.errors, 0);
}
}
+2
View File
@@ -9,6 +9,8 @@ use walkdir::WalkDir;
pub mod actors; pub mod actors;
pub mod ffmpeg; pub mod ffmpeg;
pub mod hls_paths;
pub mod legacy_migration;
#[allow(dead_code)] #[allow(dead_code)]
pub async fn generate_video_gifs() { pub async fn generate_video_gifs() {
+240 -137
View File
@@ -22,7 +22,6 @@ use std::time::{Duration, SystemTime};
use actix::Addr; use actix::Addr;
use chrono::Utc; use chrono::Utc;
use log::{debug, error, info, warn}; use log::{debug, error, info, warn};
use walkdir::WalkDir;
use crate::backfill; use crate::backfill;
use crate::content_hash; use crate::content_hash;
@@ -33,6 +32,7 @@ use crate::exif;
use crate::face_watch; use crate::face_watch;
use crate::faces; use crate::faces;
use crate::file_types; use crate::file_types;
use crate::hls_stats;
use crate::libraries; use crate::libraries;
use crate::library_maintenance; use crate::library_maintenance;
use crate::perceptual_hash; use crate::perceptual_hash;
@@ -40,20 +40,34 @@ use crate::tags;
use crate::tags::SqliteTagDao; use crate::tags::SqliteTagDao;
use crate::thumbnails; use crate::thumbnails;
use crate::video; use crate::video;
use crate::video::actors::{GeneratePreviewClipMessage, QueueVideosMessage, VideoPlaylistManager}; use crate::video::actors::{
GeneratePreviewClipMessage, QueueVideosMessage, VideoPlaylistManager, VideoToQueue,
};
use crate::video::hls_paths;
/// Clean up orphaned HLS playlists and segments whose source videos no longer exist. /// Clean up orphaned HLS hash directories under `$VIDEO_PATH` whose
/// content_hash no longer appears in `image_exif`.
///
/// Walks `<video_path>/<shard>/<hash>/` — the layout written by the
/// hash-keyed `PlaylistGenerator` — and deletes any hash directory whose
/// hash isn't in the current DISTINCT set of `image_exif.content_hash`
/// values. Empty shard parents are reaped on the same pass.
///
/// Legacy basename-keyed files at `$VIDEO_PATH` root (from the
/// pre-content-hash layout) are left alone here; the one-shot startup
/// migration is responsible for retiring those.
/// ///
/// `libs_lock` is the shared live view of the libraries table — read at the /// `libs_lock` is the shared live view of the libraries table — read at the
/// top of each cleanup pass so a PATCH /libraries/{id} that disables or /// top of each cleanup pass so a PATCH /libraries/{id} that disables or
/// re-mounts a library is picked up without a restart. /// re-mounts a library is picked up without a restart.
pub fn cleanup_orphaned_playlists( pub fn cleanup_orphaned_playlists(
libs_lock: Arc<RwLock<Vec<libraries::Library>>>, libs_lock: Arc<RwLock<Vec<libraries::Library>>>,
excluded_dirs: Vec<String>, _excluded_dirs: Vec<String>,
library_health: libraries::LibraryHealthMap, library_health: libraries::LibraryHealthMap,
) { ) {
std::thread::spawn(move || { std::thread::spawn(move || {
let video_path = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set"); let video_path_str = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");
let video_path = PathBuf::from(&video_path_str);
// Get cleanup interval from environment (default: 24 hours) // Get cleanup interval from environment (default: 24 hours)
let cleanup_interval_secs = dotenv::var("PLAYLIST_CLEANUP_INTERVAL_SECONDS") let cleanup_interval_secs = dotenv::var("PLAYLIST_CLEANUP_INTERVAL_SECONDS")
@@ -61,18 +75,14 @@ pub fn cleanup_orphaned_playlists(
.and_then(|s| s.parse::<u64>().ok()) .and_then(|s| s.parse::<u64>().ok())
.unwrap_or(86400); // 24 hours .unwrap_or(86400); // 24 hours
info!("Starting orphaned playlist cleanup job"); info!("Starting orphaned HLS cleanup job");
info!(" Cleanup interval: {} seconds", cleanup_interval_secs); info!(" Cleanup interval: {} seconds", cleanup_interval_secs);
info!(" Playlist directory: {}", video_path); info!(" HLS directory: {}", video_path.display());
{
let libs = libs_lock.read().unwrap_or_else(|e| e.into_inner()); let exif_dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(
for lib in libs.iter() { SqliteExifDao::new(),
info!( )
" Checking sources under '{}' at {}", as Box<dyn ExifDao>));
lib.name, lib.root_path
);
}
}
loop { loop {
std::thread::sleep(Duration::from_secs(cleanup_interval_secs)); std::thread::sleep(Duration::from_secs(cleanup_interval_secs));
@@ -83,22 +93,27 @@ pub fn cleanup_orphaned_playlists(
let libs: Vec<libraries::Library> = let libs: Vec<libraries::Library> =
libs_lock.read().unwrap_or_else(|e| e.into_inner()).clone(); libs_lock.read().unwrap_or_else(|e| e.into_inner()).clone();
// Safety gate: skip the cleanup cycle if any library is // Safety gate: skip the cleanup cycle if any (enabled)
// stale. A missing source video on a stale library is // library is stale. With hash-keyed layout the orphan
// indistinguishable from a transient unmount, and the // decision is a pure DB query, but the upstream
// cleanup is destructive — we'd rather leak a few playlist // missing-file scan that *removes* image_exif rows already
// files for a tick than delete one whose source is briefly // pauses for stale libraries — so a stale tick can hold
// unreachable. The cycle re-runs on the next interval. // hashes alive that would otherwise have been GC'd. The
// safety is then mostly belt-and-suspenders: a hash that
// should have been retired is just kept one tick longer.
// We'd rather leak a few hash dirs for 24h than wipe a
// hash dir whose source was briefly unreachable.
{ {
let guard = library_health.read().unwrap_or_else(|e| e.into_inner()); let guard = library_health.read().unwrap_or_else(|e| e.into_inner());
let stale: Vec<String> = libs let stale: Vec<String> = libs
.iter() .iter()
.filter(|lib| lib.enabled)
.filter(|lib| guard.get(&lib.id).map(|h| !h.is_online()).unwrap_or(false)) .filter(|lib| guard.get(&lib.id).map(|h| !h.is_online()).unwrap_or(false))
.map(|lib| lib.name.clone()) .map(|lib| lib.name.clone())
.collect(); .collect();
if !stale.is_empty() { if !stale.is_empty() {
warn!( warn!(
"Skipping orphaned-playlist cleanup: {} library(ies) stale: [{}]", "Skipping orphaned-HLS cleanup: {} library(ies) stale: [{}]",
stale.len(), stale.len(),
stale.join(", ") stale.join(", ")
); );
@@ -106,116 +121,129 @@ pub fn cleanup_orphaned_playlists(
} }
} }
info!("Running orphaned playlist cleanup"); info!("Running orphaned HLS cleanup");
let start = std::time::Instant::now(); let start = std::time::Instant::now();
let mut deleted_count = 0;
let mut error_count = 0;
// Find all .m3u8 files in VIDEO_PATH // Snapshot every live content_hash currently in image_exif.
let playlists: Vec<PathBuf> = WalkDir::new(&video_path) // We intentionally don't filter by library here — a hash that
.into_iter() // lives in any library is alive, even if the library a given
.filter_map(|e| e.ok()) // download attributed it to has since been disabled.
.filter(|e| e.file_type().is_file()) let alive_hashes: HashSet<String> = {
.filter(|e| { let context = opentelemetry::Context::new();
e.path() let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
.extension() match dao.list_distinct_content_hashes(&context) {
.and_then(|s| s.to_str()) Ok(hashes) => hashes.into_iter().collect(),
.map(|ext| ext.eq_ignore_ascii_case("m3u8")) Err(e) => {
.unwrap_or(false) error!(
}) "Failed to load distinct content hashes; skipping HLS cleanup: {:?}",
.map(|e| e.path().to_path_buf()) e
.collect();
info!("Found {} playlist files to check", playlists.len());
for playlist_path in playlists {
// Extract the original video filename from playlist name
// Playlist format: {VIDEO_PATH}/{original_filename}.m3u8
if let Some(filename) = playlist_path.file_stem() {
let video_filename = filename.to_string_lossy();
// Search for this video file across every configured
// library, respecting EXCLUDED_DIRS so we don't
// false-resurrect playlists for videos that only
// exist inside an excluded subtree. As soon as one
// library has a matching source, we're done — the
// playlist isn't orphaned.
let mut video_exists = false;
'libs: for lib in &libs {
let effective = lib.effective_excluded_dirs(&excluded_dirs);
for entry in image_api::file_scan::walk_library_files(
Path::new(&lib.root_path),
&effective,
) {
if let Some(entry_stem) = entry.path().file_stem()
&& entry_stem == filename
&& file_types::is_video_file(entry.path())
{
video_exists = true;
break 'libs;
}
}
}
if !video_exists {
debug!(
"Source video for playlist {} no longer exists, deleting",
playlist_path.display()
); );
continue;
}
}
};
// Delete the playlist file let mut deleted_count = 0usize;
if let Err(e) = std::fs::remove_file(&playlist_path) { let mut error_count = 0usize;
let mut inspected = 0usize;
// Walk top-level entries of VIDEO_PATH. Each is either a
// legacy basename-keyed `.m3u8` / `.ts` (skip — migration
// owns those) or a 2-char shard directory.
let read_root = match std::fs::read_dir(&video_path) {
Ok(r) => r,
Err(e) => {
error!(
"HLS cleanup: failed to read VIDEO_PATH {}: {}",
video_path.display(),
e
);
continue;
}
};
for shard_entry in read_root.flatten() {
let shard_path = shard_entry.path();
if !shard_entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
continue;
}
let shard_name = match shard_path.file_name().and_then(|n| n.to_str()) {
Some(n) => n.to_owned(),
None => continue,
};
if !is_hash_shard(&shard_name) {
continue;
}
// Hash dirs inside this shard.
let read_shard = match std::fs::read_dir(&shard_path) {
Ok(r) => r,
Err(e) => {
warn!( warn!(
"Failed to delete playlist {}: {}", "HLS cleanup: failed to read shard {}: {}",
playlist_path.display(), shard_path.display(),
e
);
continue;
}
};
let mut shard_emptied = true;
for hash_entry in read_shard.flatten() {
let hash_path = hash_entry.path();
if !hash_entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
shard_emptied = false;
continue;
}
let Some(hash_name) = hash_path
.file_name()
.and_then(|n| n.to_str())
.map(|n| n.to_owned())
else {
shard_emptied = false;
continue;
};
if !is_full_hash(&hash_name) {
shard_emptied = false;
continue;
}
inspected += 1;
if alive_hashes.contains(&hash_name) {
shard_emptied = false;
continue;
}
debug!(
"HLS cleanup: removing orphan hash dir {}",
hash_path.display()
);
match std::fs::remove_dir_all(&hash_path) {
Ok(()) => deleted_count += 1,
Err(e) => {
warn!(
"Failed to delete orphan hash dir {}: {}",
hash_path.display(),
e e
); );
error_count += 1; error_count += 1;
} else { shard_emptied = false;
deleted_count += 1; }
}
}
// Also try to delete associated .ts segment files // If this shard now has no surviving hash dirs, reap
// They are typically named {filename}N.ts in the same directory // the (empty) shard dir too. remove_dir fails if non-
if let Some(parent_dir) = playlist_path.parent() { // empty, which is the guard.
for entry in WalkDir::new(parent_dir) if shard_emptied {
.max_depth(1) let _ = std::fs::remove_dir(&shard_path);
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
{
let entry_path = entry.path();
if let Some(ext) = entry_path.extension()
&& ext.eq_ignore_ascii_case("ts")
{
// Check if this .ts file belongs to our playlist
if let Some(ts_stem) = entry_path.file_stem() {
let ts_name = ts_stem.to_string_lossy();
if ts_name.starts_with(&*video_filename) {
if let Err(e) = std::fs::remove_file(entry_path) {
debug!(
"Failed to delete segment {}: {}",
entry_path.display(),
e
);
} else {
debug!(
"Deleted segment: {}",
entry_path.display()
);
}
}
}
}
}
}
}
}
} }
} }
info!( info!(
"Orphaned playlist cleanup completed in {:?}: deleted {} playlists, {} errors", "Orphaned HLS cleanup completed in {:?}: inspected {} hash dirs, deleted {} orphans, {} errors",
start.elapsed(), start.elapsed(),
inspected,
deleted_count, deleted_count,
error_count error_count
); );
@@ -223,6 +251,18 @@ pub fn cleanup_orphaned_playlists(
}); });
} }
/// True iff `s` is a two-character lowercase-hex shard prefix.
fn is_hash_shard(s: &str) -> bool {
s.len() == 2 && s.bytes().all(|b| b.is_ascii_hexdigit())
}
/// True iff `s` looks like a full blake3 hex digest (64 hex chars).
/// Be strict so we don't accidentally rm a non-HLS directory operators
/// have stashed under VIDEO_PATH.
fn is_full_hash(s: &str) -> bool {
s.len() == 64 && s.bytes().all(|b| b.is_ascii_hexdigit())
}
pub fn watch_files( pub fn watch_files(
libs_lock: Arc<RwLock<Vec<libraries::Library>>>, libs_lock: Arc<RwLock<Vec<libraries::Library>>>,
playlist_manager: Addr<VideoPlaylistManager>, playlist_manager: Addr<VideoPlaylistManager>,
@@ -288,7 +328,12 @@ pub fn watch_files(
)); ));
let mut last_quick_scan = SystemTime::now(); let mut last_quick_scan = SystemTime::now();
let mut last_full_scan = SystemTime::now(); // Initialize to UNIX_EPOCH so the *first* tick is treated as a
// full scan. That replaces the legacy startup ScanDirectoryMessage
// walk for HLS playlists: every library's existing media gets
// checked once at watcher boot, instead of waiting up to
// full_interval_secs (1h default) for the first natural full scan.
let mut last_full_scan = SystemTime::UNIX_EPOCH;
let mut scan_count = 0u64; let mut scan_count = 0u64;
// Per-library cursor for the missing-file scan. Each tick reads // Per-library cursor for the missing-file scan. Each tick reads
@@ -531,6 +576,16 @@ pub fn watch_files(
} }
if is_full_scan { if is_full_scan {
// End-of-full-scan HLS readiness summary: log a single
// info line + refresh the Prometheus gauges. Skipped on
// quick scans because the cost is non-trivial on big
// libraries and the data only meaningfully changes on
// full passes.
let video_dir_str = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");
let stats =
hls_stats::compute_and_publish(&libs, &exif_dao, Path::new(&video_dir_str));
hls_stats::log_summary(&stats);
last_full_scan = now; last_full_scan = now;
} }
last_quick_scan = now; last_quick_scan = now;
@@ -600,14 +655,18 @@ pub fn process_new_files(
// Batch query: Get all EXIF data for these files in one query // Batch query: Get all EXIF data for these files in one query
let file_paths: Vec<String> = files.iter().map(|(_, rel_path)| rel_path.clone()).collect(); let file_paths: Vec<String> = files.iter().map(|(_, rel_path)| rel_path.clone()).collect();
let existing_exif_paths: HashMap<String, bool> = { // Map of rel_path -> Option<content_hash>. The presence of the key
// tells us "row exists"; the Option value carries the hash for the
// HLS pipeline so video files without a hash (mid-backfill) skip
// this tick rather than fall back to a basename-colliding playlist.
let existing_exif: HashMap<String, Option<String>> = {
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
// Walk is per-library, so scope the lookup so a same-named file // Walk is per-library, so scope the lookup so a same-named file
// in another library doesn't make this one look already-indexed. // in another library doesn't make this one look already-indexed.
match dao.get_exif_batch(&context, Some(library.id), &file_paths) { match dao.get_exif_batch(&context, Some(library.id), &file_paths) {
Ok(exif_records) => exif_records Ok(exif_records) => exif_records
.into_iter() .into_iter()
.map(|record| (record.file_path, true)) .map(|record| (record.file_path, record.content_hash))
.collect(), .collect(),
Err(e) => { Err(e) => {
error!("Error batch querying EXIF data: {:?}", e); error!("Error batch querying EXIF data: {:?}", e);
@@ -637,7 +696,7 @@ pub fn process_new_files(
&& !bare_legacy_thumb_path.exists() && !bare_legacy_thumb_path.exists()
&& !thumbnails::unsupported_thumbnail_sentinel(&scoped_thumb_path).exists() && !thumbnails::unsupported_thumbnail_sentinel(&scoped_thumb_path).exists()
&& !thumbnails::unsupported_thumbnail_sentinel(&bare_legacy_thumb_path).exists(); && !thumbnails::unsupported_thumbnail_sentinel(&bare_legacy_thumb_path).exists();
let needs_row = !existing_exif_paths.contains_key(relative_path); let needs_row = !existing_exif.contains_key(relative_path);
if needs_thumbnail || needs_row { if needs_thumbnail || needs_row {
new_files_found = true; new_files_found = true;
@@ -796,28 +855,45 @@ pub fn process_new_files(
} }
} }
// Check for videos that need HLS playlists // Check for videos that need HLS playlists. All output is keyed on
// `content_hash` (see `crate::video::hls_paths`), so files whose
// `image_exif.content_hash` is still NULL — typically mid-backfill —
// are skipped this tick and picked up after the unhashed backlog
// drain populates the hash on a subsequent tick. Skipping is the
// correct call: queuing without a hash would either fall back to
// basename keying (the bug this refactor fixes) or fabricate one.
let video_path_base = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set"); let video_path_base = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");
let mut videos_needing_playlists = Vec::new(); let video_dir = Path::new(&video_path_base);
let mut videos_needing_playlists: Vec<VideoToQueue> = Vec::new();
let mut hashless_video_count = 0usize;
for (file_path, _relative_path) in &files { for (file_path, relative_path) in &files {
if file_types::is_video_file(file_path) { if !file_types::is_video_file(file_path) {
// Construct expected playlist path continue;
let playlist_filename = }
format!("{}.m3u8", file_path.file_name().unwrap().to_string_lossy()); let Some(hash) = existing_exif.get(relative_path).and_then(|h| h.clone()) else {
let playlist_path = Path::new(&video_path_base).join(&playlist_filename); hashless_video_count += 1;
continue;
// Check if playlist needs (re)generation };
let playlist_path = hls_paths::playlist_for_hash(video_dir, &hash);
if playlist_needs_generation(file_path, &playlist_path) { if playlist_needs_generation(file_path, &playlist_path) {
videos_needing_playlists.push(file_path.clone()); videos_needing_playlists.push(VideoToQueue {
} video_path: file_path.clone(),
content_hash: hash,
});
} }
} }
// Send queue request to playlist manager if hashless_video_count > 0 {
debug!(
"Watcher tick for '{}': skipped {} video(s) with NULL content_hash (will retry after backfill)",
library.name, hashless_video_count
);
}
if !videos_needing_playlists.is_empty() { if !videos_needing_playlists.is_empty() {
playlist_manager.do_send(QueueVideosMessage { playlist_manager.do_send(QueueVideosMessage {
video_paths: videos_needing_playlists, videos: videos_needing_playlists,
}); });
} }
@@ -962,6 +1038,33 @@ mod tests {
assert!(playlist_needs_generation(&video, &playlist)); assert!(playlist_needs_generation(&video, &playlist));
} }
#[test]
fn is_hash_shard_accepts_only_two_hex_chars() {
assert!(is_hash_shard("ab"));
assert!(is_hash_shard("00"));
assert!(is_hash_shard("FF")); // ASCII hexdigit covers upper-case too
assert!(!is_hash_shard("a"));
assert!(!is_hash_shard("abc"));
assert!(!is_hash_shard("zz"));
assert!(!is_hash_shard(""));
assert!(!is_hash_shard("a/"));
}
#[test]
fn is_full_hash_accepts_only_64_hex_chars() {
let h64 = "a".repeat(64);
assert!(is_full_hash(&h64));
let mixed = format!("ab{}", "0".repeat(62));
assert!(is_full_hash(&mixed));
assert!(!is_full_hash(&"a".repeat(63)));
assert!(!is_full_hash(&"a".repeat(65)));
assert!(!is_full_hash(&format!("z{}", "a".repeat(63))));
// Defends against operator stashing e.g. ".tmp" or "Plex" under
// VIDEO_PATH — neither passes the full-hash gate.
assert!(!is_full_hash(".tmp"));
assert!(!is_full_hash("Plex"));
}
#[test] #[test]
fn playlist_needs_generation_true_when_video_missing_metadata() { fn playlist_needs_generation_true_when_video_missing_metadata() {
// Video doesn't exist; metadata fails for it. Falls through to the // Video doesn't exist; metadata fails for it. Falls through to the