@@ -6,7 +6,7 @@
// (audio read directly; video has its audio track extracted via ffmpeg).
use actix_multipart ::Multipart ;
use actix_web ::{ HttpRequest , HttpResponse , Responder , get , post , web } ;
use actix_web ::{ HttpRequest , HttpResponse , Responder , delete , get , post , web } ;
use anyhow ::Context ;
use base64 ::Engine ;
use bytes ::{ BufMut , BytesMut } ;
@@ -15,10 +15,13 @@ use opentelemetry::KeyValue;
use opentelemetry ::trace ::{ Span , Status , Tracer } ;
use regex ::Regex ;
use serde ::{ Deserialize , Serialize } ;
use serde_json ::json ;
use serde_json ::{ Value , json } ;
use std ::collections ::HashMap ;
use std ::path ::Path ;
use std ::sync ::LazyLock ;
use std ::sync ::{ LazyLock , Mutex as StdMutex } ;
use std ::time ::{ Duration , Instant } ;
use tokio ::sync ::Semaphore ;
use uuid ::Uuid ;
use crate ::data ::Claims ;
use crate ::file_types ::{ is_audio_file , is_video_file } ;
@@ -40,6 +43,105 @@ const MAX_VOICE_UPLOAD_BYTES: usize = 25 * 1024 * 1024; // 25 MB
/// finishes — that's a wrapper limitation; the chunked-queue plan fixes it.)
static TTS_PERMIT : LazyLock < Semaphore > = LazyLock ::new ( | | Semaphore ::new ( 1 ) ) ;
// --- Voice-list cache --------------------------------------------------------
/// Cached raw voice-library JSON. llama-swap's `/upstream/<model>/voices`
/// passthrough spins the TTS model up just to answer a listing — which can
/// evict the resident LLM — so we serve a cached copy and only hit upstream on
/// a cold cache, an explicit `?refresh=1`, or after a voice create/delete
/// invalidates it (the TTS model is already loaded right then anyway).
static VOICES_CACHE : LazyLock < StdMutex < Option < Value > > > = LazyLock ::new ( | | StdMutex ::new ( None ) ) ;
fn cached_voices ( ) -> Option < Value > {
VOICES_CACHE . lock ( ) . unwrap ( ) . clone ( )
}
fn store_voices_cache ( v : & Value ) {
* VOICES_CACHE . lock ( ) . unwrap ( ) = Some ( v . clone ( ) ) ;
}
fn invalidate_voices_cache ( ) {
* VOICES_CACHE . lock ( ) . unwrap ( ) = None ;
}
// --- Async speech jobs -------------------------------------------------------
//
// Synthesizing a long insight can take minutes — too long to hang one HTTP
// request from a phone that may background the app or drop the connection.
// Durable variant: POST /tts/speech/jobs returns a job id immediately, the
// synth runs in a spawned task (queuing on TTS_PERMIT instead of fast-failing
// 429), and the client polls GET /tts/speech/jobs/{id} until it collects the
// audio. State is in-memory only (deliberately lighter than the chat
// TurnRegistry): a restart loses jobs, the client surfaces that and retries.
#[ derive(Clone, Copy, PartialEq, Eq, Debug, Serialize) ]
#[ serde(rename_all = " snake_case " ) ]
pub enum TtsJobStatus {
Queued ,
Running ,
Done ,
Error ,
Cancelled ,
}
impl TtsJobStatus {
fn is_terminal ( self ) -> bool {
matches! ( self , Self ::Done | Self ::Error | Self ::Cancelled )
}
}
struct TtsJob {
status : TtsJobStatus ,
format : String ,
audio_base64 : Option < String > ,
error : Option < String > ,
created_at : Instant ,
finished_at : Option < Instant > ,
abort : Option < tokio ::task ::AbortHandle > ,
}
/// Finished jobs linger so a client that lost connectivity can still collect
/// the result on a later poll; anything older than MAX_AGE is dropped outright
/// (aborted first if somehow still running). Swept lazily on each dispatch.
const TTS_JOB_RESULT_TTL : Duration = Duration ::from_secs ( 10 * 60 ) ;
const TTS_JOB_MAX_AGE : Duration = Duration ::from_secs ( 30 * 60 ) ;
static TTS_JOBS : LazyLock < StdMutex < HashMap < Uuid , TtsJob > > > =
LazyLock ::new ( | | StdMutex ::new ( HashMap ::new ( ) ) ) ;
fn sweep_stale_jobs ( jobs : & mut HashMap < Uuid , TtsJob > , now : Instant ) {
jobs . retain ( | _ , job | {
let result_expired = job
. finished_at
. is_some_and ( | t | now . duration_since ( t ) > = TTS_JOB_RESULT_TTL ) ;
let too_old = now . duration_since ( job . created_at ) > = TTS_JOB_MAX_AGE ;
if too_old & & let Some ( h ) = job . abort . take ( ) {
h . abort ( ) ;
}
! ( result_expired | | too_old )
} ) ;
}
/// Run `f` against a job, if it still exists.
fn with_job < R > ( id : Uuid , f : impl FnOnce ( & mut TtsJob ) -> R ) -> Option < R > {
TTS_JOBS . lock ( ) . unwrap ( ) . get_mut ( & id ) . map ( f )
}
/// Move a job to a terminal state (first terminal write wins — a cancel that
/// raced a completion keeps the cancel).
fn finish_job ( id : Uuid , status : TtsJobStatus , audio_base64 : Option < String > , error : Option < String > ) {
with_job ( id , | job | {
if job . status . is_terminal ( ) {
return ;
}
job . status = status ;
job . audio_base64 = audio_base64 ;
job . error = error ;
job . finished_at = Some ( Instant ::now ( ) ) ;
job . abort = None ;
} ) ;
}
/// Sanitize a user-supplied voice name. The name is forwarded to Chatterbox
/// where it becomes a filename in the voice-library directory, so we restrict
/// it to a safe charset (alphanumerics, dash, underscore) — no path
@@ -64,6 +166,33 @@ fn sanitize_voice_name(raw: &str) -> Option<String> {
Some ( cleaned . chars ( ) . take ( 64 ) . collect ( ) )
}
/// Reference-clip cap in seconds for voice cloning. Chatterbox is zero-shot —
/// a clean ~10– 20s sample is the sweet spot and more rarely helps. Tune via
/// `LLAMA_SWAP_TTS_REF_SECONDS` (default 30).
fn tts_ref_seconds ( ) -> u32 {
std ::env ::var ( " LLAMA_SWAP_TTS_REF_SECONDS " )
. ok ( )
. and_then ( | s | s . trim ( ) . parse ::< u32 > ( ) . ok ( ) )
. filter ( | n | * n > 0 )
. unwrap_or ( 30 )
}
/// Tag a (sanitized) voice name with the reference-clip cap used to create it,
/// e.g. `grandma` → `grandma-30s`. The tag makes the ref length visible in the
/// voice list so clones of the same source at different caps can be compared.
/// Skips the append when the name already ends in the same tag; keeps the
/// 64-char bound by truncating the base name, never the tag.
fn append_ref_seconds ( name : & str , secs : u32 ) -> String {
let suffix = format! ( " - {secs} s " ) ;
if name . ends_with ( & suffix ) {
return name . to_string ( ) ;
}
let max_base = 64 usize . saturating_sub ( suffix . len ( ) ) ;
let base : String = name . chars ( ) . take ( max_base ) . collect ( ) ;
let base = base . trim_end_matches ( '-' ) ;
format! ( " {base} {suffix} " )
}
/// Optional default voice for synthesis when the request doesn't name one.
/// Set `LLAMA_SWAP_TTS_VOICE=m` to read insights in a cloned voice by default.
fn default_voice ( ) -> Option < String > {
@@ -137,15 +266,9 @@ async fn run_ffmpeg_to_wav(input_path: &str) -> anyhow::Result<Vec<u8>> {
. context ( " creating temp wav " ) ? ;
let out_s = out . path ( ) . to_string_lossy ( ) . to_string ( ) ;
// Cap the reference clip length. Chatterbox is zero-shot — a clean ~10– 20s
// sample is the sweet spot and more rarely helps — so we use the first N
// seconds. Tune via LLAMA_SWAP_TTS_REF_SECONDS (default 30).
let secs = std ::env ::var ( " LLAMA_SWAP_TTS_REF_SECONDS " )
. ok ( )
. and_then ( | s | s . trim ( ) . parse ::< u32 > ( ) . ok ( ) )
. filter ( | n | * n > 0 )
. unwrap_or ( 30 )
. to_string ( ) ;
// Cap the reference clip length — we use the first N seconds (see
// tts_ref_seconds).
let secs = tts_ref_seconds ( ) . to_string ( ) ;
let output = tokio ::process ::Command ::new ( " ffmpeg " )
. args ( [
@@ -276,16 +399,277 @@ pub async fn tts_speech_handler(
}
}
/// GET /tts/voices — list the Chatterbox voice library (raw passthrough).
#[ derive(Debug, Serialize) ]
pub struct TtsJobCreatedResponse {
pub job_id : String ,
pub status : TtsJobStatus ,
}
#[ derive(Debug, Serialize) ]
pub struct TtsJobStatusResponse {
pub job_id : String ,
pub status : TtsJobStatus ,
pub format : String ,
#[ serde(skip_serializing_if = " Option::is_none " ) ]
pub audio_base64 : Option < String > ,
#[ serde(skip_serializing_if = " Option::is_none " ) ]
pub error : Option < String > ,
}
/// POST /tts/speech/jobs — durable variant of /tts/speech for long syntheses.
/// Returns 202 + a job id immediately; the synth queues on the single GPU
/// permit (instead of fast-failing 429) and the client polls the job until
/// the audio is ready.
#[ post( " /tts/speech/jobs " ) ]
pub async fn create_speech_job_handler (
http_request : HttpRequest ,
_claims : Claims ,
req : web ::Json < TtsSpeechRequest > ,
app_state : web ::Data < AppState > ,
) -> impl Responder {
let parent_context = extract_context_from_request ( & http_request ) ;
let mut span =
global_tracer ( ) . start_with_context ( " http.tts.speech_job.create " , & parent_context ) ;
let text = clean_for_tts ( & req . text ) ;
if text . is_empty ( ) {
span . set_status ( Status ::error ( " text is required " ) ) ;
return HttpResponse ::BadRequest ( ) . json ( json! ( { " error " : " text is required " } ) ) ;
}
if app_state . llamacpp . is_none ( ) {
span . set_status ( Status ::error ( " tts backend not configured " ) ) ;
return HttpResponse ::ServiceUnavailable ( )
. json ( json! ( { " error " : " TTS backend not configured (set LLAMA_SWAP_URL) " } ) ) ;
}
let format = req
. format
. as_deref ( )
. filter ( | s | ! s . is_empty ( ) )
. unwrap_or ( " mp3 " )
. to_string ( ) ;
let voice = req
. voice
. clone ( )
. filter ( | s | ! s . is_empty ( ) )
. or_else ( default_voice ) ;
// Clamp generation knobs to Chatterbox's documented ranges before forwarding.
let exaggeration = req . exaggeration . map ( | x | x . clamp ( 0.25 , 2.0 ) ) ;
let cfg_weight = req . cfg_weight . map ( | x | x . clamp ( 0.0 , 1.0 ) ) ;
let temperature = req . temperature . map ( | x | x . clamp ( 0.05 , 5.0 ) ) ;
span . set_attribute ( KeyValue ::new ( " tts.format " , format . clone ( ) ) ) ;
span . set_attribute ( KeyValue ::new ( " tts.has_voice " , voice . is_some ( ) ) ) ;
span . set_attribute ( KeyValue ::new ( " tts.text_len " , text . len ( ) as i64 ) ) ;
let job_id = Uuid ::new_v4 ( ) ;
{
let mut jobs = TTS_JOBS . lock ( ) . unwrap ( ) ;
sweep_stale_jobs ( & mut jobs , Instant ::now ( ) ) ;
jobs . insert (
job_id ,
TtsJob {
status : TtsJobStatus ::Queued ,
format : format . clone ( ) ,
audio_base64 : None ,
error : None ,
created_at : Instant ::now ( ) ,
finished_at : None ,
abort : None ,
} ,
) ;
}
let state = app_state . clone ( ) ;
let handle = tokio ::spawn ( async move {
// Queue rather than fast-fail: jobs wait their turn for the GPU.
let _permit = match TTS_PERMIT . acquire ( ) . await {
Ok ( p ) = > p ,
Err ( _ ) = > {
finish_job (
job_id ,
TtsJobStatus ::Error ,
None ,
Some ( " TTS queue closed " . to_string ( ) ) ,
) ;
return ;
}
} ;
// Cancelled while queued — release the permit without synthesizing.
let cancelled = with_job ( job_id , | job | {
if job . status = = TtsJobStatus ::Queued {
job . status = TtsJobStatus ::Running ;
false
} else {
true
}
} )
. unwrap_or ( true ) ;
if cancelled {
return ;
}
let Some ( client ) = state . llamacpp . as_ref ( ) else {
finish_job (
job_id ,
TtsJobStatus ::Error ,
None ,
Some ( " TTS backend not configured " . to_string ( ) ) ,
) ;
return ;
} ;
match client
. text_to_speech (
& text ,
voice . as_deref ( ) ,
& format ,
exaggeration ,
cfg_weight ,
temperature ,
)
. await
{
Ok ( bytes ) = > {
let audio = base64 ::engine ::general_purpose ::STANDARD . encode ( & bytes ) ;
finish_job ( job_id , TtsJobStatus ::Done , Some ( audio ) , None ) ;
}
Err ( e ) = > {
log ::error! ( " TTS job {job_id} failed: {:?} " , e ) ;
finish_job (
job_id ,
TtsJobStatus ::Error ,
None ,
Some ( format! ( " TTS failed: {e} " ) ) ,
) ;
}
}
} ) ;
// Aborting an already-finished task is a no-op, so this late install is
// safe even if the job raced to completion.
with_job ( job_id , | job | {
if ! job . status . is_terminal ( ) {
job . abort = Some ( handle . abort_handle ( ) ) ;
}
} ) ;
span . set_status ( Status ::Ok ) ;
HttpResponse ::Accepted ( ) . json ( TtsJobCreatedResponse {
job_id : job_id . to_string ( ) ,
status : TtsJobStatus ::Queued ,
} )
}
/// GET /tts/speech/jobs/{id} — poll a speech job; returns the audio once done.
/// 404s after the job expires (results are kept ~10 min past completion).
#[ get( " /tts/speech/jobs/{id} " ) ]
pub async fn speech_job_status_handler (
http_request : HttpRequest ,
_claims : Claims ,
path : web ::Path < String > ,
) -> impl Responder {
let parent_context = extract_context_from_request ( & http_request ) ;
let mut span =
global_tracer ( ) . start_with_context ( " http.tts.speech_job.status " , & parent_context ) ;
let Ok ( id ) = Uuid ::parse_str ( & path . into_inner ( ) ) else {
span . set_status ( Status ::error ( " invalid job id " ) ) ;
return HttpResponse ::BadRequest ( ) . json ( json! ( { " error " : " invalid job id " } ) ) ;
} ;
let resp = {
let jobs = TTS_JOBS . lock ( ) . unwrap ( ) ;
jobs . get ( & id ) . map ( | job | TtsJobStatusResponse {
job_id : id . to_string ( ) ,
status : job . status ,
format : job . format . clone ( ) ,
audio_base64 : job . audio_base64 . clone ( ) ,
error : job . error . clone ( ) ,
} )
} ;
match resp {
Some ( r ) = > {
span . set_status ( Status ::Ok ) ;
HttpResponse ::Ok ( ) . json ( r )
}
None = > {
span . set_status ( Status ::error ( " job not found " ) ) ;
HttpResponse ::NotFound ( )
. json ( json! ( { " error " : " TTS job not found (it may have expired) " } ) )
}
}
}
/// DELETE /tts/speech/jobs/{id} — cancel a queued/running speech job. Once the
/// upstream GPU job has started it can't be interrupted (same wrapper
/// limitation as the sync path); cancelling stops the wait and discards the
/// result. Cancelling an already-finished job leaves it terminal.
#[ delete( " /tts/speech/jobs/{id} " ) ]
pub async fn cancel_speech_job_handler (
http_request : HttpRequest ,
_claims : Claims ,
path : web ::Path < String > ,
) -> impl Responder {
let parent_context = extract_context_from_request ( & http_request ) ;
let mut span =
global_tracer ( ) . start_with_context ( " http.tts.speech_job.cancel " , & parent_context ) ;
let Ok ( id ) = Uuid ::parse_str ( & path . into_inner ( ) ) else {
span . set_status ( Status ::error ( " invalid job id " ) ) ;
return HttpResponse ::BadRequest ( ) . json ( json! ( { " error " : " invalid job id " } ) ) ;
} ;
let status = with_job ( id , | job | {
if ! job . status . is_terminal ( ) {
if let Some ( h ) = job . abort . take ( ) {
h . abort ( ) ;
}
job . status = TtsJobStatus ::Cancelled ;
job . finished_at = Some ( Instant ::now ( ) ) ;
}
job . status
} ) ;
match status {
Some ( s ) = > {
span . set_status ( Status ::Ok ) ;
HttpResponse ::Ok ( ) . json ( json! ( { " job_id " : id . to_string ( ) , " status " : s } ) )
}
None = > {
span . set_status ( Status ::error ( " job not found " ) ) ;
HttpResponse ::NotFound ( )
. json ( json! ( { " error " : " TTS job not found (it may have expired) " } ) )
}
}
}
#[ derive(Debug, Deserialize) ]
pub struct ListVoicesQuery {
/// `?refresh=1` bypasses the voice-list cache and re-queries upstream
/// (which may spin up the TTS model).
#[ serde(default) ]
pub refresh : Option < String > ,
}
/// GET /tts/voices — list the Chatterbox voice library. Served from an
/// in-memory cache when possible so browsing settings doesn't make llama-swap
/// load the TTS model (and evict the resident LLM); see VOICES_CACHE.
#[ get( " /tts/voices " ) ]
pub async fn list_voices_handler (
http_request : HttpRequest ,
_claims : Claims ,
query : web ::Query < ListVoicesQuery > ,
app_state : web ::Data < AppState > ,
) -> impl Responder {
let parent_context = extract_context_from_request ( & http_request ) ;
let mut span = global_tracer ( ) . start_with_context ( " http.tts.voices.list " , & parent_context ) ;
let force = query
. refresh
. as_deref ( )
. is_some_and ( | v | matches! ( v , " 1 " | " true " | " yes " ) ) ;
if ! force & & let Some ( v ) = cached_voices ( ) {
span . set_attribute ( KeyValue ::new ( " tts.voices_cache_hit " , true ) ) ;
span . set_status ( Status ::Ok ) ;
return HttpResponse ::Ok ( ) . json ( v ) ;
}
let Some ( client ) = app_state . llamacpp . as_ref ( ) else {
span . set_status ( Status ::error ( " tts backend not configured " ) ) ;
return HttpResponse ::ServiceUnavailable ( )
@@ -293,6 +677,8 @@ pub async fn list_voices_handler(
} ;
match client . list_voices ( ) . await {
Ok ( v ) = > {
store_voices_cache ( & v ) ;
span . set_attribute ( KeyValue ::new ( " tts.voices_cache_hit " , false ) ) ;
span . set_status ( Status ::Ok ) ;
HttpResponse ::Ok ( ) . json ( v )
}
@@ -304,6 +690,48 @@ pub async fn list_voices_handler(
}
}
/// DELETE /tts/voices/{name} — remove a cloned voice from the library.
#[ delete( " /tts/voices/{name} " ) ]
pub async fn delete_voice_handler (
http_request : HttpRequest ,
_claims : Claims ,
path : web ::Path < String > ,
app_state : web ::Data < AppState > ,
) -> impl Responder {
let parent_context = extract_context_from_request ( & http_request ) ;
let mut span = global_tracer ( ) . start_with_context ( " http.tts.voices.delete " , & parent_context ) ;
let Some ( client ) = app_state . llamacpp . as_ref ( ) else {
span . set_status ( Status ::error ( " tts backend not configured " ) ) ;
return HttpResponse ::ServiceUnavailable ( )
. json ( json! ( { " error " : " TTS backend not configured " } ) ) ;
} ;
// Same charset rule as creation — a name that sanitizes differently was
// never a voice we created, and must not reach the upstream URL.
let raw = path . into_inner ( ) ;
let name = match sanitize_voice_name ( & raw ) {
Some ( n ) if n = = raw = > n ,
_ = > {
span . set_status ( Status ::error ( " invalid voice name " ) ) ;
return HttpResponse ::BadRequest ( ) . json ( json! ( { " error " : " invalid voice name " } ) ) ;
}
} ;
span . set_attribute ( KeyValue ::new ( " tts.voice_name " , name . clone ( ) ) ) ;
match client . delete_voice ( & name ) . await {
Ok ( v ) = > {
invalidate_voices_cache ( ) ;
span . set_status ( Status ::Ok ) ;
HttpResponse ::Ok ( ) . json ( v )
}
Err ( e ) = > {
span . set_status ( Status ::error ( " delete_voice failed " ) ) ;
log ::error! ( " delete_voice failed: {:?} " , e ) ;
HttpResponse ::BadGateway ( ) . json ( json! ( { " error " : format ! ( " {e} " ) } ) )
}
}
}
/// POST /tts/voices/upload — register a cloned voice from an uploaded audio
/// clip. Multipart fields: `voice_name` (text) + a file part (`voice_file`).
#[ post( " /tts/voices/upload " ) ]
@@ -363,6 +791,9 @@ pub async fn create_voice_upload_handler(
return HttpResponse ::BadRequest ( )
. json ( json! ( { " error " : " voice_name is required (alphanumerics, - and _ only) " } ) ) ;
} ;
// Tag the name with the ref-clip cap (e.g. `grandma-30s`) so the library
// shows which reference length produced each clone.
let name = append_ref_seconds ( & name , tts_ref_seconds ( ) ) ;
if file_bytes . is_empty ( ) {
span . set_status ( Status ::error ( " voice_file is required " ) ) ;
return HttpResponse ::BadRequest ( ) . json ( json! ( { " error " : " voice_file is required " } ) ) ;
@@ -388,6 +819,7 @@ pub async fn create_voice_upload_handler(
. await
{
Ok ( v ) = > {
invalidate_voices_cache ( ) ;
span . set_status ( Status ::Ok ) ;
HttpResponse ::Ok ( ) . json ( v )
}
@@ -432,6 +864,9 @@ pub async fn create_voice_from_library_handler(
return HttpResponse ::BadRequest ( )
. json ( json! ( { " error " : " voice_name is required (alphanumerics, - and _ only) " } ) ) ;
} ;
// Tag the name with the ref-clip cap (e.g. `grandma-30s`) so the library
// shows which reference length produced each clone.
let voice_name = append_ref_seconds ( & voice_name , tts_ref_seconds ( ) ) ;
let library = match libraries ::resolve_library_param ( & app_state , req . library . as_deref ( ) ) {
Ok ( Some ( l ) ) = > l ,
@@ -475,6 +910,7 @@ pub async fn create_voice_from_library_handler(
. await
{
Ok ( v ) = > {
invalidate_voices_cache ( ) ;
span . set_status ( Status ::Ok ) ;
HttpResponse ::Ok ( ) . json ( v )
}
@@ -534,6 +970,95 @@ mod tests {
assert_eq! ( sanitize_voice_name ( & long ) . unwrap ( ) . len ( ) , 64 ) ;
}
#[ test ]
fn append_ref_seconds_tags_name ( ) {
assert_eq! ( append_ref_seconds ( " grandma " , 30 ) , " grandma-30s " ) ;
assert_eq! ( append_ref_seconds ( " voice_01 " , 15 ) , " voice_01-15s " ) ;
}
#[ test ]
fn append_ref_seconds_is_idempotent_for_same_cap ( ) {
assert_eq! ( append_ref_seconds ( " grandma-30s " , 30 ) , " grandma-30s " ) ;
// A different cap still appends — that's the comparison use-case.
assert_eq! ( append_ref_seconds ( " grandma-15s " , 30 ) , " grandma-15s-30s " ) ;
}
#[ test ]
fn append_ref_seconds_keeps_64_char_bound ( ) {
let long = " a " . repeat ( 64 ) ;
let tagged = append_ref_seconds ( & long , 30 ) ;
assert_eq! ( tagged . len ( ) , 64 ) ;
assert! ( tagged . ends_with ( " -30s " ) ) ;
}
#[ test ]
fn sweep_drops_expired_results_and_keeps_live_jobs ( ) {
let now = Instant ::now ( ) ;
let mk = | status : TtsJobStatus , created : Instant , finished : Option < Instant > | TtsJob {
status ,
format : " mp3 " . into ( ) ,
audio_base64 : None ,
error : None ,
created_at : created ,
finished_at : finished ,
abort : None ,
} ;
let mut jobs = HashMap ::new ( ) ;
let live = Uuid ::new_v4 ( ) ;
let fresh_done = Uuid ::new_v4 ( ) ;
let stale_done = Uuid ::new_v4 ( ) ;
jobs . insert ( live , mk ( TtsJobStatus ::Running , now , None ) ) ;
jobs . insert (
fresh_done ,
mk ( TtsJobStatus ::Done , now , Some ( now - Duration ::from_secs ( 60 ) ) ) ,
) ;
jobs . insert (
stale_done ,
mk (
TtsJobStatus ::Done ,
now - TTS_JOB_MAX_AGE / 2 ,
Some ( now - TTS_JOB_RESULT_TTL ) ,
) ,
) ;
sweep_stale_jobs ( & mut jobs , now ) ;
assert! ( jobs . contains_key ( & live ) ) ;
assert! ( jobs . contains_key ( & fresh_done ) ) ;
assert! ( ! jobs . contains_key ( & stale_done ) ) ;
}
#[ test ]
fn sweep_drops_jobs_past_max_age_even_if_unfinished ( ) {
let now = Instant ::now ( ) ;
let mut jobs = HashMap ::new ( ) ;
let ancient = Uuid ::new_v4 ( ) ;
jobs . insert (
ancient ,
TtsJob {
status : TtsJobStatus ::Running ,
format : " mp3 " . into ( ) ,
audio_base64 : None ,
error : None ,
created_at : now - TTS_JOB_MAX_AGE ,
finished_at : None ,
abort : None ,
} ,
) ;
sweep_stale_jobs ( & mut jobs , now ) ;
assert! ( jobs . is_empty ( ) ) ;
}
#[ test ]
fn voices_cache_roundtrip_and_invalidation ( ) {
invalidate_voices_cache ( ) ;
assert! ( cached_voices ( ) . is_none ( ) ) ;
let v = json! ( { " voices " : [ { " name " : " m-30s " } ] , " count " : 1 } ) ;
store_voices_cache ( & v ) ;
assert_eq! ( cached_voices ( ) , Some ( v ) ) ;
invalidate_voices_cache ( ) ;
assert! ( cached_voices ( ) . is_none ( ) ) ;
}
#[ test ]
fn clean_for_tts_strips_markdown ( ) {
assert_eq! (