From ccacfe1113f74ac5e96231366ffbef26f8180e33 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Tue, 2 Jun 2026 23:10:43 -0400 Subject: [PATCH] Instrument TTS handlers with OTel spans (codebase standard) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each /tts handler now opens an http.tts.* span via extract_context_from_request + global_tracer().start_with_context, sets Status::Ok / Status::error on every outcome, and records useful attributes (model, format, voice_name, byte counts) — matching the insight handlers. Prometheus request metrics were already covered by the app-wide actix-web-prom middleware. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/ai/tts.rs | 69 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/src/ai/tts.rs b/src/ai/tts.rs index 59b4a80..9c98bee 100644 --- a/src/ai/tts.rs +++ b/src/ai/tts.rs @@ -6,11 +6,13 @@ // (audio read directly; video has its audio track extracted via ffmpeg). use actix_multipart::Multipart; -use actix_web::{HttpResponse, Responder, get, post, web}; +use actix_web::{HttpRequest, HttpResponse, Responder, get, post, web}; use anyhow::Context; use base64::Engine; use bytes::{BufMut, BytesMut}; use futures::StreamExt; +use opentelemetry::KeyValue; +use opentelemetry::trace::{Span, Status, Tracer}; use regex::Regex; use serde::{Deserialize, Serialize}; use serde_json::json; @@ -21,6 +23,7 @@ use crate::data::Claims; use crate::file_types::{is_audio_file, is_video_file}; use crate::files::is_valid_full_path; use crate::libraries; +use crate::otel::{extract_context_from_request, global_tracer}; use crate::state::AppState; /// Hard cap on an uploaded voice-reference clip. Chatterbox itself caps the @@ -191,15 +194,21 @@ pub struct TtsSpeechResponse { /// return base64-encoded audio for `data:` URI playback on the client. #[post("/tts/speech")] pub async fn tts_speech_handler( + http_request: HttpRequest, _claims: Claims, req: web::Json, app_state: web::Data, ) -> impl Responder { + let parent_context = extract_context_from_request(&http_request); + let mut span = global_tracer().start_with_context("http.tts.speech", &parent_context); + let text = clean_for_tts(&req.text); if text.is_empty() { + span.set_status(Status::error("text is required")); return HttpResponse::BadRequest().json(json!({ "error": "text is required" })); } let Some(client) = app_state.llamacpp.as_ref() else { + span.set_status(Status::error("tts backend not configured")); return HttpResponse::ServiceUnavailable() .json(json!({ "error": "TTS backend not configured (set LLAMA_SWAP_URL)" })); }; @@ -216,6 +225,11 @@ pub async fn tts_speech_handler( .filter(|s| !s.is_empty()) .or(dv.as_deref()); + span.set_attribute(KeyValue::new("tts.model", client.tts_model.clone())); + span.set_attribute(KeyValue::new("tts.format", format.to_string())); + span.set_attribute(KeyValue::new("tts.has_voice", voice.is_some())); + span.set_attribute(KeyValue::new("tts.text_len", text.len() as i64)); + // Clamp generation knobs to Chatterbox's documented ranges before forwarding. let exaggeration = req.exaggeration.map(|x| x.clamp(0.25, 2.0)); let cfg_weight = req.cfg_weight.map(|x| x.clamp(0.0, 1.0)); @@ -226,6 +240,8 @@ pub async fn tts_speech_handler( .await { Ok(bytes) => { + span.set_attribute(KeyValue::new("tts.audio_bytes", bytes.len() as i64)); + span.set_status(Status::Ok); let audio_base64 = base64::engine::general_purpose::STANDARD.encode(&bytes); HttpResponse::Ok().json(TtsSpeechResponse { audio_base64, @@ -233,6 +249,7 @@ pub async fn tts_speech_handler( }) } Err(e) => { + span.set_status(Status::error("tts synthesis failed")); log::error!("TTS synth failed: {:?}", e); HttpResponse::BadGateway().json(json!({ "error": format!("TTS failed: {e}") })) } @@ -242,16 +259,25 @@ pub async fn tts_speech_handler( /// GET /tts/voices — list the Chatterbox voice library (raw passthrough). #[get("/tts/voices")] pub async fn list_voices_handler( + http_request: HttpRequest, _claims: Claims, app_state: web::Data, ) -> impl Responder { + let parent_context = extract_context_from_request(&http_request); + let mut span = global_tracer().start_with_context("http.tts.voices.list", &parent_context); + let Some(client) = app_state.llamacpp.as_ref() else { + span.set_status(Status::error("tts backend not configured")); return HttpResponse::ServiceUnavailable() .json(json!({ "error": "TTS backend not configured" })); }; match client.list_voices().await { - Ok(v) => HttpResponse::Ok().json(v), + Ok(v) => { + span.set_status(Status::Ok); + HttpResponse::Ok().json(v) + } Err(e) => { + span.set_status(Status::error("list_voices failed")); log::error!("list_voices failed: {:?}", e); HttpResponse::BadGateway().json(json!({ "error": format!("{e}") })) } @@ -262,11 +288,16 @@ pub async fn list_voices_handler( /// clip. Multipart fields: `voice_name` (text) + a file part (`voice_file`). #[post("/tts/voices/upload")] pub async fn create_voice_upload_handler( + http_request: HttpRequest, _claims: Claims, mut payload: Multipart, app_state: web::Data, ) -> impl Responder { + let parent_context = extract_context_from_request(&http_request); + let mut span = global_tracer().start_with_context("http.tts.voices.upload", &parent_context); + let Some(client) = app_state.llamacpp.as_ref() else { + span.set_status(Status::error("tts backend not configured")); return HttpResponse::ServiceUnavailable() .json(json!({ "error": "TTS backend not configured" })); }; @@ -290,6 +321,7 @@ pub async fn create_voice_upload_handler( filename = fname; while let Some(Ok(data)) = part.next().await { if file_bytes.len() + data.len() > MAX_VOICE_UPLOAD_BYTES { + span.set_status(Status::error("voice clip exceeds limit")); return HttpResponse::PayloadTooLarge() .json(json!({ "error": "voice clip exceeds 25 MB" })); } @@ -307,12 +339,16 @@ pub async fn create_voice_upload_handler( } let Some(name) = voice_name.as_deref().and_then(sanitize_voice_name) else { + span.set_status(Status::error("voice_name is required")); return HttpResponse::BadRequest() .json(json!({ "error": "voice_name is required (alphanumerics, - and _ only)" })); }; if file_bytes.is_empty() { + span.set_status(Status::error("voice_file is required")); return HttpResponse::BadRequest().json(json!({ "error": "voice_file is required" })); } + span.set_attribute(KeyValue::new("tts.voice_name", name.clone())); + span.set_attribute(KeyValue::new("tts.upload_bytes", file_bytes.len() as i64)); // Normalize to WAV so any device format (e.g. .aac / .opus, which Chatterbox // rejects by extension) is accepted. @@ -320,6 +356,7 @@ pub async fn create_voice_upload_handler( let wav = match transcode_bytes_to_wav(file_bytes.as_ref(), src_ext).await { Ok(w) => w, Err(e) => { + span.set_status(Status::error("audio decode failed")); log::error!("voice upload transcode failed: {:?}", e); return HttpResponse::BadRequest() .json(json!({ "error": "couldn't decode that audio file" })); @@ -330,8 +367,12 @@ pub async fn create_voice_upload_handler( .create_voice(&name, wav, "reference.wav", "audio/wav") .await { - Ok(v) => HttpResponse::Ok().json(v), + Ok(v) => { + span.set_status(Status::Ok); + HttpResponse::Ok().json(v) + } Err(e) => { + span.set_status(Status::error("create_voice failed")); log::error!("create_voice (upload) failed: {:?}", e); HttpResponse::BadGateway().json(json!({ "error": format!("{e}") })) } @@ -352,15 +393,22 @@ pub struct CreateVoiceFromLibraryRequest { /// WAV reference clip (length capped by LLAMA_SWAP_TTS_REF_SECONDS). #[post("/tts/voices/from-library")] pub async fn create_voice_from_library_handler( + http_request: HttpRequest, _claims: Claims, req: web::Json, app_state: web::Data, ) -> impl Responder { + let parent_context = extract_context_from_request(&http_request); + let mut span = + global_tracer().start_with_context("http.tts.voices.from_library", &parent_context); + let Some(client) = app_state.llamacpp.as_ref() else { + span.set_status(Status::error("tts backend not configured")); return HttpResponse::ServiceUnavailable() .json(json!({ "error": "TTS backend not configured" })); }; let Some(voice_name) = sanitize_voice_name(&req.voice_name) else { + span.set_status(Status::error("voice_name is required")); return HttpResponse::BadRequest() .json(json!({ "error": "voice_name is required (alphanumerics, - and _ only)" })); }; @@ -368,13 +416,17 @@ pub async fn create_voice_from_library_handler( let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) { Ok(Some(l)) => l, Ok(None) => app_state.primary_library(), - Err(msg) => return HttpResponse::BadRequest().json(json!({ "error": msg })), + Err(msg) => { + span.set_status(Status::error("invalid library")); + return HttpResponse::BadRequest().json(json!({ "error": msg })); + } }; // is_valid_full_path confines the path to the library root (no traversal). let abs = match is_valid_full_path(&library.root_path, &req.path, false) { Some(p) if p.exists() => p, _ => { + span.set_status(Status::error("file not found")); return HttpResponse::NotFound().json(json!({ "error": "file not found in library" })); } }; @@ -382,13 +434,16 @@ pub async fn create_voice_from_library_handler( // Only real audio/video sources are valid voice references — refuse to // slurp arbitrary library files into memory / ffmpeg. if !is_audio_file(&abs) && !is_video_file(&abs) { + span.set_status(Status::error("not an audio/video file")); return HttpResponse::BadRequest() .json(json!({ "error": "file is not an audio or video file" })); } + span.set_attribute(KeyValue::new("tts.voice_name", voice_name.clone())); let wav = match prepare_reference_audio(&abs).await { Ok(b) => b, Err(e) => { + span.set_status(Status::error("audio decode failed")); log::error!("voice reference prep failed for {:?}: {:?}", abs, e); return HttpResponse::BadRequest() .json(json!({ "error": "couldn't decode that file's audio" })); @@ -399,8 +454,12 @@ pub async fn create_voice_from_library_handler( .create_voice(&voice_name, wav, "reference.wav", "audio/wav") .await { - Ok(v) => HttpResponse::Ok().json(v), + Ok(v) => { + span.set_status(Status::Ok); + HttpResponse::Ok().json(v) + } Err(e) => { + span.set_status(Status::error("create_voice failed")); log::error!("create_voice (from-library) failed: {:?}", e); HttpResponse::BadGateway().json(json!({ "error": format!("{e}") })) }