Instrument TTS handlers with OTel spans (codebase standard)
Each /tts handler now opens an http.tts.* span via extract_context_from_request + global_tracer().start_with_context, sets Status::Ok / Status::error on every outcome, and records useful attributes (model, format, voice_name, byte counts) — matching the insight handlers. Prometheus request metrics were already covered by the app-wide actix-web-prom middleware. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+64
-5
@@ -6,11 +6,13 @@
|
||||
// (audio read directly; video has its audio track extracted via ffmpeg).
|
||||
|
||||
use actix_multipart::Multipart;
|
||||
use actix_web::{HttpResponse, Responder, get, post, web};
|
||||
use actix_web::{HttpRequest, HttpResponse, Responder, get, post, web};
|
||||
use anyhow::Context;
|
||||
use base64::Engine;
|
||||
use bytes::{BufMut, BytesMut};
|
||||
use futures::StreamExt;
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry::trace::{Span, Status, Tracer};
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
@@ -21,6 +23,7 @@ use crate::data::Claims;
|
||||
use crate::file_types::{is_audio_file, is_video_file};
|
||||
use crate::files::is_valid_full_path;
|
||||
use crate::libraries;
|
||||
use crate::otel::{extract_context_from_request, global_tracer};
|
||||
use crate::state::AppState;
|
||||
|
||||
/// Hard cap on an uploaded voice-reference clip. Chatterbox itself caps the
|
||||
@@ -191,15 +194,21 @@ pub struct TtsSpeechResponse {
|
||||
/// return base64-encoded audio for `data:` URI playback on the client.
|
||||
#[post("/tts/speech")]
|
||||
pub async fn tts_speech_handler(
|
||||
http_request: HttpRequest,
|
||||
_claims: Claims,
|
||||
req: web::Json<TtsSpeechRequest>,
|
||||
app_state: web::Data<AppState>,
|
||||
) -> impl Responder {
|
||||
let parent_context = extract_context_from_request(&http_request);
|
||||
let mut span = global_tracer().start_with_context("http.tts.speech", &parent_context);
|
||||
|
||||
let text = clean_for_tts(&req.text);
|
||||
if text.is_empty() {
|
||||
span.set_status(Status::error("text is required"));
|
||||
return HttpResponse::BadRequest().json(json!({ "error": "text is required" }));
|
||||
}
|
||||
let Some(client) = app_state.llamacpp.as_ref() else {
|
||||
span.set_status(Status::error("tts backend not configured"));
|
||||
return HttpResponse::ServiceUnavailable()
|
||||
.json(json!({ "error": "TTS backend not configured (set LLAMA_SWAP_URL)" }));
|
||||
};
|
||||
@@ -216,6 +225,11 @@ pub async fn tts_speech_handler(
|
||||
.filter(|s| !s.is_empty())
|
||||
.or(dv.as_deref());
|
||||
|
||||
span.set_attribute(KeyValue::new("tts.model", client.tts_model.clone()));
|
||||
span.set_attribute(KeyValue::new("tts.format", format.to_string()));
|
||||
span.set_attribute(KeyValue::new("tts.has_voice", voice.is_some()));
|
||||
span.set_attribute(KeyValue::new("tts.text_len", text.len() as i64));
|
||||
|
||||
// Clamp generation knobs to Chatterbox's documented ranges before forwarding.
|
||||
let exaggeration = req.exaggeration.map(|x| x.clamp(0.25, 2.0));
|
||||
let cfg_weight = req.cfg_weight.map(|x| x.clamp(0.0, 1.0));
|
||||
@@ -226,6 +240,8 @@ pub async fn tts_speech_handler(
|
||||
.await
|
||||
{
|
||||
Ok(bytes) => {
|
||||
span.set_attribute(KeyValue::new("tts.audio_bytes", bytes.len() as i64));
|
||||
span.set_status(Status::Ok);
|
||||
let audio_base64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
|
||||
HttpResponse::Ok().json(TtsSpeechResponse {
|
||||
audio_base64,
|
||||
@@ -233,6 +249,7 @@ pub async fn tts_speech_handler(
|
||||
})
|
||||
}
|
||||
Err(e) => {
|
||||
span.set_status(Status::error("tts synthesis failed"));
|
||||
log::error!("TTS synth failed: {:?}", e);
|
||||
HttpResponse::BadGateway().json(json!({ "error": format!("TTS failed: {e}") }))
|
||||
}
|
||||
@@ -242,16 +259,25 @@ pub async fn tts_speech_handler(
|
||||
/// GET /tts/voices — list the Chatterbox voice library (raw passthrough).
|
||||
#[get("/tts/voices")]
|
||||
pub async fn list_voices_handler(
|
||||
http_request: HttpRequest,
|
||||
_claims: Claims,
|
||||
app_state: web::Data<AppState>,
|
||||
) -> impl Responder {
|
||||
let parent_context = extract_context_from_request(&http_request);
|
||||
let mut span = global_tracer().start_with_context("http.tts.voices.list", &parent_context);
|
||||
|
||||
let Some(client) = app_state.llamacpp.as_ref() else {
|
||||
span.set_status(Status::error("tts backend not configured"));
|
||||
return HttpResponse::ServiceUnavailable()
|
||||
.json(json!({ "error": "TTS backend not configured" }));
|
||||
};
|
||||
match client.list_voices().await {
|
||||
Ok(v) => HttpResponse::Ok().json(v),
|
||||
Ok(v) => {
|
||||
span.set_status(Status::Ok);
|
||||
HttpResponse::Ok().json(v)
|
||||
}
|
||||
Err(e) => {
|
||||
span.set_status(Status::error("list_voices failed"));
|
||||
log::error!("list_voices failed: {:?}", e);
|
||||
HttpResponse::BadGateway().json(json!({ "error": format!("{e}") }))
|
||||
}
|
||||
@@ -262,11 +288,16 @@ pub async fn list_voices_handler(
|
||||
/// clip. Multipart fields: `voice_name` (text) + a file part (`voice_file`).
|
||||
#[post("/tts/voices/upload")]
|
||||
pub async fn create_voice_upload_handler(
|
||||
http_request: HttpRequest,
|
||||
_claims: Claims,
|
||||
mut payload: Multipart,
|
||||
app_state: web::Data<AppState>,
|
||||
) -> impl Responder {
|
||||
let parent_context = extract_context_from_request(&http_request);
|
||||
let mut span = global_tracer().start_with_context("http.tts.voices.upload", &parent_context);
|
||||
|
||||
let Some(client) = app_state.llamacpp.as_ref() else {
|
||||
span.set_status(Status::error("tts backend not configured"));
|
||||
return HttpResponse::ServiceUnavailable()
|
||||
.json(json!({ "error": "TTS backend not configured" }));
|
||||
};
|
||||
@@ -290,6 +321,7 @@ pub async fn create_voice_upload_handler(
|
||||
filename = fname;
|
||||
while let Some(Ok(data)) = part.next().await {
|
||||
if file_bytes.len() + data.len() > MAX_VOICE_UPLOAD_BYTES {
|
||||
span.set_status(Status::error("voice clip exceeds limit"));
|
||||
return HttpResponse::PayloadTooLarge()
|
||||
.json(json!({ "error": "voice clip exceeds 25 MB" }));
|
||||
}
|
||||
@@ -307,12 +339,16 @@ pub async fn create_voice_upload_handler(
|
||||
}
|
||||
|
||||
let Some(name) = voice_name.as_deref().and_then(sanitize_voice_name) else {
|
||||
span.set_status(Status::error("voice_name is required"));
|
||||
return HttpResponse::BadRequest()
|
||||
.json(json!({ "error": "voice_name is required (alphanumerics, - and _ only)" }));
|
||||
};
|
||||
if file_bytes.is_empty() {
|
||||
span.set_status(Status::error("voice_file is required"));
|
||||
return HttpResponse::BadRequest().json(json!({ "error": "voice_file is required" }));
|
||||
}
|
||||
span.set_attribute(KeyValue::new("tts.voice_name", name.clone()));
|
||||
span.set_attribute(KeyValue::new("tts.upload_bytes", file_bytes.len() as i64));
|
||||
|
||||
// Normalize to WAV so any device format (e.g. .aac / .opus, which Chatterbox
|
||||
// rejects by extension) is accepted.
|
||||
@@ -320,6 +356,7 @@ pub async fn create_voice_upload_handler(
|
||||
let wav = match transcode_bytes_to_wav(file_bytes.as_ref(), src_ext).await {
|
||||
Ok(w) => w,
|
||||
Err(e) => {
|
||||
span.set_status(Status::error("audio decode failed"));
|
||||
log::error!("voice upload transcode failed: {:?}", e);
|
||||
return HttpResponse::BadRequest()
|
||||
.json(json!({ "error": "couldn't decode that audio file" }));
|
||||
@@ -330,8 +367,12 @@ pub async fn create_voice_upload_handler(
|
||||
.create_voice(&name, wav, "reference.wav", "audio/wav")
|
||||
.await
|
||||
{
|
||||
Ok(v) => HttpResponse::Ok().json(v),
|
||||
Ok(v) => {
|
||||
span.set_status(Status::Ok);
|
||||
HttpResponse::Ok().json(v)
|
||||
}
|
||||
Err(e) => {
|
||||
span.set_status(Status::error("create_voice failed"));
|
||||
log::error!("create_voice (upload) failed: {:?}", e);
|
||||
HttpResponse::BadGateway().json(json!({ "error": format!("{e}") }))
|
||||
}
|
||||
@@ -352,15 +393,22 @@ pub struct CreateVoiceFromLibraryRequest {
|
||||
/// WAV reference clip (length capped by LLAMA_SWAP_TTS_REF_SECONDS).
|
||||
#[post("/tts/voices/from-library")]
|
||||
pub async fn create_voice_from_library_handler(
|
||||
http_request: HttpRequest,
|
||||
_claims: Claims,
|
||||
req: web::Json<CreateVoiceFromLibraryRequest>,
|
||||
app_state: web::Data<AppState>,
|
||||
) -> impl Responder {
|
||||
let parent_context = extract_context_from_request(&http_request);
|
||||
let mut span =
|
||||
global_tracer().start_with_context("http.tts.voices.from_library", &parent_context);
|
||||
|
||||
let Some(client) = app_state.llamacpp.as_ref() else {
|
||||
span.set_status(Status::error("tts backend not configured"));
|
||||
return HttpResponse::ServiceUnavailable()
|
||||
.json(json!({ "error": "TTS backend not configured" }));
|
||||
};
|
||||
let Some(voice_name) = sanitize_voice_name(&req.voice_name) else {
|
||||
span.set_status(Status::error("voice_name is required"));
|
||||
return HttpResponse::BadRequest()
|
||||
.json(json!({ "error": "voice_name is required (alphanumerics, - and _ only)" }));
|
||||
};
|
||||
@@ -368,13 +416,17 @@ pub async fn create_voice_from_library_handler(
|
||||
let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) {
|
||||
Ok(Some(l)) => l,
|
||||
Ok(None) => app_state.primary_library(),
|
||||
Err(msg) => return HttpResponse::BadRequest().json(json!({ "error": msg })),
|
||||
Err(msg) => {
|
||||
span.set_status(Status::error("invalid library"));
|
||||
return HttpResponse::BadRequest().json(json!({ "error": msg }));
|
||||
}
|
||||
};
|
||||
|
||||
// is_valid_full_path confines the path to the library root (no traversal).
|
||||
let abs = match is_valid_full_path(&library.root_path, &req.path, false) {
|
||||
Some(p) if p.exists() => p,
|
||||
_ => {
|
||||
span.set_status(Status::error("file not found"));
|
||||
return HttpResponse::NotFound().json(json!({ "error": "file not found in library" }));
|
||||
}
|
||||
};
|
||||
@@ -382,13 +434,16 @@ pub async fn create_voice_from_library_handler(
|
||||
// Only real audio/video sources are valid voice references — refuse to
|
||||
// slurp arbitrary library files into memory / ffmpeg.
|
||||
if !is_audio_file(&abs) && !is_video_file(&abs) {
|
||||
span.set_status(Status::error("not an audio/video file"));
|
||||
return HttpResponse::BadRequest()
|
||||
.json(json!({ "error": "file is not an audio or video file" }));
|
||||
}
|
||||
span.set_attribute(KeyValue::new("tts.voice_name", voice_name.clone()));
|
||||
|
||||
let wav = match prepare_reference_audio(&abs).await {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
span.set_status(Status::error("audio decode failed"));
|
||||
log::error!("voice reference prep failed for {:?}: {:?}", abs, e);
|
||||
return HttpResponse::BadRequest()
|
||||
.json(json!({ "error": "couldn't decode that file's audio" }));
|
||||
@@ -399,8 +454,12 @@ pub async fn create_voice_from_library_handler(
|
||||
.create_voice(&voice_name, wav, "reference.wav", "audio/wav")
|
||||
.await
|
||||
{
|
||||
Ok(v) => HttpResponse::Ok().json(v),
|
||||
Ok(v) => {
|
||||
span.set_status(Status::Ok);
|
||||
HttpResponse::Ok().json(v)
|
||||
}
|
||||
Err(e) => {
|
||||
span.set_status(Status::error("create_voice failed"));
|
||||
log::error!("create_voice (from-library) failed: {:?}", e);
|
||||
HttpResponse::BadGateway().json(json!({ "error": format!("{e}") }))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user