inference/tts: fix speedup calculation

stintel · stintel · commit 1e09cea8605a · 2026-02-19T17:06:27.000+02:00
VitsTts returns duration in seconds, but our speedup divides by elapsed
time in milliseconds, causing the speedup to be 1000x too low. We could
divide by the elapsed time in seconds to fix this, but that would not be
very precise, and as VitsTts could return 0 for very short audio, we
could return speedup 0. Instead, calculate the duration in milliseconds
from samples and sample rate, and use that to calculate speedup.

Signed-off-by: Stijn Tintel &lt;stijn@linux-ipv6.be&gt;
diff --git a/src/inference/tts.rs b/src/inference/tts.rs
@@ -56,15 +56,16 @@ impl TtsEngine {
 
         let time = start.elapsed().as_secs_f64();
         let time_ms = time * 1000.0;
+        let duration = (speech.samples.len() as u64 * 1000) / u64::from(speech.sample_rate);
         #[allow(clippy::cast_precision_loss)]
-        let speedup = if time_ms > 0.0 {
-            (f64::from(speech.duration)) / time_ms
+        let speedup = if time > 0.0 {
+            duration as f64 / time_ms
         } else {
             0.0
         };
 
         let result = InferenceResult {
-            duration: u64::try_from(speech.duration).unwrap_or(0),
+            duration,
             output: speech,
             speedup,
             time,