[TTS] Fix second generation for Speech T5 TSS (#1389)

rkazants · mvafin · commit 91c3f5b6dae8 · 2025-08-12T16:54:18.000+02:00
Signed-off-by: Kazantsev, Roman &lt;roman.kazantsev@intel.com&gt;
diff --git a/optimum/intel/openvino/modeling_text2speech.py b/optimum/intel/openvino/modeling_text2speech.py
@@ -97,6 +97,10 @@ def forward(self, inputs_embeds, speaker_embeddings, encoder_last_hidden_state,
         prob = torch.from_numpy(result[2])
         return ModelOutput(output_sequence_out=output_sequence_out, spectrum=spectrum, prob=prob)
 
+    def reset_state(self) -> None:
+        if self.request:
+            self.request.reset_state()
+
 
 class OVTextToSpeechPostNet(OVModelPart):
     _model_name = "postnet"
@@ -472,6 +476,9 @@ def generate(
         idx = 0
         result_spectrogram = {}
 
+        # clean-up decoder states for new generation
+        self.decoder.reset_state()
+
         while True:
             idx += 1