@@ -126,7 +126,7 @@ def infer_duration(self, sequence, speaker_embedding=None, alpha=1.0, non_empty_
126
126
"input_mask" : input_mask ,
127
127
"pos_mask" : pos_mask }
128
128
if speaker_embedding is not None :
129
- inputs ["speaker_embedding" ] = np .array ([ speaker_embedding ] )
129
+ inputs ["speaker_embedding" ] = np .array (speaker_embedding )
130
130
self .duration_predictor_request .infer (inputs )
131
131
else :
132
132
self .duration_predictor_request .infer (inputs = {"input_seq" : sequence })
@@ -154,7 +154,7 @@ def infer_mel(self, aligned_emb, non_empty_symbols, speaker_embedding=None):
154
154
"data_mask" : data_mask ,
155
155
"pos_mask" : pos_mask }
156
156
if speaker_embedding is not None :
157
- inputs ["speaker_embedding" ] = np .array ([ speaker_embedding ] )
157
+ inputs ["speaker_embedding" ] = np .array (speaker_embedding )
158
158
self .forward_request .infer (inputs )
159
159
else :
160
160
self .forward_request .infer (inputs = {"data" : aligned_emb })
@@ -215,7 +215,7 @@ def forward(self, text, alpha=1.0, speaker_id=19, speaker_emb=None):
215
215
if speaker_emb is not None :
216
216
speaker_embedding = speaker_emb
217
217
else :
218
- speaker_embedding = self .speaker_embeddings [speaker_id , :]
218
+ speaker_embedding = [ self .speaker_embeddings [speaker_id , :] ]
219
219
220
220
aligned_emb = self .forward_duration_prediction_by_delimiters (text , speaker_embedding , alpha )
221
221
0 commit comments