1
1
"""
2
- Copyright (c) 2020 Intel Corporation
2
+ Copyright (c) 2020-2022 Intel Corporation
3
3
4
4
Licensed under the Apache License, Version 2.0 (the "License");
5
5
you may not use this file except in compliance with the License.
19
19
20
20
import numpy as np
21
21
22
+ from openvino .runtime import PartialShape , set_batch , Layout
22
23
from utils .wav_processing import (
23
24
fold_with_overlap , infer_from_discretized_mix_logistic , pad_tensor , xfade_and_unfold ,
24
25
)
@@ -49,37 +50,41 @@ def __init__(self, model_upsample, model_rnn, ie, target=11000, overlap=550, hop
49
50
self .batch_sizes = [1 , 2 , 4 , 8 , 16 , 32 , 64 , 128 , 256 ]
50
51
self .ie = ie
51
52
52
- self .upsample_net = self .load_network (model_upsample )
53
+ self .upsample_model = self .load_network (model_upsample )
53
54
if upsampler_width > 0 :
54
- orig_shape = self .upsample_net . input_info [ 'mels' ]. input_data .shape
55
- self .upsample_net .reshape ({"mels" : ( orig_shape [0 ], upsampler_width , orig_shape [2 ])})
55
+ orig_shape = self .upsample_model . input ( 'mels' ) .shape
56
+ self .upsample_model .reshape ({"mels" : PartialShape ([ orig_shape [0 ], upsampler_width , orig_shape [2 ] ])})
56
57
57
- self .upsample_exec = self .create_exec_network (self .upsample_net , model_upsample )
58
+ self .upsample_request = self .create_infer_requests (self .upsample_model , model_upsample )
58
59
59
- self .rnn_net = self .load_network (model_rnn )
60
- self .rnn_exec = self .create_exec_network (self .rnn_net , model_rnn , batch_sizes = self .batch_sizes )
60
+ self .rnn_model = self .load_network (model_rnn )
61
+ self .rnn_requests = self .create_infer_requests (self .rnn_model , model_rnn , batch_sizes = self .batch_sizes )
61
62
62
63
# fixed number of the mels in mel-spectrogramm
63
- self .mel_len = self .upsample_net . input_info [ 'mels' ]. input_data .shape [1 ] - 2 * self .pad
64
- self .rnn_width = self .rnn_net . input_info [ 'x' ]. input_data .shape [1 ]
64
+ self .mel_len = self .upsample_model . input ( 'mels' ) .shape [1 ] - 2 * self .pad
65
+ self .rnn_width = self .rnn_model . input ( 'h1.1' ) .shape [1 ]
65
66
66
67
def load_network (self , model_xml ):
67
68
model_bin_name = "." .join (osp .basename (model_xml ).split ('.' )[:- 1 ]) + ".bin"
68
69
model_bin = osp .join (osp .dirname (model_xml ), model_bin_name )
69
70
log .info ('Reading WaveRNN model {}' .format (model_xml ))
70
- net = self .ie .read_network (model = model_xml , weights = model_bin )
71
- return net
71
+ model = self .ie .read_model (model = model_xml , weights = model_bin )
72
+ return model
72
73
73
- def create_exec_network (self , net , path , batch_sizes = None ):
74
+ def create_infer_requests (self , model , path , batch_sizes = None ):
74
75
if batch_sizes is not None :
75
- exec_net = []
76
+ requests = []
77
+ for parameter in model .get_parameters ():
78
+ parameter .set_layout (Layout ("BC" ))
76
79
for b_s in batch_sizes :
77
- net .batch_size = b_s
78
- exec_net .append (self .ie .load_network (network = net , device_name = self .device ))
80
+ set_batch (model , b_s )
81
+ compiled_model = self .ie .compile_model (model , device_name = self .device )
82
+ requests .append (compiled_model .create_infer_request ())
79
83
else :
80
- exec_net = self .ie .load_network (network = net , device_name = self .device )
84
+ compiled_model = self .ie .compile_model (model , device_name = self .device )
85
+ requests = compiled_model .create_infer_request ()
81
86
log .info ('The WaveRNN model {} is loaded to {}' .format (path , self .device ))
82
- return exec_net
87
+ return requests
83
88
84
89
@staticmethod
85
90
def get_rnn_init_states (b_size = 1 , rnn_dims = 328 ):
@@ -133,8 +138,9 @@ def forward(self, mels):
133
138
def forward_upsample (self , mels ):
134
139
mels = pad_tensor (mels , pad = self .pad )
135
140
136
- out = self .upsample_exec .infer (inputs = {"mels" : mels })
137
- upsample_mels , aux = out ["upsample_mels" ][:, self .indent :- self .indent , :], out ["aux" ]
141
+ self .upsample_request .infer (inputs = {"mels" : mels })
142
+ upsample_mels = self .upsample_request .get_tensor ("upsample_mels" ).data [:, self .indent :- self .indent , :]
143
+ aux = self .upsample_request .get_tensor ("aux" ).data [:]
138
144
return upsample_mels , aux
139
145
140
146
def forward_rnn (self , mels , upsampled_mels , aux ):
@@ -160,13 +166,12 @@ def forward_rnn(self, mels, upsampled_mels, aux):
160
166
161
167
a1_t , a2_t , a3_t , a4_t = \
162
168
(a [:, i , :] for a in aux_split )
169
+ self .rnn_requests [active_network ].infer (inputs = {"m_t" : m_t , "a1_t" : a1_t , "a2_t" : a2_t , "a3_t" : a3_t ,
170
+ "a4_t" : a4_t , "h1.1" : h1 , "h2.1" : h2 , "x" : x })
163
171
164
- out = self .rnn_exec [active_network ].infer (inputs = {"m_t" : m_t , "a1_t" : a1_t , "a2_t" : a2_t , "a3_t" : a3_t ,
165
- "a4_t" : a4_t , "h1.1" : h1 , "h2.1" : h2 , "x" : x })
166
-
167
- logits = out ["logits" ]
168
- h1 = out ["h1" ]
169
- h2 = out ["h2" ]
172
+ logits = self .rnn_requests [active_network ].get_tensor ('logits' ).data [:]
173
+ h1 = self .rnn_requests [active_network ].get_tensor ('h1' ).data [:]
174
+ h2 = self .rnn_requests [active_network ].get_tensor ('h2' ).data [:]
170
175
171
176
sample = infer_from_discretized_mix_logistic (logits )
172
177
@@ -204,38 +209,40 @@ def __init__(self, model, ie, device='CPU', default_width=800):
204
209
self .scales = 4
205
210
self .hop_length = 256
206
211
207
- self .net = self .load_network (model )
208
- if self .net . input_info [ 'mel' ]. input_data .shape [2 ] != default_width :
209
- orig_shape = self .net . input_info [ 'mel' ]. input_data .shape
212
+ self .model = self .load_network (model )
213
+ if self .model . input ( 'mel' ) .shape [2 ] != default_width :
214
+ orig_shape = self .model . input ( 'mel' ) .shape
210
215
new_shape = (orig_shape [0 ], orig_shape [1 ], default_width )
211
- self .net .reshape ({"mel" : new_shape })
216
+ self .model .reshape ({"mel" : PartialShape ([ new_shape [ 0 ], new_shape [ 1 ], new_shape [ 2 ]]) })
212
217
213
- self .exec_net = self .create_exec_network (self .net , self .scales )
218
+ self .requests = self .create_infer_requests (self .model , model , self .scales )
214
219
215
220
# fixed number of columns in mel-spectrogramm
216
- self .mel_len = self .net . input_info [ 'mel' ]. input_data .shape [2 ]
221
+ self .mel_len = self .model . input ( 'mel' ) .shape [2 ]
217
222
self .widths = [self .mel_len * (i + 1 ) for i in range (self .scales )]
218
223
219
224
def load_network (self , model_xml ):
220
225
model_bin_name = "." .join (osp .basename (model_xml ).split ('.' )[:- 1 ]) + ".bin"
221
226
model_bin = osp .join (osp .dirname (model_xml ), model_bin_name )
222
227
log .info ('Reading MelGAN model {}' .format (model_xml ))
223
- net = self .ie .read_network (model = model_xml , weights = model_bin )
224
- return net
228
+ model = self .ie .read_model (model = model_xml , weights = model_bin )
229
+ return model
225
230
226
- def create_exec_network (self , net , path , scales = None ):
231
+ def create_infer_requests (self , model , path , scales = None ):
227
232
if scales is not None :
228
- orig_shape = net . input_info [ 'mel' ]. input_data .shape
229
- exec_net = []
233
+ orig_shape = model . input ( 'mel' ) .shape
234
+ requests = []
230
235
for i in range (scales ):
231
236
new_shape = (orig_shape [0 ], orig_shape [1 ], orig_shape [2 ] * (i + 1 ))
232
- net .reshape ({"mel" : new_shape })
233
- exec_net .append (self .ie .load_network (network = net , device_name = self .device ))
234
- net .reshape ({"mel" : orig_shape })
237
+ model .reshape ({"mel" : PartialShape ([new_shape [0 ], new_shape [1 ], new_shape [2 ]])})
238
+ compiled_model = self .ie .compile_model (model , device_name = self .device )
239
+ requests .append (compiled_model .create_infer_request ())
240
+ model .reshape ({"mel" : PartialShape ([orig_shape [0 ], orig_shape [1 ], orig_shape [2 ]])})
235
241
else :
236
- exec_net = self .ie .load_network (network = net , device_name = self .device )
242
+ compiled_model = self .ie .compile_model (model , device_name = self .device )
243
+ requests = compiled_model .create_infer_request ()
237
244
log .info ('The MelGAN model {} is loaded to {}' .format (path , self .device ))
238
- return exec_net
245
+ return requests
239
246
240
247
def forward (self , mel ):
241
248
mel = np .expand_dims (mel , axis = 0 )
@@ -261,7 +268,8 @@ def forward(self, mel):
261
268
c_begin = 0
262
269
c_end = cur_w
263
270
while c_begin < cols :
264
- audio = self .exec_net [active_net ].infer (inputs = {"mel" : mel [:, :, c_begin :c_end ]})["audio" ]
271
+ self .requests [active_net ].infer (inputs = {"mel" : mel [:, :, c_begin :c_end ]})
272
+ audio = self .requests [active_net ].get_tensor ("audio" ).data [:]
265
273
res_audio .extend (audio )
266
274
267
275
c_begin = c_end
0 commit comments