Skip to content

Commit 12cb694

Browse files
nithinraokchtruong814
authored andcommitted
update subprocess cmd (#15218)
* update subprocess cmd Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com> * common voice script Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com> * Apply isort and black reformatting Signed-off-by: nithinraok <nithinraok@users.noreply.github.com> --------- Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com> Signed-off-by: nithinraok <nithinraok@users.noreply.github.com> Co-authored-by: nithinraok <nithinraok@users.noreply.github.com> Signed-off-by: Charlie Truong <chtruong@nvidia.com>
1 parent 9521da0 commit 12cb694

File tree

2 files changed

+40
-32
lines changed

2 files changed

+40
-32
lines changed

scripts/dataset_processing/get_commonvoice_data.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def create_manifest(data: List[tuple], output_name: str, manifest_path: str):
9797

9898

9999
def process_files(csv_file, data_root, num_workers):
100-
""" Read *.csv file description, convert mp3 to wav, process text.
100+
"""Read *.csv file description, convert mp3 to wav, process text.
101101
Save results to data_root.
102102
103103
Args:
@@ -165,14 +165,13 @@ def main():
165165
commands = [
166166
'wget',
167167
'--user-agent',
168-
'"Mozilla/5.0 (Windows NT 10.0; WOW64) '
169-
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"',
168+
'Mozilla/5.0 (Windows NT 10.0; WOW64) '
169+
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36',
170170
'-O',
171171
output_archive_filename,
172-
f'{COMMON_VOICE_URL}',
172+
COMMON_VOICE_URL,
173173
]
174-
commands = " ".join(commands)
175-
subprocess.run(commands, shell=True, stderr=sys.stderr, stdout=sys.stdout, capture_output=False)
174+
subprocess.run(commands, shell=False, stderr=sys.stderr, stdout=sys.stdout, capture_output=False)
176175
filename = f"{args.language}.tar.gz"
177176
target_file = os.path.join(data_root, os.path.basename(filename))
178177

tools/asr_evaluator/utils.py

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -173,23 +173,26 @@ def run_chunked_inference(cfg: DictConfig) -> DictConfig:
173173
# 1) change TranscriptionConfig on top of the executed scripts such as speech_to_text_buffered_infer_rnnt.py, or
174174
# 2) add command as "decoding.strategy=greedy_batch " to below script
175175

176-
base_cmd = f"python {script_path} \
177-
calculate_wer=False \
178-
model_path={cfg.model_path} \
179-
pretrained_name={cfg.pretrained_name} \
180-
dataset_manifest={cfg.test_ds.manifest_filepath} \
181-
output_filename={cfg.output_filename} \
182-
random_seed={cfg.random_seed} \
183-
batch_size={cfg.test_ds.batch_size} \
184-
++num_workers={cfg.test_ds.num_workers} \
185-
chunk_len_in_secs={cfg.inference.chunk_len_in_secs} \
186-
++total_buffer_in_secs={cfg.inference.total_buffer_in_secs} \
187-
model_stride={cfg.inference.model_stride} \
188-
++timestamps={cfg.inference.timestamps}"
176+
base_cmd = [
177+
"python",
178+
str(script_path),
179+
"calculate_wer=False",
180+
f"model_path={cfg.model_path}",
181+
f"pretrained_name={cfg.pretrained_name}",
182+
f"dataset_manifest={cfg.test_ds.manifest_filepath}",
183+
f"output_filename={cfg.output_filename}",
184+
f"random_seed={cfg.random_seed}",
185+
f"batch_size={cfg.test_ds.batch_size}",
186+
f"++num_workers={cfg.test_ds.num_workers}",
187+
f"chunk_len_in_secs={cfg.inference.chunk_len_in_secs}",
188+
f"++total_buffer_in_secs={cfg.inference.total_buffer_in_secs}",
189+
f"model_stride={cfg.inference.model_stride}",
190+
f"++timestamps={cfg.inference.timestamps}",
191+
]
189192

190193
subprocess.run(
191194
base_cmd,
192-
shell=True,
195+
shell=False,
193196
check=True,
194197
)
195198
return cfg
@@ -229,19 +232,25 @@ def run_offline_inference(cfg: DictConfig) -> DictConfig:
229232
# If need to change other config such as decoding strategy, could either:
230233
# 1) change TranscriptionConfig on top of the executed scripts such as transcribe_speech.py in examples/asr, or
231234
# 2) add command as "rnnt_decoding.strategy=greedy_batch " to below script
235+
base_cmd = [
236+
"python",
237+
str(script_path),
238+
"calculate_wer=False",
239+
f"model_path={cfg.model_path}",
240+
f"pretrained_name={cfg.pretrained_name}",
241+
f"dataset_manifest={cfg.test_ds.manifest_filepath}",
242+
f"output_filename={cfg.output_filename}",
243+
f"batch_size={cfg.test_ds.batch_size}",
244+
f"num_workers={cfg.test_ds.num_workers}",
245+
f"random_seed={cfg.random_seed}",
246+
f"eval_config_yaml={f.name}",
247+
f"decoder_type={cfg.inference.decoder_type}",
248+
]
249+
if hydra_overrides:
250+
base_cmd.extend(hydra_overrides.split())
232251
subprocess.run(
233-
f"python {script_path} "
234-
f"calculate_wer=False "
235-
f"model_path={cfg.model_path} "
236-
f"pretrained_name={cfg.pretrained_name} "
237-
f"dataset_manifest={cfg.test_ds.manifest_filepath} "
238-
f"output_filename={cfg.output_filename} "
239-
f"batch_size={cfg.test_ds.batch_size} "
240-
f"num_workers={cfg.test_ds.num_workers} "
241-
f"random_seed={cfg.random_seed} "
242-
f"eval_config_yaml={f.name} "
243-
f"decoder_type={cfg.inference.decoder_type} {hydra_overrides}",
244-
shell=True,
252+
base_cmd,
253+
shell=False,
245254
check=True,
246255
)
247256

0 commit comments

Comments
 (0)