Skip to content

Commit 4de2018

Browse files
authored
update subprocess cmd (#15218)
* update subprocess cmd Signed-off-by: nithinraok <[email protected]> * common voice script Signed-off-by: nithinraok <[email protected]> * Apply isort and black reformatting Signed-off-by: nithinraok <[email protected]> --------- Signed-off-by: nithinraok <[email protected]> Signed-off-by: nithinraok <[email protected]> Co-authored-by: nithinraok <[email protected]>
1 parent 661af02 commit 4de2018

File tree

2 files changed

+40
-32
lines changed

2 files changed

+40
-32
lines changed

scripts/dataset_processing/get_commonvoice_data.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def create_manifest(data: List[tuple], output_name: str, manifest_path: str):
9797

9898

9999
def process_files(csv_file, data_root, num_workers):
100-
""" Read *.csv file description, convert mp3 to wav, process text.
100+
"""Read *.csv file description, convert mp3 to wav, process text.
101101
Save results to data_root.
102102
103103
Args:
@@ -165,14 +165,13 @@ def main():
165165
commands = [
166166
'wget',
167167
'--user-agent',
168-
'"Mozilla/5.0 (Windows NT 10.0; WOW64) '
169-
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"',
168+
'Mozilla/5.0 (Windows NT 10.0; WOW64) '
169+
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36',
170170
'-O',
171171
output_archive_filename,
172-
f'{COMMON_VOICE_URL}',
172+
COMMON_VOICE_URL,
173173
]
174-
commands = " ".join(commands)
175-
subprocess.run(commands, shell=True, stderr=sys.stderr, stdout=sys.stdout, capture_output=False)
174+
subprocess.run(commands, shell=False, stderr=sys.stderr, stdout=sys.stdout, capture_output=False)
176175
filename = f"{args.language}.tar.gz"
177176
target_file = os.path.join(data_root, os.path.basename(filename))
178177

tools/asr_evaluator/utils.py

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -183,23 +183,26 @@ def run_chunked_inference(cfg: DictConfig) -> DictConfig:
183183
# 1) change TranscriptionConfig on top of the executed scripts such as speech_to_text_buffered_infer_rnnt.py, or
184184
# 2) add command as "decoding.strategy=greedy_batch " to below script
185185

186-
base_cmd = f"python {script_path} \
187-
calculate_wer=False \
188-
model_path={cfg.model_path} \
189-
pretrained_name={cfg.pretrained_name} \
190-
dataset_manifest={cfg.test_ds.manifest_filepath} \
191-
output_filename={cfg.output_filename} \
192-
random_seed={cfg.random_seed} \
193-
batch_size={cfg.test_ds.batch_size} \
194-
++num_workers={cfg.test_ds.num_workers} \
195-
chunk_len_in_secs={cfg.inference.chunk_len_in_secs} \
196-
++total_buffer_in_secs={cfg.inference.total_buffer_in_secs} \
197-
model_stride={cfg.inference.model_stride} \
198-
++timestamps={cfg.inference.timestamps}"
186+
base_cmd = [
187+
"python",
188+
str(script_path),
189+
"calculate_wer=False",
190+
f"model_path={cfg.model_path}",
191+
f"pretrained_name={cfg.pretrained_name}",
192+
f"dataset_manifest={cfg.test_ds.manifest_filepath}",
193+
f"output_filename={cfg.output_filename}",
194+
f"random_seed={cfg.random_seed}",
195+
f"batch_size={cfg.test_ds.batch_size}",
196+
f"++num_workers={cfg.test_ds.num_workers}",
197+
f"chunk_len_in_secs={cfg.inference.chunk_len_in_secs}",
198+
f"++total_buffer_in_secs={cfg.inference.total_buffer_in_secs}",
199+
f"model_stride={cfg.inference.model_stride}",
200+
f"++timestamps={cfg.inference.timestamps}",
201+
]
199202

200203
subprocess.run(
201204
base_cmd,
202-
shell=True,
205+
shell=False,
203206
check=True,
204207
)
205208
return cfg
@@ -239,19 +242,25 @@ def run_offline_inference(cfg: DictConfig) -> DictConfig:
239242
# If need to change other config such as decoding strategy, could either:
240243
# 1) change TranscriptionConfig on top of the executed scripts such as transcribe_speech.py in examples/asr, or
241244
# 2) add command as "rnnt_decoding.strategy=greedy_batch " to below script
245+
base_cmd = [
246+
"python",
247+
str(script_path),
248+
"calculate_wer=False",
249+
f"model_path={cfg.model_path}",
250+
f"pretrained_name={cfg.pretrained_name}",
251+
f"dataset_manifest={cfg.test_ds.manifest_filepath}",
252+
f"output_filename={cfg.output_filename}",
253+
f"batch_size={cfg.test_ds.batch_size}",
254+
f"num_workers={cfg.test_ds.num_workers}",
255+
f"random_seed={cfg.random_seed}",
256+
f"eval_config_yaml={f.name}",
257+
f"decoder_type={cfg.inference.decoder_type}",
258+
]
259+
if hydra_overrides:
260+
base_cmd.extend(hydra_overrides.split())
242261
subprocess.run(
243-
f"python {script_path} "
244-
f"calculate_wer=False "
245-
f"model_path={cfg.model_path} "
246-
f"pretrained_name={cfg.pretrained_name} "
247-
f"dataset_manifest={cfg.test_ds.manifest_filepath} "
248-
f"output_filename={cfg.output_filename} "
249-
f"batch_size={cfg.test_ds.batch_size} "
250-
f"num_workers={cfg.test_ds.num_workers} "
251-
f"random_seed={cfg.random_seed} "
252-
f"eval_config_yaml={f.name} "
253-
f"decoder_type={cfg.inference.decoder_type} {hydra_overrides}",
254-
shell=True,
262+
base_cmd,
263+
shell=False,
255264
check=True,
256265
)
257266

0 commit comments

Comments
 (0)