Skip to content

Commit 9690d23

Browse files
committed
nemo pipeline support from json input
1 parent abe76f7 commit 9690d23

File tree

1 file changed

+16
-3
lines changed

1 file changed

+16
-3
lines changed

tools/nemo/generate_dataset_list_files.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import argparse
2+
import json
23
import logging
34
import os
45
import re
@@ -13,9 +14,21 @@ def generate_dataset_list_files(dataset_list, dataset_folder, dest, mode, subset
1314
with open(dest) as f:
1415
return f.read().strip().split("\n")
1516
new_list = []
16-
with open(dataset_list) as f:
17-
datasets = f.read().strip().split("\n")
18-
17+
if dataset_list.endswith(".json"):
18+
with open(dataset_list) as f:
19+
data = json.load(f)
20+
datasets = []
21+
for d in data:
22+
if data[d]:
23+
if mode == "dev" and data[d].get("valid", False):
24+
datasets.append(data[d].get("kaldi_subpath", d))
25+
elif mode == "train":
26+
datasets.append(data[d].get("kaldi_subpath", d))
27+
elif mode == "train":
28+
datasets.append(d)
29+
else:
30+
with open(dataset_list) as f:
31+
datasets = f.read().strip().split("\n")
1932
patterns = ""
2033
if mode == "train":
2134
patterns = r"train$|split\d$"

0 commit comments

Comments
 (0)