Skip to content

Commit 0d37de8

Browse files
committed
add tool_dir as an arg
1 parent dad4323 commit 0d37de8

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

ssak/utils/kaldi.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ def parse_line(line):
5858
}
5959

6060

61-
def check_kaldi_dir(dirname, language=None, strict_sort=False):
61+
def check_kaldi_dir(dirname, language=None, strict_sort=False, tool_dir=None):
6262
strict_sort = "true" if strict_sort else "false"
63-
tool_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))), "tools", "kaldi", "utils")
64-
63+
if not tool_dir:
64+
tool_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))), "tools", "kaldi", "utils")
6565
if os.path.isfile(os.path.join(dirname, "text")):
6666
with open(os.path.join(dirname, "text")) as f:
6767
texts = dict(parse_line(line) for line in f)

tools/kaldi/datasets2kaldi/yodas2kaldi.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
parser = argparse.ArgumentParser(description="Convert yodas dataset to Kaldi format")
1414
parser.add_argument("--force", action="store_true", default=True)
1515
parser.add_argument("--input", type=str, default="/data-server/datasets/audio/transcript/fr/YODAS/fr000")
16-
parser.add_argument("--output", type=str, default="/data-server/datasets/audio/kaldi/fr/YODAS/fr000_2")
16+
parser.add_argument("--output", type=str, default="/data-server/datasets/audio/kaldi/fr/YODAS/fr000")
1717
args = parser.parse_args()
1818

1919
input_dataset = args.input
@@ -51,7 +51,7 @@
5151
audio_ids = Row2Info("id", ["audio_id"], 3, None, None)
5252
spk_ids = Row2Info("id", ["speaker"], 4, None, None)
5353
dev_reader = Reader2Kaldi(input_dataset, processors=[texts, durations, audios, audio_ids, spk_ids])
54-
dataset = dev_reader.load(debug=False, accept_missing_speaker=True)
54+
dataset = dev_reader.load(debug=False, accept_missing_speaker=False)
5555

5656
def filter(row):
5757
if row.id.startswith("E--pPwqi_50-"):

0 commit comments

Comments
 (0)