Skip to content

Commit 5a58866

Browse files
committed
conform cmu arctic parsing code to the interface of the other datasets
1 parent d97877e commit 5a58866

File tree

1 file changed

+7
-11
lines changed

1 file changed

+7
-11
lines changed

scripts/data_loaders/L1ARCTIC.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
"slt": {"sex": "female", "lang": "US English", "accent": "US"},
3030
}
3131

32+
DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "..", ".data", "CMU_ARCTIC")
33+
3234

3335
class L1ArcticDataset(BaseDataset):
3436
"""
@@ -37,12 +39,10 @@ class L1ArcticDataset(BaseDataset):
3739

3840
def __init__(
3941
self,
40-
data_dir=".data/CMU_ARCTIC",
4142
include_speaker_info=False,
4243
include_text=True,
4344
speaker_list=None,
4445
):
45-
self.data_dir = data_dir
4646
self.include_speaker_info = include_speaker_info
4747
self.include_text = include_text
4848

@@ -58,7 +58,7 @@ def _build_index(self):
5858

5959
# Process each speaker directory
6060
for speaker in self.speaker_list:
61-
speaker_dir = os.path.join(self.data_dir, f"cmu_us_{speaker}_arctic")
61+
speaker_dir = os.path.join(DATA_DIR, f"cmu_us_{speaker}_arctic")
6262

6363
# Skip if speaker directory doesn't exist
6464
if not os.path.exists(speaker_dir):
@@ -129,21 +129,18 @@ def _get_ix(self, idx):
129129
audio = audio_bytes_to_array(f.read())
130130

131131
result = [None, audio]
132-
if self.include_text:
133-
result.append(sample["text"])
134132
if self.include_speaker_info:
135133
speaker_info = SPEAKERS[sample["speaker"]]
136-
result.append(speaker_info)
137-
134+
result.append({**speaker_info, "speaker": sample["speaker"]})
135+
if self.include_text:
136+
result.append(sample["text"])
138137
return tuple(result)
139138

140139

141140
# Example usage
142141
if __name__ == "__main__":
143142
# Create the dataset with all speakers
144-
dataset = L1ArcticDataset(
145-
data_dir=".data/CMU_ARCTIC", include_speaker_info=True, include_text=True
146-
)
143+
dataset = L1ArcticDataset(include_speaker_info=True, include_text=True)
147144

148145
# Get the first sample
149146
sample = dataset[0]
@@ -156,7 +153,6 @@ def _get_ix(self, idx):
156153

157154
# Example of getting a specific speaker
158155
bdl_dataset = L1ArcticDataset(
159-
data_dir=".data/CMU_ARCTIC",
160156
include_speaker_info=True,
161157
include_text=True,
162158
speaker_list=["bdl"],

0 commit comments

Comments
 (0)