2929 "slt" : {"sex" : "female" , "lang" : "US English" , "accent" : "US" },
3030}
3131
32+ DATA_DIR = os .path .join (os .path .dirname (__file__ ), ".." , ".." , ".data" , "CMU_ARCTIC" )
33+
3234
3335class L1ArcticDataset (BaseDataset ):
3436 """
@@ -37,12 +39,10 @@ class L1ArcticDataset(BaseDataset):
3739
3840 def __init__ (
3941 self ,
40- data_dir = ".data/CMU_ARCTIC" ,
4142 include_speaker_info = False ,
4243 include_text = True ,
4344 speaker_list = None ,
4445 ):
45- self .data_dir = data_dir
4646 self .include_speaker_info = include_speaker_info
4747 self .include_text = include_text
4848
@@ -58,7 +58,7 @@ def _build_index(self):
5858
5959 # Process each speaker directory
6060 for speaker in self .speaker_list :
61- speaker_dir = os .path .join (self . data_dir , f"cmu_us_{ speaker } _arctic" )
61+ speaker_dir = os .path .join (DATA_DIR , f"cmu_us_{ speaker } _arctic" )
6262
6363 # Skip if speaker directory doesn't exist
6464 if not os .path .exists (speaker_dir ):
@@ -129,21 +129,18 @@ def _get_ix(self, idx):
129129 audio = audio_bytes_to_array (f .read ())
130130
131131 result = [None , audio ]
132- if self .include_text :
133- result .append (sample ["text" ])
134132 if self .include_speaker_info :
135133 speaker_info = SPEAKERS [sample ["speaker" ]]
136- result .append (speaker_info )
137-
134+ result .append ({** speaker_info , "speaker" : sample ["speaker" ]})
135+ if self .include_text :
136+ result .append (sample ["text" ])
138137 return tuple (result )
139138
140139
141140# Example usage
142141if __name__ == "__main__" :
143142 # Create the dataset with all speakers
144- dataset = L1ArcticDataset (
145- data_dir = ".data/CMU_ARCTIC" , include_speaker_info = True , include_text = True
146- )
143+ dataset = L1ArcticDataset (include_speaker_info = True , include_text = True )
147144
148145 # Get the first sample
149146 sample = dataset [0 ]
@@ -156,7 +153,6 @@ def _get_ix(self, idx):
156153
157154 # Example of getting a specific speaker
158155 bdl_dataset = L1ArcticDataset (
159- data_dir = ".data/CMU_ARCTIC" ,
160156 include_speaker_info = True ,
161157 include_text = True ,
162158 speaker_list = ["bdl" ],
0 commit comments