1
1
import os
2
+ import posixpath
2
3
import tempfile
3
4
from pathlib import Path
4
5
from unittest import TestCase
@@ -103,8 +104,8 @@ def test_read_files(self):
103
104
reader = ReaderTest (tmp_dir , info )
104
105
105
106
files = [
106
- {"filename" : os . path .join (tmp_dir , "train" )},
107
- {"filename" : os . path .join (tmp_dir , "test" ), "skip" : 10 , "take" : 10 },
107
+ {"filename" : posixpath .join (tmp_dir , "train" )},
108
+ {"filename" : posixpath .join (tmp_dir , "test" ), "skip" : 10 , "take" : 10 },
108
109
]
109
110
dset = Dataset (** reader .read_files (files , original_instructions = "train+test[10:20]" ))
110
111
self .assertEqual (dset .num_rows , 110 )
@@ -169,18 +170,18 @@ def test_make_file_instructions_basic():
169
170
assert isinstance (file_instructions , FileInstructions )
170
171
assert file_instructions .num_examples == 33
171
172
assert file_instructions .file_instructions == [
172
- {"filename" : os . path .join (prefix_path , f"{ name } -train.arrow" ), "skip" : 0 , "take" : 33 }
173
+ {"filename" : posixpath .join (prefix_path , f"{ name } -train.arrow" ), "skip" : 0 , "take" : 33 }
173
174
]
174
175
175
176
split_infos = [SplitInfo (name = "train" , num_examples = 100 , shard_lengths = [10 ] * 10 )]
176
177
file_instructions = make_file_instructions (name , split_infos , instruction , filetype_suffix , prefix_path )
177
178
assert isinstance (file_instructions , FileInstructions )
178
179
assert file_instructions .num_examples == 33
179
180
assert file_instructions .file_instructions == [
180
- {"filename" : os . path .join (prefix_path , f"{ name } -train-00000-of-00010.arrow" ), "skip" : 0 , "take" : - 1 },
181
- {"filename" : os . path .join (prefix_path , f"{ name } -train-00001-of-00010.arrow" ), "skip" : 0 , "take" : - 1 },
182
- {"filename" : os . path .join (prefix_path , f"{ name } -train-00002-of-00010.arrow" ), "skip" : 0 , "take" : - 1 },
183
- {"filename" : os . path .join (prefix_path , f"{ name } -train-00003-of-00010.arrow" ), "skip" : 0 , "take" : 3 },
181
+ {"filename" : posixpath .join (prefix_path , f"{ name } -train-00000-of-00010.arrow" ), "skip" : 0 , "take" : - 1 },
182
+ {"filename" : posixpath .join (prefix_path , f"{ name } -train-00001-of-00010.arrow" ), "skip" : 0 , "take" : - 1 },
183
+ {"filename" : posixpath .join (prefix_path , f"{ name } -train-00002-of-00010.arrow" ), "skip" : 0 , "take" : - 1 },
184
+ {"filename" : posixpath .join (prefix_path , f"{ name } -train-00003-of-00010.arrow" ), "skip" : 0 , "take" : 3 },
184
185
]
185
186
186
187
@@ -217,7 +218,7 @@ def test_make_file_instructions(split_name, instruction, shard_lengths, read_ran
217
218
if not isinstance (shard_lengths , list ):
218
219
assert file_instructions .file_instructions == [
219
220
{
220
- "filename" : os . path .join (prefix_path , f"{ name } -{ split_name } .arrow" ),
221
+ "filename" : posixpath .join (prefix_path , f"{ name } -{ split_name } .arrow" ),
221
222
"skip" : read_range [0 ],
222
223
"take" : read_range [1 ] - read_range [0 ],
223
224
}
@@ -226,7 +227,9 @@ def test_make_file_instructions(split_name, instruction, shard_lengths, read_ran
226
227
file_instructions_list = []
227
228
shard_offset = 0
228
229
for i , shard_length in enumerate (shard_lengths ):
229
- filename = os .path .join (prefix_path , f"{ name } -{ split_name } -{ i :05d} -of-{ len (shard_lengths ):05d} .arrow" )
230
+ filename = posixpath .join (
231
+ prefix_path , f"{ name } -{ split_name } -{ i :05d} -of-{ len (shard_lengths ):05d} .arrow"
232
+ )
230
233
if shard_offset <= read_range [0 ] < shard_offset + shard_length :
231
234
file_instructions_list .append (
232
235
{
0 commit comments