11import os
2+ import posixpath
23import tempfile
34from pathlib import Path
45from unittest import TestCase
@@ -103,8 +104,8 @@ def test_read_files(self):
103104 reader = ReaderTest (tmp_dir , info )
104105
105106 files = [
106- {"filename" : os . path .join (tmp_dir , "train" )},
107- {"filename" : os . path .join (tmp_dir , "test" ), "skip" : 10 , "take" : 10 },
107+ {"filename" : posixpath .join (tmp_dir , "train" )},
108+ {"filename" : posixpath .join (tmp_dir , "test" ), "skip" : 10 , "take" : 10 },
108109 ]
109110 dset = Dataset (** reader .read_files (files , original_instructions = "train+test[10:20]" ))
110111 self .assertEqual (dset .num_rows , 110 )
@@ -169,18 +170,18 @@ def test_make_file_instructions_basic():
169170 assert isinstance (file_instructions , FileInstructions )
170171 assert file_instructions .num_examples == 33
171172 assert file_instructions .file_instructions == [
172- {"filename" : os . path .join (prefix_path , f"{ name } -train.arrow" ), "skip" : 0 , "take" : 33 }
173+ {"filename" : posixpath .join (prefix_path , f"{ name } -train.arrow" ), "skip" : 0 , "take" : 33 }
173174 ]
174175
175176 split_infos = [SplitInfo (name = "train" , num_examples = 100 , shard_lengths = [10 ] * 10 )]
176177 file_instructions = make_file_instructions (name , split_infos , instruction , filetype_suffix , prefix_path )
177178 assert isinstance (file_instructions , FileInstructions )
178179 assert file_instructions .num_examples == 33
179180 assert file_instructions .file_instructions == [
180- {"filename" : os . path .join (prefix_path , f"{ name } -train-00000-of-00010.arrow" ), "skip" : 0 , "take" : - 1 },
181- {"filename" : os . path .join (prefix_path , f"{ name } -train-00001-of-00010.arrow" ), "skip" : 0 , "take" : - 1 },
182- {"filename" : os . path .join (prefix_path , f"{ name } -train-00002-of-00010.arrow" ), "skip" : 0 , "take" : - 1 },
183- {"filename" : os . path .join (prefix_path , f"{ name } -train-00003-of-00010.arrow" ), "skip" : 0 , "take" : 3 },
181+ {"filename" : posixpath .join (prefix_path , f"{ name } -train-00000-of-00010.arrow" ), "skip" : 0 , "take" : - 1 },
182+ {"filename" : posixpath .join (prefix_path , f"{ name } -train-00001-of-00010.arrow" ), "skip" : 0 , "take" : - 1 },
183+ {"filename" : posixpath .join (prefix_path , f"{ name } -train-00002-of-00010.arrow" ), "skip" : 0 , "take" : - 1 },
184+ {"filename" : posixpath .join (prefix_path , f"{ name } -train-00003-of-00010.arrow" ), "skip" : 0 , "take" : 3 },
184185 ]
185186
186187
@@ -217,7 +218,7 @@ def test_make_file_instructions(split_name, instruction, shard_lengths, read_ran
217218 if not isinstance (shard_lengths , list ):
218219 assert file_instructions .file_instructions == [
219220 {
220- "filename" : os . path .join (prefix_path , f"{ name } -{ split_name } .arrow" ),
221+ "filename" : posixpath .join (prefix_path , f"{ name } -{ split_name } .arrow" ),
221222 "skip" : read_range [0 ],
222223 "take" : read_range [1 ] - read_range [0 ],
223224 }
@@ -226,7 +227,9 @@ def test_make_file_instructions(split_name, instruction, shard_lengths, read_ran
226227 file_instructions_list = []
227228 shard_offset = 0
228229 for i , shard_length in enumerate (shard_lengths ):
229- filename = os .path .join (prefix_path , f"{ name } -{ split_name } -{ i :05d} -of-{ len (shard_lengths ):05d} .arrow" )
230+ filename = posixpath .join (
231+ prefix_path , f"{ name } -{ split_name } -{ i :05d} -of-{ len (shard_lengths ):05d} .arrow"
232+ )
230233 if shard_offset <= read_range [0 ] < shard_offset + shard_length :
231234 file_instructions_list .append (
232235 {
0 commit comments