Skip to content

Commit 678d287

Browse files
authored
Update test_arrow_reader.py
1 parent 17ac716 commit 678d287

File tree

1 file changed

+12
-9
lines changed

1 file changed

+12
-9
lines changed

tests/test_arrow_reader.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import posixpath
23
import tempfile
34
from pathlib import Path
45
from unittest import TestCase
@@ -103,8 +104,8 @@ def test_read_files(self):
103104
reader = ReaderTest(tmp_dir, info)
104105

105106
files = [
106-
{"filename": os.path.join(tmp_dir, "train")},
107-
{"filename": os.path.join(tmp_dir, "test"), "skip": 10, "take": 10},
107+
{"filename": posixpath.join(tmp_dir, "train")},
108+
{"filename": posixpath.join(tmp_dir, "test"), "skip": 10, "take": 10},
108109
]
109110
dset = Dataset(**reader.read_files(files, original_instructions="train+test[10:20]"))
110111
self.assertEqual(dset.num_rows, 110)
@@ -169,18 +170,18 @@ def test_make_file_instructions_basic():
169170
assert isinstance(file_instructions, FileInstructions)
170171
assert file_instructions.num_examples == 33
171172
assert file_instructions.file_instructions == [
172-
{"filename": os.path.join(prefix_path, f"{name}-train.arrow"), "skip": 0, "take": 33}
173+
{"filename": posixpath.join(prefix_path, f"{name}-train.arrow"), "skip": 0, "take": 33}
173174
]
174175

175176
split_infos = [SplitInfo(name="train", num_examples=100, shard_lengths=[10] * 10)]
176177
file_instructions = make_file_instructions(name, split_infos, instruction, filetype_suffix, prefix_path)
177178
assert isinstance(file_instructions, FileInstructions)
178179
assert file_instructions.num_examples == 33
179180
assert file_instructions.file_instructions == [
180-
{"filename": os.path.join(prefix_path, f"{name}-train-00000-of-00010.arrow"), "skip": 0, "take": -1},
181-
{"filename": os.path.join(prefix_path, f"{name}-train-00001-of-00010.arrow"), "skip": 0, "take": -1},
182-
{"filename": os.path.join(prefix_path, f"{name}-train-00002-of-00010.arrow"), "skip": 0, "take": -1},
183-
{"filename": os.path.join(prefix_path, f"{name}-train-00003-of-00010.arrow"), "skip": 0, "take": 3},
181+
{"filename": posixpath.join(prefix_path, f"{name}-train-00000-of-00010.arrow"), "skip": 0, "take": -1},
182+
{"filename": posixpath.join(prefix_path, f"{name}-train-00001-of-00010.arrow"), "skip": 0, "take": -1},
183+
{"filename": posixpath.join(prefix_path, f"{name}-train-00002-of-00010.arrow"), "skip": 0, "take": -1},
184+
{"filename": posixpath.join(prefix_path, f"{name}-train-00003-of-00010.arrow"), "skip": 0, "take": 3},
184185
]
185186

186187

@@ -217,7 +218,7 @@ def test_make_file_instructions(split_name, instruction, shard_lengths, read_ran
217218
if not isinstance(shard_lengths, list):
218219
assert file_instructions.file_instructions == [
219220
{
220-
"filename": os.path.join(prefix_path, f"{name}-{split_name}.arrow"),
221+
"filename": posixpath.join(prefix_path, f"{name}-{split_name}.arrow"),
221222
"skip": read_range[0],
222223
"take": read_range[1] - read_range[0],
223224
}
@@ -226,7 +227,9 @@ def test_make_file_instructions(split_name, instruction, shard_lengths, read_ran
226227
file_instructions_list = []
227228
shard_offset = 0
228229
for i, shard_length in enumerate(shard_lengths):
229-
filename = os.path.join(prefix_path, f"{name}-{split_name}-{i:05d}-of-{len(shard_lengths):05d}.arrow")
230+
filename = posixpath.join(
231+
prefix_path, f"{name}-{split_name}-{i:05d}-of-{len(shard_lengths):05d}.arrow"
232+
)
230233
if shard_offset <= read_range[0] < shard_offset + shard_length:
231234
file_instructions_list.append(
232235
{

0 commit comments

Comments
 (0)