Skip to content

Commit 7ec20a2

Browse files
committed
Fixed issue with reading out of order lists, added exception for pyarrow.
1 parent 8c0d2d4 commit 7ec20a2

File tree

2 files changed

+17
-13
lines changed

2 files changed

+17
-13
lines changed

pandas/io/parsers/readers.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1535,16 +1535,15 @@ def read(self, nrows: int | None = None) -> DataFrame:
15351535
else:
15361536
dtype = None
15371537

1538-
if dtype is None and get_option("future.usecols_use_order"):
1538+
if get_option("future.usecols_use_order"):
15391539
if usecols is None or isfunction(usecols):
15401540
# Doesn't change anything if function or None gets passed
15411541
pass
15421542
elif len(usecols) == len(columns):
15431543
# uses size of number in usecols to determine corresponding columns
1544-
usecols_sorted = sorted(
1545-
range(len(usecols)), key=lambda i: usecols[i]
1546-
)
1547-
columns = [columns[i] for i in usecols_sorted]
1544+
value_ranked = {v: i for i, v in enumerate(sorted(usecols))}
1545+
usecols_pressed = [value_ranked[v] for v in usecols]
1546+
columns = [columns[i] for i in usecols_pressed]
15481547
col_dict = {k: col_dict[k] for k in columns}
15491548

15501549
if dtype is not None:

pandas/tests/io/parser/usecols/test_usecols_basic.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -554,22 +554,26 @@ def test_usecols_dtype(all_parsers):
554554
def test_usecols_order(all_parsers, usecols, usecols_use_order):
555555
# TODOE add portion in doc for 3.0 transition
556556
parser = all_parsers
557+
pyarrow_flag = False
557558
data = """\
558559
a,b,c,d
559560
1,2,3,0
560561
4,5,6,0
561562
7,8,9,0
562563
10,11,12,13"""
563564

564-
msg = "The pyarrow engine does not allow 'usecols' to be integer column positions"
565-
if parser.engine == "pyarrow" and isinstance(usecols[0], int):
566-
with pytest.raises(ValueError, match=msg):
567-
parser.read_csv(StringIO(data), usecols=usecols)
568-
return
569-
570-
result = parser.read_csv(StringIO(data), usecols=usecols)
565+
if parser.engine == "pyarrow":
566+
if isinstance(usecols[0], int):
567+
msg = "The pyarrow engine does not allow 'usecols' to be integer column"
568+
with pytest.raises(ValueError, match=msg):
569+
parser.read_csv(StringIO(data), usecols=usecols)
570+
return
571+
else:
572+
# looks like pyarrow already considers column order by default.
573+
# Modifies test to account for it in selecting expected df
574+
pyarrow_flag = True
571575

572-
if usecols_use_order:
576+
if usecols_use_order or pyarrow_flag:
573577
expected = DataFrame(
574578
{"d": [0, 0, 0, 13], "a": [1, 4, 7, 10], "c": [3, 6, 9, 12]}
575579
)
@@ -579,4 +583,5 @@ def test_usecols_order(all_parsers, usecols, usecols_use_order):
579583
)
580584

581585
with option_context("future.usecols_use_order", usecols_use_order):
586+
result = parser.read_csv(StringIO(data), usecols=usecols)
582587
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)