Skip to content

Commit 737ff7c

Browse files
author
root
committed
fix: preserve row count for zero-column DataFrames in Arrow export
DataFrameStreamIterator used first_col_n_chunks() to determine how many batches to emit. For zero-column DataFrames this returns 0, so the iterator produced no batches and the resulting Arrow table had 0 rows. Use max(1, first_col_n_chunks()) so that zero-column frames still emit one batch. Also store the DataFrame height so that when there are no columns the batch length comes from the frame height rather than trying to index into an empty columns vec. Fixes #26834
1 parent 28dda55 commit 737ff7c

File tree

2 files changed

+18
-5
lines changed

2 files changed

+18
-5
lines changed

crates/polars-python/src/interop/arrow/to_py.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ pub struct DataFrameStreamIterator {
120120
dtype: ArrowDataType,
121121
idx: usize,
122122
n_chunks: usize,
123+
height: usize,
123124
}
124125

125126
impl DataFrameStreamIterator {
@@ -135,7 +136,8 @@ impl DataFrameStreamIterator {
135136
.collect(),
136137
dtype,
137138
idx: 0,
138-
n_chunks: df.first_col_n_chunks(),
139+
n_chunks: usize::max(1, df.first_col_n_chunks()),
140+
height: df.height(),
139141
}
140142
}
141143

@@ -151,17 +153,17 @@ impl Iterator for DataFrameStreamIterator {
151153
if self.idx >= self.n_chunks {
152154
None
153155
} else {
154-
// create a batch of the columns with the same chunk no.
155-
let batch_cols = self
156+
let batch_cols: Vec<ArrayRef> = self
156157
.columns
157158
.iter()
158159
.map(|s| s.to_arrow(self.idx, CompatLevel::newest()))
159-
.collect::<Vec<_>>();
160+
.collect();
160161
self.idx += 1;
161162

163+
let len = batch_cols.first().map_or(self.height, |c| c.len());
162164
let array = arrow::array::StructArray::new(
163165
self.dtype.clone(),
164-
batch_cols[0].len(),
166+
len,
165167
batch_cols,
166168
None,
167169
);

py-polars/tests/unit/interop/test_interop.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,17 @@ def test_arrow_empty_dataframe() -> None:
7070
assert df2.shape == (0, 1)
7171

7272

73+
74+
def test_arrow_zero_column_dataframe_preserves_rows() -> None:
75+
df = pl.DataFrame(height=5)
76+
assert df.shape == (5, 0)
77+
78+
tbl = pa.table(df)
79+
assert tbl.shape == (5, 0)
80+
81+
tbl2 = df.to_arrow()
82+
assert tbl2.shape == (5, 0)
83+
7384
def test_arrow_dict_to_polars() -> None:
7485
pa_dict = pa.DictionaryArray.from_arrays(
7586
indices=np.array([0, 1, 2, 3, 1, 0, 2, 3, 3, 2]),

0 commit comments

Comments
 (0)