Skip to content

Commit 2630309

Browse files
committed
Fix reading of parquet files with nulls
Commiting without tests to deploy the fix ASAP.
1 parent 3ce50a3 commit 2630309

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

parquet/src/arrow/arrow_array_reader.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ use arrow::{
3131
buffer::MutableBuffer,
3232
datatypes::{DataType as ArrowType, ToByteSlice},
3333
};
34+
use std::cmp::max;
3435
use std::{any::Any, collections::VecDeque, marker::PhantomData};
3536
use std::{cell::RefCell, rc::Rc};
3637

@@ -359,7 +360,7 @@ impl<'a, C: ArrayConverter + 'a> ArrowArrayReader<'a, C> {
359360
buf,
360361
num_values,
361362
encoding,
362-
num_nulls: _,
363+
num_nulls,
363364
num_rows: _,
364365
def_levels_byte_len,
365366
rep_levels_byte_len,
@@ -410,7 +411,7 @@ impl<'a, C: ArrayConverter + 'a> ArrowArrayReader<'a, C> {
410411
let values_buffer = buf.start_from(offset);
411412
let value_iter = Self::get_value_decoder(
412413
values_buffer,
413-
num_values as usize,
414+
max(num_values - num_nulls, 0) as usize,
414415
encoding,
415416
column_desc,
416417
column_chunk_context,

parquet/src/column/reader.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ impl<T: DataType> ColumnReaderImpl<T> {
334334
buf,
335335
num_values,
336336
encoding,
337-
num_nulls: _,
337+
num_nulls,
338338
num_rows: _,
339339
def_levels_byte_len,
340340
rep_levels_byte_len,
@@ -380,7 +380,7 @@ impl<T: DataType> ColumnReaderImpl<T> {
380380
encoding,
381381
&buf,
382382
offset,
383-
num_values as usize,
383+
max(num_values - num_nulls, 0) as usize,
384384
)?;
385385
return Ok(true);
386386
}

0 commit comments

Comments
 (0)