Skip to content

Commit 593fe47

Browse files
committed
GH-49059: [C++] Fix issues found by OSS-Fuzz in IPC reader
1 parent 222fac7 commit 593fe47

File tree

4 files changed

+30
-11
lines changed

4 files changed

+30
-11
lines changed

cpp/src/arrow/ipc/reader.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ class ArrayLoader {
245245
}
246246

247247
Status GetBuffer(int buffer_index, std::shared_ptr<Buffer>* out) {
248-
auto buffers = metadata_->buffers();
248+
auto* buffers = metadata_->buffers();
249249
CHECK_FLATBUFFERS_NOT_NULL(buffers, "RecordBatch.buffers");
250250
if (buffer_index >= static_cast<int>(buffers->size())) {
251251
return Status::IOError("buffer_index out of range.");
@@ -262,7 +262,9 @@ class ArrayLoader {
262262

263263
Result<int64_t> GetVariadicCount(int i) {
264264
auto* variadic_counts = metadata_->variadicBufferCounts();
265+
auto* buffers = metadata_->buffers();
265266
CHECK_FLATBUFFERS_NOT_NULL(variadic_counts, "RecordBatch.variadicBufferCounts");
267+
CHECK_FLATBUFFERS_NOT_NULL(buffers, "RecordBatch.buffers");
266268
if (i >= static_cast<int>(variadic_counts->size())) {
267269
return Status::IOError("variadic_count_index out of range.");
268270
}
@@ -272,8 +274,7 @@ class ArrayLoader {
272274
}
273275
// Detect an excessive variadic buffer count to avoid potential memory blowup
274276
// (GH-48900).
275-
const auto max_buffer_count =
276-
static_cast<int64_t>(metadata_->buffers()->size()) - buffer_index_;
277+
const auto max_buffer_count = static_cast<int64_t>(buffers->size()) - buffer_index_;
277278
if (count > max_buffer_count) {
278279
return Status::IOError("variadic buffer count exceeds available number of buffers");
279280
}

cpp/src/arrow/record_batch.cc

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -266,10 +266,13 @@ Result<std::shared_ptr<RecordBatch>> RecordBatch::FromStructArray(
266266
namespace {
267267

268268
Status ValidateColumnLength(const RecordBatch& batch, int i) {
269-
const auto& array = *batch.column(i);
270-
if (ARROW_PREDICT_FALSE(array.length() != batch.num_rows())) {
269+
// This function is part of the validation code path and should
270+
// be robust against invalid data, but `column()` would call MakeArray()
271+
// that can abort on invalid data.
272+
const auto& array = *batch.column_data(i);
273+
if (ARROW_PREDICT_FALSE(array.length != batch.num_rows())) {
271274
return Status::Invalid("Number of rows in column ", i,
272-
" did not match batch: ", array.length(), " vs ",
275+
" did not match batch: ", array.length, " vs ",
273276
batch.num_rows());
274277
}
275278
return Status::OK();
@@ -455,11 +458,12 @@ namespace {
455458
Status ValidateBatch(const RecordBatch& batch, bool full_validation) {
456459
for (int i = 0; i < batch.num_columns(); ++i) {
457460
RETURN_NOT_OK(ValidateColumnLength(batch, i));
458-
const auto& array = *batch.column(i);
461+
// See ValidateColumnLength about avoiding a ArrayData -> Array conversion
462+
const auto& array = *batch.column_data(i);
459463
const auto& schema_type = batch.schema()->field(i)->type();
460-
if (!array.type()->Equals(schema_type)) {
464+
if (!array.type->Equals(schema_type)) {
461465
return Status::Invalid("Column ", i,
462-
" type not match schema: ", array.type()->ToString(), " vs ",
466+
" type not match schema: ", array.type->ToString(), " vs ",
463467
schema_type->ToString());
464468
}
465469
const auto st = full_validation ? internal::ValidateArrayFull(array)

cpp/src/arrow/record_batch_test.cc

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,6 @@ TEST_F(TestRecordBatch, Validate) {
318318
auto a3 = gen.ArrayOf(int16(), 5);
319319

320320
auto b1 = RecordBatch::Make(schema, length, {a0, a1, a2});
321-
322321
ASSERT_OK(b1->ValidateFull());
323322

324323
// Length mismatch
@@ -328,6 +327,21 @@ TEST_F(TestRecordBatch, Validate) {
328327
// Type mismatch
329328
auto b3 = RecordBatch::Make(schema, length, {a0, a1, a0});
330329
ASSERT_RAISES(Invalid, b3->ValidateFull());
330+
331+
// Invalid column data (nulls in map key array) that would abort on MakeArray
332+
auto map_field = field("f", map(utf8(), int32()));
333+
schema = ::arrow::schema({map_field});
334+
auto map_key_data = ArrayFromJSON(utf8(), "[null]")->data();
335+
auto map_item_data = ArrayFromJSON(int32(), "[null]")->data();
336+
auto map_data = ArrayData::Make(map_field->type(), /*length=*/1, /*buffers=*/{nullptr},
337+
/*child_data=*/{map_key_data, map_item_data});
338+
339+
auto b4 = RecordBatch::Make(schema, /*num_rows=*/map_data->length, {map_data});
340+
ASSERT_RAISES(Invalid, b4->ValidateFull());
341+
342+
// Length mismatch with a column data that would also fail on MakeArray
343+
auto b5 = RecordBatch::Make(schema, /*num_rows=*/1 + map_data->length, {map_data});
344+
ASSERT_RAISES(Invalid, b5->Validate());
331345
}
332346

333347
TEST_F(TestRecordBatch, Slice) {

0 commit comments

Comments
 (0)