Skip to content

Commit 450410a

Browse files
committed
apacheGH-41667: [C++][Parquet] Refuse writing non-nullable column that contains nulls
A non-nullable column that contains nulls would result in an invalid Parquet file.
1 parent 50f2d6e commit 450410a

File tree

2 files changed

+25
-0
lines changed

2 files changed

+25
-0
lines changed

cpp/src/parquet/arrow/arrow_reader_writer_test.cc

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3349,6 +3349,27 @@ TEST(TestArrowWrite, CheckChunkSize) {
33493349
WriteTable(*table, ::arrow::default_memory_pool(), sink, chunk_size));
33503350
}
33513351

3352+
void CheckWritingNonNullableColumnWithNulls(std::shared_ptr<::arrow::Field> field,
3353+
std::string json_batch) {
3354+
ARROW_SCOPED_TRACE("field = ", field, ", json_batch = ", json_batch);
3355+
auto schema = ::arrow::schema({field});
3356+
auto table = ::arrow::TableFromJSON(schema, {json_batch});
3357+
auto sink = CreateOutputStream();
3358+
EXPECT_RAISES_WITH_MESSAGE_THAT(
3359+
Invalid, ::testing::HasSubstr("is declared non-nullable but contains nulls"),
3360+
WriteTable(*table, ::arrow::default_memory_pool(), sink));
3361+
}
3362+
3363+
TEST(TestArrowWrite, InvalidSchema) {
3364+
// GH-41667: nulls in non-nullable column
3365+
CheckWritingNonNullableColumnWithNulls(
3366+
::arrow::field("a", ::arrow::int32(), /*nullable=*/false),
3367+
R"([{"a": 456}, {"a": null}])");
3368+
CheckWritingNonNullableColumnWithNulls(
3369+
::arrow::field("a", ::arrow::utf8(), /*nullable=*/false),
3370+
R"([{"a": "foo"}, {"a": null}])");
3371+
}
3372+
33523373
void DoNestedValidate(const std::shared_ptr<::arrow::DataType>& inner_type,
33533374
const std::shared_ptr<::arrow::Field>& outer_field,
33543375
const std::shared_ptr<Buffer>& buffer,

cpp/src/parquet/column_writer.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1301,6 +1301,10 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
13011301
bool single_nullable_element =
13021302
(level_info_.def_level == level_info_.repeated_ancestor_def_level + 1) &&
13031303
leaf_field_nullable;
1304+
if (!leaf_field_nullable && leaf_array.null_count() != 0) {
1305+
return Status::Invalid("Column '", descr_->name(),
1306+
"' is declared non-nullable but contains nulls");
1307+
}
13041308
bool maybe_parent_nulls = level_info_.HasNullableValues() && !single_nullable_element;
13051309
if (maybe_parent_nulls) {
13061310
ARROW_ASSIGN_OR_RAISE(

0 commit comments

Comments
 (0)