Skip to content

Commit c64591c

Browse files
authored
Fixed scenario where list is optional column but list element is required and value is empty list (#1263)
1 parent 6300bc4 commit c64591c

File tree

3 files changed

+132
-5
lines changed

3 files changed

+132
-5
lines changed

src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/DremelShredder.php

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,11 @@ public function shred(Column $column, array $row) : FlatColumnData
5050
return $flatData;
5151
}
5252

53-
private function shredFlat(FlatColumn $column, mixed $value, int $definitionLevel, int $repetitionLevel, FlatColumnData $data) : void
53+
private function shredFlat(FlatColumn $column, mixed $value, int $definitionLevel, int $repetitionLevel, FlatColumnData $data, bool $validate = true) : void
5454
{
55-
$this->validator->validate($column, $value);
55+
if ($validate) {
56+
$this->validator->validate($column, $value);
57+
}
5658

5759
if (!$column->repetition()?->isRequired() && $value !== null) {
5860
$definitionLevel++;
@@ -84,7 +86,7 @@ private function shredList(NestedColumn $column, ?array $listValue, int $definit
8486

8587
if ($listElementColumn instanceof FlatColumn) {
8688
if ($listValue === null) {
87-
$this->shredFlat($listElementColumn, null, $definitionLevel, $repetitionLevel - 1, $data);
89+
$this->shredFlat($listElementColumn, null, $definitionLevel, $repetitionLevel - 1, $data, false);
8890

8991
return;
9092
}
@@ -94,7 +96,7 @@ private function shredList(NestedColumn $column, ?array $listValue, int $definit
9496
}
9597

9698
if (!\count($listValue)) {
97-
$this->shredFlat($listElementColumn, null, $definitionLevel, $repetitionLevel - 1, $data);
99+
$this->shredFlat($listElementColumn, null, $definitionLevel, $repetitionLevel - 1, $data, false);
98100

99101
return;
100102
}
@@ -208,7 +210,7 @@ private function shredMap(NestedColumn $column, ?array $mapValue, int $definitio
208210

209211
if (!\count($mapValue)) {
210212
$this->shredFlat($keyColumn->makeOptional(), null, $definitionLevel, $repetitionLevel - 1, $data);
211-
$this->shredFlat($valueColumn, null, $definitionLevel, $repetitionLevel - 1, $data);
213+
$this->shredFlat($valueColumn, null, $definitionLevel, $repetitionLevel - 1, $data, false);
212214

213215
return;
214216
}

src/lib/parquet/tests/Flow/Parquet/Tests/Unit/ParquetFile/RowGroupBuilder/Dremel/DremelListsTest.php

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1396,6 +1396,54 @@ public function test_optional_list_optional_struct_optional_struct_optional_int3
13961396
self::assertEquals($rows, (new DremelAssembler(DataConverter::initialize(Options::default())))->assemble($schema->get('l'), $flatData));
13971397
}
13981398

1399+
#[TestWith([
1400+
[
1401+
['l' => []],
1402+
],
1403+
[
1404+
'l.list.element' => [
1405+
'repetition_levels' => [0],
1406+
'definition_levels' => [1],
1407+
'values' => [],
1408+
],
1409+
],
1410+
])]
1411+
#[TestWith([
1412+
[
1413+
['l' => [1, 2]],
1414+
],
1415+
[
1416+
'l.list.element' => [
1417+
'repetition_levels' => [0, 1],
1418+
'definition_levels' => [2, 2],
1419+
'values' => [1, 2],
1420+
],
1421+
],
1422+
])]
1423+
public function test_optional_list_required_int32(array $rows, array $expectedColumnData) : void
1424+
{
1425+
$schema = Schema::with(NestedColumn::list('l', ListElement::int32(true)));
1426+
1427+
$dremel = new DremelShredder(new ColumnDataValidator(), DataConverter::initialize(Options::default()));
1428+
1429+
self::assertEquals('OPTIONAL,REPEATED,REQUIRED', $schema->get('l.list.element')->repetitions());
1430+
self::assertEquals(2, $schema->get('l.list.element')->repetitions()->maxDefinitionLevel());
1431+
self::assertEquals(1, $schema->get('l.list.element')->repetitions()->maxRepetitionLevel());
1432+
1433+
/**
1434+
* @var ?FlatColumnData $flatData
1435+
*/
1436+
$flatData = \array_reduce(
1437+
$rows,
1438+
static fn (?FlatColumnData $flatData, array $row) => $flatData === null
1439+
? $dremel->shred($schema->get('l'), $row)
1440+
: $flatData->merge($dremel->shred($schema->get('l'), $row))
1441+
);
1442+
1443+
self::assertEquals($expectedColumnData, $flatData->normalize());
1444+
self::assertEquals($rows, (new DremelAssembler(DataConverter::initialize(Options::default())))->assemble($schema->get('l'), $flatData));
1445+
}
1446+
13991447
#[TestWith([
14001448
[
14011449
['l' => [1, 2, 3]],

src/lib/parquet/tests/Flow/Parquet/Tests/Unit/ParquetFile/RowGroupBuilder/Dremel/DremelMapsTest.php

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -894,6 +894,83 @@ public function test_optional_map_string_optional_struct_optional_list_optional_
894894
}
895895
}
896896

897+
#[TestWith(
898+
[
899+
[
900+
['m' => []],
901+
],
902+
[
903+
'm.key_value.key' => [
904+
'repetition_levels' => [0],
905+
'definition_levels' => [1],
906+
'values' => [],
907+
],
908+
'm.key_value.value' => [
909+
'repetition_levels' => [0],
910+
'definition_levels' => [1],
911+
'values' => [],
912+
],
913+
],
914+
]
915+
)]
916+
#[TestWith(
917+
[
918+
[
919+
['m' => ['a' => 1, 'b' => 2]],
920+
],
921+
[
922+
'm.key_value.key' => [
923+
'repetition_levels' => [0, 1],
924+
'definition_levels' => [2, 2],
925+
'values' => ['a', 'b'],
926+
],
927+
'm.key_value.value' => [
928+
'repetition_levels' => [0, 1],
929+
'definition_levels' => [2, 2],
930+
'values' => [1, 2],
931+
],
932+
],
933+
]
934+
)]
935+
public function test_optional_map_string_required_int32(array $rows, array $expectedFlatData, ?string $exceptionMessage = null) : void
936+
{
937+
$schema = Schema::with(NestedColumn::map('m', MapKey::string(), MapValue::int32(true)));
938+
939+
$dremel = new DremelShredder(new ColumnDataValidator(), DataConverter::initialize(Options::default()));
940+
941+
self::assertEquals('OPTIONAL,REPEATED,REQUIRED', $schema->get('m.key_value.key')->repetitions());
942+
self::assertEquals('OPTIONAL,REPEATED,REQUIRED', $schema->get('m.key_value.value')->repetitions());
943+
944+
self::assertEquals(2, $schema->get('m.key_value.key')->repetitions()->maxDefinitionLevel());
945+
self::assertEquals(1, $schema->get('m.key_value.key')->repetitions()->maxRepetitionLevel());
946+
947+
self::assertEquals(2, $schema->get('m.key_value.value')->repetitions()->maxDefinitionLevel());
948+
self::assertEquals(1, $schema->get('m.key_value.value')->repetitions()->maxRepetitionLevel());
949+
950+
if ($exceptionMessage) {
951+
$this->expectExceptionMessage($exceptionMessage);
952+
\array_reduce(
953+
$rows,
954+
static fn (?FlatColumnData $flatData, array $row) => $flatData === null
955+
? $dremel->shred($schema->get('m'), $row)
956+
: $flatData->merge($dremel->shred($schema->get('m'), $row))
957+
);
958+
} else {
959+
/**
960+
* @var ?FlatColumnData $flatData
961+
*/
962+
$flatData = \array_reduce(
963+
$rows,
964+
static fn (?FlatColumnData $flatData, array $row) => $flatData === null
965+
? $dremel->shred($schema->get('m'), $row)
966+
: $flatData->merge($dremel->shred($schema->get('m'), $row))
967+
);
968+
969+
self::assertEquals($expectedFlatData, $flatData->normalize());
970+
self::assertEquals($rows, (new DremelAssembler(DataConverter::initialize(Options::default())))->assemble($schema->get('m'), $flatData));
971+
}
972+
}
973+
897974
#[TestWith([
898975
[
899976
['m' => null],

0 commit comments

Comments
 (0)