Skip to content

Commit f0555ba

Browse files
committed
Support Time/Date in Parquet Schema Converter
1 parent 5c637dc commit f0555ba

File tree

3 files changed

+65
-7
lines changed

3 files changed

+65
-7
lines changed

src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/SchemaConverter.php

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
namespace Flow\ETL\Adapter\Parquet;
66

77
use function Flow\ETL\DSL\{bool_schema,
8+
date_schema,
89
datetime_schema,
910
float_schema,
1011
int_schema,
@@ -15,13 +16,23 @@
1516
struct_schema,
1617
struct_type,
1718
structure_element,
19+
time_schema,
1820
type_list,
1921
type_map,
2022
uuid_schema};
2123
use Flow\ETL\Exception\RuntimeException;
2224
use Flow\ETL\PHP\Type\Logical\Map\{MapKey, MapValue};
2325
use Flow\ETL\PHP\Type\Logical\Structure\StructureElement;
24-
use Flow\ETL\PHP\Type\Logical\{DateTimeType, JsonType, ListType, MapType, StructureType, UuidType, XMLElementType, XMLType};
26+
use Flow\ETL\PHP\Type\Logical\{DateTimeType,
27+
DateType,
28+
JsonType,
29+
ListType,
30+
MapType,
31+
StructureType,
32+
TimeType,
33+
UuidType,
34+
XMLElementType,
35+
XMLType};
2536
use Flow\ETL\PHP\Type\Native\{ObjectType, ScalarType};
2637
use Flow\ETL\PHP\Type\Type;
2738
use Flow\ETL\PHP\Value\Uuid;
@@ -76,6 +87,10 @@ private function flowListToParquetList(ListType $type) : ListElement
7687
break;
7788
case DateTimeType::class:
7889
return ListElement::datetime(!$element->nullable());
90+
case DateType::class:
91+
return ListElement::date(!$element->nullable());
92+
case TimeType::class:
93+
return ListElement::time(!$element->nullable());
7994
case UuidType::class:
8095
return ListElement::uuid(!$element->nullable());
8196
case JsonType::class:
@@ -115,6 +130,10 @@ private function flowMapKeyToParquetMapKey(MapKey $mapKey) : ParquetSchema\MapKe
115130
return ParquetSchema\MapKey::uuid();
116131
case DateTimeType::class:
117132
return ParquetSchema\MapKey::datetime();
133+
case DateType::class:
134+
return ParquetSchema\MapKey::date();
135+
case TimeType::class:
136+
return ParquetSchema\MapKey::time();
118137
case ScalarType::class:
119138
switch ($mapKeyType->type()) {
120139
case ScalarType::FLOAT:
@@ -153,6 +172,10 @@ private function flowMapValueToParquetMapValue(MapValue $mapValue) : ParquetSche
153172
break;
154173
case UuidType::class:
155174
return ParquetSchema\MapValue::uuid(!$mapValueType->nullable());
175+
case DateType::class:
176+
return ParquetSchema\MapValue::date(!$mapValueType->nullable());
177+
case TimeType::class:
178+
return ParquetSchema\MapValue::time(!$mapValueType->nullable());
156179
case DateTimeType::class:
157180
return ParquetSchema\MapValue::datetime(!$mapValueType->nullable());
158181
case JsonType::class:
@@ -235,6 +258,10 @@ private function flowTypeToParquetType(string $name, Type $type) : Column
235258
switch ($type::class) {
236259
case ScalarType::class:
237260
return $this->flowScalarToParquetFlat($type, $name);
261+
case TimeType::class:
262+
return FlatColumn::time($name, $type->nullable() ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED);
263+
case DateType::class:
264+
return FlatColumn::date($name, $type->nullable() ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED);
238265
case DateTimeType::class:
239266
return FlatColumn::datetime($name, $type->nullable() ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED);
240267
case UuidType::class:
@@ -281,7 +308,7 @@ private function parquetFlatToFlowType(FlatColumn $column) : Schema\Definition
281308
if ($logicalType === null) {
282309
return match ($column->type()) {
283310
ParquetSchema\PhysicalType::INT32 => match ($column->convertedType()) {
284-
ParquetSchema\ConvertedType::DATE => datetime_schema($column->name(), $nullable),
311+
ParquetSchema\ConvertedType::DATE => date_schema($column->name(), $nullable),
285312
default => int_schema($column->name(), $nullable),
286313
},
287314
ParquetSchema\PhysicalType::INT64 => int_schema($column->name(), $nullable),
@@ -295,8 +322,8 @@ private function parquetFlatToFlowType(FlatColumn $column) : Schema\Definition
295322

296323
return match ($logicalType->name()) {
297324
ParquetSchema\LogicalType::STRING => str_schema($column->name(), $nullable),
298-
ParquetSchema\LogicalType::DATE => datetime_schema($column->name(), $nullable),
299-
ParquetSchema\LogicalType::TIME => int_schema($column->name(), $nullable),
325+
ParquetSchema\LogicalType::TIME => time_schema($column->name(), $nullable),
326+
ParquetSchema\LogicalType::DATE => date_schema($column->name(), $nullable),
300327
ParquetSchema\LogicalType::TIMESTAMP => datetime_schema($column->name(), $nullable),
301328
ParquetSchema\LogicalType::UUID => uuid_schema($column->name(), $nullable),
302329
ParquetSchema\LogicalType::JSON => json_schema($column->name(), $nullable),

src/adapter/etl-adapter-parquet/tests/Flow/ETL/Adapter/Parquet/Tests/Unit/ParquetToFlowSchemaTest.php

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,26 @@
44

55
namespace Flow\ETL\Adapter\Parquet\Tests\Unit;
66

7-
use function Flow\ETL\DSL\{bool_schema, datetime_schema, float_schema, int_schema, json_schema, list_schema, map_schema, str_schema, struct_element, struct_schema, type_boolean, type_int, type_list, type_map, type_string, type_structure, type_uuid, uuid_schema};
7+
use function Flow\ETL\DSL\{bool_schema,
8+
date_schema,
9+
datetime_schema,
10+
float_schema,
11+
int_schema,
12+
json_schema,
13+
list_schema,
14+
map_schema,
15+
str_schema,
16+
struct_element,
17+
struct_schema,
18+
time_schema,
19+
type_boolean,
20+
type_int,
21+
type_list,
22+
type_map,
23+
type_string,
24+
type_structure,
25+
type_uuid,
26+
uuid_schema};
827
use Flow\ETL\Adapter\Parquet\SchemaConverter;
928
use Flow\Parquet\ParquetFile\Schema;
1029
use Flow\Parquet\ParquetFile\Schema\{MapKey, MapValue};
@@ -40,8 +59,8 @@ public function test_converting_flat_fields_to_flow_schema() : void
4059
float_schema('double', true),
4160
float_schema('decimal', true),
4261
bool_schema('boolean', true),
43-
datetime_schema('date', true),
44-
int_schema('time', true),
62+
date_schema('date', true),
63+
time_schema('time', true),
4564
datetime_schema('datetime', true),
4665
uuid_schema('uuid', true),
4766
json_schema('json', true),

src/core/etl/src/Flow/ETL/DSL/functions.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1302,6 +1302,18 @@ function datetime_schema(string $name, bool $nullable = false, ?Schema\Metadata
13021302
return Definition::datetime($name, $nullable, $metadata);
13031303
}
13041304

1305+
#[DocumentationDSL(module: Module::CORE, type: DSLType::SCHEMA)]
1306+
function time_schema(string $name, bool $nullable = false, ?Schema\Metadata $metadata = null) : Definition
1307+
{
1308+
return Definition::time($name, $nullable, $metadata);
1309+
}
1310+
1311+
#[DocumentationDSL(module: Module::CORE, type: DSLType::SCHEMA)]
1312+
function date_schema(string $name, bool $nullable = false, ?Schema\Metadata $metadata = null) : Definition
1313+
{
1314+
return Definition::date($name, $nullable, $metadata);
1315+
}
1316+
13051317
#[DocumentationDSL(module: Module::CORE, type: DSLType::SCHEMA)]
13061318
function json_schema(string $name, bool $nullable = false, ?Schema\Metadata $metadata = null) : Definition
13071319
{

0 commit comments

Comments
 (0)