Skip to content

Commit b7cd1f5

Browse files
matthias-Qkevinjqliu
authored andcommitted
feat: add schema conversion from avro timestamp-millis and uuid (apache#2173)
# Rationale for this change The schema conversion util from avro schema to iceberg schema did ignore `timestamp-millis`. Also added conversion from `uuid` # Are these changes tested? Added tests for `timestamp-millis` and `timestamp-micros` as the latter was missing # Are there any user-facing changes? no --------- Co-authored-by: Kevin Liu <[email protected]>
1 parent ea1059d commit b7cd1f5

File tree

2 files changed

+28
-0
lines changed

2 files changed

+28
-0
lines changed

pyiceberg/utils/schema_conversion.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,10 @@
6969
LOGICAL_FIELD_TYPE_MAPPING: Dict[Tuple[str, str], PrimitiveType] = {
7070
("date", "int"): DateType(),
7171
("time-micros", "long"): TimeType(),
72+
("timestamp-millis", "int"): TimestampType(),
7273
("timestamp-micros", "long"): TimestampType(),
7374
("uuid", "fixed"): UUIDType(),
75+
("uuid", "string"): UUIDType(),
7476
}
7577

7678
AvroType = Union[str, Any]

tests/utils/test_schema_conversion.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@
3333
NestedField,
3434
StringType,
3535
StructType,
36+
TimestampType,
3637
UnknownType,
38+
UUIDType,
3739
)
3840
from pyiceberg.utils.schema_conversion import AvroSchemaConversion
3941

@@ -327,6 +329,30 @@ def test_convert_date_type() -> None:
327329
assert actual == DateType()
328330

329331

332+
def test_convert_uuid_str_type() -> None:
333+
avro_logical_type = {"type": "string", "logicalType": "uuid"}
334+
actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type)
335+
assert actual == UUIDType()
336+
337+
338+
def test_convert_uuid_fixed_type() -> None:
339+
avro_logical_type = {"type": "fixed", "logicalType": "uuid"}
340+
actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type)
341+
assert actual == UUIDType()
342+
343+
344+
def test_convert_timestamp_millis_type() -> None:
345+
avro_logical_type = {"type": "int", "logicalType": "timestamp-millis"}
346+
actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type)
347+
assert actual == TimestampType()
348+
349+
350+
def test_convert_timestamp_micros_type() -> None:
351+
avro_logical_type = {"type": "int", "logicalType": "timestamp-micros"}
352+
actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type)
353+
assert actual == TimestampType()
354+
355+
330356
def test_unknown_logical_type() -> None:
331357
"""Test raising a ValueError when converting an unknown logical type as part of an Avro schema conversion"""
332358
avro_logical_type = {"type": "bytes", "logicalType": "date"}

0 commit comments

Comments
 (0)