Skip to content

Commit 33d97ba

Browse files
authored
Add UUID support for the Avro schema (#1706)
## Which issue does this PR close? `Fixed[16]` encoded UUIDs are now supported in Avro-rs: apache/avro-rs#255 This will automatically support String and Fixed[16] logical types if the `logicalType` annotation is set correctly 👍 ## What changes are included in this PR? <!-- Provide a summary of the modifications in this PR. List the main changes such as new features, bug fixes, refactoring, or any other updates. --> ## Are these changes tested? <!-- Specify what test covers (unit test, integration test, etc.). If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? -->
1 parent 921f389 commit 33d97ba

File tree

1 file changed

+27
-38
lines changed

1 file changed

+27
-38
lines changed

crates/iceberg/src/avro/schema.rs

Lines changed: 27 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,7 @@ const ELEMENT_ID: &str = "element-id";
3636
const FIELD_ID_PROP: &str = "field-id";
3737
const KEY_ID: &str = "key-id";
3838
const VALUE_ID: &str = "value-id";
39-
const UUID_BYTES: usize = 16;
40-
const UUID_LOGICAL_TYPE: &str = "uuid";
4139
const MAP_LOGICAL_TYPE: &str = "map";
42-
// # TODO: https://github.com/apache/iceberg-rust/issues/86
4340
// This const may better to maintain in avro-rs.
4441
const LOGICAL_TYPE: &str = "logicalType";
4542

@@ -237,8 +234,8 @@ impl SchemaVisitor for SchemaToAvroSchema {
237234
PrimitiveType::TimestampNs => AvroSchema::TimestampNanos,
238235
PrimitiveType::TimestamptzNs => AvroSchema::TimestampNanos,
239236
PrimitiveType::String => AvroSchema::String,
240-
PrimitiveType::Uuid => avro_fixed_schema(UUID_BYTES, Some(UUID_LOGICAL_TYPE))?,
241-
PrimitiveType::Fixed(len) => avro_fixed_schema((*len) as usize, None)?,
237+
PrimitiveType::Uuid => AvroSchema::Uuid,
238+
PrimitiveType::Fixed(len) => avro_fixed_schema((*len) as usize)?,
242239
PrimitiveType::Binary => AvroSchema::Bytes,
243240
PrimitiveType::Decimal { precision, scale } => {
244241
avro_decimal_schema(*precision as usize, *scale as usize)?
@@ -274,21 +271,13 @@ fn avro_record_schema(name: &str, fields: Vec<AvroRecordField>) -> Result<AvroSc
274271
}))
275272
}
276273

277-
pub(crate) fn avro_fixed_schema(len: usize, logical_type: Option<&str>) -> Result<AvroSchema> {
278-
let attributes = if let Some(logical_type) = logical_type {
279-
BTreeMap::from([(
280-
LOGICAL_TYPE.to_string(),
281-
Value::String(logical_type.to_string()),
282-
)])
283-
} else {
284-
Default::default()
285-
};
274+
pub(crate) fn avro_fixed_schema(len: usize) -> Result<AvroSchema> {
286275
Ok(AvroSchema::Fixed(FixedSchema {
287276
name: Name::new(format!("fixed_{len}").as_str())?,
288277
aliases: None,
289278
doc: None,
290279
size: len,
291-
attributes,
280+
attributes: Default::default(),
292281
default: None,
293282
}))
294283
}
@@ -533,30 +522,9 @@ impl AvroSchemaVisitor for AvroSchemaToSchema {
533522
AvroSchema::Long => Type::Primitive(PrimitiveType::Long),
534523
AvroSchema::Float => Type::Primitive(PrimitiveType::Float),
535524
AvroSchema::Double => Type::Primitive(PrimitiveType::Double),
525+
AvroSchema::Uuid => Type::Primitive(PrimitiveType::Uuid),
536526
AvroSchema::String | AvroSchema::Enum(_) => Type::Primitive(PrimitiveType::String),
537-
AvroSchema::Fixed(fixed) => {
538-
if let Some(logical_type) = fixed.attributes.get(LOGICAL_TYPE) {
539-
let logical_type = logical_type.as_str().ok_or_else(|| {
540-
Error::new(
541-
ErrorKind::DataInvalid,
542-
"logicalType in attributes of avro schema is not a string type",
543-
)
544-
})?;
545-
match logical_type {
546-
UUID_LOGICAL_TYPE => Type::Primitive(PrimitiveType::Uuid),
547-
ty => {
548-
return Err(Error::new(
549-
ErrorKind::FeatureUnsupported,
550-
format!(
551-
"Logical type {ty} is not support in iceberg primitive type.",
552-
),
553-
));
554-
}
555-
}
556-
} else {
557-
Type::Primitive(PrimitiveType::Fixed(fixed.size as u64))
558-
}
559-
}
527+
AvroSchema::Fixed(fixed) => Type::Primitive(PrimitiveType::Fixed(fixed.size as u64)),
560528
AvroSchema::Bytes => Type::Primitive(PrimitiveType::Binary),
561529
AvroSchema::Null => return Ok(None),
562530
_ => {
@@ -1223,4 +1191,25 @@ mod tests {
12231191
converter.primitive(&AvroSchema::Date).unwrap().unwrap()
12241192
);
12251193
}
1194+
1195+
#[test]
1196+
fn test_uuid_type() {
1197+
let avro_schema = {
1198+
AvroSchema::parse_str(
1199+
r#"
1200+
{"name": "test", "type": "fixed", "size": 16, "logicalType": "uuid"}
1201+
"#,
1202+
)
1203+
.unwrap()
1204+
};
1205+
1206+
let mut converter = AvroSchemaToSchema;
1207+
1208+
let iceberg_type = Type::from(PrimitiveType::Uuid);
1209+
1210+
assert_eq!(
1211+
iceberg_type,
1212+
converter.primitive(&avro_schema).unwrap().unwrap()
1213+
);
1214+
}
12261215
}

0 commit comments

Comments
 (0)