Skip to content

Commit 1b0f5ee

Browse files
committed
better test
1 parent 1f3287a commit 1b0f5ee

File tree

1 file changed

+126
-160
lines changed

1 file changed

+126
-160
lines changed

crates/iceberg/src/arrow/schema.rs

Lines changed: 126 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -1988,32 +1988,7 @@ mod tests {
19881988
fn test_arrow_schema_to_schema_with_field_id() {
19891989
// Create a complex Arrow schema without field ID metadata
19901990
// Including: primitives, list, nested struct, map, and nested list of structs
1991-
1992-
// Nested struct: address { street: string, city: string, zip: int }
1993-
let address_fields = Fields::from(vec![
1994-
Field::new("street", DataType::Utf8, true),
1995-
Field::new("city", DataType::Utf8, false),
1996-
Field::new("zip", DataType::Int32, true),
1997-
]);
1998-
1999-
// Map: attributes { key: string, value: string }
2000-
let map_struct = DataType::Struct(Fields::from(vec![
2001-
Field::new("key", DataType::Utf8, false),
2002-
Field::new("value", DataType::Utf8, true),
2003-
]));
2004-
let map_type = DataType::Map(
2005-
Arc::new(Field::new(DEFAULT_MAP_FIELD_NAME, map_struct, false)),
2006-
false,
2007-
);
2008-
2009-
// Nested list of structs: orders [{ order_id: long, amount: double }]
2010-
let order_struct = DataType::Struct(Fields::from(vec![
2011-
Field::new("order_id", DataType::Int64, false),
2012-
Field::new("amount", DataType::Float64, false),
2013-
]));
2014-
20151991
let arrow_schema = ArrowSchema::new(vec![
2016-
// Primitive fields
20171992
Field::new("id", DataType::Int64, false),
20181993
Field::new("name", DataType::Utf8, true),
20191994
Field::new("price", DataType::Decimal128(10, 2), false),
@@ -2022,154 +1997,145 @@ mod tests {
20221997
DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())),
20231998
true,
20241999
),
2025-
// Simple list
20262000
Field::new(
20272001
"tags",
20282002
DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))),
20292003
true,
20302004
),
2031-
// Nested struct
2032-
Field::new("address", DataType::Struct(address_fields), true),
2033-
// Map type
2034-
Field::new("attributes", map_type, true),
2035-
// List of structs
2005+
Field::new(
2006+
"address",
2007+
DataType::Struct(Fields::from(vec![
2008+
Field::new("street", DataType::Utf8, true),
2009+
Field::new("city", DataType::Utf8, false),
2010+
Field::new("zip", DataType::Int32, true),
2011+
])),
2012+
true,
2013+
),
2014+
Field::new(
2015+
"attributes",
2016+
DataType::Map(
2017+
Arc::new(Field::new(
2018+
DEFAULT_MAP_FIELD_NAME,
2019+
DataType::Struct(Fields::from(vec![
2020+
Field::new("key", DataType::Utf8, false),
2021+
Field::new("value", DataType::Utf8, true),
2022+
])),
2023+
false,
2024+
)),
2025+
false,
2026+
),
2027+
true,
2028+
),
20362029
Field::new(
20372030
"orders",
2038-
DataType::List(Arc::new(Field::new("element", order_struct, true))),
2031+
DataType::List(Arc::new(Field::new(
2032+
"element",
2033+
DataType::Struct(Fields::from(vec![
2034+
Field::new("order_id", DataType::Int64, false),
2035+
Field::new("amount", DataType::Float64, false),
2036+
])),
2037+
true,
2038+
))),
20392039
true,
20402040
),
20412041
]);
20422042

20432043
let schema = arrow_schema_to_schema_auto_assign_ids(&arrow_schema).unwrap();
20442044

2045-
// Verify top-level field count
2046-
let fields = schema.as_struct().fields();
2047-
assert_eq!(fields.len(), 8);
2048-
2049-
// Check primitive fields
2050-
assert_eq!(fields[0].name, "id");
2051-
assert!(matches!(
2052-
fields[0].field_type.as_ref(),
2053-
Type::Primitive(PrimitiveType::Long)
2054-
));
2055-
assert!(fields[0].required);
2056-
2057-
assert_eq!(fields[1].name, "name");
2058-
assert!(matches!(
2059-
fields[1].field_type.as_ref(),
2060-
Type::Primitive(PrimitiveType::String)
2061-
));
2062-
2063-
assert_eq!(fields[2].name, "price");
2064-
assert!(matches!(
2065-
fields[2].field_type.as_ref(),
2066-
Type::Primitive(PrimitiveType::Decimal { .. })
2067-
));
2068-
2069-
assert_eq!(fields[3].name, "created_at");
2070-
assert!(matches!(
2071-
fields[3].field_type.as_ref(),
2072-
Type::Primitive(PrimitiveType::Timestamptz)
2073-
));
2074-
2075-
// Check simple list
2076-
assert_eq!(fields[4].name, "tags");
2077-
assert!(matches!(fields[4].field_type.as_ref(), Type::List(_)));
2078-
2079-
// Check nested struct
2080-
assert_eq!(fields[5].name, "address");
2081-
if let Type::Struct(struct_type) = fields[5].field_type.as_ref() {
2082-
assert_eq!(struct_type.fields().len(), 3);
2083-
assert_eq!(struct_type.fields()[0].name, "street");
2084-
assert_eq!(struct_type.fields()[1].name, "city");
2085-
assert_eq!(struct_type.fields()[2].name, "zip");
2086-
} else {
2087-
panic!("Expected struct type for address field");
2088-
}
2089-
2090-
// Check map type
2091-
assert_eq!(fields[6].name, "attributes");
2092-
if let Type::Map(map_type) = fields[6].field_type.as_ref() {
2093-
assert!(matches!(
2094-
map_type.key_field.field_type.as_ref(),
2095-
Type::Primitive(PrimitiveType::String)
2096-
));
2097-
assert!(matches!(
2098-
map_type.value_field.field_type.as_ref(),
2099-
Type::Primitive(PrimitiveType::String)
2100-
));
2101-
} else {
2102-
panic!("Expected map type for attributes field");
2103-
}
2104-
2105-
// Check list of structs
2106-
assert_eq!(fields[7].name, "orders");
2107-
if let Type::List(list_type) = fields[7].field_type.as_ref() {
2108-
if let Type::Struct(order_struct) = list_type.element_field.field_type.as_ref() {
2109-
assert_eq!(order_struct.fields().len(), 2);
2110-
assert_eq!(order_struct.fields()[0].name, "order_id");
2111-
assert_eq!(order_struct.fields()[1].name, "amount");
2112-
} else {
2113-
panic!("Expected struct type for orders list element");
2114-
}
2115-
} else {
2116-
panic!("Expected list type for orders field");
2117-
}
2118-
2119-
// Collect ALL field IDs (including deeply nested ones) and verify uniqueness
2120-
fn collect_field_ids(field_type: &Type, ids: &mut Vec<i32>) {
2121-
match field_type {
2122-
Type::Struct(s) => {
2123-
for f in s.fields() {
2124-
ids.push(f.id);
2125-
collect_field_ids(f.field_type.as_ref(), ids);
2126-
}
2127-
}
2128-
Type::List(l) => {
2129-
ids.push(l.element_field.id);
2130-
collect_field_ids(l.element_field.field_type.as_ref(), ids);
2131-
}
2132-
Type::Map(m) => {
2133-
ids.push(m.key_field.id);
2134-
ids.push(m.value_field.id);
2135-
collect_field_ids(m.key_field.field_type.as_ref(), ids);
2136-
collect_field_ids(m.value_field.field_type.as_ref(), ids);
2137-
}
2138-
Type::Primitive(_) => {}
2139-
}
2140-
}
2141-
2142-
let mut all_field_ids: Vec<i32> = fields.iter().map(|f| f.id).collect();
2143-
for field in fields {
2144-
collect_field_ids(field.field_type.as_ref(), &mut all_field_ids);
2145-
}
2146-
2147-
// All IDs should be positive
2148-
assert!(
2149-
all_field_ids.iter().all(|&id| id > 0),
2150-
"All field IDs should be positive, got: {all_field_ids:?}",
2151-
);
2152-
2153-
// All IDs should be unique
2154-
let unique_ids: std::collections::HashSet<_> = all_field_ids.iter().collect();
2155-
assert_eq!(
2156-
unique_ids.len(),
2157-
all_field_ids.len(),
2158-
"Field IDs should be unique, got duplicates in: {all_field_ids:?}",
2159-
);
2045+
// Build expected schema with exact field IDs following level-order assignment:
2046+
// Level 0: id=1, name=2, price=3, created_at=4, tags=5, address=6, attributes=7, orders=8
2047+
// Level 1: tags.element=9, address.{street=10,city=11,zip=12}, attributes.{key=13,value=14}, orders.element=15
2048+
// Level 2: orders.element.{order_id=16,amount=17}
2049+
let expected = Schema::builder()
2050+
.with_fields(vec![
2051+
NestedField::required(1, "id", Type::Primitive(PrimitiveType::Long)).into(),
2052+
NestedField::optional(2, "name", Type::Primitive(PrimitiveType::String)).into(),
2053+
NestedField::required(
2054+
3,
2055+
"price",
2056+
Type::Primitive(PrimitiveType::Decimal {
2057+
precision: 10,
2058+
scale: 2,
2059+
}),
2060+
)
2061+
.into(),
2062+
NestedField::optional(4, "created_at", Type::Primitive(PrimitiveType::Timestamptz))
2063+
.into(),
2064+
NestedField::optional(
2065+
5,
2066+
"tags",
2067+
Type::List(ListType {
2068+
element_field: NestedField::list_element(
2069+
9,
2070+
Type::Primitive(PrimitiveType::String),
2071+
false,
2072+
)
2073+
.into(),
2074+
}),
2075+
)
2076+
.into(),
2077+
NestedField::optional(
2078+
6,
2079+
"address",
2080+
Type::Struct(StructType::new(vec![
2081+
NestedField::optional(10, "street", Type::Primitive(PrimitiveType::String))
2082+
.into(),
2083+
NestedField::required(11, "city", Type::Primitive(PrimitiveType::String))
2084+
.into(),
2085+
NestedField::optional(12, "zip", Type::Primitive(PrimitiveType::Int))
2086+
.into(),
2087+
])),
2088+
)
2089+
.into(),
2090+
NestedField::optional(
2091+
7,
2092+
"attributes",
2093+
Type::Map(MapType {
2094+
key_field: NestedField::map_key_element(
2095+
13,
2096+
Type::Primitive(PrimitiveType::String),
2097+
)
2098+
.into(),
2099+
value_field: NestedField::map_value_element(
2100+
14,
2101+
Type::Primitive(PrimitiveType::String),
2102+
false,
2103+
)
2104+
.into(),
2105+
}),
2106+
)
2107+
.into(),
2108+
NestedField::optional(
2109+
8,
2110+
"orders",
2111+
Type::List(ListType {
2112+
element_field: NestedField::list_element(
2113+
15,
2114+
Type::Struct(StructType::new(vec![
2115+
NestedField::required(
2116+
16,
2117+
"order_id",
2118+
Type::Primitive(PrimitiveType::Long),
2119+
)
2120+
.into(),
2121+
NestedField::required(
2122+
17,
2123+
"amount",
2124+
Type::Primitive(PrimitiveType::Double),
2125+
)
2126+
.into(),
2127+
])),
2128+
false,
2129+
)
2130+
.into(),
2131+
}),
2132+
)
2133+
.into(),
2134+
])
2135+
.build()
2136+
.unwrap();
21602137

2161-
// Verify we have the expected number of fields (8 top-level + nested)
2162-
// Top-level: 8
2163-
// tags list element: 1
2164-
// address struct fields: 3
2165-
// attributes map key + value: 2
2166-
// orders list element: 1, order struct fields: 2
2167-
// Total: 8 + 1 + 3 + 2 + 1 + 2 = 17
2168-
assert_eq!(
2169-
all_field_ids.len(),
2170-
17,
2171-
"Expected 17 total fields, got {}",
2172-
all_field_ids.len()
2173-
);
2138+
pretty_assertions::assert_eq!(schema, expected);
2139+
assert_eq!(schema.highest_field_id(), 17);
21742140
}
21752141
}

0 commit comments

Comments
 (0)