@@ -1988,32 +1988,7 @@ mod tests {
19881988 fn test_arrow_schema_to_schema_with_field_id ( ) {
19891989 // Create a complex Arrow schema without field ID metadata
19901990 // Including: primitives, list, nested struct, map, and nested list of structs
1991-
1992- // Nested struct: address { street: string, city: string, zip: int }
1993- let address_fields = Fields :: from ( vec ! [
1994- Field :: new( "street" , DataType :: Utf8 , true ) ,
1995- Field :: new( "city" , DataType :: Utf8 , false ) ,
1996- Field :: new( "zip" , DataType :: Int32 , true ) ,
1997- ] ) ;
1998-
1999- // Map: attributes { key: string, value: string }
2000- let map_struct = DataType :: Struct ( Fields :: from ( vec ! [
2001- Field :: new( "key" , DataType :: Utf8 , false ) ,
2002- Field :: new( "value" , DataType :: Utf8 , true ) ,
2003- ] ) ) ;
2004- let map_type = DataType :: Map (
2005- Arc :: new ( Field :: new ( DEFAULT_MAP_FIELD_NAME , map_struct, false ) ) ,
2006- false ,
2007- ) ;
2008-
2009- // Nested list of structs: orders [{ order_id: long, amount: double }]
2010- let order_struct = DataType :: Struct ( Fields :: from ( vec ! [
2011- Field :: new( "order_id" , DataType :: Int64 , false ) ,
2012- Field :: new( "amount" , DataType :: Float64 , false ) ,
2013- ] ) ) ;
2014-
20151991 let arrow_schema = ArrowSchema :: new ( vec ! [
2016- // Primitive fields
20171992 Field :: new( "id" , DataType :: Int64 , false ) ,
20181993 Field :: new( "name" , DataType :: Utf8 , true ) ,
20191994 Field :: new( "price" , DataType :: Decimal128 ( 10 , 2 ) , false ) ,
@@ -2022,154 +1997,145 @@ mod tests {
20221997 DataType :: Timestamp ( TimeUnit :: Microsecond , Some ( "+00:00" . into( ) ) ) ,
20231998 true ,
20241999 ) ,
2025- // Simple list
20262000 Field :: new(
20272001 "tags" ,
20282002 DataType :: List ( Arc :: new( Field :: new( "item" , DataType :: Utf8 , true ) ) ) ,
20292003 true ,
20302004 ) ,
2031- // Nested struct
2032- Field :: new( "address" , DataType :: Struct ( address_fields) , true ) ,
2033- // Map type
2034- Field :: new( "attributes" , map_type, true ) ,
2035- // List of structs
2005+ Field :: new(
2006+ "address" ,
2007+ DataType :: Struct ( Fields :: from( vec![
2008+ Field :: new( "street" , DataType :: Utf8 , true ) ,
2009+ Field :: new( "city" , DataType :: Utf8 , false ) ,
2010+ Field :: new( "zip" , DataType :: Int32 , true ) ,
2011+ ] ) ) ,
2012+ true ,
2013+ ) ,
2014+ Field :: new(
2015+ "attributes" ,
2016+ DataType :: Map (
2017+ Arc :: new( Field :: new(
2018+ DEFAULT_MAP_FIELD_NAME ,
2019+ DataType :: Struct ( Fields :: from( vec![
2020+ Field :: new( "key" , DataType :: Utf8 , false ) ,
2021+ Field :: new( "value" , DataType :: Utf8 , true ) ,
2022+ ] ) ) ,
2023+ false ,
2024+ ) ) ,
2025+ false ,
2026+ ) ,
2027+ true ,
2028+ ) ,
20362029 Field :: new(
20372030 "orders" ,
2038- DataType :: List ( Arc :: new( Field :: new( "element" , order_struct, true ) ) ) ,
2031+ DataType :: List ( Arc :: new( Field :: new(
2032+ "element" ,
2033+ DataType :: Struct ( Fields :: from( vec![
2034+ Field :: new( "order_id" , DataType :: Int64 , false ) ,
2035+ Field :: new( "amount" , DataType :: Float64 , false ) ,
2036+ ] ) ) ,
2037+ true ,
2038+ ) ) ) ,
20392039 true ,
20402040 ) ,
20412041 ] ) ;
20422042
20432043 let schema = arrow_schema_to_schema_auto_assign_ids ( & arrow_schema) . unwrap ( ) ;
20442044
2045- // Verify top-level field count
2046- let fields = schema. as_struct ( ) . fields ( ) ;
2047- assert_eq ! ( fields. len( ) , 8 ) ;
2048-
2049- // Check primitive fields
2050- assert_eq ! ( fields[ 0 ] . name, "id" ) ;
2051- assert ! ( matches!(
2052- fields[ 0 ] . field_type. as_ref( ) ,
2053- Type :: Primitive ( PrimitiveType :: Long )
2054- ) ) ;
2055- assert ! ( fields[ 0 ] . required) ;
2056-
2057- assert_eq ! ( fields[ 1 ] . name, "name" ) ;
2058- assert ! ( matches!(
2059- fields[ 1 ] . field_type. as_ref( ) ,
2060- Type :: Primitive ( PrimitiveType :: String )
2061- ) ) ;
2062-
2063- assert_eq ! ( fields[ 2 ] . name, "price" ) ;
2064- assert ! ( matches!(
2065- fields[ 2 ] . field_type. as_ref( ) ,
2066- Type :: Primitive ( PrimitiveType :: Decimal { .. } )
2067- ) ) ;
2068-
2069- assert_eq ! ( fields[ 3 ] . name, "created_at" ) ;
2070- assert ! ( matches!(
2071- fields[ 3 ] . field_type. as_ref( ) ,
2072- Type :: Primitive ( PrimitiveType :: Timestamptz )
2073- ) ) ;
2074-
2075- // Check simple list
2076- assert_eq ! ( fields[ 4 ] . name, "tags" ) ;
2077- assert ! ( matches!( fields[ 4 ] . field_type. as_ref( ) , Type :: List ( _) ) ) ;
2078-
2079- // Check nested struct
2080- assert_eq ! ( fields[ 5 ] . name, "address" ) ;
2081- if let Type :: Struct ( struct_type) = fields[ 5 ] . field_type . as_ref ( ) {
2082- assert_eq ! ( struct_type. fields( ) . len( ) , 3 ) ;
2083- assert_eq ! ( struct_type. fields( ) [ 0 ] . name, "street" ) ;
2084- assert_eq ! ( struct_type. fields( ) [ 1 ] . name, "city" ) ;
2085- assert_eq ! ( struct_type. fields( ) [ 2 ] . name, "zip" ) ;
2086- } else {
2087- panic ! ( "Expected struct type for address field" ) ;
2088- }
2089-
2090- // Check map type
2091- assert_eq ! ( fields[ 6 ] . name, "attributes" ) ;
2092- if let Type :: Map ( map_type) = fields[ 6 ] . field_type . as_ref ( ) {
2093- assert ! ( matches!(
2094- map_type. key_field. field_type. as_ref( ) ,
2095- Type :: Primitive ( PrimitiveType :: String )
2096- ) ) ;
2097- assert ! ( matches!(
2098- map_type. value_field. field_type. as_ref( ) ,
2099- Type :: Primitive ( PrimitiveType :: String )
2100- ) ) ;
2101- } else {
2102- panic ! ( "Expected map type for attributes field" ) ;
2103- }
2104-
2105- // Check list of structs
2106- assert_eq ! ( fields[ 7 ] . name, "orders" ) ;
2107- if let Type :: List ( list_type) = fields[ 7 ] . field_type . as_ref ( ) {
2108- if let Type :: Struct ( order_struct) = list_type. element_field . field_type . as_ref ( ) {
2109- assert_eq ! ( order_struct. fields( ) . len( ) , 2 ) ;
2110- assert_eq ! ( order_struct. fields( ) [ 0 ] . name, "order_id" ) ;
2111- assert_eq ! ( order_struct. fields( ) [ 1 ] . name, "amount" ) ;
2112- } else {
2113- panic ! ( "Expected struct type for orders list element" ) ;
2114- }
2115- } else {
2116- panic ! ( "Expected list type for orders field" ) ;
2117- }
2118-
2119- // Collect ALL field IDs (including deeply nested ones) and verify uniqueness
2120- fn collect_field_ids ( field_type : & Type , ids : & mut Vec < i32 > ) {
2121- match field_type {
2122- Type :: Struct ( s) => {
2123- for f in s. fields ( ) {
2124- ids. push ( f. id ) ;
2125- collect_field_ids ( f. field_type . as_ref ( ) , ids) ;
2126- }
2127- }
2128- Type :: List ( l) => {
2129- ids. push ( l. element_field . id ) ;
2130- collect_field_ids ( l. element_field . field_type . as_ref ( ) , ids) ;
2131- }
2132- Type :: Map ( m) => {
2133- ids. push ( m. key_field . id ) ;
2134- ids. push ( m. value_field . id ) ;
2135- collect_field_ids ( m. key_field . field_type . as_ref ( ) , ids) ;
2136- collect_field_ids ( m. value_field . field_type . as_ref ( ) , ids) ;
2137- }
2138- Type :: Primitive ( _) => { }
2139- }
2140- }
2141-
2142- let mut all_field_ids: Vec < i32 > = fields. iter ( ) . map ( |f| f. id ) . collect ( ) ;
2143- for field in fields {
2144- collect_field_ids ( field. field_type . as_ref ( ) , & mut all_field_ids) ;
2145- }
2146-
2147- // All IDs should be positive
2148- assert ! (
2149- all_field_ids. iter( ) . all( |& id| id > 0 ) ,
2150- "All field IDs should be positive, got: {all_field_ids:?}" ,
2151- ) ;
2152-
2153- // All IDs should be unique
2154- let unique_ids: std:: collections:: HashSet < _ > = all_field_ids. iter ( ) . collect ( ) ;
2155- assert_eq ! (
2156- unique_ids. len( ) ,
2157- all_field_ids. len( ) ,
2158- "Field IDs should be unique, got duplicates in: {all_field_ids:?}" ,
2159- ) ;
2045+ // Build expected schema with exact field IDs following level-order assignment:
2046+ // Level 0: id=1, name=2, price=3, created_at=4, tags=5, address=6, attributes=7, orders=8
2047+ // Level 1: tags.element=9, address.{street=10,city=11,zip=12}, attributes.{key=13,value=14}, orders.element=15
2048+ // Level 2: orders.element.{order_id=16,amount=17}
2049+ let expected = Schema :: builder ( )
2050+ . with_fields ( vec ! [
2051+ NestedField :: required( 1 , "id" , Type :: Primitive ( PrimitiveType :: Long ) ) . into( ) ,
2052+ NestedField :: optional( 2 , "name" , Type :: Primitive ( PrimitiveType :: String ) ) . into( ) ,
2053+ NestedField :: required(
2054+ 3 ,
2055+ "price" ,
2056+ Type :: Primitive ( PrimitiveType :: Decimal {
2057+ precision: 10 ,
2058+ scale: 2 ,
2059+ } ) ,
2060+ )
2061+ . into( ) ,
2062+ NestedField :: optional( 4 , "created_at" , Type :: Primitive ( PrimitiveType :: Timestamptz ) )
2063+ . into( ) ,
2064+ NestedField :: optional(
2065+ 5 ,
2066+ "tags" ,
2067+ Type :: List ( ListType {
2068+ element_field: NestedField :: list_element(
2069+ 9 ,
2070+ Type :: Primitive ( PrimitiveType :: String ) ,
2071+ false ,
2072+ )
2073+ . into( ) ,
2074+ } ) ,
2075+ )
2076+ . into( ) ,
2077+ NestedField :: optional(
2078+ 6 ,
2079+ "address" ,
2080+ Type :: Struct ( StructType :: new( vec![
2081+ NestedField :: optional( 10 , "street" , Type :: Primitive ( PrimitiveType :: String ) )
2082+ . into( ) ,
2083+ NestedField :: required( 11 , "city" , Type :: Primitive ( PrimitiveType :: String ) )
2084+ . into( ) ,
2085+ NestedField :: optional( 12 , "zip" , Type :: Primitive ( PrimitiveType :: Int ) )
2086+ . into( ) ,
2087+ ] ) ) ,
2088+ )
2089+ . into( ) ,
2090+ NestedField :: optional(
2091+ 7 ,
2092+ "attributes" ,
2093+ Type :: Map ( MapType {
2094+ key_field: NestedField :: map_key_element(
2095+ 13 ,
2096+ Type :: Primitive ( PrimitiveType :: String ) ,
2097+ )
2098+ . into( ) ,
2099+ value_field: NestedField :: map_value_element(
2100+ 14 ,
2101+ Type :: Primitive ( PrimitiveType :: String ) ,
2102+ false ,
2103+ )
2104+ . into( ) ,
2105+ } ) ,
2106+ )
2107+ . into( ) ,
2108+ NestedField :: optional(
2109+ 8 ,
2110+ "orders" ,
2111+ Type :: List ( ListType {
2112+ element_field: NestedField :: list_element(
2113+ 15 ,
2114+ Type :: Struct ( StructType :: new( vec![
2115+ NestedField :: required(
2116+ 16 ,
2117+ "order_id" ,
2118+ Type :: Primitive ( PrimitiveType :: Long ) ,
2119+ )
2120+ . into( ) ,
2121+ NestedField :: required(
2122+ 17 ,
2123+ "amount" ,
2124+ Type :: Primitive ( PrimitiveType :: Double ) ,
2125+ )
2126+ . into( ) ,
2127+ ] ) ) ,
2128+ false ,
2129+ )
2130+ . into( ) ,
2131+ } ) ,
2132+ )
2133+ . into( ) ,
2134+ ] )
2135+ . build ( )
2136+ . unwrap ( ) ;
21602137
2161- // Verify we have the expected number of fields (8 top-level + nested)
2162- // Top-level: 8
2163- // tags list element: 1
2164- // address struct fields: 3
2165- // attributes map key + value: 2
2166- // orders list element: 1, order struct fields: 2
2167- // Total: 8 + 1 + 3 + 2 + 1 + 2 = 17
2168- assert_eq ! (
2169- all_field_ids. len( ) ,
2170- 17 ,
2171- "Expected 17 total fields, got {}" ,
2172- all_field_ids. len( )
2173- ) ;
2138+ pretty_assertions:: assert_eq!( schema, expected) ;
2139+ assert_eq ! ( schema. highest_field_id( ) , 17 ) ;
21742140 }
21752141}
0 commit comments