@@ -139,7 +139,7 @@ use arrow_array::cast::*;
139139use arrow_array:: types:: ArrowDictionaryKeyType ;
140140use arrow_array:: * ;
141141use arrow_buffer:: { ArrowNativeType , Buffer , OffsetBuffer , ScalarBuffer } ;
142- use arrow_data:: ArrayDataBuilder ;
142+ use arrow_data:: { ArrayData , ArrayDataBuilder } ;
143143use arrow_schema:: * ;
144144use variable:: { decode_binary_view, decode_string_view} ;
145145
@@ -1668,8 +1668,24 @@ unsafe fn decode_column(
16681668 rows. iter_mut ( ) . for_each ( |row| * row = & row[ 1 ..] ) ;
16691669 let children = converter. convert_raw ( rows, validate_utf8) ?;
16701670
1671- let child_data = children. iter ( ) . map ( |c| c. to_data ( ) ) . collect ( ) ;
1672- let builder = ArrayDataBuilder :: new ( field. data_type . clone ( ) )
1671+ let child_data: Vec < ArrayData > = children. iter ( ) . map ( |c| c. to_data ( ) ) . collect ( ) ;
1672+ // Since RowConverter flattens certain data types (i.e. Dictionary),
1673+ // we need to use updated data type instead of original field
1674+ let corrected_fields: Vec < Field > = match & field. data_type {
1675+ DataType :: Struct ( struct_fields) => struct_fields
1676+ . iter ( )
1677+ . zip ( child_data. iter ( ) )
1678+ . map ( |( orig_field, child_array) | {
1679+ orig_field
1680+ . as_ref ( )
1681+ . clone ( )
1682+ . with_data_type ( child_array. data_type ( ) . clone ( ) )
1683+ } )
1684+ . collect ( ) ,
1685+ _ => unreachable ! ( "Only Struct types should be corrected here" ) ,
1686+ } ;
1687+ let corrected_struct_type = DataType :: Struct ( corrected_fields. into ( ) ) ;
1688+ let builder = ArrayDataBuilder :: new ( corrected_struct_type)
16731689 . len ( rows. len ( ) )
16741690 . null_count ( null_count)
16751691 . null_bit_buffer ( Some ( nulls) )
@@ -2208,6 +2224,177 @@ mod tests {
22082224 back[ 0 ] . to_data ( ) . validate_full ( ) . unwrap ( ) ;
22092225 }
22102226
2227+ #[ test]
2228+ fn test_dictionary_in_struct ( ) {
2229+ let builder = StringDictionaryBuilder :: < Int32Type > :: new ( ) ;
2230+ let mut struct_builder = StructBuilder :: new (
2231+ vec ! [ Field :: new_dictionary(
2232+ "foo" ,
2233+ DataType :: Int32 ,
2234+ DataType :: Utf8 ,
2235+ true ,
2236+ ) ] ,
2237+ vec ! [ Box :: new( builder) ] ,
2238+ ) ;
2239+
2240+ let dict_builder = struct_builder
2241+ . field_builder :: < StringDictionaryBuilder < Int32Type > > ( 0 )
2242+ . unwrap ( ) ;
2243+
2244+ // Flattened: ["a", null, "a", "b"]
2245+ dict_builder. append_value ( "a" ) ;
2246+ dict_builder. append_null ( ) ;
2247+ dict_builder. append_value ( "a" ) ;
2248+ dict_builder. append_value ( "b" ) ;
2249+
2250+ for _ in 0 ..4 {
2251+ struct_builder. append ( true ) ;
2252+ }
2253+
2254+ let s = Arc :: new ( struct_builder. finish ( ) ) as ArrayRef ;
2255+ let sort_fields = vec ! [ SortField :: new( s. data_type( ) . clone( ) ) ] ;
2256+ let converter = RowConverter :: new ( sort_fields) . unwrap ( ) ;
2257+ let r = converter. convert_columns ( & [ Arc :: clone ( & s) ] ) . unwrap ( ) ;
2258+
2259+ let back = converter. convert_rows ( & r) . unwrap ( ) ;
2260+ let [ s2] = back. try_into ( ) . unwrap ( ) ;
2261+
2262+ // RowConverter flattens Dictionary
2263+ // s.ty = Struct(foo Dictionary(Int32, Utf8)), s2.ty = Struct(foo Utf8)
2264+ assert_ne ! ( & s. data_type( ) , & s2. data_type( ) ) ;
2265+ s2. to_data ( ) . validate_full ( ) . unwrap ( ) ;
2266+
2267+ // Check if the logical data remains the same
2268+ // Keys: [0, null, 0, 1]
2269+ // Values: ["a", "b"]
2270+ let s1_struct = s. as_struct ( ) ;
2271+ let s1_0 = s1_struct. column ( 0 ) ;
2272+ let s1_idx_0 = s1_0. as_dictionary :: < Int32Type > ( ) ;
2273+ let keys = s1_idx_0. keys ( ) ;
2274+ let values = s1_idx_0. values ( ) . as_string :: < i32 > ( ) ;
2275+ // Flattened: ["a", null, "a", "b"]
2276+ let s2_struct = s2. as_struct ( ) ;
2277+ let s2_0 = s2_struct. column ( 0 ) ;
2278+ let s2_idx_0 = s2_0. as_string :: < i32 > ( ) ;
2279+
2280+ for i in 0 ..keys. len ( ) {
2281+ if keys. is_null ( i) {
2282+ assert ! ( s2_idx_0. is_null( i) ) ;
2283+ } else {
2284+ let dict_index = keys. value ( i) as usize ;
2285+ assert_eq ! ( values. value( dict_index) , s2_idx_0. value( i) ) ;
2286+ }
2287+ }
2288+ }
2289+
2290+ #[ test]
2291+ fn test_dictionary_in_struct_empty ( ) {
2292+ let ty = DataType :: Struct (
2293+ vec ! [ Field :: new_dictionary(
2294+ "foo" ,
2295+ DataType :: Int32 ,
2296+ DataType :: Int32 ,
2297+ false ,
2298+ ) ]
2299+ . into ( ) ,
2300+ ) ;
2301+ let s = arrow_array:: new_empty_array ( & ty) ;
2302+
2303+ let sort_fields = vec ! [ SortField :: new( s. data_type( ) . clone( ) ) ] ;
2304+ let converter = RowConverter :: new ( sort_fields) . unwrap ( ) ;
2305+ let r = converter. convert_columns ( & [ Arc :: clone ( & s) ] ) . unwrap ( ) ;
2306+
2307+ let back = converter. convert_rows ( & r) . unwrap ( ) ;
2308+ let [ s2] = back. try_into ( ) . unwrap ( ) ;
2309+
2310+ // RowConverter flattens Dictionary
2311+ // s.ty = Struct(foo Dictionary(Int32, Int32)), s2.ty = Struct(foo Int32)
2312+ assert_ne ! ( & s. data_type( ) , & s2. data_type( ) ) ;
2313+ s2. to_data ( ) . validate_full ( ) . unwrap ( ) ;
2314+ assert_eq ! ( s. len( ) , 0 ) ;
2315+ assert_eq ! ( s2. len( ) , 0 ) ;
2316+ }
2317+
2318+ #[ test]
2319+ fn test_list_of_string_dictionary ( ) {
2320+ let mut builder = ListBuilder :: < StringDictionaryBuilder < Int32Type > > :: default ( ) ;
2321+ // List[0] = ["a", "b", "zero", null, "c", "b", "d" (dict)]
2322+ builder. values ( ) . append ( "a" ) . unwrap ( ) ;
2323+ builder. values ( ) . append ( "b" ) . unwrap ( ) ;
2324+ builder. values ( ) . append ( "zero" ) . unwrap ( ) ;
2325+ builder. values ( ) . append_null ( ) ;
2326+ builder. values ( ) . append ( "c" ) . unwrap ( ) ;
2327+ builder. values ( ) . append ( "b" ) . unwrap ( ) ;
2328+ builder. values ( ) . append ( "d" ) . unwrap ( ) ;
2329+ builder. append ( true ) ;
2330+ // List[1] = null
2331+ builder. append ( false ) ;
2332+ // List[2] = ["e", "zero", "a" (dict)]
2333+ builder. values ( ) . append ( "e" ) . unwrap ( ) ;
2334+ builder. values ( ) . append ( "zero" ) . unwrap ( ) ;
2335+ builder. values ( ) . append ( "a" ) . unwrap ( ) ;
2336+ builder. append ( true ) ;
2337+
2338+ let a = Arc :: new ( builder. finish ( ) ) as ArrayRef ;
2339+ let data_type = a. data_type ( ) . clone ( ) ;
2340+
2341+ let field = SortField :: new ( data_type. clone ( ) ) ;
2342+ let converter = RowConverter :: new ( vec ! [ field] ) . unwrap ( ) ;
2343+ let rows = converter. convert_columns ( & [ Arc :: clone ( & a) ] ) . unwrap ( ) ;
2344+
2345+ let back = converter. convert_rows ( & rows) . unwrap ( ) ;
2346+ assert_eq ! ( back. len( ) , 1 ) ;
2347+ let [ a2] = back. try_into ( ) . unwrap ( ) ;
2348+
2349+ // RowConverter flattens Dictionary
2350+ // a.ty: List(Dictionary(Int32, Utf8)), a2.ty: List(Utf8)
2351+ assert_ne ! ( & a. data_type( ) , & a2. data_type( ) ) ;
2352+
2353+ a2. to_data ( ) . validate_full ( ) . unwrap ( ) ;
2354+
2355+ let a2_list = a2. as_list :: < i32 > ( ) ;
2356+ let a1_list = a. as_list :: < i32 > ( ) ;
2357+
2358+ // Check if the logical data remains the same
2359+ // List[0] = ["a", "b", "zero", null, "c", "b", "d" (dict)]
2360+ let a1_0 = a1_list. value ( 0 ) ;
2361+ let a1_idx_0 = a1_0. as_dictionary :: < Int32Type > ( ) ;
2362+ let keys = a1_idx_0. keys ( ) ;
2363+ let values = a1_idx_0. values ( ) . as_string :: < i32 > ( ) ;
2364+ let a2_0 = a2_list. value ( 0 ) ;
2365+ let a2_idx_0 = a2_0. as_string :: < i32 > ( ) ;
2366+
2367+ for i in 0 ..keys. len ( ) {
2368+ if keys. is_null ( i) {
2369+ assert ! ( a2_idx_0. is_null( i) ) ;
2370+ } else {
2371+ let dict_index = keys. value ( i) as usize ;
2372+ assert_eq ! ( values. value( dict_index) , a2_idx_0. value( i) ) ;
2373+ }
2374+ }
2375+
2376+ // List[1] = null
2377+ assert ! ( a1_list. is_null( 1 ) ) ;
2378+ assert ! ( a2_list. is_null( 1 ) ) ;
2379+
2380+ // List[2] = ["e", "zero", "a" (dict)]
2381+ let a1_2 = a1_list. value ( 2 ) ;
2382+ let a1_idx_2 = a1_2. as_dictionary :: < Int32Type > ( ) ;
2383+ let keys = a1_idx_2. keys ( ) ;
2384+ let values = a1_idx_2. values ( ) . as_string :: < i32 > ( ) ;
2385+ let a2_2 = a2_list. value ( 2 ) ;
2386+ let a2_idx_2 = a2_2. as_string :: < i32 > ( ) ;
2387+
2388+ for i in 0 ..keys. len ( ) {
2389+ if keys. is_null ( i) {
2390+ assert ! ( a2_idx_2. is_null( i) ) ;
2391+ } else {
2392+ let dict_index = keys. value ( i) as usize ;
2393+ assert_eq ! ( values. value( dict_index) , a2_idx_2. value( i) ) ;
2394+ }
2395+ }
2396+ }
2397+
22112398 #[ test]
22122399 fn test_primitive_dictionary ( ) {
22132400 let mut builder = PrimitiveDictionaryBuilder :: < Int32Type , Int32Type > :: new ( ) ;
@@ -2231,6 +2418,10 @@ mod tests {
22312418 assert ! ( rows. row( 3 ) < rows. row( 2 ) ) ;
22322419 assert ! ( rows. row( 6 ) < rows. row( 2 ) ) ;
22332420 assert ! ( rows. row( 3 ) < rows. row( 6 ) ) ;
2421+
2422+ let back = converter. convert_rows ( & rows) . unwrap ( ) ;
2423+ assert_eq ! ( back. len( ) , 1 ) ;
2424+ back[ 0 ] . to_data ( ) . validate_full ( ) . unwrap ( ) ;
22342425 }
22352426
22362427 #[ test]
0 commit comments