@@ -27,8 +27,9 @@ use arrow::{downcast_dictionary_array, downcast_primitive_array};
2727#[ cfg( not( feature = "force_hash_collisions" ) ) ]
2828use crate :: cast:: {
2929 as_binary_view_array, as_boolean_array, as_fixed_size_list_array,
30- as_generic_binary_array, as_large_list_array, as_list_array, as_map_array,
31- as_string_array, as_string_view_array, as_struct_array, as_union_array,
30+ as_generic_binary_array, as_large_list_array, as_large_list_view_array,
31+ as_list_array, as_list_view_array, as_map_array, as_string_array,
32+ as_string_view_array, as_struct_array, as_union_array,
3233} ;
3334use crate :: error:: Result ;
3435use crate :: error:: { _internal_datafusion_err, _internal_err} ;
@@ -538,6 +539,45 @@ where
538539 Ok ( ( ) )
539540}
540541
542+ #[ cfg( not( feature = "force_hash_collisions" ) ) ]
543+ fn hash_list_view_array < OffsetSize > (
544+ array : & GenericListViewArray < OffsetSize > ,
545+ random_state : & RandomState ,
546+ hashes_buffer : & mut [ u64 ] ,
547+ ) -> Result < ( ) >
548+ where
549+ OffsetSize : OffsetSizeTrait ,
550+ {
551+ let values = array. values ( ) ;
552+ let offsets = array. value_offsets ( ) ;
553+ let sizes = array. value_sizes ( ) ;
554+ let nulls = array. nulls ( ) ;
555+ let mut values_hashes = vec ! [ 0u64 ; values. len( ) ] ;
556+ create_hashes ( [ values] , random_state, & mut values_hashes) ?;
557+ if let Some ( nulls) = nulls {
558+ for ( i, ( offset, size) ) in offsets. iter ( ) . zip ( sizes. iter ( ) ) . enumerate ( ) {
559+ if nulls. is_valid ( i) {
560+ let hash = & mut hashes_buffer[ i] ;
561+ let start = offset. as_usize ( ) ;
562+ let end = start + size. as_usize ( ) ;
563+ for values_hash in & values_hashes[ start..end] {
564+ * hash = combine_hashes ( * hash, * values_hash) ;
565+ }
566+ }
567+ }
568+ } else {
569+ for ( i, ( offset, size) ) in offsets. iter ( ) . zip ( sizes. iter ( ) ) . enumerate ( ) {
570+ let hash = & mut hashes_buffer[ i] ;
571+ let start = offset. as_usize ( ) ;
572+ let end = start + size. as_usize ( ) ;
573+ for values_hash in & values_hashes[ start..end] {
574+ * hash = combine_hashes ( * hash, * values_hash) ;
575+ }
576+ }
577+ }
578+ Ok ( ( ) )
579+ }
580+
541581#[ cfg( not( feature = "force_hash_collisions" ) ) ]
542582fn hash_union_array (
543583 array : & UnionArray ,
@@ -714,6 +754,14 @@ fn hash_single_array(
714754 let array = as_large_list_array( array) ?;
715755 hash_list_array( array, random_state, hashes_buffer) ?;
716756 }
757+ DataType :: ListView ( _) => {
758+ let array = as_list_view_array( array) ?;
759+ hash_list_view_array( array, random_state, hashes_buffer) ?;
760+ }
761+ DataType :: LargeListView ( _) => {
762+ let array = as_large_list_view_array( array) ?;
763+ hash_list_view_array( array, random_state, hashes_buffer) ?;
764+ }
717765 DataType :: Map ( _, _) => {
718766 let array = as_map_array( array) ?;
719767 hash_map_array( array, random_state, hashes_buffer) ?;
@@ -1128,6 +1176,100 @@ mod tests {
11281176 assert_eq ! ( hashes[ 1 ] , hashes[ 6 ] ) ; // null vs empty list
11291177 }
11301178
1179+ #[ test]
1180+ // Tests actual values of hashes, which are different if forcing collisions
1181+ #[ cfg( not( feature = "force_hash_collisions" ) ) ]
1182+ fn create_hashes_for_list_view_arrays ( ) {
1183+ use arrow:: buffer:: { NullBuffer , ScalarBuffer } ;
1184+
1185+ // Create values array: [0, 1, 2, 3, null, 5]
1186+ let values = Arc :: new ( Int32Array :: from ( vec ! [
1187+ Some ( 0 ) ,
1188+ Some ( 1 ) ,
1189+ Some ( 2 ) ,
1190+ Some ( 3 ) ,
1191+ None ,
1192+ Some ( 5 ) ,
1193+ ] ) ) as ArrayRef ;
1194+ let field = Arc :: new ( Field :: new ( "item" , DataType :: Int32 , true ) ) ;
1195+
1196+ // Create ListView with the following logical structure:
1197+ // Row 0: [0, 1, 2] (offset=0, size=3)
1198+ // Row 1: null (null bit set)
1199+ // Row 2: [3, null, 5] (offset=3, size=3)
1200+ // Row 3: [3, null, 5] (offset=3, size=3) - same as row 2
1201+ // Row 4: null (null bit set)
1202+ // Row 5: [0, 1, 2] (offset=0, size=3) - same as row 0
1203+ // Row 6: [] (offset=0, size=0) - empty list
1204+ let offsets = ScalarBuffer :: from ( vec ! [ 0i32 , 0 , 3 , 3 , 0 , 0 , 0 ] ) ;
1205+ let sizes = ScalarBuffer :: from ( vec ! [ 3i32 , 0 , 3 , 3 , 0 , 3 , 0 ] ) ;
1206+ let nulls = Some ( NullBuffer :: from ( vec ! [ true , false , true , true , false , true , true ] ) ) ;
1207+
1208+ let list_view_array = Arc :: new ( ListViewArray :: new (
1209+ field,
1210+ offsets,
1211+ sizes,
1212+ values,
1213+ nulls,
1214+ ) ) as ArrayRef ;
1215+
1216+ let random_state = RandomState :: with_seeds ( 0 , 0 , 0 , 0 ) ;
1217+ let mut hashes = vec ! [ 0 ; list_view_array. len( ) ] ;
1218+ create_hashes ( & [ list_view_array] , & random_state, & mut hashes) . unwrap ( ) ;
1219+
1220+ assert_eq ! ( hashes[ 0 ] , hashes[ 5 ] ) ; // same content [0, 1, 2]
1221+ assert_eq ! ( hashes[ 1 ] , hashes[ 4 ] ) ; // both null
1222+ assert_eq ! ( hashes[ 2 ] , hashes[ 3 ] ) ; // same content [3, null, 5]
1223+ assert_eq ! ( hashes[ 1 ] , hashes[ 6 ] ) ; // null vs empty list
1224+ }
1225+
1226+ #[ test]
1227+ // Tests actual values of hashes, which are different if forcing collisions
1228+ #[ cfg( not( feature = "force_hash_collisions" ) ) ]
1229+ fn create_hashes_for_large_list_view_arrays ( ) {
1230+ use arrow:: buffer:: { NullBuffer , ScalarBuffer } ;
1231+
1232+ // Create values array: [0, 1, 2, 3, null, 5]
1233+ let values = Arc :: new ( Int32Array :: from ( vec ! [
1234+ Some ( 0 ) ,
1235+ Some ( 1 ) ,
1236+ Some ( 2 ) ,
1237+ Some ( 3 ) ,
1238+ None ,
1239+ Some ( 5 ) ,
1240+ ] ) ) as ArrayRef ;
1241+ let field = Arc :: new ( Field :: new ( "item" , DataType :: Int32 , true ) ) ;
1242+
1243+ // Create LargeListView with the following logical structure:
1244+ // Row 0: [0, 1, 2] (offset=0, size=3)
1245+ // Row 1: null (null bit set)
1246+ // Row 2: [3, null, 5] (offset=3, size=3)
1247+ // Row 3: [3, null, 5] (offset=3, size=3) - same as row 2
1248+ // Row 4: null (null bit set)
1249+ // Row 5: [0, 1, 2] (offset=0, size=3) - same as row 0
1250+ // Row 6: [] (offset=0, size=0) - empty list
1251+ let offsets = ScalarBuffer :: from ( vec ! [ 0i64 , 0 , 3 , 3 , 0 , 0 , 0 ] ) ;
1252+ let sizes = ScalarBuffer :: from ( vec ! [ 3i64 , 0 , 3 , 3 , 0 , 3 , 0 ] ) ;
1253+ let nulls = Some ( NullBuffer :: from ( vec ! [ true , false , true , true , false , true , true ] ) ) ;
1254+
1255+ let large_list_view_array = Arc :: new ( LargeListViewArray :: new (
1256+ field,
1257+ offsets,
1258+ sizes,
1259+ values,
1260+ nulls,
1261+ ) ) as ArrayRef ;
1262+
1263+ let random_state = RandomState :: with_seeds ( 0 , 0 , 0 , 0 ) ;
1264+ let mut hashes = vec ! [ 0 ; large_list_view_array. len( ) ] ;
1265+ create_hashes ( & [ large_list_view_array] , & random_state, & mut hashes) . unwrap ( ) ;
1266+
1267+ assert_eq ! ( hashes[ 0 ] , hashes[ 5 ] ) ; // same content [0, 1, 2]
1268+ assert_eq ! ( hashes[ 1 ] , hashes[ 4 ] ) ; // both null
1269+ assert_eq ! ( hashes[ 2 ] , hashes[ 3 ] ) ; // same content [3, null, 5]
1270+ assert_eq ! ( hashes[ 1 ] , hashes[ 6 ] ) ; // null vs empty list
1271+ }
1272+
11311273 #[ test]
11321274 // Tests actual values of hashes, which are different if forcing collisions
11331275 #[ cfg( not( feature = "force_hash_collisions" ) ) ]
0 commit comments