@@ -586,7 +586,8 @@ mod tests {
586
586
587
587
use arrow:: array:: {
588
588
builder:: { BooleanBuilder , UInt64Builder } ,
589
- BooleanArray , Date32Array , Date64Array , Int32Array , RecordBatch , UInt64Array ,
589
+ BinaryArray , BooleanArray , Date32Array , Date64Array , FixedSizeBinaryArray ,
590
+ Int32Array , RecordBatch , UInt64Array ,
590
591
} ;
591
592
use arrow:: compute:: { concat_batches, filter_record_batch, SortOptions } ;
592
593
use arrow:: datatypes:: { DataType , Field , Schema } ;
@@ -685,6 +686,56 @@ mod tests {
685
686
TestMemoryExec :: try_new_exec ( & [ vec ! [ batch] ] , schema, None ) . unwrap ( )
686
687
}
687
688
689
+ fn build_binary_table (
690
+ a : ( & str , & Vec < & [ u8 ] > ) ,
691
+ b : ( & str , & Vec < i32 > ) ,
692
+ c : ( & str , & Vec < i32 > ) ,
693
+ ) -> Arc < dyn ExecutionPlan > {
694
+ let schema = Schema :: new ( vec ! [
695
+ Field :: new( a. 0 , DataType :: Binary , false ) ,
696
+ Field :: new( b. 0 , DataType :: Int32 , false ) ,
697
+ Field :: new( c. 0 , DataType :: Int32 , false ) ,
698
+ ] ) ;
699
+
700
+ let batch = RecordBatch :: try_new (
701
+ Arc :: new ( schema) ,
702
+ vec ! [
703
+ Arc :: new( BinaryArray :: from( a. 1 . clone( ) ) ) ,
704
+ Arc :: new( Int32Array :: from( b. 1 . clone( ) ) ) ,
705
+ Arc :: new( Int32Array :: from( c. 1 . clone( ) ) ) ,
706
+ ] ,
707
+ )
708
+ . unwrap ( ) ;
709
+
710
+ let schema = batch. schema ( ) ;
711
+ TestMemoryExec :: try_new_exec ( & [ vec ! [ batch] ] , schema, None ) . unwrap ( )
712
+ }
713
+
714
+ fn build_fixed_size_binary_table (
715
+ a : ( & str , & Vec < & [ u8 ] > ) ,
716
+ b : ( & str , & Vec < i32 > ) ,
717
+ c : ( & str , & Vec < i32 > ) ,
718
+ ) -> Arc < dyn ExecutionPlan > {
719
+ let schema = Schema :: new ( vec ! [
720
+ Field :: new( a. 0 , DataType :: FixedSizeBinary ( 3 ) , false ) ,
721
+ Field :: new( b. 0 , DataType :: Int32 , false ) ,
722
+ Field :: new( c. 0 , DataType :: Int32 , false ) ,
723
+ ] ) ;
724
+
725
+ let batch = RecordBatch :: try_new (
726
+ Arc :: new ( schema) ,
727
+ vec ! [
728
+ Arc :: new( FixedSizeBinaryArray :: from( a. 1 . clone( ) ) ) ,
729
+ Arc :: new( Int32Array :: from( b. 1 . clone( ) ) ) ,
730
+ Arc :: new( Int32Array :: from( c. 1 . clone( ) ) ) ,
731
+ ] ,
732
+ )
733
+ . unwrap ( ) ;
734
+
735
+ let schema = batch. schema ( ) ;
736
+ TestMemoryExec :: try_new_exec ( & [ vec ! [ batch] ] , schema, None ) . unwrap ( )
737
+ }
738
+
688
739
/// returns a table with 3 columns of i32 in memory
689
740
pub fn build_table_i32_nullable (
690
741
a : ( & str , & Vec < Option < i32 > > ) ,
@@ -1923,6 +1974,100 @@ mod tests {
1923
1974
Ok ( ( ) )
1924
1975
}
1925
1976
1977
+ #[ tokio:: test]
1978
+ async fn join_binary ( ) -> Result < ( ) > {
1979
+ let left = build_binary_table (
1980
+ (
1981
+ "a1" ,
1982
+ & vec ! [
1983
+ & [ 0xc0 , 0xff , 0xee ] ,
1984
+ & [ 0xde , 0xca , 0xde ] ,
1985
+ & [ 0xfa , 0xca , 0xde ] ,
1986
+ ] ,
1987
+ ) ,
1988
+ ( "b1" , & vec ! [ 5 , 10 , 15 ] ) , // this has a repetition
1989
+ ( "c1" , & vec ! [ 7 , 8 , 9 ] ) ,
1990
+ ) ;
1991
+ let right = build_binary_table (
1992
+ (
1993
+ "a1" ,
1994
+ & vec ! [
1995
+ & [ 0xc0 , 0xff , 0xee ] ,
1996
+ & [ 0xde , 0xca , 0xde ] ,
1997
+ & [ 0xfa , 0xca , 0xde ] ,
1998
+ ] ,
1999
+ ) ,
2000
+ ( "b2" , & vec ! [ 105 , 110 , 115 ] ) ,
2001
+ ( "c2" , & vec ! [ 70 , 80 , 90 ] ) ,
2002
+ ) ;
2003
+
2004
+ let on = vec ! [ (
2005
+ Arc :: new( Column :: new_with_schema( "a1" , & left. schema( ) ) ?) as _,
2006
+ Arc :: new( Column :: new_with_schema( "a1" , & right. schema( ) ) ?) as _,
2007
+ ) ] ;
2008
+
2009
+ let ( _, batches) = join_collect ( left, right, on, Inner ) . await ?;
2010
+
2011
+ // The output order is important as SMJ preserves sortedness
2012
+ assert_snapshot ! ( batches_to_string( & batches) , @r#"
2013
+ +--------+----+----+--------+-----+----+
2014
+ | a1 | b1 | c1 | a1 | b2 | c2 |
2015
+ +--------+----+----+--------+-----+----+
2016
+ | c0ffee | 5 | 7 | c0ffee | 105 | 70 |
2017
+ | decade | 10 | 8 | decade | 110 | 80 |
2018
+ | facade | 15 | 9 | facade | 115 | 90 |
2019
+ +--------+----+----+--------+-----+----+
2020
+ "# ) ;
2021
+ Ok ( ( ) )
2022
+ }
2023
+
2024
+ #[ tokio:: test]
2025
+ async fn join_fixed_size_binary ( ) -> Result < ( ) > {
2026
+ let left = build_fixed_size_binary_table (
2027
+ (
2028
+ "a1" ,
2029
+ & vec ! [
2030
+ & [ 0xc0 , 0xff , 0xee ] ,
2031
+ & [ 0xde , 0xca , 0xde ] ,
2032
+ & [ 0xfa , 0xca , 0xde ] ,
2033
+ ] ,
2034
+ ) ,
2035
+ ( "b1" , & vec ! [ 5 , 10 , 15 ] ) , // this has a repetition
2036
+ ( "c1" , & vec ! [ 7 , 8 , 9 ] ) ,
2037
+ ) ;
2038
+ let right = build_fixed_size_binary_table (
2039
+ (
2040
+ "a1" ,
2041
+ & vec ! [
2042
+ & [ 0xc0 , 0xff , 0xee ] ,
2043
+ & [ 0xde , 0xca , 0xde ] ,
2044
+ & [ 0xfa , 0xca , 0xde ] ,
2045
+ ] ,
2046
+ ) ,
2047
+ ( "b2" , & vec ! [ 105 , 110 , 115 ] ) ,
2048
+ ( "c2" , & vec ! [ 70 , 80 , 90 ] ) ,
2049
+ ) ;
2050
+
2051
+ let on = vec ! [ (
2052
+ Arc :: new( Column :: new_with_schema( "a1" , & left. schema( ) ) ?) as _,
2053
+ Arc :: new( Column :: new_with_schema( "a1" , & right. schema( ) ) ?) as _,
2054
+ ) ] ;
2055
+
2056
+ let ( _, batches) = join_collect ( left, right, on, Inner ) . await ?;
2057
+
2058
+ // The output order is important as SMJ preserves sortedness
2059
+ assert_snapshot ! ( batches_to_string( & batches) , @r#"
2060
+ +--------+----+----+--------+-----+----+
2061
+ | a1 | b1 | c1 | a1 | b2 | c2 |
2062
+ +--------+----+----+--------+-----+----+
2063
+ | c0ffee | 5 | 7 | c0ffee | 105 | 70 |
2064
+ | decade | 10 | 8 | decade | 110 | 80 |
2065
+ | facade | 15 | 9 | facade | 115 | 90 |
2066
+ +--------+----+----+--------+-----+----+
2067
+ "# ) ;
2068
+ Ok ( ( ) )
2069
+ }
2070
+
1926
2071
#[ tokio:: test]
1927
2072
async fn join_left_sort_order ( ) -> Result < ( ) > {
1928
2073
let left = build_table (
0 commit comments