1515// specific language governing permissions and limitations
1616// under the License.
1717
18- use arrow:: array:: { ArrayRef , PrimitiveArray , UInt32Array } ;
19- use arrow:: datatypes:: {
20- Float32Type , Float64Type , Int16Type , Int32Type , Int64Type , Int8Type , UInt16Type ,
21- UInt32Type , UInt64Type , UInt8Type ,
22- } ;
18+ use arrow:: array:: { ArrayRef , ArrowPrimitiveType , PrimitiveArray , UInt32Array } ;
19+ use arrow:: datatypes:: DataType ;
20+ use rand:: distributions:: Standard ;
21+ use rand:: prelude:: Distribution ;
2322use rand:: rngs:: StdRng ;
2423use rand:: Rng ;
2524
25+ /// Trait for converting type safely from a native type T impl this trait.
26+ pub trait FromNative : std:: fmt:: Debug + Send + Sync + Copy + Default {
27+ /// Convert native type from i64.
28+ fn from_i64 ( _: i64 ) -> Option < Self > {
29+ None
30+ }
31+ }
32+
33+ macro_rules! native_type {
34+ ( $t: ty $( , $from: ident) * ) => {
35+ impl FromNative for $t {
36+ $(
37+ #[ inline]
38+ fn $from( v: $t) -> Option <Self > {
39+ Some ( v)
40+ }
41+ ) *
42+ }
43+ } ;
44+ }
45+
46+ native_type ! ( i8 ) ;
47+ native_type ! ( i16 ) ;
48+ native_type ! ( i32 ) ;
49+ native_type ! ( i64 , from_i64) ;
50+ native_type ! ( u8 ) ;
51+ native_type ! ( u16 ) ;
52+ native_type ! ( u32 ) ;
53+ native_type ! ( u64 ) ;
54+ native_type ! ( f32 ) ;
55+ native_type ! ( f64 ) ;
56+
2657/// Randomly generate primitive array
2758pub struct PrimitiveArrayGenerator {
2859 /// the total number of strings in the output
@@ -35,46 +66,61 @@ pub struct PrimitiveArrayGenerator {
3566 pub rng : StdRng ,
3667}
3768
38- macro_rules! impl_gen_data {
39- ( $NATIVE_TYPE: ty, $ARROW_TYPE: ident) => {
40- paste:: paste! {
41- pub fn [ < gen_data_ $NATIVE_TYPE >] ( & mut self ) -> ArrayRef {
42- // table of strings from which to draw
43- let distinct_primitives: PrimitiveArray <$ARROW_TYPE> = ( 0 ..self . num_distinct_primitives)
44- . map( |_| Some ( self . rng. gen :: <$NATIVE_TYPE>( ) ) )
45- . collect( ) ;
69+ // TODO: support generating more primitive arrays
70+ impl PrimitiveArrayGenerator {
71+ pub fn gen_data < A > ( & mut self ) -> ArrayRef
72+ where
73+ A : ArrowPrimitiveType ,
74+ A :: Native : FromNative ,
75+ Standard : Distribution < <A as ArrowPrimitiveType >:: Native > ,
76+ {
77+ // table of primitives from which to draw
78+ let distinct_primitives: PrimitiveArray < A > = ( 0 ..self . num_distinct_primitives )
79+ . map ( |_| {
80+ Some ( match A :: DATA_TYPE {
81+ DataType :: Int8
82+ | DataType :: Int16
83+ | DataType :: Int32
84+ | DataType :: Int64
85+ | DataType :: UInt8
86+ | DataType :: UInt16
87+ | DataType :: UInt32
88+ | DataType :: UInt64
89+ | DataType :: Float32
90+ | DataType :: Float64
91+ | DataType :: Date32 => self . rng . gen :: < A :: Native > ( ) ,
4692
47- // pick num_strings randomly from the distinct string table
48- let indicies: UInt32Array = ( 0 ..self . num_primitives)
49- . map( |_| {
50- if self . rng. gen :: <f64 >( ) < self . null_pct {
51- None
52- } else if self . num_distinct_primitives > 1 {
53- let range = 1 ..( self . num_distinct_primitives as u32 ) ;
54- Some ( self . rng. gen_range( range) )
55- } else {
56- Some ( 0 )
57- }
58- } )
59- . collect( ) ;
93+ DataType :: Date64 => {
94+ // TODO: constrain this range to valid dates if necessary
95+ let date_value = self . rng . gen_range ( i64:: MIN ..=i64:: MAX ) ;
96+ let millis_per_day = 86_400_000 ;
97+ let adjusted_value = date_value - ( date_value % millis_per_day) ;
98+ A :: Native :: from_i64 ( adjusted_value) . unwrap ( )
99+ }
60100
61- let options = None ;
62- arrow:: compute:: take( & distinct_primitives, & indicies, options) . unwrap( )
63- }
64- }
65- } ;
66- }
101+ _ => {
102+ let arrow_type = A :: DATA_TYPE ;
103+ panic ! ( "Unsupported arrow data type: {arrow_type}" )
104+ }
105+ } )
106+ } )
107+ . collect ( ) ;
67108
68- // TODO: support generating more primitive arrays
69- impl PrimitiveArrayGenerator {
70- impl_gen_data ! ( i8 , Int8Type ) ;
71- impl_gen_data ! ( i16 , Int16Type ) ;
72- impl_gen_data ! ( i32 , Int32Type ) ;
73- impl_gen_data ! ( i64 , Int64Type ) ;
74- impl_gen_data ! ( u8 , UInt8Type ) ;
75- impl_gen_data ! ( u16 , UInt16Type ) ;
76- impl_gen_data ! ( u32 , UInt32Type ) ;
77- impl_gen_data ! ( u64 , UInt64Type ) ;
78- impl_gen_data ! ( f32 , Float32Type ) ;
79- impl_gen_data ! ( f64 , Float64Type ) ;
109+ // pick num_primitves randomly from the distinct string table
110+ let indicies: UInt32Array = ( 0 ..self . num_primitives )
111+ . map ( |_| {
112+ if self . rng . gen :: < f64 > ( ) < self . null_pct {
113+ None
114+ } else if self . num_distinct_primitives > 1 {
115+ let range = 1 ..( self . num_distinct_primitives as u32 ) ;
116+ Some ( self . rng . gen_range ( range) )
117+ } else {
118+ Some ( 0 )
119+ }
120+ } )
121+ . collect ( ) ;
122+
123+ let options = None ;
124+ arrow:: compute:: take ( & distinct_primitives, & indicies, options) . unwrap ( )
125+ }
80126}
0 commit comments