1- use std:: ops:: { BitAnd , Deref , Range } ;
1+ use std:: collections:: BTreeSet ;
2+ use std:: ops:: { BitAnd , Range } ;
23use std:: sync:: { Arc , OnceLock } ;
34
45use async_trait:: async_trait;
56use dashmap:: DashMap ;
67use futures:: { FutureExt , join} ;
78use vortex_array:: arrays:: StructArray ;
89use vortex_array:: compute:: { MinMaxResult , filter, min_max} ;
10+ use vortex_array:: stats:: Precision ;
911use vortex_array:: { Array , ArrayContext , ArrayRef , ToCanonical } ;
1012use vortex_dict:: DictArray ;
13+ use vortex_dtype:: { DType , FieldMask } ;
1114use vortex_error:: { VortexExpect , VortexResult } ;
1215use vortex_expr:: { ExprRef , Identity } ;
1316use vortex_mask:: Mask ;
@@ -16,7 +19,7 @@ use super::DictLayout;
1619use crate :: layouts:: SharedArrayFuture ;
1720use crate :: segments:: SegmentSource ;
1821use crate :: {
19- ArrayEvaluation , Layout , LayoutReader , LayoutReaderRef , MaskEvaluation , NoOpPruningEvaluation ,
22+ ArrayEvaluation , LayoutReader , LayoutReaderRef , MaskEvaluation , NoOpPruningEvaluation ,
2023 PruningEvaluation ,
2124} ;
2225
@@ -25,6 +28,8 @@ pub struct DictReader {
2528 #[ allow( dead_code) ] // Typically used for logging
2629 name : Arc < str > ,
2730
31+ /// Length of the values array
32+ values_len : usize ,
2833 /// Cached dict values array
2934 values_array : OnceLock < SharedArrayFuture > ,
3035 /// Cache of expression evaluation results on the values array by expression
@@ -34,21 +39,14 @@ pub struct DictReader {
3439 codes : LayoutReaderRef ,
3540}
3641
37- impl Deref for DictReader {
38- type Target = dyn Layout ;
39-
40- fn deref ( & self ) -> & Self :: Target {
41- self . layout . deref ( )
42- }
43- }
44-
4542impl DictReader {
4643 pub ( super ) fn try_new (
4744 layout : DictLayout ,
4845 name : Arc < str > ,
4946 segment_source : & Arc < dyn SegmentSource > ,
5047 ctx : & ArrayContext ,
5148 ) -> VortexResult < Self > {
49+ let values_len = usize:: try_from ( layout. values . row_count ( ) ) ?;
5250 let values =
5351 layout
5452 . values
@@ -61,6 +59,7 @@ impl DictReader {
6159 Ok ( Self {
6260 layout,
6361 name,
62+ values_len,
6463 values_array : Default :: default ( ) ,
6564 values_evals : Default :: default ( ) ,
6665 values,
@@ -71,21 +70,18 @@ impl DictReader {
7170 fn values_array ( & self ) -> SharedArrayFuture {
7271 // We capture the name, so it may be wrong if we re-use the same reader within multiple
7372 // different parent readers. But that's rare...
73+ let values_len = self . values_len ;
7474 self . values_array
7575 . get_or_init ( move || {
76- let values_len = self . values . row_count ( ) ;
7776 let eval = self
7877 . values
79- . projection_evaluation ( & ( 0 ..values_len) , & Identity :: new_expr ( ) )
78+ . projection_evaluation ( & ( 0 ..values_len as u64 ) , & Identity :: new_expr ( ) )
8079 . vortex_expect ( "must construct dict values array evaluation" ) ;
8180
8281 async move {
83- eval. invoke ( Mask :: new_true (
84- usize:: try_from ( values_len)
85- . vortex_expect ( "dict values length must fit in u32" ) ,
86- ) )
87- . await
88- . map_err ( Arc :: new)
82+ eval. invoke ( Mask :: new_true ( values_len) )
83+ . await
84+ . map_err ( Arc :: new)
8985 }
9086 . boxed ( )
9187 . shared ( )
@@ -111,6 +107,23 @@ impl LayoutReader for DictReader {
111107 & self . name
112108 }
113109
110+ fn dtype ( & self ) -> & DType {
111+ self . layout . dtype ( )
112+ }
113+
114+ fn row_count ( & self ) -> Precision < u64 > {
115+ Precision :: Exact ( self . layout . row_count ( ) )
116+ }
117+
118+ fn register_splits (
119+ & self ,
120+ field_mask : & [ FieldMask ] ,
121+ row_offset : u64 ,
122+ splits : & mut BTreeSet < u64 > ,
123+ ) -> VortexResult < ( ) > {
124+ self . codes . register_splits ( field_mask, row_offset, splits)
125+ }
126+
114127 fn pruning_evaluation (
115128 & self ,
116129 _row_range : & Range < u64 > ,
0 commit comments