1- use fsst:: { Decompressor , Symbol } ;
1+ use std:: fmt:: { Debug , Formatter } ;
2+ use std:: sync:: { Arc , LazyLock } ;
3+
4+ use fsst:: { Compressor , Decompressor , Symbol } ;
25use vortex_array:: arrays:: VarBinArray ;
36use vortex_array:: stats:: { ArrayStats , StatsSetRef } ;
47use vortex_array:: vtable:: {
@@ -33,7 +36,7 @@ impl VTable for FSSTVTable {
3336 }
3437}
3538
36- #[ derive( Clone , Debug ) ]
39+ #[ derive( Clone ) ]
3740pub struct FSSTArray {
3841 dtype : DType ,
3942 symbols : Buffer < Symbol > ,
@@ -42,6 +45,21 @@ pub struct FSSTArray {
4245 /// Lengths of the original values before compression, can be compressed.
4346 uncompressed_lengths : ArrayRef ,
4447 stats_set : ArrayStats ,
48+
49+ /// Memoized compressor used for push-down of compute by compressing the RHS.
50+ compressor : Arc < LazyLock < Compressor , Box < dyn Fn ( ) -> Compressor + Send > > > ,
51+ }
52+
53+ impl Debug for FSSTArray {
54+ fn fmt ( & self , f : & mut Formatter < ' _ > ) -> std:: fmt:: Result {
55+ f. debug_struct ( "FSSTArray" )
56+ . field ( "dtype" , & self . dtype )
57+ . field ( "symbols" , & self . symbols )
58+ . field ( "symbol_lengths" , & self . symbol_lengths )
59+ . field ( "codes" , & self . codes )
60+ . field ( "uncompressed_lengths" , & self . uncompressed_lengths )
61+ . finish ( )
62+ }
4563}
4664
4765#[ derive( Clone , Debug ) ]
@@ -84,13 +102,21 @@ impl FSSTArray {
84102 vortex_bail ! ( InvalidArgument : "codes array must be DType::Binary type" ) ;
85103 }
86104
105+ let symbols2 = symbols. clone ( ) ;
106+ let symbol_lengths2 = symbol_lengths. clone ( ) ;
107+ let compressor = Arc :: new ( LazyLock :: new ( Box :: new ( move || {
108+ Compressor :: rebuild_from ( symbols2. as_slice ( ) , symbol_lengths2. as_slice ( ) )
109+ } )
110+ as Box < dyn Fn ( ) -> Compressor + Send > ) ) ;
111+
87112 Ok ( Self {
88113 dtype,
89114 symbols,
90115 symbol_lengths,
91116 codes,
92117 uncompressed_lengths,
93118 stats_set : Default :: default ( ) ,
119+ compressor,
94120 } )
95121 }
96122
@@ -133,6 +159,10 @@ impl FSSTArray {
133159 pub ( crate ) fn decompressor ( & self ) -> Decompressor {
134160 Decompressor :: new ( self . symbols ( ) . as_slice ( ) , self . symbol_lengths ( ) . as_slice ( ) )
135161 }
162+
163+ pub ( crate ) fn compressor ( & self ) -> & Compressor {
164+ self . compressor . as_ref ( )
165+ }
136166}
137167
138168impl ArrayVTable < FSSTVTable > for FSSTVTable {
0 commit comments