@@ -8,8 +8,7 @@ use anybytes::Bytes;
88use anyhow:: { anyhow, Result } ;
99
1010use crate :: bit_vector:: {
11- Access , BitVector , BitVectorBuilder , BitVectorData , BitVectorIndex , NumBits , Rank , Select ,
12- WORD_LEN ,
11+ Access , BitVector , BitVectorData , BitVectorIndex , NumBits , Rank , Select , WORD_LEN ,
1312} ;
1413use crate :: int_vectors:: { CompactVector , CompactVectorBuilder } ;
1514use crate :: utils;
@@ -56,10 +55,21 @@ use crate::utils;
5655/// # References
5756///
5857/// - F. Claude, and G. Navarro, "The Wavelet Matrix," In SPIRE 2012.
59- #[ derive( Default , Debug , Clone , PartialEq , Eq ) ]
58+ #[ derive( Debug , Clone , PartialEq , Eq ) ]
6059pub struct WaveletMatrix < I > {
6160 layers : Vec < BitVector < I > > ,
6261 alph_size : usize ,
62+ bytes : Bytes ,
63+ }
64+
65+ impl < I > Default for WaveletMatrix < I > {
66+ fn default ( ) -> Self {
67+ Self {
68+ layers : Vec :: new ( ) ,
69+ alph_size : 0 ,
70+ bytes : Bytes :: empty ( ) ,
71+ }
72+ }
6373}
6474
6575/// Metadata describing the serialized form of a [`WaveletMatrix`].
@@ -89,55 +99,79 @@ where
8999 return Err ( anyhow ! ( "seq must not be empty." ) ) ;
90100 }
91101
102+ let len = seq. len ( ) ;
92103 let alph_size = seq. iter ( ) . max ( ) . unwrap ( ) + 1 ;
93104 let alph_width = utils:: needed_bits ( alph_size) ;
105+ let num_words = ( len + WORD_LEN - 1 ) / WORD_LEN ;
106+
107+ let mut store = vec ! [ 0usize ; alph_width * num_words] ;
94108
95109 let mut zeros = seq;
96110 let mut ones = CompactVector :: new ( alph_width) ?. freeze ( ) ;
97- let mut layers = vec ! [ ] ;
98111
99112 for depth in 0 ..alph_width {
100113 let mut next_zeros = CompactVectorBuilder :: new ( alph_width) . unwrap ( ) ;
101114 let mut next_ones = CompactVectorBuilder :: new ( alph_width) . unwrap ( ) ;
102- let mut bv = BitVectorBuilder :: new ( ) ;
103- Self :: filter (
115+ let layer = & mut store[ depth * num_words..( depth + 1 ) * num_words] ;
116+ let mut pos = 0 ;
117+ Self :: filter_into (
104118 & zeros,
105119 alph_width - depth - 1 ,
106120 & mut next_zeros,
107121 & mut next_ones,
108- & mut bv,
122+ layer,
123+ & mut pos,
109124 ) ;
110- Self :: filter (
125+ Self :: filter_into (
111126 & ones,
112127 alph_width - depth - 1 ,
113128 & mut next_zeros,
114129 & mut next_ones,
115- & mut bv,
130+ layer,
131+ & mut pos,
116132 ) ;
117133 zeros = next_zeros. freeze ( ) ;
118134 ones = next_ones. freeze ( ) ;
119- let bits = bv. freeze :: < I > ( ) ;
120- layers. push ( bits) ;
121135 }
122136
123- Ok ( Self { layers, alph_size } )
137+ let bytes = Bytes :: from_source ( store) ;
138+ let mut layer_bytes = bytes. clone ( ) ;
139+ let mut layers = Vec :: with_capacity ( alph_width) ;
140+ for _ in 0 ..alph_width {
141+ let words = layer_bytes
142+ . view_prefix_with_elems :: < [ usize ] > ( num_words)
143+ . map_err ( |e| anyhow ! ( e) ) ?;
144+ let data = BitVectorData { words, len } ;
145+ let index = I :: build ( & data) ;
146+ layers. push ( BitVector :: new ( data, index) ) ;
147+ }
148+
149+ Ok ( Self {
150+ layers,
151+ alph_size,
152+ bytes,
153+ } )
124154 }
125155
126- fn filter (
156+ fn filter_into (
127157 seq : & CompactVector ,
128158 shift : usize ,
129159 next_zeros : & mut CompactVectorBuilder ,
130160 next_ones : & mut CompactVectorBuilder ,
131- bv : & mut BitVectorBuilder ,
161+ layer : & mut [ usize ] ,
162+ pos : & mut usize ,
132163 ) {
133164 for val in seq. iter ( ) {
134165 let bit = ( ( val >> shift) & 1 ) == 1 ;
135- bv. push_bit ( bit) ;
136166 if bit {
167+ let idx = * pos / WORD_LEN ;
168+ let sh = * pos % WORD_LEN ;
169+ layer[ idx] |= 1usize << sh;
137170 next_ones. push_int ( val) . unwrap ( ) ;
138171 } else {
139172 next_zeros. push_int ( val) . unwrap ( ) ;
140173 }
174+ * pos += 1 ;
141175 }
142176 }
143177
@@ -577,24 +611,21 @@ where
577611
578612 /// Serializes the sequence into a [`Bytes`] buffer along with its metadata.
579613 pub fn to_bytes ( & self ) -> ( WaveletMatrixMeta , Bytes ) {
580- let mut store: Vec < usize > = Vec :: new ( ) ;
581- for layer in & self . layers {
582- store. extend_from_slice ( layer. data . words ( ) ) ;
583- }
584614 let meta = WaveletMatrixMeta {
585615 alph_size : self . alph_size ,
586616 alph_width : self . alph_width ( ) ,
587617 len : self . len ( ) ,
588618 } ;
589- ( meta, Bytes :: from_source ( store ) )
619+ ( meta, self . bytes . clone ( ) )
590620 }
591621
592622 /// Reconstructs the sequence from metadata and a zero-copy [`Bytes`] buffer.
593- pub fn from_bytes ( meta : WaveletMatrixMeta , mut bytes : Bytes ) -> Result < Self > {
623+ pub fn from_bytes ( meta : WaveletMatrixMeta , bytes : Bytes ) -> Result < Self > {
594624 let mut layers = Vec :: with_capacity ( meta. alph_width ) ;
595625 let num_words = ( meta. len + WORD_LEN - 1 ) / WORD_LEN ;
626+ let mut slice = bytes. clone ( ) ;
596627 for _ in 0 ..meta. alph_width {
597- let words = bytes
628+ let words = slice
598629 . view_prefix_with_elems :: < [ usize ] > ( num_words)
599630 . map_err ( |e| anyhow ! ( e) ) ?;
600631 let data = BitVectorData {
@@ -607,6 +638,7 @@ where
607638 Ok ( Self {
608639 layers,
609640 alph_size : meta. alph_size ,
641+ bytes,
610642 } )
611643 }
612644}
0 commit comments