@@ -14,6 +14,8 @@ use crate::binaryview::vector::BinaryViewVector;
1414use crate :: binaryview:: view:: { BinaryView , validate_views} ;
1515use crate :: { VectorMutOps , VectorOps } ;
1616
17+ // Default capacity for new string data buffers of 2MiB.
18+ const BUFFER_CAPACITY : usize = 2 * 1024 * 1024 ;
1719/// Mutable variable-length binary vector.
1820#[ derive( Clone , Debug ) ]
1921
@@ -47,6 +49,17 @@ impl<T: BinaryViewType> BinaryViewVectorMut<T> {
4749 . vortex_expect ( "Failed to create `BinaryViewVectorMut`" )
4850 }
4951
52+ /// Create a new empty [`BinaryViewVectorMut`], pre-allocated to hold the specified number
53+ /// of items. This does not reserve any memory for string data itself, only for the binary views
54+ /// and the validity bits.
55+ pub fn with_capacity ( capacity : usize ) -> Self {
56+ Self :: new (
57+ BufferMut :: with_capacity ( capacity) ,
58+ Vec :: new ( ) ,
59+ MaskMut :: with_capacity ( capacity) ,
60+ )
61+ }
62+
5063 /// Tries to create a new [`BinaryViewVectorMut`] from its components.
5164 ///
5265 /// # Errors
@@ -99,6 +112,43 @@ impl<T: BinaryViewType> BinaryViewVectorMut<T> {
99112 }
100113 }
101114 }
115+
116+ /// Append a repeated sequence of binary data to a vector.
117+ ///
118+ /// ```
119+ /// # use vortex_vector::{StringVectorMut, VectorMutOps};
120+ /// let mut strings = StringVectorMut::with_capacity(4);
121+ /// strings.append_values("inlined", 2);
122+ /// strings.append_nulls(1);
123+ /// strings.append_values("large not inlined", 1);
124+ ///
125+ /// let strings = strings.freeze();
126+ ///
127+ /// assert_eq!(
128+ /// [strings.get(0), strings.get(1), strings.get(2), strings.get(3)],
129+ /// [Some("inlined"), Some("inlined"), None, Some("large not inlined")],
130+ /// );
131+ /// ```
132+ pub fn append_values ( & mut self , value : & T :: Slice , n : usize ) {
133+ let bytes = value. as_ref ( ) ;
134+ if bytes. len ( ) <= BinaryView :: MAX_INLINED_SIZE {
135+ self . views . push_n ( BinaryView :: new_inlined ( bytes) , n) ;
136+ } else {
137+ let buffer_index =
138+ u32:: try_from ( self . buffers . len ( ) ) . vortex_expect ( "buffer count exceeds u32::MAX" ) ;
139+
140+ let buf = self
141+ . open_buffer
142+ . get_or_insert_with ( || ByteBufferMut :: with_capacity ( BUFFER_CAPACITY ) ) ;
143+ let offset = u32:: try_from ( buf. len ( ) ) . vortex_expect ( "buffer length exceeds u32::MAX" ) ;
144+ buf. extend_from_slice ( value. as_ref ( ) ) ;
145+
146+ self . views
147+ . push_n ( BinaryView :: make_view ( bytes, buffer_index, offset) , n) ;
148+ }
149+
150+ self . validity . append_n ( true , n) ;
151+ }
102152}
103153
104154impl < T : BinaryViewType > VectorMutOps for BinaryViewVectorMut < T > {
@@ -123,33 +173,21 @@ impl<T: BinaryViewType> VectorMutOps for BinaryViewVectorMut<T> {
123173 self . buffers . push ( open. freeze ( ) ) ;
124174 }
125175
126- // We build a lookup table to map BinaryView's from the `other` to have
127- // valid buffer indices in the current array.
128- let mut buf_index_lookup: Vec < u32 > = Vec :: with_capacity ( other. buffers ( ) . len ( ) ) ;
129- let mut new_buffers = Vec :: new ( ) ;
130- for buffer in other. buffers ( ) . iter ( ) {
131- let ptr = buffer. as_ptr ( ) . addr ( ) ;
132- let new_index: u32 = self
133- . buffers
134- . iter ( )
135- . position ( |b| b. as_ptr ( ) . addr ( ) == ptr)
136- . unwrap_or_else ( || self . buffers . len ( ) + new_buffers. len ( ) )
137- . try_into ( )
138- . vortex_expect ( "buffer index must fit in u32" ) ;
139-
140- if new_index as usize == new_buffers. len ( ) {
141- // We need to append the buffer
142- new_buffers. push ( buffer. clone ( ) ) ;
143- }
144-
145- buf_index_lookup. push ( new_index) ;
146- }
176+ let offset =
177+ u32:: try_from ( self . buffers . len ( ) ) . vortex_expect ( "buffer count exceeds u32::MAX" ) ;
147178
148- // rewrite the views using our lookup table
149- let new_views_iter = rewrite_views ( other. views ( ) . iter ( ) . copied ( ) , & buf_index_lookup) ;
179+ self . buffers . extend ( other. buffers ( ) . iter ( ) . cloned ( ) ) ;
150180
151- self . buffers . extend ( new_buffers) ;
181+ let new_views_iter = other. views ( ) . iter ( ) . copied ( ) . map ( |mut v| {
182+ if v. is_inlined ( ) {
183+ v
184+ } else {
185+ v. as_view_mut ( ) . buffer_index += offset;
186+ v
187+ }
188+ } ) ;
152189 self . views . extend ( new_views_iter) ;
190+
153191 self . validity . append_mask ( other. validity ( ) )
154192 }
155193
@@ -182,28 +220,9 @@ impl<T: BinaryViewType> VectorMutOps for BinaryViewVectorMut<T> {
182220 }
183221}
184222
185- /// Create a new iterator that yields views rewritten with a new buffer index.
186- #[ inline]
187- fn rewrite_views (
188- views : impl Iterator < Item = BinaryView > ,
189- buf_index_lookup : & [ u32 ] ,
190- ) -> impl Iterator < Item = BinaryView > {
191- views. map ( |mut view| {
192- if view. is_inlined ( ) {
193- return view;
194- }
195- let view = view. as_view_mut ( ) ;
196- let old_index = view. buffer_index ;
197- let new_index = * buf_index_lookup
198- . get ( old_index as usize )
199- . unwrap_or ( & old_index) ;
200- view. buffer_index = new_index;
201- BinaryView { _ref : * view }
202- } )
203- }
204-
205223#[ cfg( test) ]
206224mod tests {
225+ use std:: ops:: Deref ;
207226 use std:: sync:: Arc ;
208227
209228 use vortex_buffer:: { ByteBuffer , buffer, buffer_mut} ;
@@ -256,7 +275,7 @@ mod tests {
256275 BinaryView :: make_view( b"a really very quite long string 2" , 0 , 33 ) ,
257276 BinaryView :: make_view( b"a really very quite long string 1" , 0 , 0 ) ,
258277 ] ,
259- vec ! [ buf0, buf1. clone( ) ] ,
278+ vec ! [ buf0. clone ( ) , buf1. clone( ) ] ,
260279 MaskMut :: new_true ( 6 ) ,
261280 ) ;
262281
@@ -267,7 +286,7 @@ mod tests {
267286 0 ,
268287 33
269288 ) ] ,
270- Arc :: new ( Box :: new ( [ buf1] ) ) ,
289+ Arc :: new ( Box :: new ( [ buf1. clone ( ) ] ) ) ,
271290 Mask :: new_true ( 1 ) ,
272291 ) ;
273292
@@ -299,7 +318,10 @@ mod tests {
299318 "a really very quite long string 4"
300319 ) ;
301320
302- assert_eq ! ( strings_finished. buffers( ) . len( ) , 2 ) ;
321+ assert_eq ! (
322+ strings_finished. buffers( ) . deref( ) . as_ref( ) ,
323+ & [ buf0, buf1. clone( ) , buf1]
324+ ) ;
303325 }
304326
305327 #[ test]
0 commit comments