Skip to content

Commit a2eb260

Browse files
committed
add tests, remove dedupe code path
Signed-off-by: Andrew Duffy <[email protected]>
1 parent 2c7a641 commit a2eb260

File tree

3 files changed

+100
-55
lines changed

3 files changed

+100
-55
lines changed

vortex-vector/src/binaryview/vector.rs

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -183,13 +183,9 @@ impl<T: BinaryViewType> VectorOps for BinaryViewVector<T> {
183183

184184
let buffers_mut = match Arc::try_unwrap(self.buffers) {
185185
Ok(buffers) => buffers.into_vec(),
186-
Err(arc_buffers) => {
187-
return Err(Self {
188-
views: views_mut.freeze(),
189-
validity: validity_mut.freeze(),
190-
buffers: arc_buffers,
191-
_marker: std::marker::PhantomData,
192-
});
186+
Err(buffers) => {
187+
// Backup: collect a new Vec with clones of each buffer
188+
buffers.iter().cloned().collect()
193189
}
194190
};
195191

@@ -204,3 +200,30 @@ impl<T: BinaryViewType> VectorOps for BinaryViewVector<T> {
204200
}
205201
}
206202
}
203+
204+
#[cfg(test)]
205+
mod tests {
206+
use crate::{StringVectorMut, VectorMutOps, VectorOps};
207+
208+
#[test]
209+
fn test_try_into_mut() {
210+
let mut shared_vec = StringVectorMut::with_capacity(5);
211+
shared_vec.append_nulls(2);
212+
shared_vec.append_values("an example value", 2);
213+
shared_vec.append_values("another example value", 1);
214+
215+
let shared_vec = shared_vec.freeze();
216+
217+
// Making a copy aliases the vector, preventing us from converting it back into mutable
218+
let shared_vec2 = shared_vec.clone();
219+
220+
// The Err variant is returned, because the aliasing borrow from shared_vec2 is blocking us
221+
// from taking unique ownership of the memory.
222+
let shared_vec = shared_vec.try_into_mut().unwrap_err();
223+
224+
// Dropping the aliasing borrow makes it possible to cast the unique reference to mut
225+
drop(shared_vec2);
226+
227+
assert!(shared_vec.try_into_mut().is_ok());
228+
}
229+
}

vortex-vector/src/binaryview/vector_mut.rs

Lines changed: 69 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ use crate::binaryview::vector::BinaryViewVector;
1414
use crate::binaryview::view::{BinaryView, validate_views};
1515
use crate::{VectorMutOps, VectorOps};
1616

17+
// Default capacity for new string data buffers of 2MiB.
18+
const BUFFER_CAPACITY: usize = 2 * 1024 * 1024;
1719
/// Mutable variable-length binary vector.
1820
#[derive(Clone, Debug)]
1921

@@ -47,6 +49,17 @@ impl<T: BinaryViewType> BinaryViewVectorMut<T> {
4749
.vortex_expect("Failed to create `BinaryViewVectorMut`")
4850
}
4951

52+
/// Create a new empty [`BinaryViewVectorMut`], pre-allocated to hold the specified number
53+
/// of items. This does not reserve any memory for string data itself, only for the binary views
54+
/// and the validity bits.
55+
pub fn with_capacity(capacity: usize) -> Self {
56+
Self::new(
57+
BufferMut::with_capacity(capacity),
58+
Vec::new(),
59+
MaskMut::with_capacity(capacity),
60+
)
61+
}
62+
5063
/// Tries to create a new [`BinaryViewVectorMut`] from its components.
5164
///
5265
/// # Errors
@@ -99,6 +112,43 @@ impl<T: BinaryViewType> BinaryViewVectorMut<T> {
99112
}
100113
}
101114
}
115+
116+
/// Append a repeated sequence of binary data to a vector.
117+
///
118+
/// ```
119+
/// # use vortex_vector::{StringVectorMut, VectorMutOps};
120+
/// let mut strings = StringVectorMut::with_capacity(4);
121+
/// strings.append_values("inlined", 2);
122+
/// strings.append_nulls(1);
123+
/// strings.append_values("large not inlined", 1);
124+
///
125+
/// let strings = strings.freeze();
126+
///
127+
/// assert_eq!(
128+
/// [strings.get(0), strings.get(1), strings.get(2), strings.get(3)],
129+
/// [Some("inlined"), Some("inlined"), None, Some("large not inlined")],
130+
/// );
131+
/// ```
132+
pub fn append_values(&mut self, value: &T::Slice, n: usize) {
133+
let bytes = value.as_ref();
134+
if bytes.len() <= BinaryView::MAX_INLINED_SIZE {
135+
self.views.push_n(BinaryView::new_inlined(bytes), n);
136+
} else {
137+
let buffer_index =
138+
u32::try_from(self.buffers.len()).vortex_expect("buffer count exceeds u32::MAX");
139+
140+
let buf = self
141+
.open_buffer
142+
.get_or_insert_with(|| ByteBufferMut::with_capacity(BUFFER_CAPACITY));
143+
let offset = u32::try_from(buf.len()).vortex_expect("buffer length exceeds u32::MAX");
144+
buf.extend_from_slice(value.as_ref());
145+
146+
self.views
147+
.push_n(BinaryView::make_view(bytes, buffer_index, offset), n);
148+
}
149+
150+
self.validity.append_n(true, n);
151+
}
102152
}
103153

104154
impl<T: BinaryViewType> VectorMutOps for BinaryViewVectorMut<T> {
@@ -123,33 +173,21 @@ impl<T: BinaryViewType> VectorMutOps for BinaryViewVectorMut<T> {
123173
self.buffers.push(open.freeze());
124174
}
125175

126-
// We build a lookup table to map BinaryView's from the `other` to have
127-
// valid buffer indices in the current array.
128-
let mut buf_index_lookup: Vec<u32> = Vec::with_capacity(other.buffers().len());
129-
let mut new_buffers = Vec::new();
130-
for buffer in other.buffers().iter() {
131-
let ptr = buffer.as_ptr().addr();
132-
let new_index: u32 = self
133-
.buffers
134-
.iter()
135-
.position(|b| b.as_ptr().addr() == ptr)
136-
.unwrap_or_else(|| self.buffers.len() + new_buffers.len())
137-
.try_into()
138-
.vortex_expect("buffer index must fit in u32");
139-
140-
if new_index as usize == new_buffers.len() {
141-
// We need to append the buffer
142-
new_buffers.push(buffer.clone());
143-
}
144-
145-
buf_index_lookup.push(new_index);
146-
}
176+
let offset =
177+
u32::try_from(self.buffers.len()).vortex_expect("buffer count exceeds u32::MAX");
147178

148-
// rewrite the views using our lookup table
149-
let new_views_iter = rewrite_views(other.views().iter().copied(), &buf_index_lookup);
179+
self.buffers.extend(other.buffers().iter().cloned());
150180

151-
self.buffers.extend(new_buffers);
181+
let new_views_iter = other.views().iter().copied().map(|mut v| {
182+
if v.is_inlined() {
183+
v
184+
} else {
185+
v.as_view_mut().buffer_index += offset;
186+
v
187+
}
188+
});
152189
self.views.extend(new_views_iter);
190+
153191
self.validity.append_mask(other.validity())
154192
}
155193

@@ -182,28 +220,9 @@ impl<T: BinaryViewType> VectorMutOps for BinaryViewVectorMut<T> {
182220
}
183221
}
184222

185-
/// Create a new iterator that yields views rewritten with a new buffer index.
186-
#[inline]
187-
fn rewrite_views(
188-
views: impl Iterator<Item = BinaryView>,
189-
buf_index_lookup: &[u32],
190-
) -> impl Iterator<Item = BinaryView> {
191-
views.map(|mut view| {
192-
if view.is_inlined() {
193-
return view;
194-
}
195-
let view = view.as_view_mut();
196-
let old_index = view.buffer_index;
197-
let new_index = *buf_index_lookup
198-
.get(old_index as usize)
199-
.unwrap_or(&old_index);
200-
view.buffer_index = new_index;
201-
BinaryView { _ref: *view }
202-
})
203-
}
204-
205223
#[cfg(test)]
206224
mod tests {
225+
use std::ops::Deref;
207226
use std::sync::Arc;
208227

209228
use vortex_buffer::{ByteBuffer, buffer, buffer_mut};
@@ -256,7 +275,7 @@ mod tests {
256275
BinaryView::make_view(b"a really very quite long string 2", 0, 33),
257276
BinaryView::make_view(b"a really very quite long string 1", 0, 0),
258277
],
259-
vec![buf0, buf1.clone()],
278+
vec![buf0.clone(), buf1.clone()],
260279
MaskMut::new_true(6),
261280
);
262281

@@ -267,7 +286,7 @@ mod tests {
267286
0,
268287
33
269288
)],
270-
Arc::new(Box::new([buf1])),
289+
Arc::new(Box::new([buf1.clone()])),
271290
Mask::new_true(1),
272291
);
273292

@@ -299,7 +318,10 @@ mod tests {
299318
"a really very quite long string 4"
300319
);
301320

302-
assert_eq!(strings_finished.buffers().len(), 2);
321+
assert_eq!(
322+
strings_finished.buffers().deref().as_ref(),
323+
&[buf0, buf1.clone(), buf1]
324+
);
303325
}
304326

305327
#[test]

vortex-vector/src/binaryview/view.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ impl Default for BinaryView {
119119

120120
impl BinaryView {
121121
/// Maximum size of an inlined binary value.
122-
const MAX_INLINED_SIZE: usize = 12;
122+
pub const MAX_INLINED_SIZE: usize = 12;
123123

124124
/// Create a view from a value, block and offset
125125
///

0 commit comments

Comments
 (0)