Skip to content

Commit 4d5fdaf

Browse files
authored
Add Buffer::slice_ref (#2072)
And fix PartialEq, PartialOrd, Hash for Buffer fix #2009
1 parent bae2e8e commit 4d5fdaf

File tree

3 files changed

+84
-19
lines changed

3 files changed

+84
-19
lines changed

vortex-array/src/array/constant/canonical.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,16 +136,12 @@ mod tests {
136136
let const_array = ConstantArray::new("four".to_string(), 4);
137137

138138
// Check all values correct.
139-
let canonical = const_array
140-
.into_canonical()
141-
.unwrap()
142-
.into_varbinview()
143-
.unwrap();
139+
let canonical = const_array.into_varbinview().unwrap();
144140

145141
assert_eq!(canonical.len(), 4);
146142

147143
for i in 0..=3 {
148-
assert_eq!(scalar_at(&canonical, i).unwrap(), "four".into(),);
144+
assert_eq!(scalar_at(&canonical, i).unwrap(), "four".into());
149145
}
150146
}
151147

vortex-array/src/array/varbinview/mod.rs

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,11 @@ use arrow_array::types::{BinaryViewType, ByteViewType, StringViewType};
88
use arrow_array::{ArrayRef, BinaryViewArray, GenericByteViewArray, StringViewArray};
99
use arrow_buffer::ScalarBuffer;
1010
use itertools::Itertools;
11-
use rkyv::from_bytes;
1211
use static_assertions::{assert_eq_align, assert_eq_size};
1312
use vortex_buffer::{Alignment, Buffer, ByteBuffer};
1413
use vortex_dtype::DType;
15-
use vortex_error::{
16-
vortex_bail, vortex_err, vortex_panic, VortexError, VortexExpect, VortexResult, VortexUnwrap,
17-
};
14+
use vortex_error::{vortex_bail, vortex_panic, VortexExpect, VortexResult, VortexUnwrap};
1815

19-
use crate::array::{StructArray, StructMetadata, VarBinMetadata};
2016
use crate::arrow::FromArrowArray;
2117
use crate::encoding::ids;
2218
use crate::stats::StatsSet;
@@ -275,20 +271,19 @@ impl VarBinViewArray {
275271
/// Will return a bytebuffer pointing to the underlying data without performing a copy
276272
#[inline]
277273
pub fn bytes_at(&self, index: usize) -> ByteBuffer {
278-
let view = self.views()[index];
274+
let views = self.views();
275+
let view = &views[index];
279276
// Expect this to be the common case: strings > 12 bytes.
280277
if !view.is_inlined() {
281278
let view_ref = view.as_view();
282279
self.buffer(view_ref.buffer_index() as usize)
283280
.slice(view_ref.to_range())
284281
} else {
285282
// Return access to the range of bytes around it.
286-
let view_byte_start = index * size_of::<BinaryView>() + 4;
287-
let view_byte_end = view_byte_start + view.len() as usize;
288-
self.0
289-
.byte_buffer(0)
290-
.vortex_expect("Must have views buffer")
291-
.slice_with_alignment(view_byte_start..view_byte_end, Alignment::new(1))
283+
views
284+
.clone()
285+
.into_byte_buffer()
286+
.slice_ref(view.as_inlined().value())
292287
}
293288
}
294289

vortex-buffer/src/buffer.rs

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use std::any::type_name;
2+
use std::cmp::Ordering;
23
use std::collections::Bound;
34
use std::fmt::{Debug, Formatter};
5+
use std::hash::{Hash, Hasher};
46
use std::ops::{Deref, RangeBounds};
57

68
use bytes::{Buf, Bytes};
@@ -10,14 +12,40 @@ use crate::debug::TruncatedDebug;
1012
use crate::{Alignment, BufferMut, ByteBuffer};
1113

1214
/// An immutable buffer of items of `T`.
13-
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash)]
15+
#[derive(Clone)]
1416
pub struct Buffer<T> {
1517
pub(crate) bytes: Bytes,
1618
pub(crate) length: usize,
1719
pub(crate) alignment: Alignment,
1820
pub(crate) _marker: std::marker::PhantomData<T>,
1921
}
2022

23+
impl<T> PartialEq for Buffer<T> {
24+
fn eq(&self, other: &Self) -> bool {
25+
self.bytes == other.bytes
26+
}
27+
}
28+
29+
impl<T> Eq for Buffer<T> {}
30+
31+
impl<T> Ord for Buffer<T> {
32+
fn cmp(&self, other: &Self) -> Ordering {
33+
self.bytes.cmp(&other.bytes)
34+
}
35+
}
36+
37+
impl<T> PartialOrd for Buffer<T> {
38+
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
39+
Some(self.bytes.cmp(&other.bytes))
40+
}
41+
}
42+
43+
impl<T> Hash for Buffer<T> {
44+
fn hash<H: Hasher>(&self, state: &mut H) {
45+
self.bytes.as_ref().hash(state)
46+
}
47+
}
48+
2149
impl<T> Buffer<T> {
2250
/// Returns a new `Buffer<T>` copied from the provided `Vec<T>`, `&[T]`, etc.
2351
///
@@ -234,6 +262,52 @@ impl<T> Buffer<T> {
234262
}
235263
}
236264

265+
/// Returns a slice of self that is equivalent to the given subset.
266+
///
267+
/// When processing the buffer you will often end up with &\[T\] that is a subset
268+
/// of the underlying buffer. This function turns the slice into a slice of the buffer
269+
/// it has been taken from.
270+
///
271+
/// # Panics:
272+
/// Requires that the given sub slice is in fact contained within the Bytes buffer; otherwise this function will panic.
273+
#[inline(always)]
274+
pub fn slice_ref(&self, subset: &[T]) -> Self {
275+
self.slice_ref_with_alignment(subset, Alignment::of::<T>())
276+
}
277+
278+
/// Returns a slice of self that is equivalent to the given subset.
279+
///
280+
/// When processing the buffer you will often end up with &\[T\] that is a subset
281+
/// of the underlying buffer. This function turns the slice into a slice of the buffer
282+
/// it has been taken from.
283+
///
284+
/// # Panics:
285+
/// Requires that the given sub slice is in fact contained within the Bytes buffer; otherwise this function will panic.
286+
/// Also requires that the given alignment aligns to the type of slice and is smaller or equal to the buffers alignment
287+
pub fn slice_ref_with_alignment(&self, subset: &[T], alignment: Alignment) -> Self {
288+
if !alignment.is_aligned_to(Alignment::of::<T>()) {
289+
vortex_panic!("slice_ref alignment must at least align to type T")
290+
}
291+
292+
if !self.alignment.is_aligned_to(alignment) {
293+
vortex_panic!("slice_ref subset alignment must at least align to the buffer alignment")
294+
}
295+
296+
if subset.as_ptr().align_offset(*alignment) != 0 {
297+
vortex_panic!("slice_ref subset must be aligned to {:?}", alignment);
298+
}
299+
300+
let subset_u8 =
301+
unsafe { std::slice::from_raw_parts(subset.as_ptr().cast(), size_of_val(subset)) };
302+
303+
Self {
304+
bytes: self.bytes.slice_ref(subset_u8),
305+
length: subset.len(),
306+
alignment,
307+
_marker: Default::default(),
308+
}
309+
}
310+
237311
/// Returns the underlying aligned buffer.
238312
pub fn into_inner(self) -> Bytes {
239313
self.bytes

0 commit comments

Comments
 (0)