Skip to content

Commit b6d3661

Browse files
authored
Add ArrayHash and ArrayEq traits (#5009)
This allows us to check for structural equality of array trees. --------- Signed-off-by: Nicholas Gates <[email protected]>
1 parent 7f5ba56 commit b6d3661

File tree

41 files changed

+1006
-26
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1006
-26
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ enum-iterator = "2.0.0"
115115
erased-serde = "0.4"
116116
fastlanes = "0.5"
117117
flatbuffers = "25.2.10"
118-
fsst-rs = "0.5.2"
118+
fsst-rs = "0.5.5"
119119
futures = { version = "0.3.31", default-features = false }
120120
fuzzy-matcher = "0.3"
121121
glob = "0.3.2"

encodings/alp/src/alp/array.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use std::fmt::Debug;
5+
use std::hash::Hash;
56

67
use vortex_array::patches::Patches;
78
use vortex_array::stats::{ArrayStats, StatsSetRef};
89
use vortex_array::vtable::{
910
ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityChild, ValidityVTableFromChild,
1011
};
11-
use vortex_array::{Array, ArrayRef, Canonical, EncodingId, EncodingRef, vtable};
12+
use vortex_array::{
13+
Array, ArrayEq, ArrayHash, ArrayRef, Canonical, EncodingId, EncodingRef, Precision, vtable,
14+
};
1215
use vortex_dtype::{DType, PType};
1316
use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
1417

@@ -261,6 +264,20 @@ impl ArrayVTable<ALPVTable> for ALPVTable {
261264
fn stats(array: &ALPArray) -> StatsSetRef<'_> {
262265
array.stats_set.to_ref(array.as_ref())
263266
}
267+
268+
fn array_hash<H: std::hash::Hasher>(array: &ALPArray, state: &mut H, precision: Precision) {
269+
array.dtype.hash(state);
270+
array.encoded.array_hash(state, precision);
271+
array.exponents.hash(state);
272+
array.patches.array_hash(state, precision);
273+
}
274+
275+
fn array_eq(array: &ALPArray, other: &ALPArray, precision: Precision) -> bool {
276+
array.dtype == other.dtype
277+
&& array.encoded.array_eq(&other.encoded, precision)
278+
&& array.exponents == other.exponents
279+
&& array.patches.array_eq(&other.patches, precision)
280+
}
264281
}
265282

266283
impl CanonicalVTable<ALPVTable> for ALPVTable {

encodings/alp/src/alp/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use vortex_scalar::PValue;
2121

2222
const SAMPLE_SIZE: usize = 32;
2323

24-
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
24+
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
2525
pub struct Exponents {
2626
pub e: u8,
2727
pub f: u8,

encodings/alp/src/alp_rd/array.rs

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use std::fmt::Debug;
5+
use std::hash::Hash;
56

67
use vortex_array::arrays::PrimitiveArray;
78
use vortex_array::patches::Patches;
@@ -10,7 +11,10 @@ use vortex_array::validity::Validity;
1011
use vortex_array::vtable::{
1112
ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityChild, ValidityVTableFromChild,
1213
};
13-
use vortex_array::{Array, ArrayRef, Canonical, EncodingId, EncodingRef, ToCanonical, vtable};
14+
use vortex_array::{
15+
Array, ArrayEq, ArrayHash, ArrayRef, Canonical, EncodingId, EncodingRef, Precision,
16+
ToCanonical, vtable,
17+
};
1418
use vortex_buffer::Buffer;
1519
use vortex_dtype::{DType, PType};
1620
use vortex_error::{VortexResult, vortex_bail};
@@ -198,6 +202,28 @@ impl ArrayVTable<ALPRDVTable> for ALPRDVTable {
198202
fn stats(array: &ALPRDArray) -> StatsSetRef<'_> {
199203
array.stats_set.to_ref(array.as_ref())
200204
}
205+
206+
fn array_hash<H: std::hash::Hasher>(array: &ALPRDArray, state: &mut H, precision: Precision) {
207+
array.dtype.hash(state);
208+
array.left_parts.array_hash(state, precision);
209+
array.left_parts_dictionary.array_hash(state, precision);
210+
array.right_parts.array_hash(state, precision);
211+
array.right_bit_width.hash(state);
212+
array.left_parts_patches.array_hash(state, precision);
213+
}
214+
215+
fn array_eq(array: &ALPRDArray, other: &ALPRDArray, precision: Precision) -> bool {
216+
array.dtype == other.dtype
217+
&& array.left_parts.array_eq(&other.left_parts, precision)
218+
&& array
219+
.left_parts_dictionary
220+
.array_eq(&other.left_parts_dictionary, precision)
221+
&& array.right_parts.array_eq(&other.right_parts, precision)
222+
&& array.right_bit_width == other.right_bit_width
223+
&& array
224+
.left_parts_patches
225+
.array_eq(&other.left_parts_patches, precision)
226+
}
201227
}
202228

203229
impl CanonicalVTable<ALPRDVTable> for ALPRDVTable {

encodings/bytebool/src/array.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use std::fmt::Debug;
5+
use std::hash::Hash;
56
use std::ops::Range;
67

78
use vortex_array::arrays::BoolArray;
@@ -11,7 +12,9 @@ use vortex_array::vtable::{
1112
ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityHelper,
1213
ValidityVTableFromValidityHelper,
1314
};
14-
use vortex_array::{ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
15+
use vortex_array::{
16+
ArrayEq, ArrayHash, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, Precision, vtable,
17+
};
1518
use vortex_buffer::{BitBuffer, ByteBuffer};
1619
use vortex_dtype::DType;
1720
use vortex_error::vortex_panic;
@@ -109,6 +112,22 @@ impl ArrayVTable<ByteBoolVTable> for ByteBoolVTable {
109112
fn stats(array: &ByteBoolArray) -> StatsSetRef<'_> {
110113
array.stats_set.to_ref(array.as_ref())
111114
}
115+
116+
fn array_hash<H: std::hash::Hasher>(
117+
array: &ByteBoolArray,
118+
state: &mut H,
119+
precision: Precision,
120+
) {
121+
array.dtype.hash(state);
122+
array.buffer.array_hash(state, precision);
123+
array.validity.array_hash(state, precision);
124+
}
125+
126+
fn array_eq(array: &ByteBoolArray, other: &ByteBoolArray, precision: Precision) -> bool {
127+
array.dtype == other.dtype
128+
&& array.buffer.array_eq(&other.buffer, precision)
129+
&& array.validity.array_eq(&other.validity, precision)
130+
}
112131
}
113132

114133
impl CanonicalVTable<ByteBoolVTable> for ByteBoolVTable {

encodings/datetime-parts/src/array.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use std::fmt::Debug;
5+
use std::hash::Hash;
56

67
use vortex_array::stats::{ArrayStats, StatsSetRef};
78
use vortex_array::vtable::{
89
ArrayVTable, NotSupported, VTable, ValidityChild, ValidityVTableFromChild,
910
};
10-
use vortex_array::{Array, ArrayRef, EncodingId, EncodingRef, vtable};
11+
use vortex_array::{
12+
Array, ArrayEq, ArrayHash, ArrayRef, EncodingId, EncodingRef, Precision, vtable,
13+
};
1114
use vortex_dtype::DType;
1215
use vortex_error::{VortexResult, vortex_bail};
1316

@@ -128,6 +131,28 @@ impl ArrayVTable<DateTimePartsVTable> for DateTimePartsVTable {
128131
fn stats(array: &DateTimePartsArray) -> StatsSetRef<'_> {
129132
array.stats_set.to_ref(array.as_ref())
130133
}
134+
135+
fn array_hash<H: std::hash::Hasher>(
136+
array: &DateTimePartsArray,
137+
state: &mut H,
138+
precision: Precision,
139+
) {
140+
array.dtype.hash(state);
141+
array.days.array_hash(state, precision);
142+
array.seconds.array_hash(state, precision);
143+
array.subseconds.array_hash(state, precision);
144+
}
145+
146+
fn array_eq(
147+
array: &DateTimePartsArray,
148+
other: &DateTimePartsArray,
149+
precision: Precision,
150+
) -> bool {
151+
array.dtype == other.dtype
152+
&& array.days.array_eq(&other.days, precision)
153+
&& array.seconds.array_eq(&other.seconds, precision)
154+
&& array.subseconds.array_eq(&other.subseconds, precision)
155+
}
131156
}
132157

133158
impl ValidityChild<DateTimePartsVTable> for DateTimePartsVTable {

encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
mod compute;
55
mod serde;
66

7+
use std::hash::Hash;
78
use std::ops::Range;
89

910
use vortex_array::arrays::DecimalArray;
@@ -13,7 +14,8 @@ use vortex_array::vtable::{
1314
ValidityHelper, ValidityVTableFromChild,
1415
};
1516
use vortex_array::{
16-
Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, ToCanonical, vtable,
17+
Array, ArrayEq, ArrayHash, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, Precision,
18+
ToCanonical, vtable,
1719
};
1820
use vortex_dtype::{DType, DecimalDType, match_each_signed_integer_ptype};
1921
use vortex_error::{VortexExpect, VortexResult, vortex_bail};
@@ -111,6 +113,23 @@ impl ArrayVTable<DecimalBytePartsVTable> for DecimalBytePartsVTable {
111113
fn stats(array: &DecimalBytePartsArray) -> StatsSetRef<'_> {
112114
array.stats_set.to_ref(array.as_ref())
113115
}
116+
117+
fn array_hash<H: std::hash::Hasher>(
118+
array: &DecimalBytePartsArray,
119+
state: &mut H,
120+
precision: Precision,
121+
) {
122+
array.dtype.hash(state);
123+
array.msp.array_hash(state, precision);
124+
}
125+
126+
fn array_eq(
127+
array: &DecimalBytePartsArray,
128+
other: &DecimalBytePartsArray,
129+
precision: Precision,
130+
) -> bool {
131+
array.dtype == other.dtype && array.msp.array_eq(&other.msp, precision)
132+
}
114133
}
115134

116135
impl CanonicalVTable<DecimalBytePartsVTable> for DecimalBytePartsVTable {

encodings/dict/src/array.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use std::fmt::Debug;
5+
use std::hash::Hash;
56

67
use vortex_array::stats::{ArrayStats, StatsSetRef};
78
use vortex_array::vtable::{ArrayVTable, NotSupported, VTable, ValidityVTable};
8-
use vortex_array::{Array, ArrayRef, EncodingId, EncodingRef, ToCanonical, vtable};
9+
use vortex_array::{
10+
Array, ArrayEq, ArrayHash, ArrayRef, EncodingId, EncodingRef, Precision, ToCanonical, vtable,
11+
};
912
use vortex_buffer::BitBuffer;
1013
use vortex_dtype::{DType, match_each_integer_ptype};
1114
use vortex_error::{VortexExpect as _, VortexResult, vortex_bail};
@@ -116,6 +119,18 @@ impl ArrayVTable<DictVTable> for DictVTable {
116119
fn stats(array: &DictArray) -> StatsSetRef<'_> {
117120
array.stats_set.to_ref(array.as_ref())
118121
}
122+
123+
fn array_hash<H: std::hash::Hasher>(array: &DictArray, state: &mut H, precision: Precision) {
124+
array.dtype.hash(state);
125+
array.codes.array_hash(state, precision);
126+
array.values.array_hash(state, precision);
127+
}
128+
129+
fn array_eq(array: &DictArray, other: &DictArray, precision: Precision) -> bool {
130+
array.dtype == other.dtype
131+
&& array.codes.array_eq(&other.codes, precision)
132+
&& array.values.array_eq(&other.values, precision)
133+
}
119134
}
120135

121136
impl ValidityVTable<DictVTable> for DictVTable {

encodings/fastlanes/src/bitpacking/mod.rs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use std::fmt::Debug;
5+
use std::hash::Hash;
56

67
pub use compress::*;
78
use fastlanes::BitPacking;
@@ -14,7 +15,9 @@ use vortex_array::vtable::{
1415
ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
1516
ValidityVTableFromValidityHelper,
1617
};
17-
use vortex_array::{Array, Canonical, EncodingId, EncodingRef, vtable};
18+
use vortex_array::{
19+
Array, ArrayEq, ArrayHash, Canonical, EncodingId, EncodingRef, Precision, vtable,
20+
};
1821
use vortex_buffer::ByteBuffer;
1922
use vortex_dtype::{DType, NativePType, PType, match_each_integer_ptype};
2023
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_ensure};
@@ -330,6 +333,30 @@ impl ArrayVTable<BitPackedVTable> for BitPackedVTable {
330333
fn stats(array: &BitPackedArray) -> StatsSetRef<'_> {
331334
array.stats_set.to_ref(array.as_ref())
332335
}
336+
337+
fn array_hash<H: std::hash::Hasher>(
338+
array: &BitPackedArray,
339+
state: &mut H,
340+
precision: Precision,
341+
) {
342+
array.offset.hash(state);
343+
array.len.hash(state);
344+
array.dtype.hash(state);
345+
array.bit_width.hash(state);
346+
array.packed.array_hash(state, precision);
347+
array.patches.array_hash(state, precision);
348+
array.validity.array_hash(state, precision);
349+
}
350+
351+
fn array_eq(array: &BitPackedArray, other: &BitPackedArray, precision: Precision) -> bool {
352+
array.offset == other.offset
353+
&& array.len == other.len
354+
&& array.dtype == other.dtype
355+
&& array.bit_width == other.bit_width
356+
&& array.packed.array_eq(&other.packed, precision)
357+
&& array.patches.array_eq(&other.patches, precision)
358+
&& array.validity.array_eq(&other.validity, precision)
359+
}
333360
}
334361

335362
impl CanonicalVTable<BitPackedVTable> for BitPackedVTable {

0 commit comments

Comments
 (0)