Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions cpp/src/arrow/scalar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -179,17 +179,25 @@ struct ScalarHashImpl {

// Hash the relevant child arrays for each type taking offset and length
// from the parent array into account if necessary.
// - STRUCT: children share parent's offset/length
// - FIXED_SIZE_LIST: children use offset*list_size, length*list_size
// - Others (LIST, MAP, UNION, LIST_VIEW): have their own offset mechanisms
switch (a.type->id()) {
case Type::STRUCT:
for (const auto& child : a.child_data) {
RETURN_NOT_OK(ArrayHash(child, offset, length));
}
break;
// TODO(GH-35830): Investigate what should be the correct behavior for
// each nested type.
case Type::FIXED_SIZE_LIST: {
const auto& list_type = checked_cast<const FixedSizeListType&>(*a.type);
const int32_t list_size = list_type.list_size();
for (const auto& child : a.child_data) {
RETURN_NOT_OK(ArrayHash(child, offset * list_size, length * list_size));
}
break;
}
default:
// By default, just hash the arrays without considering
// the offset and length of the parent.
// LIST, MAP, UNION, LIST_VIEW have their own offset mechanisms
for (const auto& child : a.child_data) {
RETURN_NOT_OK(ArrayHash(child));
}
Expand Down
12 changes: 12 additions & 0 deletions cpp/src/arrow/scalar_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1419,6 +1419,18 @@ TEST(TestFixedSizeListScalar, Cast) {
ASSERT_EQ(casted_str->ToString(), scalar.ToString());
}

TEST(TestFixedSizeListScalar, Hashing) {
auto inner_type = fixed_size_list(int32(), 2);
auto outer_type = fixed_size_list(inner_type, 3);
auto g = ArrayFromJSON(outer_type,
"[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]");
auto h = ArrayFromJSON(outer_type, "[[[7, 8], [9, 10], [11, 12]]]");
ASSERT_OK_AND_ASSIGN(auto g1, g->GetScalar(1));
ASSERT_OK_AND_ASSIGN(auto h0, h->GetScalar(0));
ASSERT_EQ(*g1, *h0);
ASSERT_EQ(g1->hash(), h0->hash());
}

TEST(TestMapScalar, Basics) {
auto value =
ArrayFromJSON(struct_({field("key", utf8(), false), field("value", int8())}),
Expand Down
9 changes: 9 additions & 0 deletions python/pyarrow/tests/test_scalars.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,15 @@ def test_hashing_struct_scalar():
assert hash1 == hash2


def test_hashing_fixed_size_list_scalar():
# GH-35830
inner = pa.list_(pa.int32(), 2)
outer = pa.list_(inner, 3)
g = pa.array([[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], type=outer)
h = pa.array([[[7, 8], [9, 10], [11, 12]]], type=outer)
assert hash(g[1]) == hash(h[0])


@pytest.mark.timezone_data
def test_timestamp_scalar():
a = repr(pa.scalar("0000-01-01").cast(pa.timestamp("s")))
Expand Down
Loading