Skip to content

Commit a4565ef

Browse files
GH-48151: [C++][Parquet] Fix arrow-ipc-message-internal-test & arrow-acero-hash-join-node-test failures
1 parent 7cd2f2a commit a4565ef

File tree

2 files changed

+55
-8
lines changed

2 files changed

+55
-8
lines changed

cpp/src/arrow/acero/hash_join.cc

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "arrow/acero/task_util.h"
3030
#include "arrow/compute/row/encode_internal.h"
3131
#include "arrow/compute/row/row_encoder_internal.h"
32+
#include "arrow/util/endian.h"
3233
#include "arrow/util/logging_internal.h"
3334
#include "arrow/util/tracing_internal.h"
3435

@@ -306,19 +307,40 @@ class HashJoinBasicImpl : public HashJoinImpl {
306307

307308
size_t num_probed_rows = match.size() + no_match.size();
308309
if (mask.is_scalar()) {
310+
#if ARROW_LITTLE_ENDIAN
309311
const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
310312
if (mask_scalar.is_valid && mask_scalar.value) {
311313
// All rows passed, nothing left to do
312314
return Status::OK();
313-
} else {
314-
// Nothing passed, no_match becomes everything
315-
no_match.resize(num_probed_rows);
316-
std::iota(no_match.begin(), no_match.end(), 0);
317-
match_left.clear();
318-
match_right.clear();
319-
match.clear();
320-
return Status::OK();
321315
}
316+
#else
317+
// Check if the scalar is a BooleanScalar before casting
318+
if (mask.scalar()->type->id() == Type::BOOL) {
319+
const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
320+
if (mask_scalar.is_valid && mask_scalar.value) {
321+
// All rows passed, nothing left to do
322+
return Status::OK();
323+
} else {
324+
// Nothing passed, no_match becomes everything
325+
no_match.resize(num_probed_rows);
326+
std::iota(no_match.begin(), no_match.end(), 0);
327+
match_left.clear();
328+
match_right.clear();
329+
match.clear();
330+
return Status::OK();
331+
}
332+
}
333+
#endif
334+
// On Little-endian systems: Nothing passed, no_match becomes everything
335+
// On Big-endian systems:
336+
// If it's not a BooleanScalar (e.g., NullScalar), treat as false
337+
// This handles cases like literal(NullScalar()) in filter expressions
338+
no_match.resize(num_probed_rows);
339+
std::iota(no_match.begin(), no_match.end(), 0);
340+
match_left.clear();
341+
match_right.clear();
342+
match.clear();
343+
return Status::OK();
322344
}
323345
ARROW_DCHECK_EQ(mask.array()->offset, 0);
324346
ARROW_DCHECK_EQ(mask.array()->length, static_cast<int64_t>(match_left.size()));

cpp/src/arrow/ipc/message_internal_test.cc

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "arrow/ipc/metadata_internal.h"
2525
#include "arrow/ipc/options.h"
2626
#include "arrow/testing/gtest_util.h"
27+
#include "arrow/util/endian.h"
2728
#include "arrow/util/key_value_metadata.h"
2829

2930
namespace arrow::ipc::internal {
@@ -55,6 +56,7 @@ TEST(TestMessageInternal, TestByteIdentical) {
5556
ASSERT_OK(
5657
WriteSchemaMessage(*schema, mapper, IpcWriteOptions::Defaults(), &out_buffer));
5758

59+
#if ARROW_LITTLE_ENDIAN
5860
// This is example output from macOS+ARM+LLVM
5961
const uint8_t expected[] = {
6062
0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x0E, 0x00, 0x06, 0x00, 0x05, 0x00,
@@ -74,6 +76,29 @@ TEST(TestMessageInternal, TestByteIdentical) {
7476
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x66, 0x30, 0x00, 0x00, 0x08, 0x00,
7577
0x0C, 0x00, 0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
7678
0x40, 0x00, 0x00, 0x00};
79+
#else
80+
// On Big-endian systems, FlatBuffer serialization can produce slightly different
81+
// output across different platforms and toolchains.
82+
// This is example output from Linux+s390x+GCC, which is a 232 byte structure
83+
const uint8_t expected[] = {
84+
0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x0E, 0x00, 0x06, 0x00, 0x05, 0x00,
85+
0x08, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
86+
0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x06, 0x00, 0x08, 0x00, 0x0C, 0x00, 0x0A, 0x00,
87+
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
88+
0x02, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xD8, 0xFF,
89+
0xFF, 0xFF, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00,
90+
0x6B, 0x65, 0x79, 0x5F, 0x32, 0x5F, 0x76, 0x61, 0x6C, 0x75, 0x65, 0x00, 0x05, 0x00,
91+
0x00, 0x00, 0x6B, 0x65, 0x79, 0x5F, 0x32, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0C, 0x00,
92+
0x04, 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00,
93+
0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x6B, 0x65, 0x79, 0x5F, 0x31, 0x5F, 0x76, 0x61,
94+
0x6C, 0x75, 0x65, 0x00, 0x05, 0x00, 0x00, 0x00, 0x6B, 0x65, 0x79, 0x5F, 0x31, 0x00,
95+
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x14, 0x00,
96+
0x08, 0x00, 0x06, 0x00, 0x07, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x10, 0x00, 0x10, 0x00,
97+
0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00,
98+
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x66, 0x30,
99+
0x00, 0x00, 0x08, 0x00, 0x0C, 0x00, 0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00,
100+
0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00};
101+
#endif
77102
Buffer expected_buffer(expected, sizeof(expected));
78103

79104
AssertBufferEqual(expected_buffer, *out_buffer);

0 commit comments

Comments
 (0)