Skip to content

Commit a2bb31d

Browse files
committed
Fix the issue that bitmap ops overriding partial leading byte
1 parent 1a2a800 commit a2bb31d

File tree

4 files changed

+91
-4
lines changed

4 files changed

+91
-4
lines changed

cpp/src/arrow/compute/kernels/scalar_if_else_test.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,6 +1095,16 @@ TYPED_TEST(TestIfElseDict, DifferentDictionaries) {
10951095
CheckDictionary("if_else", {MakeNullScalar(boolean()), values1, values2});
10961096
}
10971097

1098+
// GH-47825: BitmapOp overrides partial leading byte for unaligned chunked execution.
1099+
TEST(TestIfElse, ChunkedUnalignedBitmapOp) {
1100+
auto cond = ArrayFromJSON(boolean(), "[true, true]");
1101+
auto if_true = ChunkedArrayFromJSON(boolean(), {"[true]", "[true]"});
1102+
auto if_false = ArrayFromJSON(boolean(), "[true, true]");
1103+
ASSERT_OK_AND_ASSIGN(auto result, CallFunction("if_else", {cond, if_true, if_false}));
1104+
ASSERT_OK(result.chunked_array()->ValidateFull());
1105+
AssertDatumsEqual(ChunkedArrayFromJSON(boolean(), {"[true, true]"}), result);
1106+
}
1107+
10981108
Datum MakeStruct(const std::vector<Datum>& conds) {
10991109
if (conds.size() == 0) {
11001110
// The tests below want a struct scalar when no condition values passed,

cpp/src/arrow/util/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ add_arrow_test(utility-test
4949
SOURCES
5050
align_util_test.cc
5151
atfork_test.cc
52+
bitmap_test.cc
5253
byte_size_test.cc
5354
byte_stream_split_test.cc
5455
cache_test.cc

cpp/src/arrow/util/bitmap_ops.cc

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -370,13 +370,25 @@ void UnalignedBitmapOp(const uint8_t* left, int64_t left_offset, const uint8_t*
370370
}
371371
}
372372

373+
// XXX: The bits before left/right/out_offset, if unaligned, are untouched. But not for
374+
// the bits after length. Caller should ensure proper alignment for the tail bits if
375+
// necessary, or correct the tail bits by subsequent calls.
373376
template <template <typename> class BitOp>
374377
void BitmapOp(const uint8_t* left, int64_t left_offset, const uint8_t* right,
375378
int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* dest) {
376-
if ((out_offset % 8 == left_offset % 8) && (out_offset % 8 == right_offset % 8)) {
377-
// Fast case: can use bytewise AND
378-
AlignedBitmapOp<BitOp>(left, left_offset, right, right_offset, dest, out_offset,
379-
length);
379+
if (out_offset % 8 == left_offset % 8 && out_offset % 8 == right_offset % 8) {
380+
// Fast case: can use byte-wise BitOp after handling leading unaligned bits.
381+
int64_t leading_unaligned_bits = (8 - left_offset % 8) % 8;
382+
if (leading_unaligned_bits > 0) {
383+
UnalignedBitmapOp<BitOp>(left, left_offset, right, right_offset, dest, out_offset,
384+
leading_unaligned_bits);
385+
}
386+
if (length > leading_unaligned_bits) {
387+
AlignedBitmapOp<BitOp>(left, left_offset + leading_unaligned_bits, right,
388+
right_offset + leading_unaligned_bits, dest,
389+
out_offset + leading_unaligned_bits,
390+
length - leading_unaligned_bits);
391+
}
380392
} else {
381393
// Unaligned
382394
UnalignedBitmapOp<BitOp>(left, left_offset, right, right_offset, dest, out_offset,

cpp/src/arrow/util/bitmap_test.cc

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include <gtest/gtest.h>
19+
20+
#include "arrow/testing/gtest_util.h"
21+
#include "arrow/util/bitmap_ops.h"
22+
23+
namespace arrow::internal {
24+
25+
TEST(BitmapOpsTest, PartialLeadingByteSafety) {
26+
const int64_t num_bytes = 16;
27+
const std::vector<uint8_t> left(num_bytes, 0), right(num_bytes, 0xFF);
28+
std::vector<uint8_t> out = right;
29+
int64_t num_bits = num_bytes * 8;
30+
31+
auto left_data = left.data();
32+
auto right_data = right.data();
33+
auto out_data = out.data();
34+
35+
for (int64_t out_offset = num_bits - 1; out_offset >= 0; --out_offset) {
36+
ARROW_SCOPED_TRACE("out offset = " + std::to_string(out_offset));
37+
for (int64_t left_offset : {out_offset, out_offset + 1}) {
38+
ARROW_SCOPED_TRACE("left offset = " + std::to_string(left_offset));
39+
for (int64_t right_offset : {out_offset, out_offset + 2}) {
40+
ARROW_SCOPED_TRACE("right offset = " + std::to_string(right_offset));
41+
for (int64_t length = 1;
42+
length <= num_bits - out_offset && length <= num_bits - left_offset &&
43+
length <= num_bits - right_offset;
44+
++length) {
45+
ARROW_SCOPED_TRACE("length = " + std::to_string(length));
46+
BitmapAnd(left_data, out_offset, right_data, right_offset, length, out_offset,
47+
out_data);
48+
// Bytes before out_offset.
49+
for (int64_t i = 0; i < out_offset / 8; ++i) {
50+
EXPECT_EQ(out_data[i], 0xFF);
51+
}
52+
// The byte holding the out_offset bit.
53+
EXPECT_EQ(out_data[out_offset / 8], (1 << (out_offset % 8)) - 1);
54+
// Bytes after the last bit.
55+
for (int64_t i = (out_offset + length + 7) / 8; i < num_bytes; ++i) {
56+
EXPECT_EQ(out_data[i], 0);
57+
}
58+
}
59+
}
60+
}
61+
}
62+
}
63+
64+
} // namespace arrow::internal

0 commit comments

Comments
 (0)