Skip to content

Commit 7b8e3e8

Browse files
authored
GH-47807: [C++][Compute] Fix the issue that null count is not updated when setting slice on an array span (#47808)
### Rationale for this change The null count is not updated when setting slice on an array span, after a preceding set slice sees a 0 null count. An incorrectly null count will cause subsequent failures wrt. null processing like #47807. ### What changes are included in this PR? Narrowing the null count update condition when setting slice on an array span: as long as there is a valid buffer, we set null count to unknown. ### Are these changes tested? Test included. ### Are there any user-facing changes? None. * GitHub Issue: #47807 Authored-by: Rossi Sun <[email protected]> Signed-off-by: Rossi Sun <[email protected]>
1 parent 2a5ad38 commit 7b8e3e8

File tree

4 files changed

+59
-1
lines changed

4 files changed

+59
-1
lines changed

cpp/src/arrow/array/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# under the License.
1717

1818
add_arrow_test(concatenate_test)
19+
add_arrow_test(data_test)
1920
add_arrow_test(diff_test)
2021

2122
# Headers: top level

cpp/src/arrow/array/data.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ struct ARROW_EXPORT ArraySpan {
641641
this->length = length;
642642
if (this->type->id() == Type::NA) {
643643
this->null_count = this->length;
644-
} else if (this->MayHaveNulls()) {
644+
} else if (buffers[0].data != NULLPTR) {
645645
this->null_count = kUnknownNullCount;
646646
} else {
647647
this->null_count = 0;

cpp/src/arrow/array/data_test.cc

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include <gtest/gtest.h>
19+
20+
#include "arrow/array.h"
21+
#include "arrow/array/data.h"
22+
#include "arrow/testing/gtest_util.h"
23+
24+
namespace arrow {
25+
26+
TEST(ArraySpan, SetSlice) {
27+
auto arr = ArrayFromJSON(int32(), "[0, 1, 2, 3, 4, 5, 6, null, 7, 8, 9]");
28+
ArraySpan span(*arr->data());
29+
ASSERT_EQ(span.length, arr->length());
30+
ASSERT_EQ(span.null_count, 1);
31+
ASSERT_EQ(span.offset, 0);
32+
33+
span.SetSlice(0, 7);
34+
ASSERT_EQ(span.length, 7);
35+
ASSERT_EQ(span.null_count, kUnknownNullCount);
36+
ASSERT_EQ(span.offset, 0);
37+
ASSERT_EQ(span.GetNullCount(), 0);
38+
39+
span.SetSlice(7, 4);
40+
ASSERT_EQ(span.length, 4);
41+
ASSERT_EQ(span.null_count, kUnknownNullCount);
42+
ASSERT_EQ(span.offset, 7);
43+
ASSERT_EQ(span.GetNullCount(), 1);
44+
}
45+
46+
} // namespace arrow

cpp/src/arrow/compute/kernels/scalar_if_else_test.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3720,6 +3720,17 @@ TEST(TestChoose, FixedSizeBinary) {
37203720
*MakeArrayOfNull(type, 5));
37213721
}
37223722

3723+
// GH-47807: Null count in ArraySpan not updated correctly when executing chunked.
3724+
TEST(TestChoose, WrongNullCountForChunked) {
3725+
auto indices = ArrayFromJSON(int64(), "[0, 1, 0, 1, 0, null]");
3726+
auto values1 = ArrayFromJSON(int64(), "[10, 11, 12, 13, 14, 15]");
3727+
auto values2 = ChunkedArrayFromJSON(int64(), {"[100, 101]", "[102, 103, 104, 105]"});
3728+
ASSERT_OK_AND_ASSIGN(auto result, CallFunction("choose", {indices, values1, values2}));
3729+
ASSERT_OK(result.chunked_array()->ValidateFull());
3730+
AssertDatumsEqual(ChunkedArrayFromJSON(int64(), {"[10, 101]", "[12, 103, 14, null]"}),
3731+
result);
3732+
}
3733+
37233734
TEST(TestChooseKernel, DispatchBest) {
37243735
ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction("choose"));
37253736
auto Check = [&](std::vector<TypeHolder> original_values) {

0 commit comments

Comments
 (0)