Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions cpp/src/arrow/compute/kernels/scalar_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,9 @@ struct ListSlice {
const auto* list_type = checked_cast<const BaseListType*>(list_array.type);

// Pre-conditions
if (opts.start < 0 || (opts.stop.has_value() && opts.start >= opts.stop.value())) {
// TODO(ARROW-18281): support start == stop which should give empty lists
if (opts.start < 0 || (opts.stop.has_value() && opts.start > opts.stop.value())) {
return Status::Invalid("`start`(", opts.start,
") should be greater than 0 and smaller than `stop`(",
") should be >= 0 and not greater than `stop`(",
ToString(opts.stop), ")");
}
if (opts.step < 1) {
Expand Down
39 changes: 28 additions & 11 deletions cpp/src/arrow/compute/kernels/scalar_nested_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,32 @@ TEST(TestScalarNested, ListSliceOutputEqualsInputType) {
}
}

TEST(TestScalarNested, ListSliceEmptyLists) {
// start == stop should return empty lists
auto input = ArrayFromJSON(list(int32()), "[[1, 2, 3], [4, 5], null]");
ListSliceOptions args(/*start=*/0, /*stop=*/0, /*step=*/1);
auto expected = ArrayFromJSON(list(int32()), "[[], [], null]");
CheckScalarUnary("list_slice", input, expected, &args);

// Different start position
args.start = 1;
args.stop = 1;
CheckScalarUnary("list_slice", input, expected, &args);

// Large list
auto input_large = ArrayFromJSON(large_list(int32()), "[[1, 2, 3], [4, 5]]");
args.start = 0;
args.stop = 0;
auto expected_large = ArrayFromJSON(large_list(int32()), "[[], []]");
CheckScalarUnary("list_slice", input_large, expected_large, &args);

// Fixed size list -> fixed size list[0]
auto input_fixed = ArrayFromJSON(fixed_size_list(int32(), 3), "[[1, 2, 3], [4, 5, 6]]");
args.return_fixed_size_list = true;
auto expected_fixed = ArrayFromJSON(fixed_size_list(int32(), 0), "[[], []]");
CheckScalarUnary("list_slice", input_fixed, expected_fixed, &args);
}

TEST(TestScalarNested, ListSliceBadParameters) {
auto input = ArrayFromJSON(list(int32()), "[[1]]");

Expand All @@ -314,23 +340,14 @@ TEST(TestScalarNested, ListSliceBadParameters) {
/*return_fixed_size_list=*/true);
EXPECT_RAISES_WITH_MESSAGE_THAT(
Invalid,
::testing::HasSubstr(
"`start`(-1) should be greater than 0 and smaller than `stop`(1)"),
::testing::HasSubstr("`start`(-1) should be >= 0 and not greater than `stop`(1)"),
CallFunction("list_slice", {input}, &args));
// start greater than stop
args.start = 1;
args.stop = 0;
EXPECT_RAISES_WITH_MESSAGE_THAT(
Invalid,
::testing::HasSubstr(
"`start`(1) should be greater than 0 and smaller than `stop`(0)"),
CallFunction("list_slice", {input}, &args));
// start same as stop
args.stop = args.start;
EXPECT_RAISES_WITH_MESSAGE_THAT(
Invalid,
::testing::HasSubstr(
"`start`(1) should be greater than 0 and smaller than `stop`(1)"),
::testing::HasSubstr("`start`(1) should be >= 0 and not greater than `stop`(0)"),
CallFunction("list_slice", {input}, &args));
// stop not set and FixedSizeList requested with variable sized input
args.stop = std::nullopt;
Expand Down
33 changes: 28 additions & 5 deletions python/pyarrow/tests/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -3946,16 +3946,12 @@ def test_list_slice_field_names_retained(return_fixed_size, type):

def test_list_slice_bad_parameters():
arr = pa.array([[1]], pa.list_(pa.int8(), 1))
msg = r"`start`(.*) should be greater than 0 and smaller than `stop`(.*)"
msg = r"`start`(.*) should be >= 0 and not greater than `stop`(.*)"
with pytest.raises(pa.ArrowInvalid, match=msg):
pc.list_slice(arr, -1, 1) # negative start?
with pytest.raises(pa.ArrowInvalid, match=msg):
pc.list_slice(arr, 2, 1) # start > stop?

# TODO(ARROW-18281): start==stop -> empty lists
with pytest.raises(pa.ArrowInvalid, match=msg):
pc.list_slice(arr, 0, 0) # start == stop?

# Step not >= 1
msg = "`step` must be >= 1, got: "
with pytest.raises(pa.ArrowInvalid, match=msg + "0"):
Expand All @@ -3964,6 +3960,33 @@ def test_list_slice_bad_parameters():
pc.list_slice(arr, 0, 1, step=-1)


def test_list_slice_empty_lists():
# Test start == stop should return empty lists
arr = pa.array([[1, 2, 3], [4, 5, None], [6, None, None], None])
result = pc.list_slice(arr, 0, 0)
expected = pa.array([[], [], [], None], type=pa.list_(pa.int64()))
assert result.equals(expected)

# Test with different start positions
result = pc.list_slice(arr, 1, 1)
assert result.equals(expected)

result = pc.list_slice(arr, 2, 2)
assert result.equals(expected)

# Test with large_list
arr_large = pa.array([[1, 2, 3], [4, 5, None]], pa.large_list(pa.int64()))
result = pc.list_slice(arr_large, 0, 0)
expected_large = pa.array([[], []], pa.large_list(pa.int64()))
assert result.equals(expected_large)

# Test with fixed_size_list -> output is fixed_size_list[0]
arr_fixed = pa.array([[1, 2, 3], [4, 5, 6]], pa.list_(pa.int64(), 3))
result = pc.list_slice(arr_fixed, 0, 0)
expected_fixed = pa.array([[], []], pa.list_(pa.int64(), 0))
assert result.equals(expected_fixed)


def check_run_end_encode_decode(value_type, run_end_encode_opts=None):
values = [1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3]
arr = pa.array(values, type=value_type)
Expand Down
Loading