diff --git a/cpp/src/arrow/compute/kernels/scalar_nested.cc b/cpp/src/arrow/compute/kernels/scalar_nested.cc index 1fb0df56bb9..bc2a33185b9 100644 --- a/cpp/src/arrow/compute/kernels/scalar_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_nested.cc @@ -183,10 +183,9 @@ struct ListSlice { const auto* list_type = checked_cast(list_array.type); // Pre-conditions - if (opts.start < 0 || (opts.stop.has_value() && opts.start >= opts.stop.value())) { - // TODO(ARROW-18281): support start == stop which should give empty lists + if (opts.start < 0 || (opts.stop.has_value() && opts.start > opts.stop.value())) { return Status::Invalid("`start`(", opts.start, - ") should be greater than 0 and smaller than `stop`(", + ") should be >= 0 and not greater than `stop`(", ToString(opts.stop), ")"); } if (opts.step < 1) { diff --git a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc index f199f56aa2f..241c3e3b165 100644 --- a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc @@ -306,6 +306,32 @@ TEST(TestScalarNested, ListSliceOutputEqualsInputType) { } } +TEST(TestScalarNested, ListSliceEmptyLists) { + // start == stop should return empty lists + auto input = ArrayFromJSON(list(int32()), "[[1, 2, 3], [4, 5], null]"); + ListSliceOptions args(/*start=*/0, /*stop=*/0, /*step=*/1); + auto expected = ArrayFromJSON(list(int32()), "[[], [], null]"); + CheckScalarUnary("list_slice", input, expected, &args); + + // Different start position + args.start = 1; + args.stop = 1; + CheckScalarUnary("list_slice", input, expected, &args); + + // Large list + auto input_large = ArrayFromJSON(large_list(int32()), "[[1, 2, 3], [4, 5]]"); + args.start = 0; + args.stop = 0; + auto expected_large = ArrayFromJSON(large_list(int32()), "[[], []]"); + CheckScalarUnary("list_slice", input_large, expected_large, &args); + + // Fixed size list -> fixed size list[0] + auto input_fixed = ArrayFromJSON(fixed_size_list(int32(), 3), "[[1, 2, 3], [4, 5, 6]]"); + args.return_fixed_size_list = true; + auto expected_fixed = ArrayFromJSON(fixed_size_list(int32(), 0), "[[], []]"); + CheckScalarUnary("list_slice", input_fixed, expected_fixed, &args); +} + TEST(TestScalarNested, ListSliceBadParameters) { auto input = ArrayFromJSON(list(int32()), "[[1]]"); @@ -314,23 +340,14 @@ TEST(TestScalarNested, ListSliceBadParameters) { /*return_fixed_size_list=*/true); EXPECT_RAISES_WITH_MESSAGE_THAT( Invalid, - ::testing::HasSubstr( - "`start`(-1) should be greater than 0 and smaller than `stop`(1)"), + ::testing::HasSubstr("`start`(-1) should be >= 0 and not greater than `stop`(1)"), CallFunction("list_slice", {input}, &args)); // start greater than stop args.start = 1; args.stop = 0; EXPECT_RAISES_WITH_MESSAGE_THAT( Invalid, - ::testing::HasSubstr( - "`start`(1) should be greater than 0 and smaller than `stop`(0)"), - CallFunction("list_slice", {input}, &args)); - // start same as stop - args.stop = args.start; - EXPECT_RAISES_WITH_MESSAGE_THAT( - Invalid, - ::testing::HasSubstr( - "`start`(1) should be greater than 0 and smaller than `stop`(1)"), + ::testing::HasSubstr("`start`(1) should be >= 0 and not greater than `stop`(0)"), CallFunction("list_slice", {input}, &args)); // stop not set and FixedSizeList requested with variable sized input args.stop = std::nullopt; diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index c6b17e4791d..0e4aae1d904 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -3946,16 +3946,12 @@ def test_list_slice_field_names_retained(return_fixed_size, type): def test_list_slice_bad_parameters(): arr = pa.array([[1]], pa.list_(pa.int8(), 1)) - msg = r"`start`(.*) should be greater than 0 and smaller than `stop`(.*)" + msg = r"`start`(.*) should be >= 0 and not greater than `stop`(.*)" with pytest.raises(pa.ArrowInvalid, match=msg): pc.list_slice(arr, -1, 1) # negative start? with pytest.raises(pa.ArrowInvalid, match=msg): pc.list_slice(arr, 2, 1) # start > stop? - # TODO(ARROW-18281): start==stop -> empty lists - with pytest.raises(pa.ArrowInvalid, match=msg): - pc.list_slice(arr, 0, 0) # start == stop? - # Step not >= 1 msg = "`step` must be >= 1, got: " with pytest.raises(pa.ArrowInvalid, match=msg + "0"): @@ -3964,6 +3960,33 @@ def test_list_slice_bad_parameters(): pc.list_slice(arr, 0, 1, step=-1) +def test_list_slice_empty_lists(): + # Test start == stop should return empty lists + arr = pa.array([[1, 2, 3], [4, 5, None], [6, None, None], None]) + result = pc.list_slice(arr, 0, 0) + expected = pa.array([[], [], [], None], type=pa.list_(pa.int64())) + assert result.equals(expected) + + # Test with different start positions + result = pc.list_slice(arr, 1, 1) + assert result.equals(expected) + + result = pc.list_slice(arr, 2, 2) + assert result.equals(expected) + + # Test with large_list + arr_large = pa.array([[1, 2, 3], [4, 5, None]], pa.large_list(pa.int64())) + result = pc.list_slice(arr_large, 0, 0) + expected_large = pa.array([[], []], pa.large_list(pa.int64())) + assert result.equals(expected_large) + + # Test with fixed_size_list -> output is fixed_size_list[0] + arr_fixed = pa.array([[1, 2, 3], [4, 5, 6]], pa.list_(pa.int64(), 3)) + result = pc.list_slice(arr_fixed, 0, 0) + expected_fixed = pa.array([[], []], pa.list_(pa.int64(), 0)) + assert result.equals(expected_fixed) + + def check_run_end_encode_decode(value_type, run_end_encode_opts=None): values = [1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3] arr = pa.array(values, type=value_type)