Skip to content

Commit 8fc54a3

Browse files
stenlarssonkou
andauthored
GH-48507: [GLib][Ruby] Add SplitOptions (#48528)
### Rationale for this change The `SplitOptions` class is not available in GLib/Ruby, and it is used together with the `ascii_split_whitespace` compute function. ### What changes are included in this PR? This adds the `SplitOptions` class to GLib. ### Are these changes tested? Yes, with Ruby unit tests. ### Are there any user-facing changes? Yes, a new class. * GitHub Issue: #48507 Lead-authored-by: Sten Larsson <[email protected]> Co-authored-by: Sutou Kouhei <[email protected]> Signed-off-by: Sutou Kouhei <[email protected]>
1 parent 8b27bc0 commit 8fc54a3

File tree

4 files changed

+209
-0
lines changed

4 files changed

+209
-0
lines changed

c_glib/arrow-glib/compute.cpp

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,9 @@ G_BEGIN_DECLS
339339
* #GArrowZeroFillOptions is a class to customize the `utf8_zero_fill`
340340
* function.
341341
*
342+
* #GArrowSplitOptions is a class to customize the `ascii_split_whitespace` and
343+
* `utf8_split_whitespace` functions.
344+
*
342345
* There are many functions to compute data on an array.
343346
*/
344347

@@ -10482,6 +10485,119 @@ garrow_zero_fill_options_new(void)
1048210485
return GARROW_ZERO_FILL_OPTIONS(g_object_new(GARROW_TYPE_ZERO_FILL_OPTIONS, nullptr));
1048310486
}
1048410487

10488+
enum {
10489+
PROP_SPLIT_OPTIONS_MAX_SPLITS = 1,
10490+
PROP_SPLIT_OPTIONS_REVERSE,
10491+
};
10492+
10493+
G_DEFINE_TYPE(GArrowSplitOptions, garrow_split_options, GARROW_TYPE_FUNCTION_OPTIONS)
10494+
10495+
static void
10496+
garrow_split_options_set_property(GObject *object,
10497+
guint prop_id,
10498+
const GValue *value,
10499+
GParamSpec *pspec)
10500+
{
10501+
auto options = garrow_split_options_get_raw(GARROW_SPLIT_OPTIONS(object));
10502+
10503+
switch (prop_id) {
10504+
case PROP_SPLIT_OPTIONS_MAX_SPLITS:
10505+
options->max_splits = g_value_get_int64(value);
10506+
break;
10507+
case PROP_SPLIT_OPTIONS_REVERSE:
10508+
options->reverse = g_value_get_boolean(value);
10509+
break;
10510+
default:
10511+
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
10512+
break;
10513+
}
10514+
}
10515+
10516+
static void
10517+
garrow_split_options_get_property(GObject *object,
10518+
guint prop_id,
10519+
GValue *value,
10520+
GParamSpec *pspec)
10521+
{
10522+
auto options = garrow_split_options_get_raw(GARROW_SPLIT_OPTIONS(object));
10523+
10524+
switch (prop_id) {
10525+
case PROP_SPLIT_OPTIONS_MAX_SPLITS:
10526+
g_value_set_int64(value, options->max_splits);
10527+
break;
10528+
case PROP_SPLIT_OPTIONS_REVERSE:
10529+
g_value_set_boolean(value, options->reverse);
10530+
break;
10531+
default:
10532+
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
10533+
break;
10534+
}
10535+
}
10536+
10537+
static void
10538+
garrow_split_options_init(GArrowSplitOptions *object)
10539+
{
10540+
auto arrow_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
10541+
arrow_priv->options =
10542+
static_cast<arrow::compute::FunctionOptions *>(new arrow::compute::SplitOptions());
10543+
}
10544+
10545+
static void
10546+
garrow_split_options_class_init(GArrowSplitOptionsClass *klass)
10547+
{
10548+
auto gobject_class = G_OBJECT_CLASS(klass);
10549+
10550+
gobject_class->set_property = garrow_split_options_set_property;
10551+
gobject_class->get_property = garrow_split_options_get_property;
10552+
10553+
arrow::compute::SplitOptions options;
10554+
10555+
GParamSpec *spec;
10556+
/**
10557+
* GArrowSplitOptions:max-splits:
10558+
*
10559+
* Maximum number of splits allowed, or unlimited when -1.
10560+
*
10561+
* Since: 23.0.0
10562+
*/
10563+
spec = g_param_spec_int64("max-splits",
10564+
"Max splits",
10565+
"Maximum number of splits allowed, or unlimited when -1",
10566+
-1,
10567+
G_MAXINT64,
10568+
options.max_splits,
10569+
static_cast<GParamFlags>(G_PARAM_READWRITE));
10570+
g_object_class_install_property(gobject_class, PROP_SPLIT_OPTIONS_MAX_SPLITS, spec);
10571+
10572+
/**
10573+
* GArrowSplitOptions:reverse:
10574+
*
10575+
* Start splitting from the end of the string (only relevant when max_splits != -1).
10576+
*
10577+
* Since: 23.0.0
10578+
*/
10579+
spec = g_param_spec_boolean(
10580+
"reverse",
10581+
"Reverse",
10582+
"Start splitting from the end of the string (only relevant when max_splits != -1)",
10583+
options.reverse,
10584+
static_cast<GParamFlags>(G_PARAM_READWRITE));
10585+
g_object_class_install_property(gobject_class, PROP_SPLIT_OPTIONS_REVERSE, spec);
10586+
}
10587+
10588+
/**
10589+
* garrow_split_options_new:
10590+
*
10591+
* Returns: A newly created #GArrowSplitOptions.
10592+
*
10593+
* Since: 23.0.0
10594+
*/
10595+
GArrowSplitOptions *
10596+
garrow_split_options_new(void)
10597+
{
10598+
return GARROW_SPLIT_OPTIONS(g_object_new(GARROW_TYPE_SPLIT_OPTIONS, nullptr));
10599+
}
10600+
1048510601
G_END_DECLS
1048610602

1048710603
arrow::Result<arrow::FieldRef>
@@ -10762,6 +10878,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt
1076210878
static_cast<const arrow::compute::ZeroFillOptions *>(arrow_options);
1076310879
auto options = garrow_zero_fill_options_new_raw(arrow_zero_fill_options);
1076410880
return GARROW_FUNCTION_OPTIONS(options);
10881+
} else if (arrow_type_name == "SplitOptions") {
10882+
const auto arrow_split_options =
10883+
static_cast<const arrow::compute::SplitOptions *>(arrow_options);
10884+
auto options = garrow_split_options_new_raw(arrow_split_options);
10885+
return GARROW_FUNCTION_OPTIONS(options);
1076510886
} else {
1076610887
auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL);
1076710888
return GARROW_FUNCTION_OPTIONS(options);
@@ -11897,3 +12018,22 @@ garrow_zero_fill_options_get_raw(GArrowZeroFillOptions *options)
1189712018
return static_cast<arrow::compute::ZeroFillOptions *>(
1189812019
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
1189912020
}
12021+
12022+
GArrowSplitOptions *
12023+
garrow_split_options_new_raw(const arrow::compute::SplitOptions *arrow_options)
12024+
{
12025+
auto options = g_object_new(GARROW_TYPE_SPLIT_OPTIONS,
12026+
"max-splits",
12027+
arrow_options->max_splits,
12028+
"reverse",
12029+
arrow_options->reverse,
12030+
nullptr);
12031+
return GARROW_SPLIT_OPTIONS(options);
12032+
}
12033+
12034+
arrow::compute::SplitOptions *
12035+
garrow_split_options_get_raw(GArrowSplitOptions *options)
12036+
{
12037+
return static_cast<arrow::compute::SplitOptions *>(
12038+
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
12039+
}

c_glib/arrow-glib/compute.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1787,4 +1787,17 @@ GARROW_AVAILABLE_IN_23_0
17871787
GArrowZeroFillOptions *
17881788
garrow_zero_fill_options_new(void);
17891789

1790+
#define GARROW_TYPE_SPLIT_OPTIONS (garrow_split_options_get_type())
1791+
GARROW_AVAILABLE_IN_23_0
1792+
G_DECLARE_DERIVABLE_TYPE(
1793+
GArrowSplitOptions, garrow_split_options, GARROW, SPLIT_OPTIONS, GArrowFunctionOptions)
1794+
struct _GArrowSplitOptionsClass
1795+
{
1796+
GArrowFunctionOptionsClass parent_class;
1797+
};
1798+
1799+
GARROW_AVAILABLE_IN_23_0
1800+
GArrowSplitOptions *
1801+
garrow_split_options_new(void);
1802+
17901803
G_END_DECLS

c_glib/arrow-glib/compute.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,3 +338,8 @@ GArrowZeroFillOptions *
338338
garrow_zero_fill_options_new_raw(const arrow::compute::ZeroFillOptions *arrow_options);
339339
arrow::compute::ZeroFillOptions *
340340
garrow_zero_fill_options_get_raw(GArrowZeroFillOptions *options);
341+
342+
GArrowSplitOptions *
343+
garrow_split_options_new_raw(const arrow::compute::SplitOptions *arrow_options);
344+
arrow::compute::SplitOptions *
345+
garrow_split_options_get_raw(GArrowSplitOptions *options);

c_glib/test/test-split-options.rb

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
class TestSplitOptions < Test::Unit::TestCase
19+
include Helper::Buildable
20+
21+
def setup
22+
@options = Arrow::SplitOptions.new
23+
end
24+
25+
def test_max_splits_property
26+
assert_equal(-1, @options.max_splits)
27+
@options.max_splits = 1
28+
assert_equal(1, @options.max_splits)
29+
end
30+
31+
def test_reverse_property
32+
assert do
33+
not @options.reverse?
34+
end
35+
@options.reverse = true
36+
assert do
37+
@options.reverse?
38+
end
39+
end
40+
41+
def test_utf8_split_whitespace_function
42+
args = [
43+
Arrow::ArrayDatum.new(build_string_array(["hello world test"])),
44+
]
45+
@options.max_splits = 1
46+
utf8_split_whitespace_function = Arrow::Function.find("utf8_split_whitespace")
47+
result = utf8_split_whitespace_function.execute(args, @options).value
48+
expected = build_list_array(Arrow::StringDataType.new, [["hello", "world test"]])
49+
assert_equal(expected, result)
50+
end
51+
end

0 commit comments

Comments
 (0)