Skip to content

Commit b14e109

Browse files
authored
GH-48366: [GLib][Ruby] Add DictionaryEncodeOptions (#48373)
### Rationale for this change The `DictionaryEncodeOptions` class is not available in GLib/Ruby, and it is used together with the `dictionary_encode` compute function. ### What changes are included in this PR? This adds the `DictionaryEncodeOptions` class to GLib. ### Are these changes tested? Yes, with Ruby unit tests. ### Are there any user-facing changes? Yes, a new class. * GitHub Issue: #48366 Authored-by: Sten Larsson <[email protected]> Signed-off-by: Sutou Kouhei <[email protected]>
1 parent 6d65569 commit b14e109

File tree

4 files changed

+216
-0
lines changed

4 files changed

+216
-0
lines changed

c_glib/arrow-glib/compute.cpp

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,9 @@ G_BEGIN_DECLS
258258
* such as `cumulative_sum`, `cumulative_prod`, `cumulative_max`, and
259259
* `cumulative_min`.
260260
*
261+
* #GArrowDictionaryEncodeOptions is a class to customize the `dictionary_encode`
262+
* function.
263+
*
261264
* There are many functions to compute data on an array.
262265
*/
263266

@@ -6662,6 +6665,108 @@ garrow_cumulative_options_new(void)
66626665
return GARROW_CUMULATIVE_OPTIONS(options);
66636666
}
66646667

6668+
enum {
6669+
PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR = 1,
6670+
};
6671+
6672+
G_DEFINE_TYPE(GArrowDictionaryEncodeOptions,
6673+
garrow_dictionary_encode_options,
6674+
GARROW_TYPE_FUNCTION_OPTIONS)
6675+
6676+
static void
6677+
garrow_dictionary_encode_options_set_property(GObject *object,
6678+
guint prop_id,
6679+
const GValue *value,
6680+
GParamSpec *pspec)
6681+
{
6682+
auto options =
6683+
garrow_dictionary_encode_options_get_raw(GARROW_DICTIONARY_ENCODE_OPTIONS(object));
6684+
6685+
switch (prop_id) {
6686+
case PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR:
6687+
options->null_encoding_behavior =
6688+
static_cast<arrow::compute::DictionaryEncodeOptions::NullEncodingBehavior>(
6689+
g_value_get_enum(value));
6690+
break;
6691+
default:
6692+
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
6693+
break;
6694+
}
6695+
}
6696+
6697+
static void
6698+
garrow_dictionary_encode_options_get_property(GObject *object,
6699+
guint prop_id,
6700+
GValue *value,
6701+
GParamSpec *pspec)
6702+
{
6703+
auto options =
6704+
garrow_dictionary_encode_options_get_raw(GARROW_DICTIONARY_ENCODE_OPTIONS(object));
6705+
6706+
switch (prop_id) {
6707+
case PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR:
6708+
g_value_set_enum(value,
6709+
static_cast<GArrowDictionaryEncodeNullEncodingBehavior>(
6710+
options->null_encoding_behavior));
6711+
break;
6712+
default:
6713+
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
6714+
break;
6715+
}
6716+
}
6717+
6718+
static void
6719+
garrow_dictionary_encode_options_init(GArrowDictionaryEncodeOptions *object)
6720+
{
6721+
auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
6722+
priv->options = static_cast<arrow::compute::FunctionOptions *>(
6723+
new arrow::compute::DictionaryEncodeOptions());
6724+
}
6725+
6726+
static void
6727+
garrow_dictionary_encode_options_class_init(GArrowDictionaryEncodeOptionsClass *klass)
6728+
{
6729+
auto gobject_class = G_OBJECT_CLASS(klass);
6730+
6731+
gobject_class->set_property = garrow_dictionary_encode_options_set_property;
6732+
gobject_class->get_property = garrow_dictionary_encode_options_get_property;
6733+
6734+
arrow::compute::DictionaryEncodeOptions options;
6735+
6736+
GParamSpec *spec;
6737+
/**
6738+
* GArrowDictionaryEncodeOptions:null-encoding-behavior:
6739+
*
6740+
* How null values will be encoded.
6741+
*
6742+
* Since: 23.0.0
6743+
*/
6744+
spec = g_param_spec_enum("null-encoding-behavior",
6745+
"Null encoding behavior",
6746+
"How null values will be encoded",
6747+
GARROW_TYPE_DICTIONARY_ENCODE_NULL_ENCODING_BEHAVIOR,
6748+
static_cast<GArrowDictionaryEncodeNullEncodingBehavior>(
6749+
options.null_encoding_behavior),
6750+
static_cast<GParamFlags>(G_PARAM_READWRITE));
6751+
g_object_class_install_property(gobject_class,
6752+
PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR,
6753+
spec);
6754+
}
6755+
6756+
/**
6757+
* garrow_dictionary_encode_options_new:
6758+
*
6759+
* Returns: A newly created #GArrowDictionaryEncodeOptions.
6760+
*
6761+
* Since: 23.0.0
6762+
*/
6763+
GArrowDictionaryEncodeOptions *
6764+
garrow_dictionary_encode_options_new(void)
6765+
{
6766+
auto options = g_object_new(GARROW_TYPE_DICTIONARY_ENCODE_OPTIONS, NULL);
6767+
return GARROW_DICTIONARY_ENCODE_OPTIONS(options);
6768+
}
6769+
66656770
G_END_DECLS
66666771

66676772
arrow::Result<arrow::FieldRef>
@@ -6803,6 +6908,12 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt
68036908
static_cast<const arrow::compute::CumulativeOptions *>(arrow_options);
68046909
auto options = garrow_cumulative_options_new_raw(arrow_cumulative_options);
68056910
return GARROW_FUNCTION_OPTIONS(options);
6911+
} else if (arrow_type_name == "DictionaryEncodeOptions") {
6912+
const auto arrow_dictionary_encode_options =
6913+
static_cast<const arrow::compute::DictionaryEncodeOptions *>(arrow_options);
6914+
auto options =
6915+
garrow_dictionary_encode_options_new_raw(arrow_dictionary_encode_options);
6916+
return GARROW_FUNCTION_OPTIONS(options);
68066917
} else {
68076918
auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL);
68086919
return GARROW_FUNCTION_OPTIONS(options);
@@ -7370,3 +7481,22 @@ garrow_cumulative_options_get_raw(GArrowCumulativeOptions *options)
73707481
return static_cast<arrow::compute::CumulativeOptions *>(
73717482
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
73727483
}
7484+
7485+
GArrowDictionaryEncodeOptions *
7486+
garrow_dictionary_encode_options_new_raw(
7487+
const arrow::compute::DictionaryEncodeOptions *arrow_options)
7488+
{
7489+
return GARROW_DICTIONARY_ENCODE_OPTIONS(
7490+
g_object_new(GARROW_TYPE_DICTIONARY_ENCODE_OPTIONS,
7491+
"null-encoding-behavior",
7492+
static_cast<GArrowDictionaryEncodeNullEncodingBehavior>(
7493+
arrow_options->null_encoding_behavior),
7494+
NULL));
7495+
}
7496+
7497+
arrow::compute::DictionaryEncodeOptions *
7498+
garrow_dictionary_encode_options_get_raw(GArrowDictionaryEncodeOptions *options)
7499+
{
7500+
return static_cast<arrow::compute::DictionaryEncodeOptions *>(
7501+
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
7502+
}

c_glib/arrow-glib/compute.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,4 +1189,38 @@ GARROW_AVAILABLE_IN_23_0
11891189
GArrowCumulativeOptions *
11901190
garrow_cumulative_options_new(void);
11911191

1192+
/**
1193+
* GArrowDictionaryEncodeNullEncodingBehavior:
1194+
* @GARROW_DICTIONARY_ENCODE_NULL_ENCODING_ENCODE: The null value will be added to the
1195+
* dictionary with a proper index.
1196+
* @GARROW_DICTIONARY_ENCODE_NULL_ENCODING_MASK: The null value will be masked in the
1197+
* indices array.
1198+
*
1199+
* They correspond to the values of
1200+
* `arrow::compute::DictionaryEncodeOptions::NullEncodingBehavior`.
1201+
*
1202+
* Since: 23.0.0
1203+
*/
1204+
typedef enum {
1205+
GARROW_DICTIONARY_ENCODE_NULL_ENCODING_ENCODE,
1206+
GARROW_DICTIONARY_ENCODE_NULL_ENCODING_MASK,
1207+
} GArrowDictionaryEncodeNullEncodingBehavior;
1208+
1209+
#define GARROW_TYPE_DICTIONARY_ENCODE_OPTIONS \
1210+
(garrow_dictionary_encode_options_get_type())
1211+
GARROW_AVAILABLE_IN_23_0
1212+
G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryEncodeOptions,
1213+
garrow_dictionary_encode_options,
1214+
GARROW,
1215+
DICTIONARY_ENCODE_OPTIONS,
1216+
GArrowFunctionOptions)
1217+
struct _GArrowDictionaryEncodeOptionsClass
1218+
{
1219+
GArrowFunctionOptionsClass parent_class;
1220+
};
1221+
1222+
GARROW_AVAILABLE_IN_23_0
1223+
GArrowDictionaryEncodeOptions *
1224+
garrow_dictionary_encode_options_new(void);
1225+
11921226
G_END_DECLS

c_glib/arrow-glib/compute.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,3 +186,9 @@ GArrowCumulativeOptions *
186186
garrow_cumulative_options_new_raw(const arrow::compute::CumulativeOptions *arrow_options);
187187
arrow::compute::CumulativeOptions *
188188
garrow_cumulative_options_get_raw(GArrowCumulativeOptions *options);
189+
190+
GArrowDictionaryEncodeOptions *
191+
garrow_dictionary_encode_options_new_raw(
192+
const arrow::compute::DictionaryEncodeOptions *arrow_options);
193+
arrow::compute::DictionaryEncodeOptions *
194+
garrow_dictionary_encode_options_get_raw(GArrowDictionaryEncodeOptions *options);
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
class TestDictionaryEncodeOptions < Test::Unit::TestCase
19+
include Helper::Buildable
20+
21+
def setup
22+
@options = Arrow::DictionaryEncodeOptions.new
23+
end
24+
25+
def test_null_encoding_behavior_property
26+
assert_equal(Arrow::DictionaryEncodeNullEncodingBehavior::MASK, @options.null_encoding_behavior)
27+
@options.null_encoding_behavior = :encode
28+
assert_equal(Arrow::DictionaryEncodeNullEncodingBehavior::ENCODE,
29+
@options.null_encoding_behavior)
30+
end
31+
32+
def test_dictionary_encode_function_with_encode
33+
args = [
34+
Arrow::ArrayDatum.new(build_string_array(["a", "b", nil, "a", "b"])),
35+
]
36+
@options.null_encoding_behavior = :encode
37+
dictionary_encode_function = Arrow::Function.find("dictionary_encode")
38+
result = dictionary_encode_function.execute(args, @options).value
39+
assert_equal(Arrow::DictionaryDataType.new(Arrow::Int32DataType.new,
40+
Arrow::StringDataType.new,
41+
false),
42+
result.value_data_type)
43+
assert_equal(build_int32_array([0, 1, 2, 0, 1]), result.indices)
44+
end
45+
end
46+

0 commit comments

Comments
 (0)