Skip to content

Commit 5e7dcad

Browse files
committed
Add GArrowExtractRegexSpanOptions
1 parent 4127ca2 commit 5e7dcad

File tree

4 files changed

+203
-0
lines changed

4 files changed

+203
-0
lines changed

c_glib/arrow-glib/compute.cpp

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,9 @@ G_BEGIN_DECLS
269269
* #GArrowExtractRegexOptions is a class to customize the `extract_regex`
270270
* function.
271271
*
272+
* #GArrowExtractRegexSpanOptions is a class to customize the `extract_regex_span`
273+
* function.
274+
*
272275
* There are many functions to compute data on an array.
273276
*/
274277

@@ -7091,6 +7094,102 @@ garrow_extract_regex_options_new(void)
70917094
return GARROW_EXTRACT_REGEX_OPTIONS(options);
70927095
}
70937096

7097+
enum {
7098+
PROP_EXTRACT_REGEX_SPAN_OPTIONS_PATTERN = 1,
7099+
};
7100+
7101+
G_DEFINE_TYPE(GArrowExtractRegexSpanOptions,
7102+
garrow_extract_regex_span_options,
7103+
GARROW_TYPE_FUNCTION_OPTIONS)
7104+
7105+
static void
7106+
garrow_extract_regex_span_options_set_property(GObject *object,
7107+
guint prop_id,
7108+
const GValue *value,
7109+
GParamSpec *pspec)
7110+
{
7111+
auto options =
7112+
garrow_extract_regex_span_options_get_raw(GARROW_EXTRACT_REGEX_SPAN_OPTIONS(object));
7113+
7114+
switch (prop_id) {
7115+
case PROP_EXTRACT_REGEX_SPAN_OPTIONS_PATTERN:
7116+
options->pattern = g_value_get_string(value);
7117+
break;
7118+
default:
7119+
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
7120+
break;
7121+
}
7122+
}
7123+
7124+
static void
7125+
garrow_extract_regex_span_options_get_property(GObject *object,
7126+
guint prop_id,
7127+
GValue *value,
7128+
GParamSpec *pspec)
7129+
{
7130+
auto options =
7131+
garrow_extract_regex_span_options_get_raw(GARROW_EXTRACT_REGEX_SPAN_OPTIONS(object));
7132+
7133+
switch (prop_id) {
7134+
case PROP_EXTRACT_REGEX_SPAN_OPTIONS_PATTERN:
7135+
g_value_set_string(value, options->pattern.c_str());
7136+
break;
7137+
default:
7138+
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
7139+
break;
7140+
}
7141+
}
7142+
7143+
static void
7144+
garrow_extract_regex_span_options_init(GArrowExtractRegexSpanOptions *object)
7145+
{
7146+
auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
7147+
priv->options = static_cast<arrow::compute::FunctionOptions *>(
7148+
new arrow::compute::ExtractRegexSpanOptions());
7149+
}
7150+
7151+
static void
7152+
garrow_extract_regex_span_options_class_init(GArrowExtractRegexSpanOptionsClass *klass)
7153+
{
7154+
auto gobject_class = G_OBJECT_CLASS(klass);
7155+
7156+
gobject_class->set_property = garrow_extract_regex_span_options_set_property;
7157+
gobject_class->get_property = garrow_extract_regex_span_options_get_property;
7158+
7159+
arrow::compute::ExtractRegexSpanOptions options;
7160+
7161+
GParamSpec *spec;
7162+
/**
7163+
* GArrowExtractRegexSpanOptions:pattern:
7164+
*
7165+
* Regular expression with named capture fields.
7166+
*
7167+
* Since: 23.0.0
7168+
*/
7169+
spec = g_param_spec_string("pattern",
7170+
"Pattern",
7171+
"Regular expression with named capture fields",
7172+
options.pattern.c_str(),
7173+
static_cast<GParamFlags>(G_PARAM_READWRITE));
7174+
g_object_class_install_property(gobject_class,
7175+
PROP_EXTRACT_REGEX_SPAN_OPTIONS_PATTERN,
7176+
spec);
7177+
}
7178+
7179+
/**
7180+
* garrow_extract_regex_span_options_new:
7181+
*
7182+
* Returns: A newly created #GArrowExtractRegexSpanOptions.
7183+
*
7184+
* Since: 23.0.0
7185+
*/
7186+
GArrowExtractRegexSpanOptions *
7187+
garrow_extract_regex_span_options_new(void)
7188+
{
7189+
auto options = g_object_new(GARROW_TYPE_EXTRACT_REGEX_SPAN_OPTIONS, NULL);
7190+
return GARROW_EXTRACT_REGEX_SPAN_OPTIONS(options);
7191+
}
7192+
70947193
G_END_DECLS
70957194

70967195
arrow::Result<arrow::FieldRef>
@@ -7254,6 +7353,12 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt
72547353
static_cast<const arrow::compute::ExtractRegexOptions *>(arrow_options);
72557354
auto options = garrow_extract_regex_options_new_raw(arrow_extract_regex_options);
72567355
return GARROW_FUNCTION_OPTIONS(options);
7356+
} else if (arrow_type_name == "ExtractRegexSpanOptions") {
7357+
const auto arrow_extract_regex_span_options =
7358+
static_cast<const arrow::compute::ExtractRegexSpanOptions *>(arrow_options);
7359+
auto options =
7360+
garrow_extract_regex_span_options_new_raw(arrow_extract_regex_span_options);
7361+
return GARROW_FUNCTION_OPTIONS(options);
72577362
} else {
72587363
auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL);
72597364
return GARROW_FUNCTION_OPTIONS(options);
@@ -7893,3 +7998,21 @@ garrow_extract_regex_options_get_raw(GArrowExtractRegexOptions *options)
78937998
return static_cast<arrow::compute::ExtractRegexOptions *>(
78947999
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
78958000
}
8001+
8002+
GArrowExtractRegexSpanOptions *
8003+
garrow_extract_regex_span_options_new_raw(
8004+
const arrow::compute::ExtractRegexSpanOptions *arrow_options)
8005+
{
8006+
return GARROW_EXTRACT_REGEX_SPAN_OPTIONS(
8007+
g_object_new(GARROW_TYPE_EXTRACT_REGEX_SPAN_OPTIONS,
8008+
"pattern",
8009+
arrow_options->pattern.c_str(),
8010+
NULL));
8011+
}
8012+
8013+
arrow::compute::ExtractRegexSpanOptions *
8014+
garrow_extract_regex_span_options_get_raw(GArrowExtractRegexSpanOptions *options)
8015+
{
8016+
return static_cast<arrow::compute::ExtractRegexSpanOptions *>(
8017+
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
8018+
}

c_glib/arrow-glib/compute.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1272,4 +1272,21 @@ GARROW_AVAILABLE_IN_23_0
12721272
GArrowExtractRegexOptions *
12731273
garrow_extract_regex_options_new(void);
12741274

1275+
#define GARROW_TYPE_EXTRACT_REGEX_SPAN_OPTIONS \
1276+
(garrow_extract_regex_span_options_get_type())
1277+
GARROW_AVAILABLE_IN_23_0
1278+
G_DECLARE_DERIVABLE_TYPE(GArrowExtractRegexSpanOptions,
1279+
garrow_extract_regex_span_options,
1280+
GARROW,
1281+
EXTRACT_REGEX_SPAN_OPTIONS,
1282+
GArrowFunctionOptions)
1283+
struct _GArrowExtractRegexSpanOptionsClass
1284+
{
1285+
GArrowFunctionOptionsClass parent_class;
1286+
};
1287+
1288+
GARROW_AVAILABLE_IN_23_0
1289+
GArrowExtractRegexSpanOptions *
1290+
garrow_extract_regex_span_options_new(void);
1291+
12751292
G_END_DECLS

c_glib/arrow-glib/compute.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,3 +209,9 @@ garrow_extract_regex_options_new_raw(
209209
const arrow::compute::ExtractRegexOptions *arrow_options);
210210
arrow::compute::ExtractRegexOptions *
211211
garrow_extract_regex_options_get_raw(GArrowExtractRegexOptions *options);
212+
213+
GArrowExtractRegexSpanOptions *
214+
garrow_extract_regex_span_options_new_raw(
215+
const arrow::compute::ExtractRegexSpanOptions *arrow_options);
216+
arrow::compute::ExtractRegexSpanOptions *
217+
garrow_extract_regex_span_options_get_raw(GArrowExtractRegexSpanOptions *options);
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
class TestExtractRegexSpanOptions < Test::Unit::TestCase
19+
include Helper::Buildable
20+
21+
def setup
22+
@options = Arrow::ExtractRegexSpanOptions.new
23+
end
24+
25+
def test_pattern_property
26+
assert_equal("", @options.pattern)
27+
@options.pattern = "(?P<year>\\d{4})-(?P<month>\\d{2})"
28+
assert_equal("(?P<year>\\d{4})-(?P<month>\\d{2})", @options.pattern)
29+
end
30+
31+
def test_extract_regex_span_function
32+
args = [
33+
Arrow::ArrayDatum.new(build_string_array(["2023-01-15", "2024-12-31"])),
34+
]
35+
@options.pattern = "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
36+
extract_regex_span_function = Arrow::Function.find("extract_regex_span")
37+
result = extract_regex_span_function.execute(args, @options).value
38+
fields = [
39+
Arrow::Field.new("year", Arrow::FixedSizeListDataType.new(Arrow::Int32DataType.new, 2)),
40+
Arrow::Field.new("month", Arrow::FixedSizeListDataType.new(Arrow::Int32DataType.new, 2)),
41+
Arrow::Field.new("day", Arrow::FixedSizeListDataType.new(Arrow::Int32DataType.new, 2)),
42+
]
43+
assert_equal(build_struct_array(fields, [
44+
{
45+
"year" => [0, 4],
46+
"month" => [5, 2],
47+
"day" => [8, 2],
48+
},
49+
{
50+
"year" => [0, 4],
51+
"month" => [5, 2],
52+
"day" => [8, 2],
53+
},
54+
]),
55+
result)
56+
end
57+
end

0 commit comments

Comments
 (0)