Skip to content

Commit 0890149

Browse files
xxlaykxxraulcdkou
authored
apacheGH-40968: [C++][Gandiva] add RE2::Options set_dot_nl(true) for Like f (dremio#80)
* apacheGH-40968: [C++][Gandiva] add RE2::Options set_dot_nl(true) for Like function (apache#40970) (dremio#68) Gandiva function "LIKE" does not always work correctly when the string contains \n. String value: `[function_name: "Space1.protect"\nargs: "passenger_count"\ncolumn_name: "passenger_count" ]` Pattern '%Space1%' nor '%Space1.%' do not match. added flag set_dot_nl(true) to LikeHolder add unit tests. Yes **This PR includes breaking changes to public APIs.** * GitHub Issue: apache#40968 Lead-authored-by: Ivan Chesnov <ivan.chesnov@dremio.com> Signed-off-by: Sutou Kouhei <kou@clear-code.com> * apacheGH-43119: [CI][Packaging] Update manylinux 2014 CentOS repos that have been deprecated (apache#43121) Jobs are failing to find mirrorlist.centos.org Updating repos based on solution from: apache#43119 (comment) Via archery No * GitHub Issue: apache#43119 Lead-authored-by: Raúl Cumplido <raulcumplido@gmail.com> Co-authored-by: Sutou Kouhei <kou@clear-code.com> Co-authored-by: Sutou Kouhei <kou@cozmixng.org> Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com> --------- Signed-off-by: Sutou Kouhei <kou@clear-code.com> Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com> Co-authored-by: Raúl Cumplido <raulcumplido@gmail.com> Co-authored-by: Sutou Kouhei <kou@clear-code.com> Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
1 parent d2e16bf commit 0890149

File tree

5 files changed

+64
-14
lines changed

5 files changed

+64
-14
lines changed

ci/docker/centos-7-cpp.dockerfile

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,25 @@
1717

1818
FROM centos:centos7
1919

20+
# Update mirrors to use vault.centos.org as CentOS 7
21+
# is EOL since 2024-06-30
22+
RUN sed -i \
23+
-e 's/^mirrorlist/#mirrorlist/' \
24+
-e 's/^#baseurl/baseurl/' \
25+
-e 's/mirror\.centos\.org/vault.centos.org/' \
26+
/etc/yum.repos.d/*.repo
27+
2028
# devtoolset is required for C++17
2129
RUN \
2230
yum install -y \
2331
centos-release-scl \
2432
epel-release && \
33+
sed -i \
34+
-e 's/^mirrorlist/#mirrorlist/' \
35+
-e 's/^#baseurl/baseurl/' \
36+
-e 's/^# baseurl/baseurl/' \
37+
-e 's/mirror\.centos\.org/vault.centos.org/' \
38+
/etc/yum.repos.d/CentOS-SCLo-scl*.repo && \
2539
yum install -y \
2640
cmake3 \
2741
curl \

ci/docker/python-wheel-manylinux-201x.dockerfile

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,21 @@ ARG manylinux
2424

2525
ENV MANYLINUX_VERSION=${manylinux}
2626

27+
# Ensure dnf is installed, especially for the manylinux2014 base
28+
RUN if [ "${MANYLINUX_VERSION}" = "2014" ]; then \
29+
sed -i \
30+
-e 's/^mirrorlist/#mirrorlist/' \
31+
-e 's/^#baseurl/baseurl/' \
32+
-e 's/mirror\.centos\.org/vault.centos.org/' \
33+
/etc/yum.repos.d/*.repo; \
34+
if [ "${arch}" != "amd64" ]; then \
35+
sed -i \
36+
-e 's,vault\.centos\.org/centos,vault.centos.org/altarch,' \
37+
/etc/yum.repos.d/CentOS-SCLo-scl-rh.repo; \
38+
fi; \
39+
fi
40+
RUN yum install -y dnf
41+
2742
# Install basic dependencies
2843
RUN yum install -y git flex curl autoconf zip perl-IPC-Cmd wget
2944

cpp/src/gandiva/regex_functions_holder.cc

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,13 +99,14 @@ Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* h
9999
"'like' function requires a string literal as the second parameter"));
100100

101101
RE2::Options regex_op;
102+
regex_op.set_dot_nl(true);
102103
if (node.descriptor()->name() == "ilike") {
103104
regex_op.set_case_sensitive(false); // set case-insensitive for ilike function.
104105

105106
return Make(std::get<std::string>(literal->holder()), holder, regex_op);
106107
}
107108
if (node.children().size() == 2) {
108-
return Make(std::get<std::string>(literal->holder()), holder);
109+
return Make(std::get<std::string>(literal->holder()), holder, regex_op);
109110
} else {
110111
auto escape_char = dynamic_cast<LiteralNode*>(node.children().at(2).get());
111112
ARROW_RETURN_IF(
@@ -118,7 +119,7 @@ Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* h
118119
Status::Invalid(
119120
"'like' function requires a string literal as the third parameter"));
120121
return Make(std::get<std::string>(literal->holder()),
121-
std::get<std::string>(escape_char->holder()), holder);
122+
std::get<std::string>(escape_char->holder()), holder, regex_op);
122123
}
123124
}
124125

@@ -127,7 +128,9 @@ Status LikeHolder::Make(const std::string& sql_pattern,
127128
std::string pcre_pattern;
128129
ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));
129130

130-
auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern));
131+
RE2::Options regex_op;
132+
regex_op.set_dot_nl(true);
133+
auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern, regex_op));
131134
ARROW_RETURN_IF(!lholder->regex_.ok(),
132135
Status::Invalid("Building RE2 pattern '", pcre_pattern,
133136
"' failed with: ", lholder->regex_.error()));
@@ -137,7 +140,7 @@ Status LikeHolder::Make(const std::string& sql_pattern,
137140
}
138141

139142
Status LikeHolder::Make(const std::string& sql_pattern, const std::string& escape_char,
140-
std::shared_ptr<LikeHolder>* holder) {
143+
std::shared_ptr<LikeHolder>* holder, RE2::Options regex_op) {
141144
ARROW_RETURN_IF(escape_char.length() > 1,
142145
Status::Invalid("The length of escape char ", escape_char,
143146
" in 'like' function is greater than 1"));
@@ -149,7 +152,7 @@ Status LikeHolder::Make(const std::string& sql_pattern, const std::string& escap
149152
ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));
150153
}
151154

152-
auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern));
155+
auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern, regex_op));
153156
ARROW_RETURN_IF(!lholder->regex_.ok(),
154157
Status::Invalid("Building RE2 pattern '", pcre_pattern,
155158
"' failed with: ", lholder->regex_.error()));

cpp/src/gandiva/regex_functions_holder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder {
4040
static Status Make(const std::string& sql_pattern, std::shared_ptr<LikeHolder>* holder);
4141

4242
static Status Make(const std::string& sql_pattern, const std::string& escape_char,
43-
std::shared_ptr<LikeHolder>* holder);
43+
std::shared_ptr<LikeHolder>* holder, RE2::Options regex_op);
4444

4545
static Status Make(const std::string& sql_pattern, std::shared_ptr<LikeHolder>* holder,
4646
RE2::Options regex_op);

cpp/src/gandiva/regex_functions_holder_test.cc

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ namespace gandiva {
2727
class TestLikeHolder : public ::testing::Test {
2828
public:
2929
RE2::Options regex_op;
30+
void SetUp() { regex_op.set_dot_nl(true); }
31+
3032
FunctionNode BuildLike(std::string pattern) {
3133
auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
3234
auto pattern_node =
@@ -75,6 +77,15 @@ TEST_F(TestLikeHolder, TestMatchOne) {
7577
EXPECT_FALSE(like("dabc"));
7678
}
7779

80+
TEST_F(TestLikeHolder, TestPcreSpecialWithNewLine) {
81+
std::shared_ptr<LikeHolder> like_holder;
82+
auto status = LikeHolder::Make("%Space1.%", &like_holder, regex_op);
83+
84+
auto& like = *like_holder;
85+
EXPECT_TRUE(
86+
like("[name: \"Space1.protect\"\nargs: \"count\"\ncolumn_name: \"pass_count\"]"));
87+
}
88+
7889
TEST_F(TestLikeHolder, TestPcreSpecial) {
7990
std::shared_ptr<LikeHolder> like_holder;
8091

@@ -104,17 +115,25 @@ TEST_F(TestLikeHolder, TestDot) {
104115
EXPECT_FALSE(like("abcd"));
105116
}
106117

118+
TEST_F(TestLikeHolder, TestMatchWithNewLine) {
119+
std::shared_ptr<LikeHolder> like_holder;
120+
auto status = LikeHolder::Make("%abc%", "\\", &like_holder, regex_op);
121+
122+
auto& like = *like_holder;
123+
EXPECT_TRUE(like("abc\nd"));
124+
}
125+
107126
TEST_F(TestLikeHolder, TestMatchSubString) {
108127
std::shared_ptr<LikeHolder> like_holder;
109128

110-
auto status = LikeHolder::Make("%abc%", "\\", &like_holder);
129+
auto status = LikeHolder::Make("%abc%", "\\", &like_holder, regex_op);
111130
EXPECT_EQ(status.ok(), true) << status.message();
112131

113132
auto& like = *like_holder;
114133
EXPECT_TRUE(like("abc"));
115134
EXPECT_FALSE(like("xxabdc"));
116135

117-
status = LikeHolder::Make("%ab-.^$*+?()[]{}|—/c\\%%", "\\", &like_holder);
136+
status = LikeHolder::Make("%ab-.^$*+?()[]{}|—/c\\%%", "\\", &like_holder, regex_op);
118137
EXPECT_EQ(status.ok(), true) << status.message();
119138

120139
auto& like_reserved_char = *like_holder;
@@ -192,7 +211,7 @@ TEST_F(TestLikeHolder, TestOptimise) {
192211
TEST_F(TestLikeHolder, TestMatchOneEscape) {
193212
std::shared_ptr<LikeHolder> like_holder;
194213

195-
auto status = LikeHolder::Make("ab\\_", "\\", &like_holder);
214+
auto status = LikeHolder::Make("ab\\_", "\\", &like_holder, regex_op);
196215
EXPECT_EQ(status.ok(), true) << status.message();
197216

198217
auto& like = *like_holder;
@@ -209,8 +228,7 @@ TEST_F(TestLikeHolder, TestMatchOneEscape) {
209228
TEST_F(TestLikeHolder, TestMatchManyEscape) {
210229
std::shared_ptr<LikeHolder> like_holder;
211230

212-
auto status = LikeHolder::Make("ab\\%", "\\", &like_holder);
213-
EXPECT_EQ(status.ok(), true) << status.message();
231+
auto status = LikeHolder::Make("ab\\%", "\\", &like_holder, regex_op);
214232

215233
auto& like = *like_holder;
216234

@@ -226,7 +244,7 @@ TEST_F(TestLikeHolder, TestMatchManyEscape) {
226244
TEST_F(TestLikeHolder, TestMatchEscape) {
227245
std::shared_ptr<LikeHolder> like_holder;
228246

229-
auto status = LikeHolder::Make("ab\\\\", "\\", &like_holder);
247+
auto status = LikeHolder::Make("ab\\\\", "\\", &like_holder, regex_op);
230248
EXPECT_EQ(status.ok(), true) << status.message();
231249

232250
auto& like = *like_holder;
@@ -239,7 +257,7 @@ TEST_F(TestLikeHolder, TestMatchEscape) {
239257
TEST_F(TestLikeHolder, TestEmptyEscapeChar) {
240258
std::shared_ptr<LikeHolder> like_holder;
241259

242-
auto status = LikeHolder::Make("ab\\_", "", &like_holder);
260+
auto status = LikeHolder::Make("ab\\_", "", &like_holder, regex_op);
243261
EXPECT_EQ(status.ok(), true) << status.message();
244262

245263
auto& like = *like_holder;
@@ -254,7 +272,7 @@ TEST_F(TestLikeHolder, TestEmptyEscapeChar) {
254272
TEST_F(TestLikeHolder, TestMultipleEscapeChar) {
255273
std::shared_ptr<LikeHolder> like_holder;
256274

257-
auto status = LikeHolder::Make("ab\\_", "\\\\", &like_holder);
275+
auto status = LikeHolder::Make("ab\\_", "\\\\", &like_holder, regex_op);
258276
EXPECT_EQ(status.ok(), false) << status.message();
259277
}
260278

0 commit comments

Comments
 (0)