Skip to content

Commit b0044c5

Browse files
zhouyuanPHILO-HE
andauthored
backport recent patches to 1.3 (#71)
* Add translate expression support (#68) * Initial commit * Introduce TranslateHolder * Remove unused header * Return 1 if empty string is given as substring (#69) * Add two math operations: floor & ceil (#72) * Inital commit * Add ceil function Co-authored-by: PHILO-HE <feilong.he@intel.com>
1 parent 58ad5d9 commit b0044c5

12 files changed

+231
-4
lines changed

cpp/src/gandiva/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ set(SRC_FILES
8484
llvm_types.cc
8585
like_holder.cc
8686
json_holder.cc
87+
translate_holder.cc
8788
literal_holder.cc
8889
projector.cc
8990
regex_util.cc
@@ -231,6 +232,7 @@ add_gandiva_test(internals-test
231232
to_date_holder_test.cc
232233
simple_arena_test.cc
233234
json_holder_test.cc
235+
translate_holder_test.cc
234236
like_holder_test.cc
235237
replace_holder_test.cc
236238
decimal_type_util_test.cc

cpp/src/gandiva/function_holder_registry.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "gandiva/random_generator_holder.h"
3232
#include "gandiva/replace_holder.h"
3333
#include "gandiva/to_date_holder.h"
34+
#include "gandiva/translate_holder.h"
3435

3536
namespace gandiva {
3637

@@ -70,6 +71,7 @@ class FunctionHolderRegistry {
7071
{"random", LAMBDA_MAKER(RandomGeneratorHolder)},
7172
{"rand", LAMBDA_MAKER(RandomGeneratorHolder)},
7273
{"regexp_replace", LAMBDA_MAKER(ReplaceHolder)},
74+
{"translate", LAMBDA_MAKER(TranslateHolder)}
7375
};
7476
return maker_map;
7577
}

cpp/src/gandiva/function_registry_arithmetic.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,12 @@ std::vector<NativeFunction> GetArithmeticFunctionRegistry() {
115115
// normalize for nan and zero
116116
UNARY_SAFE_NULL_IF_NULL(normalize, {}, float32, float32),
117117
UNARY_SAFE_NULL_IF_NULL(normalize, {}, float64, float64),
118+
// floor
119+
UNARY_SAFE_NULL_IF_NULL(floor, {}, float64, int64),
120+
UNARY_SAFE_NULL_IF_NULL(floor, {}, int64, int64),
121+
// ceil
122+
UNARY_SAFE_NULL_IF_NULL(ceil, {}, float64, int64),
123+
UNARY_SAFE_NULL_IF_NULL(ceil, {}, int64, int64),
118124
// bitwise functions
119125
BINARY_GENERIC_SAFE_NULL_IF_NULL(shift_left, {}, int32, int32, int32),
120126
BINARY_GENERIC_SAFE_NULL_IF_NULL(shift_left, {}, int64, int32, int64),

cpp/src/gandiva/function_registry_string.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,11 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
172172
kResultNullInternal, "gdv_fn_get_json_object_utf8_utf8",
173173
NativeFunction::kNeedsContext | NativeFunction::kNeedsFunctionHolder |
174174
NativeFunction::kCanReturnErrors),
175+
176+
NativeFunction("translate", {}, DataTypeVector{utf8(), utf8(), utf8()}, utf8(),
177+
kResultNullIfNull, "gdv_fn_translate_utf8_utf8_utf8",
178+
NativeFunction::kNeedsContext | NativeFunction::kNeedsFunctionHolder |
179+
NativeFunction::kCanReturnErrors),
175180

176181
NativeFunction("ltrim", {}, DataTypeVector{utf8(), utf8()}, utf8(),
177182
kResultNullIfNull, "ltrim_utf8_utf8", NativeFunction::kNeedsContext),

cpp/src/gandiva/gdv_function_stubs.cc

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "gandiva/random_generator_holder.h"
3434
#include "gandiva/replace_holder.h"
3535
#include "gandiva/to_date_holder.h"
36+
#include "gandiva/translate_holder.h"
3637

3738
/// Stub functions that can be accessed from LLVM or the pre-compiled library.
3839

@@ -57,6 +58,17 @@ const uint8_t* gdv_fn_get_json_object_utf8_utf8(int64_t ptr, int64_t holder_ptr,
5758
return res;
5859
}
5960

61+
const uint8_t* gdv_fn_translate_utf8_utf8_utf8(int64_t ptr, int64_t holder_ptr, const char* text,
62+
int text_len, const char* matching_str,
63+
int matching_str_len, const char* replace_str,
64+
int replace_str_len, int32_t* out_len) {
65+
gandiva::ExecutionContext* context = reinterpret_cast<gandiva::ExecutionContext*>(ptr);
66+
gandiva::TranslateHolder* holder = reinterpret_cast<gandiva::TranslateHolder*>(holder_ptr);
67+
auto res = (*holder)(context, std::string(text, text_len), std::string(matching_str, matching_str_len),
68+
std::string(replace_str, replace_str_len), out_len);
69+
return res;
70+
}
71+
6072
bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len,
6173
const char* pattern, int pattern_len) {
6274
gandiva::LikeHolder* holder = reinterpret_cast<gandiva::LikeHolder*>(ptr);
@@ -517,6 +529,20 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
517529
engine->AddGlobalMappingForFunc("gdv_fn_get_json_object_utf8_utf8",
518530
types->i8_ptr_type() /*return_type*/, args,
519531
reinterpret_cast<void*>(gdv_fn_get_json_object_utf8_utf8));
532+
533+
// gdv_fn_translate_utf8_utf8_utf8
534+
args = {types->i64_type(), // int64_t ptr
535+
types->i64_type(), // int64_t holder_ptr
536+
types->i8_ptr_type(), // const char* text
537+
types->i32_type(), // int text_len
538+
types->i8_ptr_type(), // const char* matching_str
539+
types->i32_type(), // int matching_str_len
540+
types->i8_ptr_type(), // const char* replace_str
541+
types->i32_type(), // int replace_str_len
542+
types->i32_ptr_type()}; // int* out_len
543+
engine->AddGlobalMappingForFunc("gdv_fn_translate_utf8_utf8_utf8",
544+
types->i8_ptr_type() /*return types*/, args,
545+
reinterpret_cast<void*>(gdv_fn_translate_utf8_utf8_utf8));
520546

521547
// gdv_fn_like_utf8_utf8
522548
args = {types->i64_type(), // int64_t ptr

cpp/src/gandiva/gdv_function_stubs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,11 @@ bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len,
5757
const uint8_t* gdv_fn_get_json_object_utf8_utf8(int64_t ptr, int64_t holder_ptr, const char* data, int data_len, bool in1_valid,
5858
const char* pattern, int pattern_len, bool in2_valid, bool* out_valid, int32_t* out_len);
5959

60+
const uint8_t* gdv_fn_translate_utf8_utf8_utf8(int64_t ptr, int64_t holder_ptr, const char* text,
61+
int text_len, const char* matching_str,
62+
int matching_str_len, const char* replace_str,
63+
int replace_str_len, int32_t* out_len);
64+
6065
int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context, int64_t ptr, const char* data,
6166
int data_len, bool in1_validity,
6267
const char* pattern, int pattern_len,

cpp/src/gandiva/precompiled/arithmetic_ops.cc

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,30 @@ NORMALIZE(float32, float32)
158158

159159
#undef NORMALIZE
160160

161+
// floor
162+
#define FLOOR(IN_TYPE, OUT_TYPE) \
163+
FORCE_INLINE \
164+
gdv_##OUT_TYPE floor_##IN_TYPE(gdv_##IN_TYPE in) { \
165+
return static_cast<gdv_##OUT_TYPE>(std::floor(in)); \
166+
}
167+
168+
FLOOR(float64, int64)
169+
FLOOR(int64, int64)
170+
171+
#undef FLOOR
172+
173+
// ceil
174+
#define CEIL(IN_TYPE, OUT_TYPE) \
175+
FORCE_INLINE \
176+
gdv_##OUT_TYPE ceil_##IN_TYPE(gdv_##IN_TYPE in) { \
177+
return static_cast<gdv_##OUT_TYPE>(std::ceil(in)); \
178+
}
179+
180+
CEIL(float64, int64)
181+
CEIL(int64, int64)
182+
183+
#undef CEIL
184+
161185
// cast fns : takes one param type, returns another type.
162186
#define CAST_UNARY(NAME, IN_TYPE, OUT_TYPE) \
163187
FORCE_INLINE \

cpp/src/gandiva/precompiled/string_ops.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
// String functions
1919
#include "arrow/util/value_parsing.h"
20+
2021
extern "C" {
2122

2223
#include <limits.h>
@@ -1326,9 +1327,9 @@ gdv_int32 locate_utf8_utf8_int32(gdv_int64 context, const char* sub_str,
13261327
gdv_fn_context_set_error_msg(context, "Start position must be greater than 0");
13271328
return 0;
13281329
}
1329-
1330-
if (str_len == 0 || sub_str_len == 0) {
1331-
return 0;
1330+
// TO align with vanilla spark.
1331+
if (sub_str_len == 0) {
1332+
return 1;
13321333
}
13331334

13341335
gdv_int32 byte_pos = utf8_byte_pos(context, str, str_len, start_pos - 1);

cpp/src/gandiva/precompiled/string_ops_test.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -921,7 +921,11 @@ TEST(TestStringOps, TestLocate) {
921921
EXPECT_FALSE(ctx.has_error());
922922

923923
pos = locate_utf8_utf8_int32(ctx_ptr, "", 0, "str", 3, 1);
924-
EXPECT_EQ(pos, 0);
924+
EXPECT_EQ(pos, 1);
925+
EXPECT_FALSE(ctx.has_error());
926+
927+
pos = locate_utf8_utf8_int32(ctx_ptr, "", 0, "", 0, 1);
928+
EXPECT_EQ(pos, 1);
925929
EXPECT_FALSE(ctx.has_error());
926930

927931
pos = locate_utf8_utf8_int32(ctx_ptr, "bar", 3, "barbar", 6, 0);
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include "gandiva/translate_holder.h"
19+
20+
#include <unordered_map>
21+
#include "gandiva/node.h"
22+
23+
namespace gandiva {
24+
25+
Status TranslateHolder::Make(const FunctionNode& node, std::shared_ptr<TranslateHolder>* holder) {
26+
return Make(holder);
27+
}
28+
29+
Status TranslateHolder::Make(std::shared_ptr<TranslateHolder>* holder) {
30+
*holder = std::shared_ptr<TranslateHolder>(new TranslateHolder());
31+
return Status::OK();
32+
}
33+
34+
const uint8_t* TranslateHolder::operator()(gandiva::ExecutionContext* ctx, std::string text,
35+
std::string matching_str, std::string replace_str, int32_t* out_len) {
36+
char res[text.length()];
37+
std::unordered_map<char, char> replace_map;
38+
for (int i = 0; i < matching_str.length(); i++) {
39+
if (i >= replace_str.length()) {
40+
replace_map[matching_str[i]] = '\0';
41+
} else {
42+
replace_map[matching_str[i]] = replace_str[i];
43+
}
44+
}
45+
int j = 0;
46+
for (int i = 0; i < text.length(); i++) {
47+
if (replace_map.find(text[i]) == replace_map.end()) {
48+
res[j++] = text[i];
49+
continue;
50+
}
51+
char replace_char = replace_map[text[i]];
52+
if (replace_char != '\0') {
53+
res[j++] = replace_char;
54+
}
55+
}
56+
*out_len = j;
57+
auto result_buffer = reinterpret_cast<uint8_t*>(ctx->arena()->Allocate(*out_len));
58+
memcpy(result_buffer, std::string((char*)res, *out_len).data(), *out_len);
59+
return result_buffer;
60+
}
61+
62+
} // namespace gandiva

0 commit comments

Comments
 (0)