Skip to content

Commit c68c453

Browse files
committed
[Feature](function) Support function TIME_FORMAT
1 parent eb93602 commit c68c453

File tree

10 files changed

+868
-1
lines changed

10 files changed

+868
-1
lines changed

be/src/vec/functions/date_time_transforms.h

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "runtime/primitive_type.h"
2828
#include "udf/udf.h"
2929
#include "util/binary_cast.hpp"
30+
#include "vec/columns/column.h"
3031
#include "vec/columns/column_nullable.h"
3132
#include "vec/columns/column_string.h"
3233
#include "vec/columns/column_vector.h"
@@ -39,6 +40,7 @@
3940
#include "vec/data_types/data_type_decimal.h"
4041
#include "vec/data_types/data_type_string.h"
4142
#include "vec/functions/date_format_type.h"
43+
#include "vec/runtime/time_value.h"
4244
#include "vec/runtime/vdatetime_value.h"
4345
#include "vec/utils/util.hpp"
4446

@@ -409,6 +411,86 @@ struct FromUnixTimeDecimalImpl {
409411
}
410412
};
411413

414+
template <PrimitiveType ArgPType>
415+
class FunctionTimeFormat : public IFunction {
416+
public:
417+
using ArgColType = typename PrimitiveTypeTraits<ArgPType>::ColumnType;
418+
using ArgCppType = typename PrimitiveTypeTraits<ArgPType>::CppType;
419+
420+
static constexpr auto name = "time_format";
421+
String get_name() const override { return name; }
422+
static FunctionPtr create() { return std::make_shared<FunctionTimeFormat>(); }
423+
DataTypes get_variadic_argument_types_impl() const override {
424+
return {std::make_shared<typename PrimitiveTypeTraits<ArgPType>::DataType>(),
425+
std::make_shared<vectorized::DataTypeString>()};
426+
}
427+
DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
428+
return make_nullable(std::make_shared<DataTypeString>());
429+
}
430+
size_t get_number_of_arguments() const override { return 2; }
431+
432+
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
433+
uint32_t result, size_t input_rows_count) const override {
434+
auto res_col = ColumnString::create();
435+
ColumnString::Chars& res_chars = res_col->get_chars();
436+
ColumnString::Offsets& res_offsets = res_col->get_offsets();
437+
438+
auto null_map = ColumnUInt8::create();
439+
auto& null_map_data = null_map->get_data();
440+
null_map_data.resize_fill(input_rows_count, 0);
441+
442+
res_offsets.reserve(input_rows_count);
443+
444+
ColumnPtr arg_col[2];
445+
bool is_const[2];
446+
for (size_t i = 0; i < 2; ++i) {
447+
const ColumnPtr& col = block.get_by_position(arguments[i]).column;
448+
std::tie(arg_col[i], is_const[i]) = unpack_if_const(col);
449+
const NullMap* arg_null_map = VectorizedUtils::get_null_map(col);
450+
if (arg_null_map) {
451+
VectorizedUtils::update_null_map(null_map_data, *arg_null_map, is_const[i]);
452+
}
453+
arg_col[i] = remove_nullable(arg_col[i]);
454+
}
455+
456+
const auto* datetime_col = assert_cast<const ArgColType*>(arg_col[0].get());
457+
const auto* format_col = assert_cast<const ColumnString*>(arg_col[1].get());
458+
for (size_t i = 0; i < input_rows_count; ++i) {
459+
if (null_map_data[i]) {
460+
res_offsets.push_back(res_chars.size());
461+
continue;
462+
}
463+
464+
const auto& datetime_val = datetime_col->get_element(index_check_const(i, is_const[0]));
465+
StringRef format = format_col->get_data_at(index_check_const(i, is_const[1]));
466+
TimeValue::TimeType time = get_time_value(datetime_val);
467+
468+
char buf[100 + SAFE_FORMAT_STRING_MARGIN];
469+
if (!TimeValue::to_format_string_conservative(format.data, format.size, buf,
470+
100 + SAFE_FORMAT_STRING_MARGIN, time)) {
471+
null_map_data[i] = 1;
472+
res_offsets.push_back(res_chars.size());
473+
continue;
474+
}
475+
res_chars.insert(buf, buf + strlen(buf));
476+
res_offsets.push_back(res_chars.size());
477+
}
478+
block.replace_by_position(result,
479+
ColumnNullable::create(std::move(res_col), std::move(null_map)));
480+
return Status::OK();
481+
}
482+
483+
private:
484+
TimeValue::TimeType get_time_value(const ArgCppType& datetime_val) const {
485+
if constexpr (ArgPType == PrimitiveType::TYPE_TIMEV2) {
486+
return static_cast<TimeValue::TimeType>(datetime_val);
487+
} else {
488+
return TimeValue::make_time(datetime_val.hour(), datetime_val.minute(),
489+
datetime_val.second(), datetime_val.microsecond());
490+
}
491+
}
492+
};
493+
412494
#include "common/compile_check_end.h"
413495
} // namespace doris::vectorized
414496

be/src/vec/functions/function_datetime_string_to_string.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ using FunctionFromUnixTimeNewDecimalOneArg =
3737
FunctionDateTimeStringToString<FromUnixTimeDecimalImpl<false>>;
3838
using FunctionFromUnixTimeNewDecimalTwoArg =
3939
FunctionDateTimeStringToString<FromUnixTimeDecimalImpl<true>>;
40+
using FunctionTimeFormatDate = FunctionTimeFormat<TYPE_DATEV2>;
41+
using FunctionTimeFormatDateTime = FunctionTimeFormat<TYPE_DATETIMEV2>;
42+
using FunctionTimeFormatTime = FunctionTimeFormat<TYPE_TIMEV2>;
4043

4144
void register_function_date_time_string_to_string(SimpleFunctionFactory& factory) {
4245
factory.register_function<FunctionDateFormatV2>();
@@ -47,6 +50,9 @@ void register_function_date_time_string_to_string(SimpleFunctionFactory& factory
4750
factory.register_function<FunctionFromUnixTimeNewDecimalOneArg>();
4851
factory.register_function<FunctionFromUnixTimeNewDecimalTwoArg>();
4952
factory.register_function<FunctionDateTimeV2DateFormat>();
53+
factory.register_function<FunctionTimeFormatDate>();
54+
factory.register_function<FunctionTimeFormatDateTime>();
55+
factory.register_function<FunctionTimeFormatTime>();
5056
}
5157

5258
} // namespace doris::vectorized

be/src/vec/runtime/time_value.h

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "runtime/define_primitive_type.h"
2929
#include "runtime/primitive_type.h"
3030
#include "util/date_func.h"
31+
#include "vec/runtime/vdatetime_value.h"
3132

3233
namespace doris {
3334
#include "common/compile_check_begin.h"
@@ -150,6 +151,234 @@ class TimeValue {
150151
}
151152

152153
static bool valid(double time) { return time <= MAX_TIME && time >= -MAX_TIME; }
154+
155+
static bool to_format_string_conservative(const char* format, size_t len, char* to,
156+
size_t max_valid_length, TimeType time) {
157+
// If time is negative, we here only add a '-' to the begining of res
158+
// This behavior is consistent with MySQL
159+
if (time < 0) {
160+
memcpy(to, "-", 1);
161+
++to;
162+
time = -time;
163+
}
164+
165+
int32_t hour = TimeValue::hour(time);
166+
int32_t minute = TimeValue::minute(time);
167+
int32_t second = TimeValue::second(time);
168+
int32_t microsecond = TimeValue::microsecond(time);
169+
170+
char* const begin = to;
171+
char buf[64];
172+
char* pos = nullptr;
173+
char* cursor = buf;
174+
const char* ptr = format;
175+
const char* end = format + len;
176+
char ch = '\0';
177+
178+
while (ptr < end) {
179+
if (to - begin + SAFE_FORMAT_STRING_MARGIN > max_valid_length) [[unlikely]] {
180+
return false;
181+
}
182+
if (*ptr != '%' || (ptr + 1) == end) {
183+
*to++ = *ptr++;
184+
continue;
185+
}
186+
ptr++;
187+
switch (ch = *ptr++) {
188+
case 'H':
189+
// Hour (00..838 for TIME type, with at least 2 digits)
190+
if (hour < 100) {
191+
to = write_two_digits_to_string(hour, to);
192+
} else {
193+
pos = int_to_str(hour, cursor);
194+
to = append_with_prefix(cursor, static_cast<int>(pos - cursor), '0', 2, to);
195+
}
196+
break;
197+
case 'h':
198+
case 'I':
199+
// Hour (01..12)
200+
to = write_two_digits_to_string((hour % 24 + 11) % 12 + 1, to);
201+
break;
202+
case 'i':
203+
// Minutes, numeric (00..59)
204+
to = write_two_digits_to_string(minute, to);
205+
break;
206+
case 'k':
207+
// Hour (0..23) without leading zero
208+
pos = int_to_str(hour, cursor);
209+
to = append_with_prefix(cursor, static_cast<int>(pos - cursor), '0', 1, to);
210+
break;
211+
case 'l':
212+
// Hour (1..12) without leading zero
213+
pos = int_to_str((hour % 24 + 11) % 12 + 1, cursor);
214+
to = append_with_prefix(cursor, static_cast<int>(pos - cursor), '0', 1, to);
215+
break;
216+
case 's':
217+
case 'S':
218+
// Seconds (00..59)
219+
to = write_two_digits_to_string(second, to);
220+
break;
221+
case 'f':
222+
// Microseconds (000000..999999)
223+
pos = int_to_str(microsecond, cursor);
224+
to = append_with_prefix(cursor, static_cast<int>(pos - cursor), '0', 6, to);
225+
break;
226+
case 'p': {
227+
// AM or PM
228+
if (hour % 24 >= 12) {
229+
to = append_string("PM", to);
230+
} else {
231+
to = append_string("AM", to);
232+
}
233+
break;
234+
}
235+
case 'r': {
236+
// Time, 12-hour (hh:mm:ss followed by AM or PM)
237+
int32_t hour_12 = (hour + 11) % 12 + 1;
238+
*to++ = (char)('0' + (hour_12 / 10));
239+
*to++ = (char)('0' + (hour_12 % 10));
240+
*to++ = ':';
241+
*to++ = (char)('0' + (minute / 10));
242+
*to++ = (char)('0' + (minute % 10));
243+
*to++ = ':';
244+
*to++ = (char)('0' + (second / 10));
245+
*to++ = (char)('0' + (second % 10));
246+
if (hour % 24 >= 12) {
247+
to = append_string(" PM", to);
248+
} else {
249+
to = append_string(" AM", to);
250+
}
251+
break;
252+
}
253+
case 'T': {
254+
// Time, 24-hour (hh:mm:ss or hhh:mm:ss for TIME type)
255+
if (hour < 100) {
256+
*to++ = (char)('0' + (hour / 10));
257+
*to++ = (char)('0' + (hour % 10));
258+
} else {
259+
// For hours >= 100, convert to string with at least 2 digits
260+
pos = int_to_str(hour, cursor);
261+
to = append_with_prefix(cursor, static_cast<int>(pos - cursor), '0', 2, to);
262+
}
263+
*to++ = ':';
264+
*to++ = (char)('0' + (minute / 10));
265+
*to++ = (char)('0' + (minute % 10));
266+
*to++ = ':';
267+
*to++ = (char)('0' + (second / 10));
268+
*to++ = (char)('0' + (second % 10));
269+
break;
270+
}
271+
case '%':
272+
*to++ = '%';
273+
break;
274+
case 'Y':
275+
// Year, 4 digits - 4 zeros
276+
to = append_string("0000", to);
277+
break;
278+
case 'y':
279+
case 'm':
280+
case 'd':
281+
// Year (2 digits), Month, Day - insert 2 zeros
282+
to = write_two_digits_to_string(0, to);
283+
break;
284+
case 'c':
285+
case 'e':
286+
// Month (0..12) or Day without leading zero - insert 1 zero
287+
to = append_string("0", to);
288+
break;
289+
case 'M':
290+
case 'W':
291+
case 'j':
292+
case 'D':
293+
case 'U':
294+
case 'u':
295+
case 'V':
296+
case 'v':
297+
case 'x':
298+
case 'X':
299+
case 'w':
300+
// These specifiers are not supported for TIME type
301+
return false;
302+
default:
303+
*to++ = ch;
304+
break;
305+
}
306+
}
307+
*to++ = '\0';
308+
return true;
309+
}
310+
311+
private:
312+
static constexpr char digits100[201] =
313+
"00010203040506070809"
314+
"10111213141516171819"
315+
"20212223242526272829"
316+
"30313233343536373839"
317+
"40414243444546474849"
318+
"50515253545556575859"
319+
"60616263646566676869"
320+
"70717273747576777879"
321+
"80818283848586878889"
322+
"90919293949596979899";
323+
324+
static char* int_to_str(uint64_t val, char* to) {
325+
char buf[64];
326+
char* ptr = buf;
327+
// Use do/while for 0 value
328+
do {
329+
*ptr++ = '0' + (val % 10);
330+
val /= 10;
331+
} while (val);
332+
333+
while (ptr > buf) {
334+
*to++ = *--ptr;
335+
}
336+
return to;
337+
}
338+
339+
static char* append_string(const char* from, char* to) {
340+
while (*from) {
341+
*to++ = *from++;
342+
}
343+
return to;
344+
}
345+
346+
static char* append_with_prefix(const char* str, int str_len, char prefix, int target_len,
347+
char* to) {
348+
// full_len is the lower bound. if less, use prefix to pad. if greater, accept all.
349+
int diff = target_len - str_len;
350+
// use prefix to pad
351+
while (diff-- > 0) {
352+
*to++ = prefix;
353+
}
354+
355+
memcpy(to, str, str_len);
356+
return to + str_len;
357+
}
358+
359+
static char* write_two_digits_to_string(int number, char* dst) {
360+
memcpy(dst, &digits100[number * 2], 2);
361+
return dst + 2;
362+
}
363+
364+
static bool is_date_related_specifier(char spec) {
365+
switch (spec) {
366+
case 'Y':
367+
case 'y':
368+
case 'M':
369+
case 'm':
370+
case 'b':
371+
case 'c':
372+
case 'd':
373+
case 'D':
374+
case 'e':
375+
case 'j':
376+
case 'U':
377+
return true;
378+
default:
379+
return false;
380+
}
381+
}
153382
};
154383
} // namespace doris
155384
#include "common/compile_check_end.h"

fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,7 @@
481481
import org.apache.doris.nereids.trees.expressions.functions.scalar.Tanh;
482482
import org.apache.doris.nereids.trees.expressions.functions.scalar.Time;
483483
import org.apache.doris.nereids.trees.expressions.functions.scalar.TimeDiff;
484+
import org.apache.doris.nereids.trees.expressions.functions.scalar.TimeFormat;
484485
import org.apache.doris.nereids.trees.expressions.functions.scalar.TimeToSec;
485486
import org.apache.doris.nereids.trees.expressions.functions.scalar.Timestamp;
486487
import org.apache.doris.nereids.trees.expressions.functions.scalar.ToBase64;
@@ -1028,6 +1029,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
10281029
scalar(Tanh.class, "tanh"),
10291030
scalar(Time.class, "time"),
10301031
scalar(TimeDiff.class, "timediff"),
1032+
scalar(TimeFormat.class, "time_format"),
10311033
scalar(TimeToSec.class, "time_to_sec"),
10321034
scalar(Timestamp.class, "timestamp"),
10331035
scalar(ToBase64.class, "to_base64"),

0 commit comments

Comments
 (0)