Skip to content

Commit b4db4c3

Browse files
authored
[opt](function) better signature match for time type (#58193)
### What problem does this PR solve? Problem Summary: This PR introduces the following changes: - Modifies the function signature matching logic by treating string literal coercion as an identical match. - Adds string literal coercion support for the TIME type. - Restricts the allowed format for string literal coercion of the DATETIME type to only standard formats. With this PR, when invoking functions (such as second) that have overloads involving the TIME type, a string literal argument in a valid TIME format will now preferentially match the TIME-type signature.
1 parent a13241b commit b4db4c3

File tree

10 files changed

+339
-102
lines changed

10 files changed

+339
-102
lines changed

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/SearchSignature.java

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,15 @@ public Optional<FunctionSignature> result() {
7979
// search every round
8080
for (BiFunction<DataType, DataType, Boolean> typePredicate : typePredicatePerRound) {
8181
int candidateNonStrictMatched = Integer.MAX_VALUE;
82+
int candidateNonStrictMatchedWithoutStringLiteralCoercion = Integer.MAX_VALUE;
8283
int candidateDateToDateV2Count = Integer.MIN_VALUE;
8384
FunctionSignature candidate = null;
8485
for (FunctionSignature signature : signatures) {
85-
if (doMatchArity(signature, arguments) && doMatchTypes(signature, arguments, typePredicate)) {
86+
if (doMatchArity(signature, arguments)) {
87+
Pair<Boolean, Integer> matchTypesResult = doMatchTypes(signature, arguments, typePredicate);
88+
if (!matchTypesResult.first) {
89+
continue;
90+
}
8691
// first we need to check decimal v3 precision promotion
8792
if (computeSignature instanceof ComputePrecision) {
8893
if (!((ComputePrecision) computeSignature).checkPrecision(signature)) {
@@ -94,17 +99,34 @@ public Optional<FunctionSignature> result() {
9499
continue;
95100
}
96101
}
97-
// has most identical matched signature has the highest priority
102+
// compare identical matched, the more identical matched has higher priority
103+
// first, compare identical matched + string like literal coercion
104+
// if equals, compare identical matched itself
105+
// if equals, compare identical matched + date to datev2
98106
Pair<Integer, Integer> currentNonStrictMatched = nonStrictMatchedCount(signature, arguments);
99-
if (currentNonStrictMatched.first < candidateNonStrictMatched) {
100-
candidateNonStrictMatched = currentNonStrictMatched.first;
107+
int currentNonStrictMatchedCount = currentNonStrictMatched.first;
108+
int currentNonStrictMatchedWithoutStringLiteralCoercion
109+
= currentNonStrictMatchedCount - matchTypesResult.second;
110+
if (currentNonStrictMatchedWithoutStringLiteralCoercion
111+
< candidateNonStrictMatchedWithoutStringLiteralCoercion) {
112+
candidateNonStrictMatchedWithoutStringLiteralCoercion
113+
= currentNonStrictMatchedWithoutStringLiteralCoercion;
114+
candidateNonStrictMatched = currentNonStrictMatchedCount;
101115
candidateDateToDateV2Count = currentNonStrictMatched.second;
102116
candidate = signature;
103-
} else if (currentNonStrictMatched.first == candidateNonStrictMatched) {
104-
// if we need to do same count cast, then we choose the signature need to do more v1 to v2 cast
105-
if (candidateDateToDateV2Count < currentNonStrictMatched.second) {
117+
} else if (currentNonStrictMatchedWithoutStringLiteralCoercion
118+
== candidateNonStrictMatchedWithoutStringLiteralCoercion) {
119+
if (currentNonStrictMatchedCount < candidateNonStrictMatched) {
120+
candidateNonStrictMatched = currentNonStrictMatchedCount;
106121
candidateDateToDateV2Count = currentNonStrictMatched.second;
107122
candidate = signature;
123+
} else if (currentNonStrictMatchedCount == candidateNonStrictMatched) {
124+
// if we need to do same count cast,
125+
// then we choose the signature need to do more v1 to v2 cast
126+
if (candidateDateToDateV2Count < currentNonStrictMatched.second) {
127+
candidateDateToDateV2Count = currentNonStrictMatched.second;
128+
candidate = signature;
129+
}
108130
}
109131
}
110132
}
@@ -204,8 +226,9 @@ private Pair<Integer, Integer> nonStrictMatchedCount(FunctionSignature sig, List
204226
return Pair.of(nonStrictMatched, dateToDateV2Count);
205227
}
206228

207-
private boolean doMatchTypes(FunctionSignature sig, List<Expression> arguments,
229+
private Pair<Boolean, Integer> doMatchTypes(FunctionSignature sig, List<Expression> arguments,
208230
BiFunction<DataType, DataType, Boolean> typePredicate) {
231+
int stringLiteralCoersionCount = 0;
209232
int arity = arguments.size();
210233
for (int i = 0; i < arity; i++) {
211234
DataType sigArgType = sig.getArgType(i);
@@ -217,12 +240,15 @@ private boolean doMatchTypes(FunctionSignature sig, List<Expression> arguments,
217240
if (!argument.isNullLiteral() && argument.isLiteral() && realType.isStringLikeType()) {
218241
realType = TypeCoercionUtils.characterLiteralTypeCoercion(((Literal) argument).getStringValue(),
219242
sigArgType).orElse(argument).getDataType();
243+
if (!realType.isStringLikeType()) {
244+
stringLiteralCoersionCount++;
245+
}
220246
}
221247
if (!typePredicate.apply(sigArgType, realType)) {
222-
return false;
248+
return Pair.of(false, stringLiteralCoersionCount);
223249
}
224250
}
225-
return true;
251+
return Pair.of(true, stringLiteralCoersionCount);
226252
}
227253

228254
public static void throwCanNotFoundFunctionException(String name, List<Expression> arguments) {

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/TimeV2Literal.java

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,67 @@ protected static String normalize(String s) {
141141
return s;
142142
}
143143

144+
/**
145+
* parse time string and avoid throw exception directly for better performance.
146+
*/
147+
public static Result<TimeV2Literal, AnalysisException> parseTimeLiteral(String s) {
148+
int hour;
149+
int minute;
150+
int second;
151+
int microsecond;
152+
boolean negative = false;
153+
String normalized = normalize(s);
154+
if (normalized.charAt(0) == '-') {
155+
negative = true;
156+
normalized = normalized.substring(1);
157+
} else if (normalized.charAt(0) == '+') {
158+
normalized = normalized.substring(1);
159+
}
160+
// start parse string
161+
String[] parts = normalized.split(":");
162+
if (parts.length != 3) {
163+
return Result.err(() -> new AnalysisException("Invalid format, must have 3 parts separated by ':'"));
164+
}
165+
try {
166+
hour = Integer.parseInt(parts[0]);
167+
} catch (NumberFormatException e) {
168+
return Result.err(() -> new AnalysisException("Invalid hour format"));
169+
}
170+
171+
try {
172+
minute = Integer.parseInt(parts[1]);
173+
} catch (NumberFormatException e) {
174+
return Result.err(() -> new AnalysisException("Invalid minute format"));
175+
}
176+
// if parts[2] is 60.000 it will cause judge feed execute error
177+
if (parts[2].startsWith("60")) {
178+
return Result.err(() -> new AnalysisException("second out of range"));
179+
}
180+
double secPart;
181+
try {
182+
secPart = Double.parseDouble(parts[2]);
183+
} catch (NumberFormatException e) {
184+
return Result.err(() -> new AnalysisException("Invalid second format"));
185+
}
186+
secPart = secPart * (int) Math.pow(10, 6);
187+
secPart = Math.round(secPart);
188+
second = (int) (secPart / 1000000);
189+
microsecond = (int) (secPart % 1000000);
190+
if (second == 60) {
191+
minute += 1;
192+
second -= 60;
193+
if (minute == 60) {
194+
hour += 1;
195+
minute -= 60;
196+
}
197+
}
198+
199+
if (checkRange(hour, minute, second, microsecond)) {
200+
return Result.err(() -> new AnalysisException("time literal [" + s + "] is out of range"));
201+
}
202+
return Result.ok(new TimeV2Literal(hour, minute, second, microsecond, 6, negative));
203+
}
204+
144205
// should like be/src/vec/runtime/time_value.h timev2_to_double_from_str
145206
protected void init(String s) throws AnalysisException {
146207
s = normalize(s);

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/format/DateTimeChecker.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ private DateTimeChecker() {
3838
// 2024-10-12
3939
and(
4040
digit(1, 4), // year
41-
chars(DateLiteral.punctuations::contains),
41+
ch('-'),
4242
digit(1, 2), // month
43-
chars(DateLiteral.punctuations::contains),
43+
ch('-'),
4444
digit(1, 2) // day
4545
)
4646
)
@@ -65,19 +65,19 @@ private DateTimeChecker() {
6565
// 2024-01-01 01:02:03
6666
and("NormalDateTime",
6767
digit(1, 4), // year
68-
chars(DateLiteral.punctuations::contains),
68+
ch('-'),
6969
digit(1, 2), // month
70-
chars(DateLiteral.punctuations::contains),
70+
ch('-'),
7171
digit(1, 2), // day
7272
atLeast(1, c -> c == 'T' || c == ' ' || DateLiteral.punctuations.contains(c)),
7373
digit(1, 2), // hour
7474
option(
7575
and(
76-
chars(DateLiteral.punctuations::contains),
76+
ch(':'),
7777
digit(1, 2), // minute
7878
option(
7979
and(
80-
chars(DateLiteral.punctuations::contains),
80+
ch(':'),
8181
digit(1, 2) // second
8282
)
8383
)
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package org.apache.doris.nereids.trees.expressions.literal.format;
19+
20+
/**
21+
* Time literal format checker, support two types of time string:
22+
* colon format: ([+-])?\d+:\d{1,2}(:\d{1,2}(.\d+)?)?
23+
* NOTICE: only process colon format, because we do not treat numeric format as a time type
24+
* when do string literal corecion
25+
*/
26+
public class TimeChecker extends FormatChecker {
27+
private static final TimeChecker INSTANCE = new TimeChecker();
28+
29+
private final FormatChecker checker;
30+
31+
private TimeChecker() {
32+
super("TimeChecker");
33+
34+
this.checker =
35+
// time
36+
and("time format",
37+
option("sign", or(ch('-'), ch('+'))),
38+
// colon-format
39+
and("colon format",
40+
digit(1), // hour
41+
ch(':'),
42+
digit(1, 2), // minute
43+
option("second and micro second",
44+
and(
45+
ch(':'),
46+
digit(1, 2),
47+
option("micro second", nanoSecond())
48+
)
49+
) // second
50+
)
51+
);
52+
}
53+
54+
public static boolean isValidTime(String str) {
55+
str = str.trim();
56+
StringInspect stringInspect = new StringInspect(str.trim());
57+
return INSTANCE.check(stringInspect).matched && stringInspect.eos();
58+
}
59+
60+
@Override
61+
protected boolean doCheck(StringInspect stringInspect) {
62+
return checker.check(stringInspect).matched;
63+
}
64+
65+
private FormatChecker nanoSecond() {
66+
return and(
67+
ch('.'),
68+
digit(1)
69+
);
70+
}
71+
}

fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,13 @@
6767
import org.apache.doris.nereids.trees.expressions.literal.SmallIntLiteral;
6868
import org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral;
6969
import org.apache.doris.nereids.trees.expressions.literal.StringLiteral;
70+
import org.apache.doris.nereids.trees.expressions.literal.TimeV2Literal;
7071
import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral;
7172
import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral;
7273
import org.apache.doris.nereids.trees.expressions.literal.format.DateTimeChecker;
7374
import org.apache.doris.nereids.trees.expressions.literal.format.FloatChecker;
7475
import org.apache.doris.nereids.trees.expressions.literal.format.IntegerChecker;
76+
import org.apache.doris.nereids.trees.expressions.literal.format.TimeChecker;
7577
import org.apache.doris.nereids.types.ArrayType;
7678
import org.apache.doris.nereids.types.BigIntType;
7779
import org.apache.doris.nereids.types.BooleanType;
@@ -637,6 +639,11 @@ public static Optional<Expression> characterLiteralTypeCoercion(String value, Da
637639
ret = parseResult2.get();
638640
}
639641
}
642+
} else if (dataType instanceof TimeV2Type && TimeChecker.isValidTime(value)) {
643+
Result<TimeV2Literal, AnalysisException> parseResult = TimeV2Literal.parseTimeLiteral(value);
644+
if (parseResult.isOk()) {
645+
ret = new TimeV2Literal(value);
646+
}
640647
}
641648
} catch (Exception e) {
642649
if (LOG.isDebugEnabled()) {

fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeLiteralTest.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ void mysqlStrangeCase() {
5252
check("0-08-01 13:21:03", DateTimeV2Literal::new);
5353
check("0-08-01 13:21:03", DateTimeV2Literal::new);
5454
check("0001-01-01: 00:01:01.001", DateTimeV2Literal::new);
55-
check("2021?01?01 00.00.00", DateTimeV2Literal::new);
5655
}
5756

5857
@Test
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package org.apache.doris.nereids.trees.expressions.literal.format;
19+
20+
import org.junit.jupiter.api.Assertions;
21+
import org.junit.jupiter.api.Test;
22+
23+
public class TimeCheckerTest {
24+
25+
@Test
26+
public void testColon() {
27+
String literal;
28+
literal = "0:0";
29+
Assertions.assertTrue(TimeChecker.isValidTime(literal));
30+
literal = "00:0";
31+
Assertions.assertTrue(TimeChecker.isValidTime(literal));
32+
literal = "000:0";
33+
Assertions.assertTrue(TimeChecker.isValidTime(literal));
34+
literal = "+0:0";
35+
Assertions.assertTrue(TimeChecker.isValidTime(literal));
36+
literal = "-0:0";
37+
Assertions.assertTrue(TimeChecker.isValidTime(literal));
38+
literal = "0:00";
39+
Assertions.assertTrue(TimeChecker.isValidTime(literal));
40+
literal = "0:00:0";
41+
Assertions.assertTrue(TimeChecker.isValidTime(literal));
42+
literal = "0:0:00";
43+
Assertions.assertTrue(TimeChecker.isValidTime(literal));
44+
literal = "0:0:0.0";
45+
Assertions.assertTrue(TimeChecker.isValidTime(literal));
46+
literal = "0:0:0.00";
47+
Assertions.assertTrue(TimeChecker.isValidTime(literal));
48+
49+
literal = ":0";
50+
Assertions.assertFalse(TimeChecker.isValidTime(literal));
51+
literal = "0:0:0:0";
52+
Assertions.assertFalse(TimeChecker.isValidTime(literal));
53+
literal = "0:0:0.";
54+
Assertions.assertFalse(TimeChecker.isValidTime(literal));
55+
literal = "0:a:0";
56+
Assertions.assertFalse(TimeChecker.isValidTime(literal));
57+
literal = "!0:0:0";
58+
Assertions.assertFalse(TimeChecker.isValidTime(literal));
59+
60+
}
61+
}

regression-test/data/doc/sql-manual/sql-functions/doc_date_functions_test.out

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,7 @@ gdaskpdp
831831
5
832832

833833
-- !minute_3 --
834-
\N
834+
25
835835

836836
-- !minute_4 --
837837
0
@@ -1200,10 +1200,10 @@ da fanadur
12001200
-2.032258064516129
12011201

12021202
-- !months_between_4 --
1203-
1.0
1203+
1
12041204

12051205
-- !months_between_5 --
1206-
2.0
1206+
2
12071207

12081208
-- !months_between_6 --
12091209
0.96774194

0 commit comments

Comments
 (0)