Skip to content

Commit 34989b0

Browse files
zabetakrubenada
authored andcommitted
[CALCITE-7194] Simplify comparisons between function calls and literals to SEARCH
1. Generalize SargCollector in RexSimplify to handle comparisons with deterministic expressions. 2. Add Javadoc for accept variants in SargCollector 3. Prevent invalid SEARCH to interval/range transformations in DruidDateTimeUtils Some plan changes in DruidAdapterIT/DruidAdapter2IT are due to the added restrictions in DruidDateTimeUtils. When the SEARCH operand is not a plain column reference (RexInputRef) its generally unsafe to convert it to an interval; the entire DruidDateTimeUtils.createInterval was not meant to handle arbitrary complex expressions.
1 parent f26bd43 commit 34989b0

File tree

7 files changed

+103
-32
lines changed

7 files changed

+103
-32
lines changed

core/src/main/java/org/apache/calcite/rex/RexSimplify.java

Lines changed: 51 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3175,13 +3175,28 @@ static class SargCollector {
31753175
this.negate = negate;
31763176
}
31773177

3178+
/**
3179+
* Accepts an expression and converts it to a Sarg if possible.
3180+
*
3181+
* @param term the expression to convert
3182+
* @param newTerms the list holding the result of the conversion or the
3183+
* original term if it cannot be converted
3184+
*/
31783185
private void accept(RexNode term, List<RexNode> newTerms) {
31793186
if (!accept_(term, newTerms)) {
31803187
newTerms.add(term);
31813188
}
31823189
newTermsCount = newTerms.size();
31833190
}
31843191

3192+
/**
3193+
* Accepts an expression and converts it to a Sarg if possible.
3194+
* Only certain kinds of expressions can be converted to Sargs.
3195+
*
3196+
* @param e the expression to convert
3197+
* @param newTerms the list to which the Sarg will be added if the expression is accepted
3198+
* @return true if the expression can be converted to a Sarg, false otherwise
3199+
*/
31853200
private boolean accept_(RexNode e, List<RexNode> newTerms) {
31863201
switch (e.getKind()) {
31873202
case LESS_THAN:
@@ -3204,31 +3219,25 @@ private boolean accept_(RexNode e, List<RexNode> newTerms) {
32043219
}
32053220
}
32063221

3222+
/**
3223+
* Accepts two operands from a binary comparison operator and converts them to a Sarg if
3224+
* possible. Only comparisons between a literal and a deterministic expression can be converted.
3225+
* Simplifications with non-deterministic expressions are generally avoided to ensure consistent
3226+
* results.
3227+
*
3228+
* @param left the left operand of the comparison
3229+
* @param right the right operand of the comparison
3230+
* @param kind the kind of comparison operator
3231+
* @param newTerms the list to which the Sarg will be added if accepted
3232+
* @return true if the operands can be converted to a Sarg, false otherwise
3233+
*/
32073234
private boolean accept2(RexNode left, RexNode right, SqlKind kind,
32083235
List<RexNode> newTerms) {
3209-
switch (left.getKind()) {
3210-
case INPUT_REF:
3211-
case FIELD_ACCESS:
3212-
case CAST:
3213-
switch (right.getKind()) {
3214-
case LITERAL:
3215-
return accept2b(left, kind, (RexLiteral) right, newTerms);
3216-
default:
3217-
break;
3218-
}
3219-
return false;
3220-
case LITERAL:
3221-
switch (right.getKind()) {
3222-
case INPUT_REF:
3223-
case FIELD_ACCESS:
3224-
case CAST:
3225-
return accept2b(right, kind.reverse(), (RexLiteral) left, newTerms);
3226-
default:
3227-
break;
3228-
}
3229-
return false;
3230-
default:
3231-
break;
3236+
if (right.isA(SqlKind.LITERAL) && RexUtil.isDeterministic(left)) {
3237+
return accept2b(left, kind, (RexLiteral) right, newTerms);
3238+
}
3239+
if (left.isA(SqlKind.LITERAL) && RexUtil.isDeterministic(right)) {
3240+
return accept2b(right, kind.reverse(), (RexLiteral) left, newTerms);
32323241
}
32333242
return false;
32343243
}
@@ -3238,7 +3247,14 @@ private static <E> E addFluent(List<? super E> list, E e) {
32383247
return e;
32393248
}
32403249

3241-
// always returns true
3250+
/**
3251+
* Accepts an operand from a null predicate and converts it to a Sarg.
3252+
*
3253+
* @param e the operand of the null predicate
3254+
* @param kind the kind of the null predicate
3255+
* @param newTerms the list to which the Sarg is added
3256+
* @return true since the operand is always converted to a Sarg
3257+
*/
32423258
private boolean accept1(RexNode e, SqlKind kind, List<RexNode> newTerms) {
32433259
final RexSargBuilder b =
32443260
map.computeIfAbsent(e, e2 ->
@@ -3256,6 +3272,17 @@ private boolean accept1(RexNode e, SqlKind kind, List<RexNode> newTerms) {
32563272
}
32573273
}
32583274

3275+
/**
3276+
* Accepts two operands from a binary comparison operator and converts them to a Sarg when the
3277+
* literal is not null. The conversion depends on the kind of comparison operator and only
3278+
* certain operators are supported.
3279+
*
3280+
* @param e any kind of deterministic expression
3281+
* @param kind the kind of comparison operator
3282+
* @param literal the literal operand of the comparison
3283+
* @param newTerms the list to which the Sarg is added if accepted
3284+
* @return false if the literal operand is null, true otherwise
3285+
*/
32593286
private boolean accept2b(RexNode e, SqlKind kind,
32603287
RexLiteral literal, List<RexNode> newTerms) {
32613288
if (literal.getValue() == null) {

core/src/test/java/org/apache/calcite/rex/RexProgramTest.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2040,6 +2040,22 @@ private void checkExponentialCnf(int n) {
20402040
checkSimplify(expr, simplified);
20412041
}
20422042

2043+
/** Test case for
2044+
* <a href="https://issues.apache.org/jira/browse/CALCITE-7194">[CALCITE-7194]
2045+
* Simplify comparisons between function calls and literals to SEARCH</a>. */
2046+
@Test void testSimplifyComparisonsOfCallsWithLiteralsToSearch() {
2047+
RexNode v1 = input(tInt(), 1);
2048+
RexNode v2 = input(tInt(), 2);
2049+
RexNode plus = plus(v1, v2);
2050+
checkSimplify(and(gt(plus, literal(100000)), lt(plus, literal(200000))),
2051+
"SEARCH(+($1, $2), Sarg[(100000..200000)])");
2052+
checkSimplify(or(eq(plus, literal(100000)), eq(plus, literal(200000))),
2053+
"SEARCH(+($1, $2), Sarg[100000, 200000])");
2054+
RexNode ndc = rexBuilder.makeCall(getNoDeterministicOperator(), v1, v2);
2055+
checkSimplifyUnchanged(or(eq(ndc, literal(100000)), eq(ndc, literal(200000))));
2056+
checkSimplifyUnchanged(and(gt(ndc, literal(100000)), lt(ndc, literal(200000))));
2057+
}
2058+
20432059
@Test void testSimplifySearchWithSinglePointSargToEquals() {
20442060
Range<BigDecimal> r100 = Range.singleton(BigDecimal.valueOf(100));
20452061
RangeSet<BigDecimal> rangeSet = ImmutableRangeSet.<BigDecimal>builder().add(r100).build();

core/src/test/java/org/apache/calcite/test/RelMetadataTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4097,8 +4097,8 @@ private void assertExpressionLineage(
40974097
@Test void testExpressionLineageBetweenExpressionWithJoin() {
40984098
String sql = "select dept.deptno + empno between 1 and 2"
40994099
+ " from emp join dept on emp.deptno = dept.deptno";
4100-
String expected = "[AND(>=(+([CATALOG, SALES, DEPT].#0.$0, [CATALOG, SALES, EMP].#0.$0), 1),"
4101-
+ " <=(+([CATALOG, SALES, DEPT].#0.$0, [CATALOG, SALES, EMP].#0.$0), 2))]";
4100+
String expected =
4101+
"[SEARCH(+([CATALOG, SALES, DEPT].#0.$0, [CATALOG, SALES, EMP].#0.$0), Sarg[[1..2]])]";
41024102
String comment = "'empno' is column 0 in 'catalog.sales.emp', "
41034103
+ "'deptno' is column 0 in 'catalog.sales.dept', and "
41044104
+ "'dept.deptno + empno between 1 and 2' is translated into "

druid/src/main/java/org/apache/calcite/adapter/druid/DruidDateTimeUtils.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,9 @@ && literalValue(call.getOperands().get(3)) != null) {
237237
return ranges.build();
238238

239239
case SEARCH:
240+
if (!canTransformSearchToRange(call)) {
241+
return null;
242+
}
240243
final RexLiteral right = (RexLiteral) call.operands.get(1);
241244
final Sarg<?> sarg = requireNonNull(right.getValueAs(Sarg.class));
242245
ranges = ImmutableList.builder();
@@ -255,6 +258,28 @@ && literalValue(call.getOperands().get(3)) != null) {
255258
}
256259
}
257260

261+
/**
262+
* Returns whether the given SEARCH call can be transformed to a Druid range.
263+
*
264+
* @param call a SEARCH call
265+
* @return whether the given SEARCH call can be transformed to a Druid range.
266+
*/
267+
private static boolean canTransformSearchToRange(RexCall call) {
268+
assert call.getKind() == SqlKind.SEARCH;
269+
if (call.getOperands().get(0) instanceof RexInputRef) {
270+
SqlTypeName literalType = call.operands.get(1).getType().getSqlTypeName();
271+
switch (literalType) {
272+
case TIMESTAMP:
273+
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
274+
case DATE:
275+
return true;
276+
default:
277+
return false;
278+
}
279+
}
280+
return false;
281+
}
282+
258283
private static Long toLong(Comparable comparable) {
259284
if (comparable instanceof TimestampString) {
260285
TimestampString timestampString = (TimestampString) comparable;

druid/src/test/java/org/apache/calcite/test/DruidAdapter2IT.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,7 +1882,7 @@ private void checkGroupBySingleSortLimit(boolean approx) {
18821882
final String plan = "PLAN=EnumerableInterpreter\n"
18831883
+ " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000Z/"
18841884
+ "2992-01-10T00:00:00.000Z]], filter=[AND(=($2, 'Bird Call'), "
1885-
+ "OR(=(EXTRACT(FLAG(WEEK), $0), 10), =(EXTRACT(FLAG(WEEK), $0), 11)))], "
1885+
+ "SEARCH(EXTRACT(FLAG(WEEK), $0), Sarg[10L:BIGINT, 11L:BIGINT]:BIGINT))], "
18861886
+ "projects=[[$63, $90, $91]], "
18871887
+ "groups=[{0}], aggs=[[SUM($1), SUM($2)]], "
18881888
+ "post_projects=[[$0, 'Bird Call', -($1, $2)]])";
@@ -3054,7 +3054,8 @@ private void testCountWithApproxDistinct(boolean approx, String sql,
30543054
+ " CAST('1997-01-01' as DATE) GROUP BY floor(\"timestamp\" to DAY) order by d limit 3";
30553055
final String plan = "PLAN=EnumerableInterpreter\n"
30563056
+ " DruidQuery(table=[[foodmart, foodmart]], "
3057-
+ "intervals=[[1997-01-01T00:00:00.000Z/1997-02-01T00:00:00.000Z]], "
3057+
+ "intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], "
3058+
+ "filter=[=(FLOOR(CAST($0):DATE NOT NULL, FLAG(MONTH)), 1997-01-01)], "
30583059
+ "projects=[[FLOOR($0, FLAG(DAY))]], groups=[{0}], aggs=[[]], sort0=[0], "
30593060
+ "dir0=[ASC], fetch=[3])";
30603061
sql(sql)

druid/src/test/java/org/apache/calcite/test/DruidAdapterIT.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2188,8 +2188,9 @@ private void checkGroupBySingleSortLimit(boolean approx) {
21882188
final String plan = "PLAN=EnumerableInterpreter\n"
21892189
+ " DruidQuery(table=[[foodmart, foodmart]], "
21902190
+ "intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], "
2191-
+ "filter=[AND(=($2, 'Bird Call'), OR(=(EXTRACT(FLAG(WEEK), $0), 10), "
2192-
+ "=(EXTRACT(FLAG(WEEK), $0), 11)))], projects=[[$63, $90, $91]], "
2191+
+ "filter=[AND(=($2, 'Bird Call'), "
2192+
+ "SEARCH(EXTRACT(FLAG(WEEK), $0), Sarg[10L:BIGINT, 11L:BIGINT]:BIGINT))], "
2193+
+ "projects=[[$63, $90, $91]], "
21932194
+ "groups=[{0}], aggs=[[SUM($1), SUM($2)]], post_projects=[[$0, 'Bird Call', -($1, $2)]])";
21942195
sql(sql, FOODMART)
21952196
.returnsOrdered("store_state=CA; brand_name=Bird Call; A=34.3646",
@@ -3696,7 +3697,8 @@ private void testCountWithApproxDistinct(boolean approx, String sql, String expe
36963697
+ " CAST('1997-01-01' as DATE) GROUP BY floor(\"timestamp\" to DAY) order by d limit 3";
36973698
final String plan = "PLAN=EnumerableInterpreter\n"
36983699
+ " DruidQuery(table=[[foodmart, foodmart]], "
3699-
+ "intervals=[[1997-01-01T00:00:00.000Z/1997-02-01T00:00:00.000Z]], "
3700+
+ "intervals=[[1900-01-09T00:00:00.000Z/2992-01-10T00:00:00.000Z]], "
3701+
+ "filter=[=(FLOOR(CAST($0):DATE NOT NULL, FLAG(MONTH)), 1997-01-01)], "
37003702
+ "projects=[[FLOOR($0, FLAG(DAY))]], groups=[{0}], aggs=[[]], "
37013703
+ "post_projects=[[CAST($0):TIMESTAMP(0) NOT NULL]], sort0=[0], dir0=[ASC], fetch=[3])";
37023704
sql(sql, FOODMART)

geode/src/test/java/org/apache/calcite/adapter/geode/rel/GeodeBookstoreTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ private CalciteAssert.AssertThat calciteAssert() {
486486

487487
@Test void testInSetFilterWithNestedStringField() {
488488
String expectedQuery = "SELECT primaryAddress.city AS city FROM /BookCustomer "
489-
+ "WHERE primaryAddress.city IN SET('Topeka', 'San Francisco')";
489+
+ "WHERE primaryAddress.city IN SET('San Francisco', 'Topeka')";
490490

491491
calciteAssert()
492492
.query("SELECT primaryAddress['city'] AS city\n"

0 commit comments

Comments
 (0)