Skip to content

Commit d521f89

Browse files
authored
ESQL: Rewrite TO_UPPER/TO_LOWER comparisons (#118870)
This adds an optimization rule to rewrite TO_UPPER/TO_LOWER comparisons against a string into an InsensitiveEquals comparison. The rewrite can also result right away into a TRUE/FALSE, in case the string doesn't match the caseness of the function. This also allows later pushing down the predicate to lucene as a case-insensitive term-query. Fixes #118304.
1 parent 22990df commit d521f89

File tree

14 files changed

+735
-264
lines changed

14 files changed

+735
-264
lines changed

docs/changelog/118870.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 118870
2+
summary: Rewrite TO_UPPER/TO_LOWER comparisons
3+
area: ES|QL
4+
type: enhancement
5+
issues:
6+
- 118304

x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/TestUtils.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import org.elasticsearch.xpack.esql.core.type.DataType;
1616
import org.elasticsearch.xpack.esql.core.type.EsField;
1717

18+
import java.util.regex.Pattern;
19+
1820
import static java.util.Collections.emptyMap;
1921
import static org.elasticsearch.test.ESTestCase.randomAlphaOfLength;
2022
import static org.elasticsearch.test.ESTestCase.randomBoolean;
@@ -26,6 +28,8 @@
2628
public final class TestUtils {
2729
private TestUtils() {}
2830

31+
private static final Pattern WS_PATTERN = Pattern.compile("\\s");
32+
2933
public static Literal of(Object value) {
3034
return of(Source.EMPTY, value);
3135
}
@@ -59,4 +63,9 @@ public static FieldAttribute getFieldAttribute(String name) {
5963
public static FieldAttribute getFieldAttribute(String name, DataType dataType) {
6064
return new FieldAttribute(EMPTY, name, new EsField(name + "f", dataType, emptyMap(), true));
6165
}
66+
67+
/** Similar to {@link String#strip()}, but removes the WS throughout the entire string. */
68+
public static String stripThrough(String input) {
69+
return WS_PATTERN.matcher(input).replaceAll(StringUtils.EMPTY);
70+
}
6271
}

x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,6 +1231,189 @@ a:keyword | upper:keyword | lower:keyword
12311231
π/2 + a + B + Λ ºC | Π/2 + A + B + Λ ºC | π/2 + a + b + λ ºc
12321232
;
12331233

1234+
equalsToUpperPushedDown[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
1235+
from employees
1236+
| where to_upper(first_name) == "GEORGI"
1237+
| keep emp_no, first_name
1238+
;
1239+
1240+
emp_no:integer | first_name:keyword
1241+
10001 | Georgi
1242+
;
1243+
1244+
equalsToUpperNestedPushedDown[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
1245+
from employees
1246+
| where to_upper(to_upper(to_lower(first_name))) == "GEORGI"
1247+
| keep emp_no, first_name
1248+
;
1249+
1250+
emp_no:integer | first_name:keyword
1251+
10001 | Georgi
1252+
;
1253+
1254+
negatedEqualsToUpperPushedDown[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
1255+
from employees
1256+
| sort emp_no
1257+
| where not(to_upper(first_name) == "GEORGI")
1258+
| keep emp_no, first_name
1259+
| limit 1
1260+
;
1261+
1262+
emp_no:integer | first_name:keyword
1263+
10002 | Bezalel
1264+
;
1265+
1266+
notEqualsToUpperPushedDown[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
1267+
from employees
1268+
| sort emp_no
1269+
| where to_upper(first_name) != "GEORGI"
1270+
| keep emp_no, first_name
1271+
| limit 1
1272+
;
1273+
1274+
emp_no:integer | first_name:keyword
1275+
10002 | Bezalel
1276+
;
1277+
1278+
negatedNotEqualsToUpperPushedDown[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
1279+
from employees
1280+
| sort emp_no
1281+
| where not(to_upper(first_name) != "GEORGI")
1282+
| keep emp_no, first_name
1283+
| limit 1
1284+
;
1285+
1286+
emp_no:integer | first_name:keyword
1287+
10001 | Georgi
1288+
;
1289+
1290+
equalsToUpperFolded
1291+
from employees
1292+
| where to_upper(first_name) == "Georgi"
1293+
| keep emp_no, first_name
1294+
;
1295+
1296+
emp_no:integer | first_name:keyword
1297+
;
1298+
1299+
negatedEqualsToUpperFolded
1300+
from employees
1301+
| where not(to_upper(first_name) == "Georgi")
1302+
| stats c = count()
1303+
;
1304+
1305+
c:long
1306+
90
1307+
;
1308+
1309+
equalsToUpperNullFolded
1310+
from employees
1311+
| where to_upper(null) == "Georgi"
1312+
| keep emp_no, first_name
1313+
;
1314+
1315+
emp_no:integer | first_name:keyword
1316+
;
1317+
1318+
equalsNullToUpperFolded
1319+
from employees
1320+
| where to_upper(first_name) == null::keyword
1321+
| keep emp_no, first_name
1322+
;
1323+
1324+
emp_no:integer | first_name:keyword
1325+
;
1326+
1327+
notEqualsToUpperNullFolded
1328+
from employees
1329+
| where to_upper(null) != "Georgi"
1330+
| keep emp_no, first_name
1331+
;
1332+
1333+
emp_no:integer | first_name:keyword
1334+
;
1335+
1336+
notEqualsNullToUpperFolded
1337+
from employees
1338+
| where to_upper(first_name) != null::keyword
1339+
| keep emp_no, first_name
1340+
;
1341+
1342+
emp_no:integer | first_name:keyword
1343+
;
1344+
1345+
notEqualsToUpperFolded
1346+
from employees
1347+
| where to_upper(first_name) != "Georgi"
1348+
| stats c = count()
1349+
;
1350+
1351+
c:long
1352+
90
1353+
;
1354+
1355+
negatedNotEqualsToUpperFolded
1356+
from employees
1357+
| where not(to_upper(first_name) != "Georgi")
1358+
| stats c = count()
1359+
;
1360+
1361+
c:long
1362+
0
1363+
;
1364+
1365+
equalsToLowerPushedDown[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
1366+
from employees
1367+
| where to_lower(first_name) == "georgi"
1368+
| keep emp_no, first_name
1369+
;
1370+
1371+
emp_no:integer | first_name:keyword
1372+
10001 | Georgi
1373+
;
1374+
1375+
notEqualsToLowerPushedDown[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
1376+
from employees
1377+
| sort emp_no
1378+
| where to_lower(first_name) != "georgi"
1379+
| keep emp_no, first_name
1380+
| limit 1
1381+
;
1382+
1383+
emp_no:integer | first_name:keyword
1384+
10002 | Bezalel
1385+
;
1386+
1387+
equalsToLowerFolded
1388+
from employees
1389+
| where to_lower(first_name) == "Georgi"
1390+
| keep emp_no, first_name
1391+
;
1392+
1393+
emp_no:integer | first_name:keyword
1394+
;
1395+
1396+
notEqualsToLowerFolded
1397+
from employees
1398+
| where to_lower(first_name) != "Georgi"
1399+
| stats c = count()
1400+
;
1401+
1402+
c:long
1403+
90
1404+
;
1405+
1406+
equalsToLowerWithUnico(rn|d)s
1407+
from employees
1408+
| where to_lower(concat(first_name, "🦄🦄")) != "georgi🦄🦄"
1409+
| stats c = count()
1410+
;
1411+
1412+
// 10 null first names
1413+
c:long
1414+
89
1415+
;
1416+
12341417
reverse
12351418
required_capability: fn_reverse
12361419
from employees | sort emp_no | eval name_reversed = REVERSE(first_name) | keep emp_no, first_name, name_reversed | limit 1;
Lines changed: 18 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)