Skip to content

Commit aeda3ed

Browse files
authored
[8.x] ESQL: Rewrite TO_UPPER/TO_LOWER comparisons (#118870) (#119207)
* ESQL: Rewrite TO_UPPER/TO_LOWER comparisons (#118870) This adds an optimization rule to rewrite TO_UPPER/TO_LOWER comparisons against a string into an InsensitiveEquals comparison. The rewrite can also result right away into a TRUE/FALSE, in case the string doesn't match the caseness of the function. This also allows later pushing down the predicate to lucene as a case-insensitive term-query. Fixes #118304. * Disable `TO_UPPER(null)`-tests prior to 8.17 (#119213) TO_UPPER/TO_LOWER resolution incorrectly returned child's type (that could also be `null`, type `NULL`), instead of KEYWORD/TEXT. So a test like `TO_UPPER(null) == "..."` fails on type mismatch. This was fixed collaterally by #114334 (8.17.0) Also, correct some of the tests skipping (that had however no impact, due to testing range). (cherry picked from commit edb3818)
1 parent 364c80e commit aeda3ed

File tree

14 files changed

+735
-264
lines changed

14 files changed

+735
-264
lines changed

docs/changelog/118870.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 118870
2+
summary: Rewrite TO_UPPER/TO_LOWER comparisons
3+
area: ES|QL
4+
type: enhancement
5+
issues:
6+
- 118304

x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/TestUtils.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import org.elasticsearch.xpack.esql.core.type.DataType;
1616
import org.elasticsearch.xpack.esql.core.type.EsField;
1717

18+
import java.util.regex.Pattern;
19+
1820
import static java.util.Collections.emptyMap;
1921
import static org.elasticsearch.test.ESTestCase.randomAlphaOfLength;
2022
import static org.elasticsearch.test.ESTestCase.randomBoolean;
@@ -26,6 +28,8 @@
2628
public final class TestUtils {
2729
private TestUtils() {}
2830

31+
private static final Pattern WS_PATTERN = Pattern.compile("\\s");
32+
2933
public static Literal of(Object value) {
3034
return of(Source.EMPTY, value);
3135
}
@@ -59,4 +63,9 @@ public static FieldAttribute getFieldAttribute(String name) {
5963
public static FieldAttribute getFieldAttribute(String name, DataType dataType) {
6064
return new FieldAttribute(EMPTY, name, new EsField(name + "f", dataType, emptyMap(), true));
6165
}
66+
67+
/** Similar to {@link String#strip()}, but removes the WS throughout the entire string. */
68+
public static String stripThrough(String input) {
69+
return WS_PATTERN.matcher(input).replaceAll(StringUtils.EMPTY);
70+
}
6271
}

x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,6 +1244,189 @@ a:keyword | upper:keyword | lower:keyword
12441244
π/2 + a + B + Λ ºC | Π/2 + A + B + Λ ºC | π/2 + a + b + λ ºc
12451245
;
12461246

1247+
equalsToUpperPushedDown#[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
1248+
from employees
1249+
| where to_upper(first_name) == "GEORGI"
1250+
| keep emp_no, first_name
1251+
;
1252+
1253+
emp_no:integer | first_name:keyword
1254+
10001 | Georgi
1255+
;
1256+
1257+
equalsToUpperNestedPushedDown#[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
1258+
from employees
1259+
| where to_upper(to_upper(to_lower(first_name))) == "GEORGI"
1260+
| keep emp_no, first_name
1261+
;
1262+
1263+
emp_no:integer | first_name:keyword
1264+
10001 | Georgi
1265+
;
1266+
1267+
negatedEqualsToUpperPushedDown#[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
1268+
from employees
1269+
| sort emp_no
1270+
| where not(to_upper(first_name) == "GEORGI")
1271+
| keep emp_no, first_name
1272+
| limit 1
1273+
;
1274+
1275+
emp_no:integer | first_name:keyword
1276+
10002 | Bezalel
1277+
;
1278+
1279+
notEqualsToUpperPushedDown#[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
1280+
from employees
1281+
| sort emp_no
1282+
| where to_upper(first_name) != "GEORGI"
1283+
| keep emp_no, first_name
1284+
| limit 1
1285+
;
1286+
1287+
emp_no:integer | first_name:keyword
1288+
10002 | Bezalel
1289+
;
1290+
1291+
negatedNotEqualsToUpperPushedDown#[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
1292+
from employees
1293+
| sort emp_no
1294+
| where not(to_upper(first_name) != "GEORGI")
1295+
| keep emp_no, first_name
1296+
| limit 1
1297+
;
1298+
1299+
emp_no:integer | first_name:keyword
1300+
10001 | Georgi
1301+
;
1302+
1303+
equalsToUpperFolded
1304+
from employees
1305+
| where to_upper(first_name) == "Georgi"
1306+
| keep emp_no, first_name
1307+
;
1308+
1309+
emp_no:integer | first_name:keyword
1310+
;
1311+
1312+
negatedEqualsToUpperFolded
1313+
from employees
1314+
| where not(to_upper(first_name) == "Georgi")
1315+
| stats c = count()
1316+
;
1317+
1318+
c:long
1319+
90
1320+
;
1321+
1322+
equalsToUpperNullFolded#[skip:-8.16.99, reason:function's type corrected in #114334]
1323+
from employees
1324+
| where to_upper(null) == "Georgi"
1325+
| keep emp_no, first_name
1326+
;
1327+
1328+
emp_no:integer | first_name:keyword
1329+
;
1330+
1331+
equalsNullToUpperFolded
1332+
from employees
1333+
| where to_upper(first_name) == null::keyword
1334+
| keep emp_no, first_name
1335+
;
1336+
1337+
emp_no:integer | first_name:keyword
1338+
;
1339+
1340+
notEqualsToUpperNullFolded#[skip:-8.16.99, reason:function's type corrected in #114334]
1341+
from employees
1342+
| where to_upper(null) != "Georgi"
1343+
| keep emp_no, first_name
1344+
;
1345+
1346+
emp_no:integer | first_name:keyword
1347+
;
1348+
1349+
notEqualsNullToUpperFolded
1350+
from employees
1351+
| where to_upper(first_name) != null::keyword
1352+
| keep emp_no, first_name
1353+
;
1354+
1355+
emp_no:integer | first_name:keyword
1356+
;
1357+
1358+
notEqualsToUpperFolded
1359+
from employees
1360+
| where to_upper(first_name) != "Georgi"
1361+
| stats c = count()
1362+
;
1363+
1364+
c:long
1365+
90
1366+
;
1367+
1368+
negatedNotEqualsToUpperFolded
1369+
from employees
1370+
| where not(to_upper(first_name) != "Georgi")
1371+
| stats c = count()
1372+
;
1373+
1374+
c:long
1375+
0
1376+
;
1377+
1378+
equalsToLowerPushedDown#[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
1379+
from employees
1380+
| where to_lower(first_name) == "georgi"
1381+
| keep emp_no, first_name
1382+
;
1383+
1384+
emp_no:integer | first_name:keyword
1385+
10001 | Georgi
1386+
;
1387+
1388+
notEqualsToLowerPushedDown#[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
1389+
from employees
1390+
| sort emp_no
1391+
| where to_lower(first_name) != "georgi"
1392+
| keep emp_no, first_name
1393+
| limit 1
1394+
;
1395+
1396+
emp_no:integer | first_name:keyword
1397+
10002 | Bezalel
1398+
;
1399+
1400+
equalsToLowerFolded
1401+
from employees
1402+
| where to_lower(first_name) == "Georgi"
1403+
| keep emp_no, first_name
1404+
;
1405+
1406+
emp_no:integer | first_name:keyword
1407+
;
1408+
1409+
notEqualsToLowerFolded
1410+
from employees
1411+
| where to_lower(first_name) != "Georgi"
1412+
| stats c = count()
1413+
;
1414+
1415+
c:long
1416+
90
1417+
;
1418+
1419+
equalsToLowerWithUnico(rn|d)s
1420+
from employees
1421+
| where to_lower(concat(first_name, "🦄🦄")) != "georgi🦄🦄"
1422+
| stats c = count()
1423+
;
1424+
1425+
// 10 null first names
1426+
c:long
1427+
89
1428+
;
1429+
12471430
reverse
12481431
required_capability: fn_reverse
12491432
from employees | sort emp_no | eval name_reversed = REVERSE(first_name) | keep emp_no, first_name, name_reversed | limit 1;
Lines changed: 18 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)