Skip to content

Commit 54eeb62

Browse files
authored
Add ES|QL Locate function (#106899)
* Add ES|QL Locate function
1 parent ee667c4 commit 54eeb62

File tree

13 files changed

+666
-2
lines changed

13 files changed

+666
-2
lines changed

docs/changelog/106899.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 106899
2+
summary: Add ES|QL Locate function
3+
area: ES|QL
4+
type: enhancement
5+
issues:
6+
- 106818
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
2+
3+
*Description*
4+
5+
Returns an integer that indicates the position of a keyword substring within another string
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
2+
3+
[discrete]
4+
[[esql-locate]]
5+
=== `LOCATE`
6+
7+
*Syntax*
8+
9+
[.text-center]
10+
image::esql/functions/signature/locate.svg[Embedded,opts=inline]
11+
12+
include::../parameters/locate.asciidoc[]
13+
include::../description/locate.asciidoc[]
14+
include::../types/locate.asciidoc[]
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
2+
3+
*Parameters*
4+
5+
`string`::
6+
An input string
7+
8+
`substring`::
9+
A substring to locate in the input string
10+
11+
`start`::
12+
The start index
Lines changed: 1 addition & 0 deletions
Loading
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
2+
3+
*Supported types*
4+
5+
[%header.monospaced.styled,format=dsv,separator=|]
6+
|===
7+
string | substring | start | result
8+
keyword | keyword | integer | integer
9+
keyword | text | integer | integer
10+
text | keyword | integer | integer
11+
text | text | integer | integer
12+
|===

x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ double e()
3030
"integer|long|double|boolean|keyword|text|ip|version least(first:integer|long|double|boolean|keyword|text|ip|version, ?rest...:integer|long|double|boolean|keyword|text|ip|version)"
3131
"keyword left(string:keyword|text, length:integer)"
3232
"integer length(string:keyword|text)"
33+
"integer locate(string:keyword|text, substring:keyword|text, ?start:integer)"
3334
"double log(?base:integer|unsigned_long|long|double, number:integer|unsigned_long|long|double)"
3435
"double log10(number:double|integer|long|unsigned_long)"
3536
"keyword|text ltrim(string:keyword|text)"
@@ -138,6 +139,7 @@ greatest |first |"integer|long|double|boolean
138139
least |first |"integer|long|double|boolean|keyword|text|ip|version" |[""]
139140
left |[string, length] |["keyword|text", integer] |[The string from which to return a substring., The number of characters to return.]
140141
length |string |"keyword|text" |[""]
142+
locate |[string, substring, start] |["keyword|text", "keyword|text", "integer"] |[An input string, A substring to locate in the input string, The start index]
141143
log |[base, number] |["integer|unsigned_long|long|double", "integer|unsigned_long|long|double"] |["Base of logarithm. If `null`\, the function returns `null`. If not provided\, this function returns the natural logarithm (base e) of a value.", "Numeric expression. If `null`\, the function returns `null`."]
142144
log10 |number |"double|integer|long|unsigned_long" |Numeric expression. If `null`, the function returns `null`.
143145
ltrim |string |"keyword|text" |[""]
@@ -247,6 +249,7 @@ greatest |Returns the maximum value from many columns.
247249
least |Returns the minimum value from many columns.
248250
left |Returns the substring that extracts 'length' chars from 'string' starting from the left.
249251
length |Returns the character length of a string.
252+
locate |Returns an integer that indicates the position of a keyword substring within another string
250253
log |Returns the logarithm of a value to a base. The input can be any numeric value, the return value is always a double. Logs of zero, negative numbers, and base of one return `null` as well as a warning.
251254
log10 |Returns the logarithm of a value to base 10. The input can be any numeric value, the return value is always a double. Logs of 0 and negative numbers return `null` as well as a warning.
252255
ltrim |Removes leading whitespaces from a string.
@@ -357,6 +360,7 @@ greatest |"integer|long|double|boolean|keyword|text|ip|version"
357360
least |"integer|long|double|boolean|keyword|text|ip|version" |false |true |false
358361
left |keyword |[false, false] |false |false
359362
length |integer |false |false |false
363+
locate |integer |[false, false, true] |false |false
360364
log |double |[true, false] |false |false
361365
log10 |double |false |false |false
362366
ltrim |"keyword|text" |false |false |false
@@ -447,5 +451,5 @@ countFunctions#[skip:-8.13.99]
447451
meta functions | stats a = count(*), b = count(*), c = count(*) | mv_expand c;
448452

449453
a:long | b:long | c:long
450-
100 | 100 | 100
454+
101 | 101 | 101
451455
;

x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,3 +1168,115 @@ from employees | where emp_no == 10001 | eval split = split("fooMbar", gender) |
11681168
gender:keyword | split:keyword
11691169
M | [foo, bar]
11701170
;
1171+
1172+
locate#[skip:-8.13.99,reason:new string function added in 8.14]
1173+
row a = "hello" | eval a_ll = locate(a, "ll");
1174+
1175+
a:keyword | a_ll:integer
1176+
hello | 3
1177+
;
1178+
1179+
locateFail#[skip:-8.13.99,reason:new string function added in 8.14]
1180+
row a = "hello" | eval a_ll = locate(a, "int");
1181+
1182+
a:keyword | a_ll:integer
1183+
hello | 0
1184+
;
1185+
1186+
locateZeroStart#[skip:-8.13.99,reason:new string function added in 8.14]
1187+
row a = "hello" | eval a_ll = locate(a, "ll", 0);
1188+
1189+
a:keyword | a_ll:integer
1190+
hello | 3
1191+
;
1192+
1193+
locateExactStart#[skip:-8.13.99,reason:new string function added in 8.14]
1194+
row a = "hello" | eval a_ll = locate(a, "ll", 3);
1195+
1196+
a:keyword | a_ll:integer
1197+
hello | 3
1198+
;
1199+
1200+
locateLongerStart#[skip:-8.13.99,reason:new string function added in 8.14]
1201+
row a = "hello" | eval a_ll = locate(a, "ll", 10);
1202+
1203+
a:keyword | a_ll:integer
1204+
hello | 0
1205+
;
1206+
1207+
locateLongerSubstr#[skip:-8.13.99,reason:new string function added in 8.14]
1208+
row a = "hello" | eval a_ll = locate(a, "farewell");
1209+
1210+
a:keyword | a_ll:integer
1211+
hello | 0
1212+
;
1213+
1214+
locateSame#[skip:-8.13.99,reason:new string function added in 8.14]
1215+
row a = "hello" | eval a_ll = locate(a, "hello");
1216+
1217+
a:keyword | a_ll:integer
1218+
hello | 1
1219+
;
1220+
1221+
locateWithSubstring#[skip:-8.13.99,reason:new string function added in 8.14]
1222+
from employees | where emp_no <= 10010 | eval f_s = substring(last_name, 2) | eval f_l = locate(last_name, f_s) | keep emp_no, last_name, f_s, f_l;
1223+
ignoreOrder:true
1224+
1225+
emp_no:integer | last_name:keyword | f_s:keyword | f_l:integer
1226+
10001 | Facello | acello | 2
1227+
10002 | Simmel | immel | 2
1228+
10003 | Bamford | amford | 2
1229+
10004 | Koblick | oblick | 2
1230+
10005 | Maliniak | aliniak | 2
1231+
10006 | Preusig | reusig | 2
1232+
10007 | Zielinski | ielinski | 2
1233+
10008 | Kalloufi | alloufi | 2
1234+
10009 | Peac | eac | 2
1235+
10010 | Piveteau | iveteau | 2
1236+
;
1237+
1238+
locateUtf16Emoji#[skip:-8.13.99,reason:new string function added in 8.14]
1239+
row a = "🐱Meow!🐶Woof!" | eval f_s = substring(a, 3) | eval f_l = locate(a, f_s);
1240+
1241+
a:keyword | f_s:keyword | f_l:integer
1242+
🐱Meow!🐶Woof! | Meow!🐶Woof! | 3
1243+
;
1244+
1245+
locateNestedSubstring#[skip:-8.13.99,reason:new string function added in 8.14]
1246+
row a = "hello" | eval a_ll = substring(a, locate(a, "ll"));
1247+
1248+
a:keyword | a_ll:keyword
1249+
hello | llo
1250+
;
1251+
1252+
locateNestSubstring#[skip:-8.13.99,reason:new string function added in 8.14]
1253+
row a = "hello" | eval a_ll = locate(substring(a, 2), "ll");
1254+
1255+
a:keyword | a_ll:integer
1256+
hello | 2
1257+
;
1258+
1259+
locateStats#[skip:-8.13.99,reason:new string function added in 8.14]
1260+
from employees | where emp_no <= 10010 | eval f_l = locate(last_name, "ll") | stats min(f_l), max(f_l) by job_positions | sort job_positions | limit 5;
1261+
1262+
min(f_l):integer | max(f_l):integer | job_positions:keyword
1263+
5 | 5 | Accountant
1264+
0 | 0 | Architect
1265+
0 | 0 | Head Human Resources
1266+
0 | 3 | Internship
1267+
3 | 3 | Junior Developer
1268+
;
1269+
1270+
locateWarnings#[skip:-8.13.99,reason:new string function added in 8.14]
1271+
required_feature: esql.mv_warn
1272+
1273+
from hosts | where host=="epsilon" | eval l1 = locate(host_group, "ate"), l2 = locate(description, "ate") | keep l1, l2;
1274+
ignoreOrder:true
1275+
warning:Line 1:80: evaluation of [locate(description, \"ate\")] failed, treating result as null. Only first 20 failures recorded.
1276+
warning:Line 1:80: java.lang.IllegalArgumentException: single-value function encountered multi-value
1277+
1278+
l1:integer | l2:integer
1279+
2 | null
1280+
2 | null
1281+
null | 0
1282+
;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License
3+
// 2.0; you may not use this file except in compliance with the Elastic License
4+
// 2.0.
5+
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
6+
7+
import java.lang.IllegalArgumentException;
8+
import java.lang.Override;
9+
import java.lang.String;
10+
import org.apache.lucene.util.BytesRef;
11+
import org.elasticsearch.compute.data.Block;
12+
import org.elasticsearch.compute.data.BytesRefBlock;
13+
import org.elasticsearch.compute.data.BytesRefVector;
14+
import org.elasticsearch.compute.data.IntBlock;
15+
import org.elasticsearch.compute.data.IntVector;
16+
import org.elasticsearch.compute.data.Page;
17+
import org.elasticsearch.compute.operator.DriverContext;
18+
import org.elasticsearch.compute.operator.EvalOperator;
19+
import org.elasticsearch.core.Releasables;
20+
import org.elasticsearch.xpack.esql.expression.function.Warnings;
21+
import org.elasticsearch.xpack.ql.tree.Source;
22+
23+
/**
24+
* {@link EvalOperator.ExpressionEvaluator} implementation for {@link Locate}.
25+
* This class is generated. Do not edit it.
26+
*/
27+
public final class LocateEvaluator implements EvalOperator.ExpressionEvaluator {
28+
private final Warnings warnings;
29+
30+
private final EvalOperator.ExpressionEvaluator str;
31+
32+
private final EvalOperator.ExpressionEvaluator substr;
33+
34+
private final EvalOperator.ExpressionEvaluator start;
35+
36+
private final DriverContext driverContext;
37+
38+
public LocateEvaluator(Source source, EvalOperator.ExpressionEvaluator str,
39+
EvalOperator.ExpressionEvaluator substr, EvalOperator.ExpressionEvaluator start,
40+
DriverContext driverContext) {
41+
this.warnings = new Warnings(source);
42+
this.str = str;
43+
this.substr = substr;
44+
this.start = start;
45+
this.driverContext = driverContext;
46+
}
47+
48+
@Override
49+
public Block eval(Page page) {
50+
try (BytesRefBlock strBlock = (BytesRefBlock) str.eval(page)) {
51+
try (BytesRefBlock substrBlock = (BytesRefBlock) substr.eval(page)) {
52+
try (IntBlock startBlock = (IntBlock) start.eval(page)) {
53+
BytesRefVector strVector = strBlock.asVector();
54+
if (strVector == null) {
55+
return eval(page.getPositionCount(), strBlock, substrBlock, startBlock);
56+
}
57+
BytesRefVector substrVector = substrBlock.asVector();
58+
if (substrVector == null) {
59+
return eval(page.getPositionCount(), strBlock, substrBlock, startBlock);
60+
}
61+
IntVector startVector = startBlock.asVector();
62+
if (startVector == null) {
63+
return eval(page.getPositionCount(), strBlock, substrBlock, startBlock);
64+
}
65+
return eval(page.getPositionCount(), strVector, substrVector, startVector).asBlock();
66+
}
67+
}
68+
}
69+
}
70+
71+
public IntBlock eval(int positionCount, BytesRefBlock strBlock, BytesRefBlock substrBlock,
72+
IntBlock startBlock) {
73+
try(IntBlock.Builder result = driverContext.blockFactory().newIntBlockBuilder(positionCount)) {
74+
BytesRef strScratch = new BytesRef();
75+
BytesRef substrScratch = new BytesRef();
76+
position: for (int p = 0; p < positionCount; p++) {
77+
if (strBlock.isNull(p)) {
78+
result.appendNull();
79+
continue position;
80+
}
81+
if (strBlock.getValueCount(p) != 1) {
82+
if (strBlock.getValueCount(p) > 1) {
83+
warnings.registerException(new IllegalArgumentException("single-value function encountered multi-value"));
84+
}
85+
result.appendNull();
86+
continue position;
87+
}
88+
if (substrBlock.isNull(p)) {
89+
result.appendNull();
90+
continue position;
91+
}
92+
if (substrBlock.getValueCount(p) != 1) {
93+
if (substrBlock.getValueCount(p) > 1) {
94+
warnings.registerException(new IllegalArgumentException("single-value function encountered multi-value"));
95+
}
96+
result.appendNull();
97+
continue position;
98+
}
99+
if (startBlock.isNull(p)) {
100+
result.appendNull();
101+
continue position;
102+
}
103+
if (startBlock.getValueCount(p) != 1) {
104+
if (startBlock.getValueCount(p) > 1) {
105+
warnings.registerException(new IllegalArgumentException("single-value function encountered multi-value"));
106+
}
107+
result.appendNull();
108+
continue position;
109+
}
110+
result.appendInt(Locate.process(strBlock.getBytesRef(strBlock.getFirstValueIndex(p), strScratch), substrBlock.getBytesRef(substrBlock.getFirstValueIndex(p), substrScratch), startBlock.getInt(startBlock.getFirstValueIndex(p))));
111+
}
112+
return result.build();
113+
}
114+
}
115+
116+
public IntVector eval(int positionCount, BytesRefVector strVector, BytesRefVector substrVector,
117+
IntVector startVector) {
118+
try(IntVector.Builder result = driverContext.blockFactory().newIntVectorBuilder(positionCount)) {
119+
BytesRef strScratch = new BytesRef();
120+
BytesRef substrScratch = new BytesRef();
121+
position: for (int p = 0; p < positionCount; p++) {
122+
result.appendInt(Locate.process(strVector.getBytesRef(p, strScratch), substrVector.getBytesRef(p, substrScratch), startVector.getInt(p)));
123+
}
124+
return result.build();
125+
}
126+
}
127+
128+
@Override
129+
public String toString() {
130+
return "LocateEvaluator[" + "str=" + str + ", substr=" + substr + ", start=" + start + "]";
131+
}
132+
133+
@Override
134+
public void close() {
135+
Releasables.closeExpectNoException(str, substr, start);
136+
}
137+
138+
static class Factory implements EvalOperator.ExpressionEvaluator.Factory {
139+
private final Source source;
140+
141+
private final EvalOperator.ExpressionEvaluator.Factory str;
142+
143+
private final EvalOperator.ExpressionEvaluator.Factory substr;
144+
145+
private final EvalOperator.ExpressionEvaluator.Factory start;
146+
147+
public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory str,
148+
EvalOperator.ExpressionEvaluator.Factory substr,
149+
EvalOperator.ExpressionEvaluator.Factory start) {
150+
this.source = source;
151+
this.str = str;
152+
this.substr = substr;
153+
this.start = start;
154+
}
155+
156+
@Override
157+
public LocateEvaluator get(DriverContext context) {
158+
return new LocateEvaluator(source, str.get(context), substr.get(context), start.get(context), context);
159+
}
160+
161+
@Override
162+
public String toString() {
163+
return "LocateEvaluator[" + "str=" + str + ", substr=" + substr + ", start=" + start + "]";
164+
}
165+
}
166+
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim;
9191
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left;
9292
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length;
93+
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Locate;
9394
import org.elasticsearch.xpack.esql.expression.function.scalar.string.RTrim;
9495
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Replace;
9596
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Right;
@@ -174,7 +175,8 @@ private FunctionDefinition[][] functions() {
174175
def(StartsWith.class, StartsWith::new, "starts_with"),
175176
def(EndsWith.class, EndsWith::new, "ends_with"),
176177
def(ToLower.class, ToLower::new, "to_lower"),
177-
def(ToUpper.class, ToUpper::new, "to_upper") },
178+
def(ToUpper.class, ToUpper::new, "to_upper"),
179+
def(Locate.class, Locate::new, "locate") },
178180
// date
179181
new FunctionDefinition[] {
180182
def(DateDiff.class, DateDiff::new, "date_diff"),

0 commit comments

Comments
 (0)