Skip to content

Commit ccc0108

Browse files
authored
EQL: Deal with internally created IN in a different way for EQL (#132167) (#132392)
1 parent b8e9b95 commit ccc0108

File tree

8 files changed

+409
-63
lines changed

8 files changed

+409
-63
lines changed

docs/changelog/132167.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 132167
2+
summary: Deal with internally created IN in a different way for EQL
3+
area: EQL
4+
type: bug
5+
issues:
6+
- 118621

x-pack/plugin/eql/qa/common/build.gradle

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,10 @@ dependencies {
88
// TOML parser for EqlActionIT tests
99
api 'io.ous:jtoml:2.0.0'
1010
}
11+
12+
tasks.register("loadTestData", JavaExec) {
13+
group = "Execution"
14+
description = "Loads EQL Spec Tests data on a running stand-alone instance"
15+
classpath = sourceSets.main.runtimeClasspath
16+
mainClass = "org.elasticsearch.test.eql.DataLoader"
17+
}

x-pack/plugin/eql/qa/common/src/main/java/org/elasticsearch/test/eql/DataLoader.java

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -76,47 +76,69 @@ private static Map<String, String[]> getReplacementPatterns() {
7676
public static void main(String[] args) throws IOException {
7777
main = true;
7878
try (RestClient client = RestClient.builder(new HttpHost("localhost", 9200)).build()) {
79-
loadDatasetIntoEs(client, DataLoader::createParser);
79+
loadDatasetIntoEsWithIndexCreator(client, DataLoader::createParser, (restClient, indexName, indexMapping) -> {
80+
// don't use ESRestTestCase methods here or, if you do, test running the main method before making the change
81+
StringBuilder jsonBody = new StringBuilder("{");
82+
jsonBody.append("\"settings\":{\"number_of_shards\":1},");
83+
jsonBody.append("\"mappings\":");
84+
jsonBody.append(indexMapping);
85+
jsonBody.append("}");
86+
87+
Request request = new Request("PUT", "/" + indexName);
88+
request.setJsonEntity(jsonBody.toString());
89+
restClient.performRequest(request);
90+
});
8091
}
8192
}
8293

8394
public static void loadDatasetIntoEs(RestClient client, CheckedBiFunction<XContent, InputStream, XContentParser, IOException> p)
8495
throws IOException {
96+
loadDatasetIntoEsWithIndexCreator(client, p, (restClient, indexName, indexMapping) -> {
97+
ESRestTestCase.createIndex(restClient, indexName, Settings.builder().put("number_of_shards", 1).build(), indexMapping, null);
98+
});
99+
}
100+
101+
private static void loadDatasetIntoEsWithIndexCreator(
102+
RestClient client,
103+
CheckedBiFunction<XContent, InputStream, XContentParser, IOException> p,
104+
IndexCreator indexCreator
105+
) throws IOException {
85106

86107
//
87108
// Main Index
88109
//
89-
load(client, TEST_INDEX, null, DataLoader::timestampToUnixMillis, p);
110+
load(client, TEST_INDEX, null, DataLoader::timestampToUnixMillis, p, indexCreator);
90111
//
91112
// Aux Index
92113
//
93-
load(client, TEST_EXTRA_INDEX, null, null, p);
114+
load(client, TEST_EXTRA_INDEX, null, null, p, indexCreator);
94115
//
95116
// Date_Nanos index
96117
//
97118
// The data for this index is loaded from the same endgame-140.data sample, only having the mapping for @timestamp changed: the
98119
// chosen Windows filetime timestamps (2017+) can coincidentally also be readily used as nano-resolution unix timestamps (1973+).
99120
// There are mixed values with and without nanos precision so that the filtering is properly tested for both cases.
100-
load(client, TEST_NANOS_INDEX, TEST_INDEX, DataLoader::timestampToUnixNanos, p);
101-
load(client, TEST_SAMPLE, null, null, p);
121+
load(client, TEST_NANOS_INDEX, TEST_INDEX, DataLoader::timestampToUnixNanos, p, indexCreator);
122+
load(client, TEST_SAMPLE, null, null, p, indexCreator);
102123
//
103124
// missing_events index
104125
//
105-
load(client, TEST_MISSING_EVENTS_INDEX, null, null, p);
106-
load(client, TEST_SAMPLE_MULTI, null, null, p);
126+
load(client, TEST_MISSING_EVENTS_INDEX, null, null, p, indexCreator);
127+
load(client, TEST_SAMPLE_MULTI, null, null, p, indexCreator);
107128
//
108129
// index with a runtime field ("broken", type long) that causes shard failures.
109130
// the rest of the mapping is the same as TEST_INDEX
110131
//
111-
load(client, TEST_SHARD_FAILURES_INDEX, null, DataLoader::timestampToUnixMillis, p);
132+
load(client, TEST_SHARD_FAILURES_INDEX, null, DataLoader::timestampToUnixMillis, p, indexCreator);
112133
}
113134

114135
private static void load(
115136
RestClient client,
116137
String indexNames,
117138
String dataName,
118139
Consumer<Map<String, Object>> datasetTransform,
119-
CheckedBiFunction<XContent, InputStream, XContentParser, IOException> p
140+
CheckedBiFunction<XContent, InputStream, XContentParser, IOException> p,
141+
IndexCreator indexCreator
120142
) throws IOException {
121143
String[] splitNames = indexNames.split(",");
122144
for (String indexName : splitNames) {
@@ -130,15 +152,11 @@ private static void load(
130152
if (data == null) {
131153
throw new IllegalArgumentException("Cannot find resource " + name);
132154
}
133-
createTestIndex(client, indexName, readMapping(mapping));
155+
indexCreator.createIndex(client, indexName, readMapping(mapping));
134156
loadData(client, indexName, datasetTransform, data, p);
135157
}
136158
}
137159

138-
private static void createTestIndex(RestClient client, String indexName, String mapping) throws IOException {
139-
ESRestTestCase.createIndex(client, indexName, Settings.builder().put("number_of_shards", 1).build(), mapping, null);
140-
}
141-
142160
/**
143161
* Reads the mapping file, ignoring comments and replacing placeholders for random types.
144162
*/
@@ -236,4 +254,8 @@ private static XContentParser createParser(XContent xContent, InputStream data)
236254
NamedXContentRegistry contentRegistry = new NamedXContentRegistry(ClusterModule.getNamedXWriteables());
237255
return xContent.createParser(contentRegistry, LoggingDeprecationHandler.INSTANCE, data);
238256
}
257+
258+
private interface IndexCreator {
259+
void createIndex(RestClient client, String indexName, String mapping) throws IOException;
260+
}
239261
}

x-pack/plugin/eql/src/main/java/org/elasticsearch/xpack/eql/optimizer/Optimizer.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.BinaryComparisonSimplification;
4545
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.BooleanFunctionEqualsElimination;
4646
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.BooleanSimplification;
47-
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.CombineDisjunctionsToIn;
4847
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.ConstantFolding;
4948
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.LiteralsOnTheRight;
5049
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.OptimizerRule;
@@ -252,6 +251,14 @@ protected Expression maybeSimplifyNegatable(Expression e) {
252251

253252
}
254253

254+
static class CombineDisjunctionsToIn extends org.elasticsearch.xpack.ql.optimizer.OptimizerRules.CombineDisjunctionsToIn {
255+
256+
@Override
257+
protected boolean shouldValidateIn() {
258+
return true;
259+
}
260+
}
261+
255262
static class PruneFilters extends org.elasticsearch.xpack.ql.optimizer.OptimizerRules.PruneFilters {
256263

257264
@Override

x-pack/plugin/eql/src/test/resources/querytranslator_tests.txt

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,90 @@ process where process_name in ("python.exe", "SMSS.exe", "explorer.exe")
123123
"terms":{"process_name":["python.exe","SMSS.exe","explorer.exe"],
124124
;
125125

126+
mutipleOrEquals_As_InTranslation1
127+
process where process_name == "python.exe" or process_name == "SMSS.exe" or process_name == "explorer.exe"
128+
;
129+
"terms":{"process_name":["python.exe","SMSS.exe","explorer.exe"],
130+
;
131+
132+
multipleOrAndEquals_As_InTranslation
133+
process where process_name == "python.exe" and process_name == "SMSS.exe" or process_name == "explorer.exe" or process_name == "test.exe"
134+
;
135+
{"bool":{"should":[{"bool":{"must":[{"term":{"process_name":{"value":"python.exe"}}},{"term":{"process_name":{"value":"SMSS.exe"}}}],"boost":1.0}},{"terms":{"process_name":["explorer.exe","test.exe"],"boost":1.0}}],"boost":1.0}}
136+
;
137+
138+
mutipleOrEquals_As_InTranslation2
139+
process where source_address == "123.12.1.1" or (opcode == 123 or opcode == 127)
140+
;
141+
{"bool":{"should":[{"term":{"source_address":{"value":"123.12.1.1"}}},{"terms":{"opcode":[123,127],"boost":1.0}}],"boost":1.0}}
142+
;
143+
144+
mutipleOrEquals_As_InTranslation3
145+
process where (source_address == "123.12.1.1" or source_address == "127.0.0.1") and (opcode == 123 or opcode == 127)
146+
;
147+
{"bool":{"should":[{"term":{"source_address":{"value":"123.12.1.1"}}},{"term":{"source_address":{"value":"127.0.0.1"}}}],"boost":1.0}},{"terms":{"opcode":[123,127],"boost":1.0}}
148+
;
149+
150+
mutipleOrEquals_As_InTranslation4
151+
process where (source_address == "123.12.1.1" or source_address == "127.0.0.1") and (opcode == 123 or opcode == 127)
152+
;
153+
"must":[{"bool":{"should":[{"term":{"source_address":{"value":"123.12.1.1"}}},{"term":{"source_address":{"value":"127.0.0.1"}}}],"boost":1.0}},{"terms":{"opcode":[123,127],"boost":1.0}},{"term":{"event.category":{"value":"process"}}}]
154+
;
155+
156+
multipleOrIncompatibleTypes1
157+
process where process_name == "python.exe" or process_name == 2 or process_name == "3"
158+
;
159+
{"bool":{"should":[{"term":{"process_name":{"value":"python.exe"}}},{"term":{"process_name":{"value":2}}},{"term":{"process_name":{"value":"3"}}}],"boost":1.0}}
160+
;
161+
162+
multipleOrIncompatibleTypes2
163+
process where process_name == "1" or process_name == 2 or process_name == "3"
164+
;
165+
{"bool":{"should":[{"term":{"process_name":{"value":"1"}}},{"term":{"process_name":{"value":2}}},{"term":{"process_name":{"value":"3"}}}],"boost":1.0}}
166+
;
167+
168+
multipleOrIncompatibleTypes3
169+
process where process_name == 1.2 or process_name == 2 or process_name == "3"
170+
;
171+
{"bool":{"should":[{"term":{"process_name":{"value":1.2}}},{"term":{"process_name":{"value":2}}},{"term":{"process_name":{"value":"3"}}}],"boost":1.0}}
172+
;
173+
174+
// this query as an equivalent with
175+
// process where process_name in (1.2, 2, 3)
176+
// will result in a user error: 1st argument of [process_name in (1.2, 2, 3)] must be [keyword], found value [1.2] type [double]
177+
multipleOrIncompatibleTypes4
178+
process where process_name == 1.2 or process_name == 2 or process_name == 3
179+
;
180+
{"bool":{"should":[{"term":{"process_name":{"value":1.2}}},{"term":{"process_name":{"value":2}}},{"term":{"process_name":{"value":3}}}],"boost":1.0}}
181+
;
182+
183+
// this query as an equivalent with
184+
// process where source_address in ("123.12.1.1", "123.12.1.2")
185+
// will result in a user error: 1st argument of [source_address in ("123.12.1.1", "123.12.1.2")] must be [ip], found value ["123.12.1.1"] type [keyword]
186+
multipleOrIncompatibleTypes5
187+
process where source_address == "123.12.1.1" or source_address == "123.12.1.2"
188+
;
189+
{"bool":{"should":[{"term":{"source_address":{"value":"123.12.1.1"}}},{"term":{"source_address":{"value":"123.12.1.2"}}}],"boost":1.0}}
190+
;
191+
192+
multipleOrIncompatibleTypes6
193+
process where source_address == "123.12.1.1" or source_address == concat("123.12.","1.2")
194+
;
195+
{"bool":{"should":[{"term":{"source_address":{"value":"123.12.1.1"}}},{"term":{"source_address":{"value":"123.12.1.2"}}}],"boost":1.0}}
196+
;
197+
198+
multipleOrIncompatibleTypes7
199+
process where source_address == "123.12.1.1" and (source_address == "123.12.1.2" or source_address >= "127.0.0.1")
200+
;
201+
"must":[{"term":{"source_address":{"value":"123.12.1.1"}}},{"bool":{"should":[{"term":{"source_address":{"value":"123.12.1.2"}}},{"range":{"source_address":{"gte":"127.0.0.1","boost":1.0}}}],"boost":1.0}},{"term":{"event.category":{"value":"process"}}}]
202+
;
203+
204+
multipleOrIncompatibleTypes8
205+
process where source_address == "123.12.1.1" and (source_address == "123.12.1.2" or source_address == "127.0.0.1")
206+
;
207+
"must":[{"term":{"source_address":{"value":"123.12.1.1"}}},{"bool":{"should":[{"term":{"source_address":{"value":"123.12.1.2"}}},{"term":{"source_address":{"value":"127.0.0.1"}}}],"boost":1.0}},{"term":{"event.category":{"value":"process"}}}]
208+
;
209+
126210
inFilterWithScripting
127211
process where substring(command_line, 5) in ("test*","best")
128212
;

x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/predicate/operator/comparison/In.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,10 @@ protected TypeResolution resolveType() {
177177
return super.resolveType();
178178
}
179179

180+
public TypeResolution validateInTypes() {
181+
return resolveType();
182+
}
183+
180184
@Override
181185
public int hashCode() {
182186
return Objects.hash(value, list);

x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/optimizer/OptimizerRules.java

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1203,8 +1203,8 @@ private static boolean notEqualsIsRemovableFromConjunction(NotEquals notEquals,
12031203
* 2. a == 1 OR a IN (2) becomes a IN (1, 2)
12041204
* 3. a IN (1) OR a IN (2) becomes a IN (1, 2)
12051205
*
1206-
* This rule does NOT check for type compatibility as that phase has been
1207-
* already be verified in the analyzer.
1206+
* By default (see {@link #shouldValidateIn()}), this rule does NOT check for type compatibility as that phase has
1207+
* already been verified in the analyzer, but this behavior can be changed by subclasses.
12081208
*/
12091209
public static class CombineDisjunctionsToIn extends OptimizerExpressionRule<Or> {
12101210
public CombineDisjunctionsToIn() {
@@ -1214,18 +1214,24 @@ public CombineDisjunctionsToIn() {
12141214
@Override
12151215
protected Expression rule(Or or) {
12161216
Expression e = or;
1217-
// look only at equals and In
1217+
// look only at Equals and In
12181218
List<Expression> exps = splitOr(e);
12191219

12201220
Map<Expression, Set<Expression>> found = new LinkedHashMap<>();
1221+
Map<Expression, List<Expression>> originalOrs = new LinkedHashMap<>();
12211222
ZoneId zoneId = null;
12221223
List<Expression> ors = new LinkedList<>();
12231224

12241225
for (Expression exp : exps) {
12251226
if (exp instanceof Equals eq) {
1226-
// consider only equals against foldables
1227+
// consider only Equals against foldables
12271228
if (eq.right().foldable()) {
12281229
found.computeIfAbsent(eq.left(), k -> new LinkedHashSet<>()).add(eq.right());
1230+
if (shouldValidateIn()) {
1231+
// in case there is an optimized In being built and its validation fails, rebuild the original ORs
1232+
// so, keep around the original Expressions
1233+
originalOrs.computeIfAbsent(eq.left(), k -> new ArrayList<>()).add(eq);
1234+
}
12291235
} else {
12301236
ors.add(exp);
12311237
}
@@ -1234,6 +1240,11 @@ protected Expression rule(Or or) {
12341240
}
12351241
} else if (exp instanceof In in) {
12361242
found.computeIfAbsent(in.value(), k -> new LinkedHashSet<>()).addAll(in.list());
1243+
if (shouldValidateIn()) {
1244+
// in case there is an optimized In being built and its validation fails, rebuild the original ORs
1245+
// so, keep around the original Expressions
1246+
originalOrs.computeIfAbsent(in.value(), k -> new ArrayList<>()).add(in);
1247+
}
12371248
if (zoneId == null) {
12381249
zoneId = in.zoneId();
12391250
}
@@ -1243,11 +1254,31 @@ protected Expression rule(Or or) {
12431254
}
12441255

12451256
if (found.isEmpty() == false) {
1246-
// combine equals alongside the existing ors
1257+
// combine Equals alongside the existing ORs
12471258
final ZoneId finalZoneId = zoneId;
1248-
found.forEach(
1249-
(k, v) -> { ors.add(v.size() == 1 ? createEquals(k, v, finalZoneId) : createIn(k, new ArrayList<>(v), finalZoneId)); }
1250-
);
1259+
found.forEach((k, v) -> {
1260+
if (v.size() == 1) {
1261+
ors.add(createEquals(k, v.iterator().next(), finalZoneId));
1262+
} else {
1263+
In in = createIn(k, new ArrayList<>(v), finalZoneId);
1264+
// IN has its own particularities when it comes to type resolution and not all implementations
1265+
// double check the validity of an internally created IN (like the one created here). EQL is one where the IN
1266+
// implementation is like this mechanism here has been specifically created for it
1267+
if (shouldValidateIn()) {
1268+
Expression.TypeResolution resolution = in.validateInTypes();
1269+
if (resolution.unresolved()) {
1270+
// if the internally created In is not valid, fall back to the original ORs
1271+
assert originalOrs.containsKey(k);
1272+
assert originalOrs.get(k).isEmpty() == false;
1273+
ors.add(combineOr(originalOrs.get(k)));
1274+
} else {
1275+
ors.add(in);
1276+
}
1277+
} else {
1278+
ors.add(in);
1279+
}
1280+
}
1281+
});
12511282

12521283
Expression combineOr = combineOr(ors);
12531284
// check the result semantically since the result might different in order
@@ -1261,13 +1292,17 @@ protected Expression rule(Or or) {
12611292
return e;
12621293
}
12631294

1264-
protected Equals createEquals(Expression k, Set<Expression> v, ZoneId finalZoneId) {
1265-
return new Equals(k.source(), k, v.iterator().next(), finalZoneId);
1266-
}
1267-
12681295
protected In createIn(Expression key, List<Expression> values, ZoneId zoneId) {
12691296
return new In(key.source(), key, values, zoneId);
12701297
}
1298+
1299+
protected boolean shouldValidateIn() {
1300+
return false;
1301+
}
1302+
1303+
private Equals createEquals(Expression key, Expression value, ZoneId finalZoneId) {
1304+
return new Equals(key.source(), key, value, finalZoneId);
1305+
}
12711306
}
12721307

12731308
public static class PushDownAndCombineFilters extends OptimizerRule<Filter> {

0 commit comments

Comments
 (0)