Skip to content

Commit 88b61e3

Browse files
kanoshiouastefan
andauthored
ESQL: Avoid unintended attribute removal (#127563)
--------- Co-authored-by: Andrei Stefan <[email protected]>
1 parent 288f47e commit 88b61e3

File tree

7 files changed

+157
-6
lines changed

7 files changed

+157
-6
lines changed

docs/changelog/127563.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 127563
2+
summary: "ESQL: Avoid unintended attribute removal"
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 127468

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
5151
"Unknown column \\[<all-fields-projected>\\]", // https://github.com/elastic/elasticsearch/issues/121741,
5252
"Plan \\[ProjectExec\\[\\[<no-fields>.* optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/125866
5353
"optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781
54-
"only supports KEYWORD or TEXT values", // https://github.com/elastic/elasticsearch/issues/127468
5554
"The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework
5655
);
5756

x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,3 +331,43 @@ ROW a="b c d x"| DISSECT a "%{b} %{} %{d} %{}";
331331
a:keyword | b:keyword | d:keyword
332332
b c d x | b | d
333333
;
334+
335+
avoidAttributesRemoval
336+
// https://github.com/elastic/elasticsearch/issues/127468
337+
required_capability: keep_regex_extract_attributes
338+
required_capability: join_lookup_v12
339+
from message_types
340+
| eval type = 1
341+
| lookup join message_types_lookup on message
342+
| drop message
343+
| dissect type "%{b}"
344+
| stats x = max(b)
345+
| keep x
346+
;
347+
348+
x:keyword
349+
Success
350+
;
351+
352+
avoidAttributesRemoval2
353+
// https://github.com/elastic/elasticsearch/issues/127468
354+
required_capability: keep_regex_extract_attributes
355+
required_capability: join_lookup_v12
356+
FROM sample_data, employees
357+
| EVAL client_ip = client_ip::keyword
358+
| RENAME languages AS language_code
359+
| LOOKUP JOIN clientips_lookup ON client_ip
360+
| EVAL type = 1::keyword
361+
| EVAL type = 2
362+
| LOOKUP JOIN message_types_lookup ON message
363+
| LOOKUP JOIN languages_lookup ON language_code
364+
| DISSECT type "%{type_as_text}"
365+
| KEEP message
366+
| WHERE message IS NOT NULL
367+
| SORT message DESC
368+
| LIMIT 1
369+
;
370+
371+
message:keyword
372+
Disconnected
373+
;

x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,35 @@ row text = "123 abc", int = 5 | sort int asc | grok text "%{NUMBER:text:int} %{W
297297
text:integer | int:integer | description:keyword
298298
123 | 5 | abc
299299
;
300+
301+
avoidAttributesRemoval
302+
// https://github.com/elastic/elasticsearch/issues/127468
303+
required_capability: union_types
304+
required_capability: join_lookup_v12
305+
required_capability: keep_regex_extract_attributes
306+
from multivalue_points,h*,messa*
307+
| eval `card` = true, PbehoQUqKSF = "VLGjhcgNkQiEVyCLo", DsxMWtGL = true, qSxTIvUorMim = true, `location` = 8593178066470220111, type = -446161601, FSkGQkgmS = false
308+
| eval PbehoQUqKSF = 753987034, HLNMQfQj = true, `within` = true, `id` = "JDKKkYwhhh", lk = null, aecuvjTkgZza = 510616700, aDAMpuVtNX = null, qCopgNZPt = "AjhJUtZefqKdJYH", BxHHlFoA = "isBrmhKLc"
309+
| rename message as message
310+
| lookup join message_types_lookup on message
311+
| sort PbehoQUqKSF DESC, ip1 DESC NULLS LAST
312+
| limit 5845
313+
| drop `subset`, ip*, `card`, `within`, host.v*, description, `aecuvjTkgZza`, host.version, `ip0`, height_range, DsxMWtGL, host_group, `aDAMpuVtNX`, PbehoQUqKSF, `intersects`, `host.os`, aDAMpuVtNX, *ight_range, HLNMQfQj, `FSkGQkgmS`, BxHHlFoA, card
314+
| grok type "%{WORD:GknCxQFo}"
315+
| eval `location` = null, ZjWUUvGusyyz = null, HeeKIpzgh = false, `id` = 4325287503714500302, host = false, `lk` = null, HvTQdOqFajpH = false, fKNlsYoT = true, `location` = -1158449473, `qCopgNZPt` = 1219986202615280617
316+
| drop HeeKIpzg*, `ZjWUUvGusyyz`, `message`, `type`, `lk`
317+
| grok GknCxQFo "%{WORD:location} %{WORD:HvTQdOqFajpH}"
318+
| drop HvTQdOqFajpH, `location`, centroid
319+
| mv_expand GknCxQFo
320+
| limit 410
321+
| limit 3815
322+
| rename `id` AS `GknCxQFo`
323+
| grok host.name "%{WORD:oGQQZHxQHj} %{WORD:qCopgNZPt} %{WORD:vHKOmmocPcTO}"
324+
| stats BkQXJRMeAM = min(GknCxQFo)
325+
| keep `BkQXJRMeAM`
326+
;
327+
328+
BkQXJRMeAM:long
329+
4325287503714500302
330+
;
331+

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1091,7 +1091,13 @@ public enum Cap {
10911091
* During resolution (pre-analysis) we have to consider that joins can override regex extracted values
10921092
* see <a href="https://github.com/elastic/elasticsearch/issues/127467"> ES|QL: pruning of JOINs leads to missing fields #127467 </a>
10931093
*/
1094-
FIX_JOIN_MASKING_REGEX_EXTRACT;
1094+
FIX_JOIN_MASKING_REGEX_EXTRACT,
1095+
1096+
/**
1097+
* Avid GROK and DISSECT attributes being removed when resolving fields.
1098+
* see <a href="https://github.com/elastic/elasticsearch/issues/127468"> ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER] #127468 </a>
1099+
*/
1100+
KEEP_REGEX_EXTRACT_ATTRIBUTES;
10951101

10961102
private final boolean enabled;
10971103

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
656656
//
657657
// and ips_policy enriches the results with the same name ip field),
658658
// these aliases should be kept in the list of fields.
659-
if (canRemoveAliases[0] && couldOverrideAliases(p)) {
659+
if (canRemoveAliases[0] && p.anyMatch(EsqlSession::couldOverrideAliases)) {
660660
canRemoveAliases[0] = false;
661661
}
662662
if (canRemoveAliases[0]) {
@@ -726,7 +726,8 @@ private static boolean couldOverrideAliases(LogicalPlan p) {
726726
|| p instanceof Project
727727
|| p instanceof RegexExtract
728728
|| p instanceof Rename
729-
|| p instanceof TopN) == false;
729+
|| p instanceof TopN
730+
|| p instanceof UnresolvedRelation) == false;
730731
}
731732

732733
private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) {

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,20 @@ public void testEnrichEval() {
604604
| eval language = concat(x, "-", lang)
605605
| keep emp_no, x, lang, language
606606
| sort emp_no desc | limit 3""",
607-
Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*", "lang", "lang.*")
607+
Set.of(
608+
"emp_no",
609+
"x",
610+
"lang",
611+
"language",
612+
"language_name",
613+
"languages",
614+
"x.*",
615+
"language_name.*",
616+
"languages.*",
617+
"emp_no.*",
618+
"lang.*",
619+
"language.*"
620+
)
608621
);
609622
}
610623

@@ -1355,7 +1368,7 @@ public void testAvoidGrokAttributesRemoval() {
13551368
| grok type "%{WORD:b}"
13561369
| stats x = max(b)
13571370
| keep x""", Set.of());
1358-
assertThat(fieldNames, equalTo(Set.of("message", "x", "x.*", "message.*")));
1371+
assertThat(fieldNames, equalTo(Set.of("x", "b", "type", "message", "x.*", "message.*", "type.*", "b.*")));
13591372
}
13601373

13611374
public void testAvoidGrokAttributesRemoval2() {
@@ -1388,6 +1401,60 @@ public void testAvoidGrokAttributesRemoval3() {
13881401

13891402
}
13901403

1404+
/**
1405+
* @see <a href="https://github.com/elastic/elasticsearch/issues/127468">ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER]</a>
1406+
*/
1407+
public void testAvoidGrokAttributesRemoval4() {
1408+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1409+
Set<String> fieldNames = fieldNames("""
1410+
from message_types
1411+
| eval type = 1
1412+
| lookup join message_types_lookup on message
1413+
| drop message
1414+
| grok type "%{WORD:b}"
1415+
| stats x = max(b)
1416+
| keep x""", Set.of());
1417+
assertThat(fieldNames, equalTo(Set.of("x", "b", "type", "message", "x.*", "message.*", "type.*", "b.*")));
1418+
}
1419+
1420+
/**
1421+
* @see <a href="https://github.com/elastic/elasticsearch/issues/127468">ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER]</a>
1422+
*/
1423+
public void testAvoidGrokAttributesRemoval5() {
1424+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1425+
Set<String> fieldNames = fieldNames("""
1426+
FROM sample_data, employees
1427+
| EVAL client_ip = client_ip::keyword
1428+
| RENAME languages AS language_code
1429+
| LOOKUP JOIN clientips_lookup ON client_ip
1430+
| EVAL type = 1::keyword
1431+
| EVAL type = 2
1432+
| LOOKUP JOIN message_types_lookup ON message
1433+
| LOOKUP JOIN languages_lookup ON language_code
1434+
| DISSECT type "%{type_as_text}"
1435+
| KEEP message
1436+
| WHERE message IS NOT NULL
1437+
| SORT message DESC
1438+
| LIMIT 1""", Set.of());
1439+
assertThat(
1440+
fieldNames,
1441+
equalTo(
1442+
Set.of(
1443+
"message",
1444+
"type",
1445+
"languages",
1446+
"client_ip",
1447+
"language_code",
1448+
"language_code.*",
1449+
"client_ip.*",
1450+
"message.*",
1451+
"type.*",
1452+
"languages.*"
1453+
)
1454+
)
1455+
);
1456+
}
1457+
13911458
public void testEnrichOnDefaultField() {
13921459
Set<String> fieldNames = fieldNames("""
13931460
from employees

0 commit comments

Comments
 (0)