Skip to content

Commit 6030456

Browse files
astefankanoshiou
andauthored
[8.19] ESQL: Avoid unintended attribute removal (elastic#127563) (elastic#128231)
* ESQL: Avoid unintended attribute removal (elastic#127563) --------- Co-authored-by: Andrei Stefan <[email protected]> * Checkstyle * Checkstyle again * Slightly change the test because 8.19 has fewer indices in the index pattern used (9.x also has host_inventory index). --------- Co-authored-by: kanoshiou <[email protected]>
1 parent f1cd6d7 commit 6030456

File tree

6 files changed

+160
-5
lines changed

6 files changed

+160
-5
lines changed

docs/changelog/127563.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 127563
2+
summary: "ESQL: Avoid unintended attribute removal"
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 127468

x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,3 +331,43 @@ ROW a="b c d x"| DISSECT a "%{b} %{} %{d} %{}";
331331
a:keyword | b:keyword | d:keyword
332332
b c d x | b | d
333333
;
334+
335+
avoidAttributesRemoval
336+
// https://github.com/elastic/elasticsearch/issues/127468
337+
required_capability: keep_regex_extract_attributes
338+
required_capability: join_lookup_v12
339+
from message_types
340+
| eval type = 1
341+
| lookup join message_types_lookup on message
342+
| drop message
343+
| dissect type "%{b}"
344+
| stats x = max(b)
345+
| keep x
346+
;
347+
348+
x:keyword
349+
Success
350+
;
351+
352+
avoidAttributesRemoval2
353+
// https://github.com/elastic/elasticsearch/issues/127468
354+
required_capability: keep_regex_extract_attributes
355+
required_capability: join_lookup_v12
356+
FROM sample_data, employees
357+
| EVAL client_ip = client_ip::keyword
358+
| RENAME languages AS language_code
359+
| LOOKUP JOIN clientips_lookup ON client_ip
360+
| EVAL type = 1::keyword
361+
| EVAL type = 2
362+
| LOOKUP JOIN message_types_lookup ON message
363+
| LOOKUP JOIN languages_lookup ON language_code
364+
| DISSECT type "%{type_as_text}"
365+
| KEEP message
366+
| WHERE message IS NOT NULL
367+
| SORT message DESC
368+
| LIMIT 1
369+
;
370+
371+
message:keyword
372+
Disconnected
373+
;

x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,35 @@ row text = "123 abc", int = 5 | sort int asc | grok text "%{NUMBER:text:int} %{W
297297
text:integer | int:integer | description:keyword
298298
123 | 5 | abc
299299
;
300+
301+
avoidAttributesRemoval
302+
// https://github.com/elastic/elasticsearch/issues/127468
303+
required_capability: union_types
304+
required_capability: join_lookup_v12
305+
required_capability: keep_regex_extract_attributes
306+
from multivalue_points,h*,messa*
307+
| eval `card` = true, PbehoQUqKSF = "VLGjhcgNkQiEVyCLo", DsxMWtGL = true, qSxTIvUorMim = true, `location` = 8593178066470220111, type = -446161601, FSkGQkgmS = false
308+
| eval PbehoQUqKSF = 753987034, HLNMQfQj = true, `within` = true, `id` = "JDKKkYwhhh", lk = null, aecuvjTkgZza = 510616700, aDAMpuVtNX = null, qCopgNZPt = "AjhJUtZefqKdJYH", BxHHlFoA = "isBrmhKLc"
309+
| rename message as message
310+
| lookup join message_types_lookup on message
311+
| sort PbehoQUqKSF DESC, ip1 DESC NULLS LAST
312+
| limit 5845
313+
| drop `subset`, ip*, `card`, `within`, description, `aecuvjTkgZza`, `ip0`, height_range, DsxMWtGL, `aDAMpuVtNX`, PbehoQUqKSF, `intersects`, aDAMpuVtNX, *ight_range, HLNMQfQj, `FSkGQkgmS`, BxHHlFoA, card
314+
| grok type "%{WORD:GknCxQFo}"
315+
| eval `location` = null, ZjWUUvGusyyz = null, HeeKIpzgh = false, `id` = 4325287503714500302, host = false, `lk` = null, HvTQdOqFajpH = false, fKNlsYoT = true, `location` = -1158449473, `qCopgNZPt` = 1219986202615280617
316+
| drop HeeKIpzg*, `ZjWUUvGusyyz`, `message`, `type`, `lk`
317+
| grok GknCxQFo "%{WORD:location} %{WORD:HvTQdOqFajpH}"
318+
| drop HvTQdOqFajpH, `location`, centroid
319+
| mv_expand GknCxQFo
320+
| limit 410
321+
| limit 3815
322+
| rename `id` AS `GknCxQFo`
323+
| grok host_group "%{WORD:oGQQZHxQHj} %{WORD:qCopgNZPt} %{WORD:vHKOmmocPcTO}"
324+
| stats BkQXJRMeAM = min(GknCxQFo)
325+
| keep `BkQXJRMeAM`
326+
;
327+
328+
BkQXJRMeAM:long
329+
4325287503714500302
330+
;
331+

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,14 @@ public enum Cap {
860860
* During resolution (pre-analysis) we have to consider that joins can override regex extracted values
861861
* see <a href="https://github.com/elastic/elasticsearch/issues/127467"> ES|QL: pruning of JOINs leads to missing fields #127467</a>
862862
*/
863-
FIX_JOIN_MASKING_REGEX_EXTRACT;
863+
FIX_JOIN_MASKING_REGEX_EXTRACT,
864+
865+
/**
866+
* Avid GROK and DISSECT attributes being removed when resolving fields.
867+
* see <a href="https://github.com/elastic/elasticsearch/issues/127468"> ES|QL: Grok only supports KEYWORD or TEXT values,
868+
* found expression [type] type [INTEGER] #127468 </a>
869+
*/
870+
KEEP_REGEX_EXTRACT_ATTRIBUTES;
864871

865872
private final boolean enabled;
866873

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
620620
//
621621
// and ips_policy enriches the results with the same name ip field),
622622
// these aliases should be kept in the list of fields.
623-
if (canRemoveAliases[0] && couldOverrideAliases(p)) {
623+
if (canRemoveAliases[0] && p.anyMatch(EsqlSession::couldOverrideAliases)) {
624624
canRemoveAliases[0] = false;
625625
}
626626
if (canRemoveAliases[0]) {
@@ -687,7 +687,8 @@ private static boolean couldOverrideAliases(LogicalPlan p) {
687687
|| p instanceof Project
688688
|| p instanceof RegexExtract
689689
|| p instanceof Rename
690-
|| p instanceof TopN) == false;
690+
|| p instanceof TopN
691+
|| p instanceof UnresolvedRelation) == false;
691692
}
692693

693694
private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) {

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,20 @@ public void testEnrichEval() {
604604
| eval language = concat(x, "-", lang)
605605
| keep emp_no, x, lang, language
606606
| sort emp_no desc | limit 3""",
607-
Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*", "lang", "lang.*")
607+
Set.of(
608+
"emp_no",
609+
"x",
610+
"lang",
611+
"language",
612+
"language_name",
613+
"languages",
614+
"x.*",
615+
"language_name.*",
616+
"languages.*",
617+
"emp_no.*",
618+
"lang.*",
619+
"language.*"
620+
)
608621
);
609622
}
610623

@@ -1355,7 +1368,7 @@ public void testAvoidGrokAttributesRemoval() {
13551368
| grok type "%{WORD:b}"
13561369
| stats x = max(b)
13571370
| keep x""", Set.of());
1358-
assertThat(fieldNames, equalTo(Set.of("message", "x", "x.*", "message.*")));
1371+
assertThat(fieldNames, equalTo(Set.of("x", "b", "type", "message", "x.*", "message.*", "type.*", "b.*")));
13591372
}
13601373

13611374
public void testAvoidGrokAttributesRemoval2() {
@@ -1388,6 +1401,62 @@ public void testAvoidGrokAttributesRemoval3() {
13881401

13891402
}
13901403

1404+
/**
1405+
* @see <a href="https://github.com/elastic/elasticsearch/issues/127468">ES|QL: Grok only supports KEYWORD or TEXT values,
1406+
* found expression [type] type [INTEGER]</a>
1407+
*/
1408+
public void testAvoidGrokAttributesRemoval4() {
1409+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1410+
Set<String> fieldNames = fieldNames("""
1411+
from message_types
1412+
| eval type = 1
1413+
| lookup join message_types_lookup on message
1414+
| drop message
1415+
| grok type "%{WORD:b}"
1416+
| stats x = max(b)
1417+
| keep x""", Set.of());
1418+
assertThat(fieldNames, equalTo(Set.of("x", "b", "type", "message", "x.*", "message.*", "type.*", "b.*")));
1419+
}
1420+
1421+
/**
1422+
* @see <a href="https://github.com/elastic/elasticsearch/issues/127468">ES|QL: Grok only supports KEYWORD or TEXT values,
1423+
* found expression [type] type [INTEGER]</a>
1424+
*/
1425+
public void testAvoidGrokAttributesRemoval5() {
1426+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1427+
Set<String> fieldNames = fieldNames("""
1428+
FROM sample_data, employees
1429+
| EVAL client_ip = client_ip::keyword
1430+
| RENAME languages AS language_code
1431+
| LOOKUP JOIN clientips_lookup ON client_ip
1432+
| EVAL type = 1::keyword
1433+
| EVAL type = 2
1434+
| LOOKUP JOIN message_types_lookup ON message
1435+
| LOOKUP JOIN languages_lookup ON language_code
1436+
| DISSECT type "%{type_as_text}"
1437+
| KEEP message
1438+
| WHERE message IS NOT NULL
1439+
| SORT message DESC
1440+
| LIMIT 1""", Set.of());
1441+
assertThat(
1442+
fieldNames,
1443+
equalTo(
1444+
Set.of(
1445+
"message",
1446+
"type",
1447+
"languages",
1448+
"client_ip",
1449+
"language_code",
1450+
"language_code.*",
1451+
"client_ip.*",
1452+
"message.*",
1453+
"type.*",
1454+
"languages.*"
1455+
)
1456+
)
1457+
);
1458+
}
1459+
13911460
public void testEnrichOnDefaultField() {
13921461
Set<String> fieldNames = fieldNames("""
13931462
from employees

0 commit comments

Comments
 (0)