Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/127563.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 127563
summary: "ESQL: Avoid unintended attribute removal"
area: ES|QL
type: bug
issues:
- 127468
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,43 @@ ROW a="b c d x"| DISSECT a "%{b} %{} %{d} %{}";
a:keyword | b:keyword | d:keyword
b c d x | b | d
;

avoidAttributesRemoval
// https://github.com/elastic/elasticsearch/issues/127468
required_capability: keep_regex_extract_attributes
required_capability: join_lookup_v12
from message_types
| eval type = 1
| lookup join message_types_lookup on message
| drop message
| dissect type "%{b}"
| stats x = max(b)
| keep x
;

x:keyword
Success
;

avoidAttributesRemoval2
// https://github.com/elastic/elasticsearch/issues/127468
required_capability: keep_regex_extract_attributes
required_capability: join_lookup_v12
FROM sample_data, employees
| EVAL client_ip = client_ip::keyword
| RENAME languages AS language_code
| LOOKUP JOIN clientips_lookup ON client_ip
| EVAL type = 1::keyword
| EVAL type = 2
| LOOKUP JOIN message_types_lookup ON message
| LOOKUP JOIN languages_lookup ON language_code
| DISSECT type "%{type_as_text}"
| KEEP message
| WHERE message IS NOT NULL
| SORT message DESC
| LIMIT 1
;

message:keyword
Disconnected
;
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,35 @@ row text = "123 abc", int = 5 | sort int asc | grok text "%{NUMBER:text:int} %{W
text:integer | int:integer | description:keyword
123 | 5 | abc
;

avoidAttributesRemoval
// https://github.com/elastic/elasticsearch/issues/127468
required_capability: union_types
required_capability: join_lookup_v12
required_capability: keep_regex_extract_attributes
from multivalue_points,h*,messa*
| eval `card` = true, PbehoQUqKSF = "VLGjhcgNkQiEVyCLo", DsxMWtGL = true, qSxTIvUorMim = true, `location` = 8593178066470220111, type = -446161601, FSkGQkgmS = false
| eval PbehoQUqKSF = 753987034, HLNMQfQj = true, `within` = true, `id` = "JDKKkYwhhh", lk = null, aecuvjTkgZza = 510616700, aDAMpuVtNX = null, qCopgNZPt = "AjhJUtZefqKdJYH", BxHHlFoA = "isBrmhKLc"
| rename message as message
| lookup join message_types_lookup on message
| sort PbehoQUqKSF DESC, ip1 DESC NULLS LAST
| limit 5845
| drop `subset`, ip*, `card`, `within`, host.v*, description, `aecuvjTkgZza`, host.version, `ip0`, height_range, DsxMWtGL, host_group, `aDAMpuVtNX`, PbehoQUqKSF, `intersects`, `host.os`, aDAMpuVtNX, *ight_range, HLNMQfQj, `FSkGQkgmS`, BxHHlFoA, card
| grok type "%{WORD:GknCxQFo}"
| eval `location` = null, ZjWUUvGusyyz = null, HeeKIpzgh = false, `id` = 4325287503714500302, host = false, `lk` = null, HvTQdOqFajpH = false, fKNlsYoT = true, `location` = -1158449473, `qCopgNZPt` = 1219986202615280617
| drop HeeKIpzg*, `ZjWUUvGusyyz`, `message`, `type`, `lk`
| grok GknCxQFo "%{WORD:location} %{WORD:HvTQdOqFajpH}"
| drop HvTQdOqFajpH, `location`, centroid
| mv_expand GknCxQFo
| limit 410
| limit 3815
| rename `id` AS `GknCxQFo`
| grok host.name "%{WORD:oGQQZHxQHj} %{WORD:qCopgNZPt} %{WORD:vHKOmmocPcTO}"
| stats BkQXJRMeAM = min(GknCxQFo)
| keep `BkQXJRMeAM`
;

BkQXJRMeAM:long
4325287503714500302
;

Original file line number Diff line number Diff line change
Expand Up @@ -901,7 +901,13 @@ public enum Cap {
* During resolution (pre-analysis) we have to consider that joins can override regex extracted values
* see <a href="https://github.com/elastic/elasticsearch/issues/127467"> ES|QL: pruning of JOINs leads to missing fields #127467 </a>
*/
FIX_JOIN_MASKING_REGEX_EXTRACT;
FIX_JOIN_MASKING_REGEX_EXTRACT,

/**
* Avid GROK and DISSECT attributes being removed when resolving fields.
* see <a href="https://github.com/elastic/elasticsearch/issues/127468"> ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER] #127468 </a>
*/
KEEP_REGEX_EXTRACT_ATTRIBUTES;

private final boolean enabled;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
//
// and ips_policy enriches the results with the same name ip field),
// these aliases should be kept in the list of fields.
if (canRemoveAliases[0] && couldOverrideAliases(p)) {
if (canRemoveAliases[0] && p.anyMatch(EsqlSession::couldOverrideAliases)) {
canRemoveAliases[0] = false;
}
if (canRemoveAliases[0]) {
Expand Down Expand Up @@ -712,7 +712,8 @@ private static boolean couldOverrideAliases(LogicalPlan p) {
|| p instanceof Project
|| p instanceof RegexExtract
|| p instanceof Rename
|| p instanceof TopN) == false;
|| p instanceof TopN
|| p instanceof UnresolvedRelation) == false;
}

private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,20 @@ public void testEnrichEval() {
| eval language = concat(x, "-", lang)
| keep emp_no, x, lang, language
| sort emp_no desc | limit 3""",
Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*", "lang", "lang.*")
Set.of(
"emp_no",
"x",
"lang",
"language",
"language_name",
"languages",
"x.*",
"language_name.*",
"languages.*",
"emp_no.*",
"lang.*",
"language.*"
)
);
}

Expand Down Expand Up @@ -1355,7 +1368,7 @@ public void testAvoidGrokAttributesRemoval() {
| grok type "%{WORD:b}"
| stats x = max(b)
| keep x""", Set.of());
assertThat(fieldNames, equalTo(Set.of("message", "x", "x.*", "message.*")));
assertThat(fieldNames, equalTo(Set.of("x", "b", "type", "message", "x.*", "message.*", "type.*", "b.*")));
}

public void testAvoidGrokAttributesRemoval2() {
Expand Down Expand Up @@ -1388,6 +1401,60 @@ public void testAvoidGrokAttributesRemoval3() {

}

/**
* @see <a href="https://github.com/elastic/elasticsearch/issues/127468">ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER]</a>
*/
public void testAvoidGrokAttributesRemoval4() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
Set<String> fieldNames = fieldNames("""
from message_types
| eval type = 1
| lookup join message_types_lookup on message
| drop message
| grok type "%{WORD:b}"
| stats x = max(b)
| keep x""", Set.of());
assertThat(fieldNames, equalTo(Set.of("x", "b", "type", "message", "x.*", "message.*", "type.*", "b.*")));
}

/**
* @see <a href="https://github.com/elastic/elasticsearch/issues/127468">ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER]</a>
*/
public void testAvoidGrokAttributesRemoval5() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
Set<String> fieldNames = fieldNames("""
FROM sample_data, employees
| EVAL client_ip = client_ip::keyword
| RENAME languages AS language_code
| LOOKUP JOIN clientips_lookup ON client_ip
| EVAL type = 1::keyword
| EVAL type = 2
| LOOKUP JOIN message_types_lookup ON message
| LOOKUP JOIN languages_lookup ON language_code
| DISSECT type "%{type_as_text}"
| KEEP message
| WHERE message IS NOT NULL
| SORT message DESC
| LIMIT 1""", Set.of());
assertThat(
fieldNames,
equalTo(
Set.of(
"message",
"type",
"languages",
"client_ip",
"language_code",
"language_code.*",
"client_ip.*",
"message.*",
"type.*",
"languages.*"
)
)
);
}

public void testEnrichOnDefaultField() {
Set<String> fieldNames = fieldNames("""
from employees
Expand Down