Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/127563.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 127563
summary: "ESQL: Avoid unintended attribute removal"
area: ES|QL
type: bug
issues:
- 127468
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
"Plan \\[ProjectExec\\[\\[<no-fields>.* optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/125866
"optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781
"Unknown column", // https://github.com/elastic/elasticsearch/issues/127467
"only supports KEYWORD or TEXT values", // https://github.com/elastic/elasticsearch/issues/127468
"The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,43 @@ ROW a="b c d x"| DISSECT a "%{b} %{} %{d} %{}";
a:keyword | b:keyword | d:keyword
b c d x | b | d
;

avoidAttributesRemoval
// https://github.com/elastic/elasticsearch/issues/127468
required_capability: keep_regex_extract_attributes
required_capability: join_lookup_v12
from message_types
| eval type = 1
| lookup join message_types_lookup on message
| drop message
| dissect type "%{b}"
| stats x = max(b)
| keep x
;

x:keyword
Success
;

avoidAttributesRemoval2
// https://github.com/elastic/elasticsearch/issues/127468
required_capability: keep_regex_extract_attributes
required_capability: join_lookup_v12
FROM sample_data, employees
| EVAL client_ip = client_ip::keyword
| RENAME languages AS language_code
| LOOKUP JOIN clientips_lookup ON client_ip
| EVAL type = 1::keyword
| EVAL type = 2
| LOOKUP JOIN message_types_lookup ON message
| LOOKUP JOIN languages_lookup ON language_code
| DISSECT type "%{type_as_text}"
| KEEP message
| WHERE message IS NOT NULL
| SORT message DESC
| LIMIT 1
;

message:keyword
Disconnected
;
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,35 @@ row text = "123 abc", int = 5 | sort int asc | grok text "%{NUMBER:text:int} %{W
text:integer | int:integer | description:keyword
123 | 5 | abc
;

avoidAttributesRemoval
// https://github.com/elastic/elasticsearch/issues/127468
required_capability: union_types
required_capability: join_lookup_v12
required_capability: keep_regex_extract_attributes
from multivalue_points,h*,messa*
| eval `card` = true, PbehoQUqKSF = "VLGjhcgNkQiEVyCLo", DsxMWtGL = true, qSxTIvUorMim = true, `location` = 8593178066470220111, type = -446161601, FSkGQkgmS = false
| eval PbehoQUqKSF = 753987034, HLNMQfQj = true, `within` = true, `id` = "JDKKkYwhhh", lk = null, aecuvjTkgZza = 510616700, aDAMpuVtNX = null, qCopgNZPt = "AjhJUtZefqKdJYH", BxHHlFoA = "isBrmhKLc"
| rename message as message
| lookup join message_types_lookup on message
| sort PbehoQUqKSF DESC, ip1 DESC NULLS LAST
| limit 5845
| drop `subset`, ip*, `card`, `within`, host.v*, description, `aecuvjTkgZza`, host.version, `ip0`, height_range, DsxMWtGL, host_group, `aDAMpuVtNX`, PbehoQUqKSF, `intersects`, `host.os`, aDAMpuVtNX, *ight_range, HLNMQfQj, `FSkGQkgmS`, BxHHlFoA, card
| grok type "%{WORD:GknCxQFo}"
| eval `location` = null, ZjWUUvGusyyz = null, HeeKIpzgh = false, `id` = 4325287503714500302, host = false, `lk` = null, HvTQdOqFajpH = false, fKNlsYoT = true, `location` = -1158449473, `qCopgNZPt` = 1219986202615280617
| drop HeeKIpzg*, `ZjWUUvGusyyz`, `message`, `type`, `lk`
| grok GknCxQFo "%{WORD:location} %{WORD:HvTQdOqFajpH}"
| drop HvTQdOqFajpH, `location`, centroid
| mv_expand GknCxQFo
| limit 410
| limit 3815
| rename `id` AS `GknCxQFo`
| grok host.name "%{WORD:oGQQZHxQHj} %{WORD:qCopgNZPt} %{WORD:vHKOmmocPcTO}"
| stats BkQXJRMeAM = min(GknCxQFo)
| keep `BkQXJRMeAM`
;

BkQXJRMeAM:long
4325287503714500302
;

Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,11 @@ public enum Cap {
*/
GROK_DISSECT_MASKING,

/**
* Avid GROK and DISSECT attributes being removed when resolving fields.
* see <a href="https://github.com/elastic/elasticsearch/issues/127468"> ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER] #127468 </a>
*/
KEEP_REGEX_EXTRACT_ATTRIBUTES,
/**
* Support for quoting index sources in double quotes.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
var keepJoinRefsBuilder = AttributeSet.builder();
Set<String> wildcardJoinIndices = new java.util.HashSet<>();

boolean[] canRemoveAliases = new boolean[] { true };
boolean[] canRemoveAliases = new boolean[] { true, true };

parsed.forEachDown(p -> {// go over each plan top-down
if (p instanceof RegexExtract re) { // for Grok and Dissect
Expand Down Expand Up @@ -667,13 +667,22 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
AttributeSet planRefs = p.references();
Set<String> fieldNames = planRefs.names();
p.forEachExpressionDown(Alias.class, alias -> {
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id"
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
if (fieldNames.contains(alias.name())) {
return;
canRemoveAliases[1] = true;
// go down each plan and remove aliases until we meet a plan that can override aliases
p.forEachDown(c -> {
if (canRemoveAliases[1] && couldOverrideAliases(c)) {
canRemoveAliases[1] = false;
}
if (canRemoveAliases[1]) {
c.forEachExpression(Alias.class, alias -> {
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id"
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
if (fieldNames.contains(alias.name())) {
return;
}
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), shadowingRefsBuilder.contains(attr)));
});
}
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), shadowingRefsBuilder.contains(attr)));
});
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1341,6 +1341,60 @@ public void testDissectOverwriteName() {
assertThat(fieldNames, equalTo(Set.of("emp_no", "emp_no.*", "first_name", "first_name.*")));
}

/**
* @see <a href="https://github.com/elastic/elasticsearch/issues/127468">ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER]</a>
*/
public void testAvoidGrokAttributesRemoval() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
Set<String> fieldNames = fieldNames("""
from message_types
| eval type = 1
| lookup join message_types_lookup on message
| drop message
| grok type "%{WORD:b}"
| stats x = max(b)
| keep x""", Set.of());
assertThat(fieldNames, equalTo(Set.of("message", "x", "type", "x.*", "message.*", "type.*")));
}

/**
* @see <a href="https://github.com/elastic/elasticsearch/issues/127468">ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER]</a>
*/
public void testAvoidGrokAttributesRemoval2() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
Set<String> fieldNames = fieldNames("""
FROM sample_data, employees
| EVAL client_ip = client_ip::keyword
| RENAME languages AS language_code
| LOOKUP JOIN clientips_lookup ON client_ip
| EVAL type = 1::keyword
| EVAL type = 2
| LOOKUP JOIN message_types_lookup ON message
| LOOKUP JOIN languages_lookup ON language_code
| DISSECT type "%{type_as_text}"
| KEEP message
| WHERE message IS NOT NULL
| SORT message DESC
| LIMIT 1""", Set.of());
assertThat(
fieldNames,
equalTo(
Set.of(
"message",
"type",
"languages",
"client_ip",
"language_code",
"language_code.*",
"client_ip.*",
"message.*",
"type.*",
"languages.*"
)
)
);
}

public void testEnrichOnDefaultField() {
Set<String> fieldNames = fieldNames("""
from employees
Expand Down
Loading