Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/127563.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 127563
summary: "ESQL: Avoid regex extract attributes removal"
area: ES|QL
type: bug
issues:
- 127468
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
"optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781
"No matches found for pattern", // https://github.com/elastic/elasticsearch/issues/126418
"Unknown column", // https://github.com/elastic/elasticsearch/issues/127467
"only supports KEYWORD or TEXT values", // https://github.com/elastic/elasticsearch/issues/127468
"The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,19 @@ ROW a="b c d x"| DISSECT a "%{b} %{} %{d} %{}";
a:keyword | b:keyword | d:keyword
b c d x | b | d
;

avoidAttributesRemoval
required_capability: keep_regex_extract_attributes
required_capability: join_lookup_v12
from message_types
| eval type = 1
| lookup join message_types_lookup on message
| drop message
| dissect type "%{b}"
| stats x = max(b)
| keep x
;

x:keyword
Success
;
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,34 @@ row text = "123 abc", int = 5 | sort int asc | grok text "%{NUMBER:text:int} %{W
text:integer | int:integer | description:keyword
123 | 5 | abc
;

avoidAttributesRemoval
required_capability: union_types
required_capability: join_lookup_v12
required_capability: keep_regex_extract_attributes
from multivalue_points,h*,messa*
| eval `card` = true, PbehoQUqKSF = "VLGjhcgNkQiEVyCLo", DsxMWtGL = true, qSxTIvUorMim = true, `location` = 8593178066470220111, type = -446161601, FSkGQkgmS = false
| eval PbehoQUqKSF = 753987034, HLNMQfQj = true, `within` = true, `id` = "JDKKkYwhhh", lk = null, aecuvjTkgZza = 510616700, aDAMpuVtNX = null, qCopgNZPt = "AjhJUtZefqKdJYH", BxHHlFoA = "isBrmhKLc"
| rename message as message
| lookup join message_types_lookup on message
| sort PbehoQUqKSF DESC, ip1 DESC NULLS LAST
| limit 5845
| drop `subset`, ip*, `card`, `within`, host.v*, description, `aecuvjTkgZza`, host.version, `ip0`, height_range, DsxMWtGL, host_group, `aDAMpuVtNX`, PbehoQUqKSF, `intersects`, `host.os`, aDAMpuVtNX, *ight_range, HLNMQfQj, `FSkGQkgmS`, BxHHlFoA, card
| grok type "%{WORD:GknCxQFo}"
| eval `location` = null, ZjWUUvGusyyz = null, HeeKIpzgh = false, `id` = 4325287503714500302, host = false, `lk` = null, HvTQdOqFajpH = false, fKNlsYoT = true, `location` = -1158449473, `qCopgNZPt` = 1219986202615280617
| drop HeeKIpzg*, `ZjWUUvGusyyz`, `message`, `type`, `lk`
| grok GknCxQFo "%{WORD:location} %{WORD:HvTQdOqFajpH}"
| drop HvTQdOqFajpH, `location`, centroid
| mv_expand GknCxQFo
| limit 410
| limit 3815
| rename `id` AS `GknCxQFo`
| grok host.name "%{WORD:oGQQZHxQHj} %{WORD:qCopgNZPt} %{WORD:vHKOmmocPcTO}"
| stats BkQXJRMeAM = min(GknCxQFo)
| keep `BkQXJRMeAM`
;

BkQXJRMeAM:long
4325287503714500302
;

Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,11 @@ public enum Cap {
*/
GROK_DISSECT_MASKING,

/**
* Avid GROK and DISSECT attributes being removed when resolving fields.
* see <a href="https://github.com/elastic/elasticsearch/issues/127468"> ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER] #127468 </a>
*/
KEEP_REGEX_EXTRACT_ATTRIBUTES,
/**
* Support for quoting index sources in double quotes.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
var keepJoinRefsBuilder = AttributeSet.builder();
Set<String> wildcardJoinIndices = new java.util.HashSet<>();

boolean[] canRemoveAliases = new boolean[] { true };
boolean[] canRemoveAliases = new boolean[] { true, true };

parsed.forEachDown(p -> {// go over each plan top-down
if (p instanceof RegexExtract re) { // for Grok and Dissect
Expand Down Expand Up @@ -663,13 +663,21 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
AttributeSet planRefs = p.references();
Set<String> fieldNames = planRefs.names();
p.forEachExpressionDown(Alias.class, alias -> {
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id"
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
if (fieldNames.contains(alias.name())) {
return;
canRemoveAliases[1] = true;
p.forEachDown(plan -> {
if (canRemoveAliases[1] && couldOverrideAliases(plan)) {
canRemoveAliases[1] = false;
}
if (canRemoveAliases[1]) {
plan.forEachExpression(Alias.class, alias -> {
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id"
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
if (fieldNames.contains(alias.name())) {
return;
}
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr)));
});
}
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr)));
});
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1341,6 +1341,18 @@ public void testDissectOverwriteName() {
assertThat(fieldNames, equalTo(Set.of("emp_no", "emp_no.*", "first_name", "first_name.*")));
}

public void testAvoidGrokAttributesRemoval() {
Set<String> fieldNames = fieldNames("""
from message_types
| eval type = 1
| lookup join message_types_lookup on message
| drop message
| grok type "%{WORD:b}"
| stats x = max(b)
| keep x""", Set.of());
assertThat(fieldNames, equalTo(Set.of("message", "x", "type", "x.*", "message.*", "type.*")));
}

public void testEnrichOnDefaultField() {
Set<String> fieldNames = fieldNames("""
from employees
Expand Down
Loading