Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/127563.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 127563
summary: "ESQL: Avoid regex extract attributes removal"
area: ES|QL
type: bug
issues:
- 127468
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
"optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781
"No matches found for pattern", // https://github.com/elastic/elasticsearch/issues/126418
"Unknown column", // https://github.com/elastic/elasticsearch/issues/127467
"only supports KEYWORD or TEXT values", // https://github.com/elastic/elasticsearch/issues/127468
"The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,19 @@ ROW a="b c d x"| DISSECT a "%{b} %{} %{d} %{}";
a:keyword | b:keyword | d:keyword
b c d x | b | d
;

avoidAttributesRemoval
required_capability: keep_regex_extract_attributes
required_capability: join_lookup_v12
from message_types
| eval type = 1
| lookup join message_types_lookup on message
| drop message
| dissect type "%{b}"
| stats x = max(b)
| keep x
;

x:keyword
Success
;
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,34 @@ row text = "123 abc", int = 5 | sort int asc | grok text "%{NUMBER:text:int} %{W
text:integer | int:integer | description:keyword
123 | 5 | abc
;

avoidAttributesRemoval
required_capability: union_types
required_capability: join_lookup_v12
required_capability: keep_regex_extract_attributes
from multivalue_points,h*,messa*
| eval `card` = true, PbehoQUqKSF = "VLGjhcgNkQiEVyCLo", DsxMWtGL = true, qSxTIvUorMim = true, `location` = 8593178066470220111, type = -446161601, FSkGQkgmS = false
| eval PbehoQUqKSF = 753987034, HLNMQfQj = true, `within` = true, `id` = "JDKKkYwhhh", lk = null, aecuvjTkgZza = 510616700, aDAMpuVtNX = null, qCopgNZPt = "AjhJUtZefqKdJYH", BxHHlFoA = "isBrmhKLc"
| rename message as message
| lookup join message_types_lookup on message
| sort PbehoQUqKSF DESC, ip1 DESC NULLS LAST
| limit 5845
| drop `subset`, ip*, `card`, `within`, host.v*, description, `aecuvjTkgZza`, host.version, `ip0`, height_range, DsxMWtGL, host_group, `aDAMpuVtNX`, PbehoQUqKSF, `intersects`, `host.os`, aDAMpuVtNX, *ight_range, HLNMQfQj, `FSkGQkgmS`, BxHHlFoA, card
| grok type "%{WORD:GknCxQFo}"
| eval `location` = null, ZjWUUvGusyyz = null, HeeKIpzgh = false, `id` = 4325287503714500302, host = false, `lk` = null, HvTQdOqFajpH = false, fKNlsYoT = true, `location` = -1158449473, `qCopgNZPt` = 1219986202615280617
| drop HeeKIpzg*, `ZjWUUvGusyyz`, `message`, `type`, `lk`
| grok GknCxQFo "%{WORD:location} %{WORD:HvTQdOqFajpH}"
| drop HvTQdOqFajpH, `location`, centroid
| mv_expand GknCxQFo
| limit 410
| limit 3815
| rename `id` AS `GknCxQFo`
| grok host.name "%{WORD:oGQQZHxQHj} %{WORD:qCopgNZPt} %{WORD:vHKOmmocPcTO}"
| stats BkQXJRMeAM = min(GknCxQFo)
| keep `BkQXJRMeAM`
;

BkQXJRMeAM:long
4325287503714500302
;

Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,11 @@ public enum Cap {
*/
GROK_DISSECT_MASKING,

/**
* Avid GROK and DISSECT attributes being removed when resolving fields.
* see <a href="https://github.com/elastic/elasticsearch/issues/127468"> ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER] #127468 </a>
*/
KEEP_REGEX_EXTRACT_ATTRIBUTES,
/**
* Support for quoting index sources in double quotes.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
// ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
var keepCommandRefsBuilder = AttributeSet.builder();
var keepJoinRefsBuilder = AttributeSet.builder();
var regexExtractRefsBuilder = AttributeSet.builder();
Set<String> wildcardJoinIndices = new java.util.HashSet<>();

boolean[] canRemoveAliases = new boolean[] { true };
Expand All @@ -605,7 +606,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
referencesBuilder.removeIf(attr -> matchByName(attr, extracted.name(), false));
}
// but keep the inputs needed by Grok/Dissect
referencesBuilder.addAll(re.input().references());
regexExtractRefsBuilder.addAll(re.input().references());
} else if (p instanceof Enrich enrich) {
AttributeSet enrichFieldRefs = Expressions.references(enrich.enrichFields());
AttributeSet.Builder enrichRefs = enrichFieldRefs.combine(enrich.matchField().references()).asBuilder();
Expand Down Expand Up @@ -676,6 +677,8 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy

// Add JOIN ON column references afterward to avoid Alias removal
referencesBuilder.addAll(keepJoinRefsBuilder);
// Add the inputs needed by Grok/Dissect afterward to avoid Alias removal
referencesBuilder.addAll(regexExtractRefsBuilder);
// If any JOIN commands need wildcard field-caps calls, persist the index names
if (wildcardJoinIndices.isEmpty() == false) {
result = result.withWildcardJoinIndices(wildcardJoinIndices);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,13 +261,16 @@ public void testDateFields() {
}

public void testEvalDissect() {
assertFieldNames("""
from employees
| eval full_name = concat(first_name, " ", last_name)
| dissect full_name "%{a} %{b}"
| sort emp_no asc
| keep full_name, a, b
| limit 3""", Set.of("first_name", "first_name.*", "last_name", "last_name.*", "emp_no", "emp_no.*"));
assertFieldNames(
"""
from employees
| eval full_name = concat(first_name, " ", last_name)
| dissect full_name "%{a} %{b}"
| sort emp_no asc
| keep full_name, a, b
| limit 3""",
Set.of("emp_no", "first_name", "last_name", "full_name", "last_name.*", "first_name.*", "full_name.*", "emp_no.*")
);
}

public void testDissectExpression() {
Expand Down Expand Up @@ -685,13 +688,16 @@ public void testBucket() {
}

public void testEvalGrok() {
assertFieldNames("""
from employees
| eval full_name = concat(first_name, " ", last_name)
| grok full_name "%{WORD:a} %{WORD:b}"
| sort emp_no asc
| keep full_name, a, b
| limit 3""", Set.of("first_name", "first_name.*", "last_name", "last_name.*", "emp_no", "emp_no.*"));
assertFieldNames(
"""
from employees
| eval full_name = concat(first_name, " ", last_name)
| grok full_name "%{WORD:a} %{WORD:b}"
| sort emp_no asc
| keep full_name, a, b
| limit 3""",
Set.of("emp_no", "first_name", "last_name", "full_name", "last_name.*", "first_name.*", "full_name.*", "emp_no.*")
);
}

public void testGrokExpression() {
Expand All @@ -710,7 +716,7 @@ public void testEvalGrokSort() {
| grok full_name "%{WORD:a} %{WORD:b}"
| sort a asc
| keep full_name, a, b
| limit 3""", Set.of("first_name", "first_name.*", "last_name", "last_name.*"));
| limit 3""", Set.of("first_name", "last_name", "full_name", "last_name.*", "first_name.*", "full_name.*"));
}

public void testGrokStats() {
Expand All @@ -720,7 +726,7 @@ public void testGrokStats() {
| grok x "%{WORD:a} %{WORD:b}"
| stats n = max(emp_no) by a
| keep a, n
| sort a asc""", Set.of("gender", "gender.*", "emp_no", "emp_no.*"));
| sort a asc""", Set.of("emp_no", "gender", "x", "x.*", "gender.*", "emp_no.*"));
}

public void testNullOnePattern() {
Expand Down Expand Up @@ -1341,6 +1347,18 @@ public void testDissectOverwriteName() {
assertThat(fieldNames, equalTo(Set.of("emp_no", "emp_no.*", "first_name", "first_name.*")));
}

public void testAvoidGrokAttributesRemoval() {
Set<String> fieldNames = fieldNames("""
from message_types
| eval type = 1
| lookup join message_types_lookup on message
| drop message
| grok type "%{WORD:b}"
| stats x = max(b)
| keep x""", Set.of());
assertThat(fieldNames, equalTo(Set.of("message", "x", "type", "x.*", "message.*", "type.*")));
}

public void testEnrichOnDefaultField() {
Set<String> fieldNames = fieldNames("""
from employees
Expand Down
Loading