Skip to content

Commit c5941bc

Browse files
authored
[ES|QL] Permit text becoming keyword in TS aggs grouping (#136242)
When running aggregations under the TS command and grouping by a text field, the FieldAttribute representing the text field gets replaced by a ReferenceAttribute containing an Alias to `Values(text_field)`, which is an aggregation. Aggregations will always return KEYWORD types rather than TEXT types, so we want to allow this change in types in this specific circumstance. Closes #134794
1 parent 7403ca9 commit c5941bc

File tree

4 files changed

+118
-5
lines changed

4 files changed

+118
-5
lines changed

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,7 @@ public abstract class GenerativeRestTest extends ESRestTestCase implements Query
7878
"time_series aggregate.* can only be used with the TS command",
7979
"Invalid call to dataType on an unresolved object \\?LASTOVERTIME", // https://github.com/elastic/elasticsearch/issues/134791
8080
// https://github.com/elastic/elasticsearch/issues/134793
81-
"class org.elasticsearch.compute.data..*Block cannot be cast to class org.elasticsearch.compute.data..*Block",
82-
"Output has changed from \\[.*\\] to \\[.*\\]" // https://github.com/elastic/elasticsearch/issues/134794
81+
"class org.elasticsearch.compute.data..*Block cannot be cast to class org.elasticsearch.compute.data..*Block"
8382
);
8483

8584
public static final Set<Pattern> ALLOWED_ERROR_PATTERNS = ALLOWED_ERRORS.stream()

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1484,6 +1484,12 @@ public enum Cap {
14841484
*/
14851485
METADATA_TSID_FIELD,
14861486

1487+
/**
1488+
* Permit the data type of a field changing from TEXT to KEYWORD
1489+
* when being grouped on in aggregations on the TS command.
1490+
*/
1491+
TS_PERMIT_TEXT_BECOMING_KEYWORD_WHEN_GROUPED_ON,
1492+
14871493
/**
14881494
* Fix management of plans with no columns
14891495
* https://github.com/elastic/elasticsearch/issues/120272

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PostOptimizationPhasePlanVerifier.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,13 @@
88
package org.elasticsearch.xpack.esql.optimizer;
99

1010
import org.elasticsearch.xpack.esql.common.Failures;
11+
import org.elasticsearch.xpack.esql.core.expression.Alias;
1112
import org.elasticsearch.xpack.esql.core.expression.Attribute;
13+
import org.elasticsearch.xpack.esql.core.type.DataType;
14+
import org.elasticsearch.xpack.esql.expression.function.aggregate.Values;
1215
import org.elasticsearch.xpack.esql.optimizer.rules.physical.ProjectAwayColumns;
1316
import org.elasticsearch.xpack.esql.plan.QueryPlan;
17+
import org.elasticsearch.xpack.esql.plan.logical.TimeSeriesAggregate;
1418
import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec;
1519

1620
import java.util.List;
@@ -72,7 +76,14 @@ private static void verifyOutputNotChanged(QueryPlan<?> optimizedPlan, List<Attr
7276
// We perform an optimizer run on every fragment. LookupJoinExec also contains such a fragment,
7377
// and currently it only contains an EsQueryExec after optimization.
7478
boolean hasLookupJoinExec = optimizedPlan instanceof EsQueryExec esQueryExec && esQueryExec.indexMode() == LOOKUP;
75-
boolean ignoreError = hasProjectAwayColumns || hasLookupJoinExec;
79+
// If we group on a text field when using the TS command, we create an Alias that wraps the text field
80+
// in a Values aggregation. Aggregations will return Keywords as opposed to Text types, so we want to
81+
// permit the output type changing here.
82+
boolean hasTextGroupingInTimeSeries = optimizedPlan.anyMatch(
83+
a -> a instanceof TimeSeriesAggregate ts
84+
&& ts.aggregates().stream().anyMatch(g -> Alias.unwrap(g) instanceof Values v && v.field().dataType() == DataType.TEXT)
85+
);
86+
boolean ignoreError = hasProjectAwayColumns || hasLookupJoinExec || hasTextGroupingInTimeSeries;
7687
if (ignoreError == false) {
7788
failures.add(
7889
fail(

x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_tsdb.yml

Lines changed: 99 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ cast counter then filter:
244244
- method: POST
245245
path: /_query
246246
parameters: [ ]
247-
capabilities: [ aggregate_metric_double_convert_to ]
247+
capabilities: [ aggregate_metric_double_v0 ]
248248
reason: "Uses TO_AGGREGATE_METRIC_DOUBLE"
249249
- do:
250250
esql.query:
@@ -268,7 +268,7 @@ sort on counter without cast:
268268
- method: POST
269269
path: /_query
270270
parameters: []
271-
capabilities: [sorting_on_source_and_counters_forbidden, aggregate_metric_double_convert_to]
271+
capabilities: [sorting_on_source_and_counters_forbidden, aggregate_metric_double_v0]
272272
reason: "Sorting on counters shouldn't have been possible"
273273
- do:
274274
catch: /cannot sort on counter_long/
@@ -788,3 +788,100 @@ avg of aggregate_metric_double:
788788
- match: {columns.0.name: "avg"}
789789
- match: {columns.0.type: "double"}
790790
- match: {values.0.0: 4.904761904761905}
791+
792+
---
793+
TS Command grouping on text field:
794+
- requires:
795+
test_runner_features: [ capabilities ]
796+
capabilities:
797+
- method: POST
798+
path: /_query
799+
parameters: [ ]
800+
capabilities: [ ts_permit_text_becoming_keyword_when_grouped_on, ts_command_v0 ]
801+
reason: "fix grouping on text fields with TS command"
802+
803+
- do:
804+
indices.create:
805+
index: test-text-field
806+
body:
807+
settings:
808+
index:
809+
mode: time_series
810+
routing_path: [metricset, k8s.pod.uid]
811+
time_series:
812+
start_time: 2021-04-28T00:00:00Z
813+
end_time: 2021-04-29T00:00:00Z
814+
mappings:
815+
properties:
816+
"@timestamp":
817+
type: date
818+
metricset:
819+
type: keyword
820+
time_series_dimension: true
821+
k8s:
822+
properties:
823+
pod:
824+
properties:
825+
uid:
826+
type: keyword
827+
time_series_dimension: true
828+
name:
829+
type: keyword
830+
ip:
831+
type: ip
832+
network:
833+
properties:
834+
tx:
835+
type: integer
836+
rx:
837+
type: integer
838+
text_field:
839+
type: text
840+
- do:
841+
bulk:
842+
refresh: true
843+
index: test-text-field
844+
body:
845+
- '{"index": {}}'
846+
- '{"@timestamp": "2021-04-28T16:50:04.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "ip": "10.10.55.1", "network": {"tx": 2001818691, "rx": 802133794}}}, "text_field": "some text"}'
847+
- '{"index": {}}'
848+
- '{"@timestamp": "2021-04-28T17:50:24.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "ip": "10.10.55.1", "network": {"tx": 2005177954, "rx": 801479970}}}, "text_field": "even more text"}'
849+
- '{"index": {}}'
850+
- '{"@timestamp": "2021-04-28T18:50:44.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "ip": "10.10.55.1", "network": {"tx": 2006223737, "rx": 802337279}}}, "text_field": "wow i cant believe theres still text"}'
851+
- '{"index": {}}'
852+
- '{"@timestamp": "2021-04-28T19:51:04.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "ip": "10.10.55.2", "network": {"tx": 2012916202, "rx": 803685721}}}, "text_field": "who knew there could be so much text"}'
853+
- '{"index": {}}'
854+
- '{"@timestamp": "2021-04-28T16:50:03.142Z", "metricset": "pod", "k8s": {"pod": {"name": "dog", "uid":"df3145b3-0563-4d3b-a0f7-897eb2876ea9", "ip": "10.10.55.3", "network": {"tx": 1434521831, "rx": 530575198}}}, "text_field": "we are just overflowing with text"}'
855+
- '{"index": {}}'
856+
- '{"@timestamp": "2021-04-28T17:50:23.142Z", "metricset": "pod", "k8s": {"pod": {"name": "dog", "uid":"df3145b3-0563-4d3b-a0f7-897eb2876ea9", "ip": "10.10.55.3", "network": {"tx": 1434577921, "rx": 530600088}}}, "text_field": "is this text?"}'
857+
- '{"index": {}}'
858+
- '{"@timestamp": "2021-04-28T18:50:53.142Z", "metricset": "pod", "k8s": {"pod": {"name": "dog", "uid":"df3145b3-0563-4d3b-a0f7-897eb2876ea9", "ip": "10.10.55.3", "network": {"tx": 1434587694, "rx": 530604797}}}, "text_field": "why cant i hold all these text"}'
859+
- '{"index": {}}'
860+
- '{"@timestamp": "2021-04-28T19:51:03.142Z", "metricset": "pod", "k8s": {"pod": {"name": "dog", "uid":"df3145b3-0563-4d3b-a0f7-897eb2876ea9", "ip": "10.10.55.3", "network": {"tx": 1434595272, "rx": 530605511}}}, "text_field": "peak text"}'
861+
862+
- do:
863+
esql.query:
864+
body:
865+
query: |
866+
TS test-text-field
867+
| STATS max = max(k8s.pod.network.rx) BY text_field, bucket = bucket(@timestamp,1hour)
868+
| SORT max, text_field
869+
| LIMIT 3
870+
871+
- length: {values: 3}
872+
- length: {values.0: 3}
873+
- match: {columns.0.name: "max"}
874+
- match: {columns.0.type: "integer"}
875+
- match: {columns.1.name: "text_field"}
876+
- match: {columns.1.type: "keyword"}
877+
- match: {columns.2.name: "bucket"}
878+
- match: {columns.2.type: "date"}
879+
- match: {values.0.0: 530575198}
880+
- match: {values.0.1: "we are just overflowing with text"}
881+
- match: {values.0.2: "2021-04-28T16:00:00.000Z"}
882+
- match: {values.1.0: 530600088}
883+
- match: {values.1.1: "is this text?"}
884+
- match: {values.1.2: "2021-04-28T17:00:00.000Z"}
885+
- match: {values.2.0: 530604797}
886+
- match: {values.2.1: "why cant i hold all these text"}
887+
- match: {values.2.2: "2021-04-28T18:00:00.000Z"}

0 commit comments

Comments
 (0)