diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/tbucket.md b/docs/reference/query-languages/esql/_snippets/functions/description/tbucket.md new file mode 100644 index 0000000000000..dfe74f67beda8 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/description/tbucket.md @@ -0,0 +1,6 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Description** + +Creates groups of values - buckets - out of a @timestamp attribute. The size of the buckets must be provided directly. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/tbucket.md b/docs/reference/query-languages/esql/_snippets/functions/examples/tbucket.md new file mode 100644 index 0000000000000..0b4a724dc6493 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/tbucket.md @@ -0,0 +1,43 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Examples** + +Provide a bucket size as an argument. + +```esql +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET(1 hour) +| SORT min +``` + +| min:datetime | max:datetime | bucket:datetime | +| --- | --- | --- | +| 2023-10-23T12:15:03.360Z | 2023-10-23T12:27:28.948Z | 2023-10-23T12:00:00.000Z | +| 2023-10-23T13:33:34.937Z | 2023-10-23T13:55:01.543Z | 2023-10-23T13:00:00.000Z | + + +::::{note} +When providing the bucket size, it must be a time duration or date period. +Also the reference is epoch, which starts at `0001-01-01T00:00:00Z`. +:::: + +Provide a string representation of bucket size as an argument. + +```esql +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET("1 hour") +| SORT min +``` + +| min:datetime | max:datetime | bucket:datetime | +| --- | --- | --- | +| 2023-10-23T12:15:03.360Z | 2023-10-23T12:27:28.948Z | 2023-10-23T12:00:00.000Z | +| 2023-10-23T13:33:34.937Z | 2023-10-23T13:55:01.543Z | 2023-10-23T13:00:00.000Z | + + +::::{note} +When providing the bucket size, it can be a string representation of time duration or date period. +For example, "1 hour". Also the reference is epoch, which starts at `0001-01-01T00:00:00Z`. +:::: + + diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/tbucket.md b/docs/reference/query-languages/esql/_snippets/functions/layout/tbucket.md new file mode 100644 index 0000000000000..9c7071a06d686 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/tbucket.md @@ -0,0 +1,23 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +## `TBUCKET` [esql-tbucket] + +**Syntax** + +:::{image} ../../../images/functions/tbucket.svg +:alt: Embedded +:class: text-center +::: + + +:::{include} ../parameters/tbucket.md +::: + +:::{include} ../description/tbucket.md +::: + +:::{include} ../types/tbucket.md +::: + +:::{include} ../examples/tbucket.md +::: diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/tbucket.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/tbucket.md new file mode 100644 index 0000000000000..a811005a41bef --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/tbucket.md @@ -0,0 +1,7 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Parameters** + +`buckets` +: Desired bucket size. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/tbucket.md b/docs/reference/query-languages/esql/_snippets/functions/types/tbucket.md new file mode 100644 index 0000000000000..5c85899ebfb8a --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/types/tbucket.md @@ -0,0 +1,11 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Supported types** + +| buckets | result | +| --- | --- | +| date_period | date | +| date_period | date_nanos | +| time_duration | date | +| time_duration | date_nanos | + diff --git a/docs/reference/query-languages/esql/_snippets/lists/grouping-functions.md b/docs/reference/query-languages/esql/_snippets/lists/grouping-functions.md index bb0c752c63047..dbddb8e362266 100644 --- a/docs/reference/query-languages/esql/_snippets/lists/grouping-functions.md +++ b/docs/reference/query-languages/esql/_snippets/lists/grouping-functions.md @@ -1,2 +1,3 @@ * [`BUCKET`](../../functions-operators/grouping-functions.md#esql-bucket) +* [`TBUCKET`](../../functions-operators/grouping-functions.md#esql-tbucket) * [`CATEGORIZE`](../../functions-operators/grouping-functions.md#esql-categorize) diff --git a/docs/reference/query-languages/esql/functions-operators/grouping-functions.md b/docs/reference/query-languages/esql/functions-operators/grouping-functions.md index 7cd02febec968..e44617d5962d8 100644 --- a/docs/reference/query-languages/esql/functions-operators/grouping-functions.md +++ b/docs/reference/query-languages/esql/functions-operators/grouping-functions.md @@ -16,7 +16,10 @@ The [`STATS`](/reference/query-languages/esql/commands/stats-by.md) command supp :::{include} ../_snippets/functions/layout/bucket.md ::: -:::{note} +:::{include} ../_snippets/functions/layout/tbucket.md +::: + +:::{note} The `CATEGORIZE` function requires a [platinum license](https://www.elastic.co/subscriptions). ::: diff --git a/docs/reference/query-languages/esql/images/functions/tbucket.svg b/docs/reference/query-languages/esql/images/functions/tbucket.svg new file mode 100644 index 0000000000000..095d6411c2c69 --- /dev/null +++ b/docs/reference/query-languages/esql/images/functions/tbucket.svg @@ -0,0 +1 @@ +TBUCKET(buckets) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/tbucket.json b/docs/reference/query-languages/esql/kibana/definition/functions/tbucket.json new file mode 100644 index 0000000000000..c95f75bbfa2c6 --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/definition/functions/tbucket.json @@ -0,0 +1,62 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", + "type" : "grouping", + "name" : "tbucket", + "description" : "Creates groups of values - buckets - out of a @timestamp attribute. The size of the buckets must be provided directly.", + "signatures" : [ + { + "params" : [ + { + "name" : "buckets", + "type" : "date_period", + "optional" : false, + "description" : "Desired bucket size." + } + ], + "variadic" : false, + "returnType" : "date" + }, + { + "params" : [ + { + "name" : "buckets", + "type" : "date_period", + "optional" : false, + "description" : "Desired bucket size." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, + { + "params" : [ + { + "name" : "buckets", + "type" : "time_duration", + "optional" : false, + "description" : "Desired bucket size." + } + ], + "variadic" : false, + "returnType" : "date" + }, + { + "params" : [ + { + "name" : "buckets", + "type" : "time_duration", + "optional" : false, + "description" : "Desired bucket size." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + } + ], + "examples" : [ + "FROM sample_data\n| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET(1 hour)\n| SORT min", + "FROM sample_data\n| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET(\"1 hour\")\n| SORT min" + ], + "preview" : false, + "snapshot_only" : false +} diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/tbucket.md b/docs/reference/query-languages/esql/kibana/docs/functions/tbucket.md new file mode 100644 index 0000000000000..bc0ad1ee27af9 --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/docs/functions/tbucket.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +### TBUCKET +Creates groups of values - buckets - out of a @timestamp attribute. The size of the buckets must be provided directly. + +```esql +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET(1 hour) +| SORT min +``` diff --git a/x-pack/plugin/esql-core/src/test/resources/mapping-odd-timestamp.json b/x-pack/plugin/esql-core/src/test/resources/mapping-odd-timestamp.json new file mode 100644 index 0000000000000..0c3d34486bd11 --- /dev/null +++ b/x-pack/plugin/esql-core/src/test/resources/mapping-odd-timestamp.json @@ -0,0 +1,16 @@ +{ + "properties": { + "@timestamp": { + "type": "boolean" + }, + "client_ip": { + "type": "ip" + }, + "event_duration": { + "type": "long" + }, + "message": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/k8s-timeseries.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/k8s-timeseries.csv-spec index 5b226d647a606..a903503e147e0 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/k8s-timeseries.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/k8s-timeseries.csv-spec @@ -96,6 +96,18 @@ max(rate(network.total_bytes_in)): double | time_bucket:date // end::rate-result[] ; +oneRateWithTBucket +required_capability: metrics_command +required_capability: tbucket +TS k8s +| STATS max(rate(network.total_bytes_in)) BY time_bucket = tbucket(5minute) +| SORT time_bucket DESC | LIMIT 2; + +max(rate(network.total_bytes_in)): double | time_bucket:date +6.980660660660663 | 2024-05-10T00:20:00.000Z +23.702205882352942 | 2024-05-10T00:15:00.000Z +; + twoRatesWithBucket required_capability: metrics_command TS k8s | STATS max(rate(network.total_bytes_in)), sum(rate(network.total_bytes_in)) BY time_bucket = bucket(@timestamp,5minute) | SORT time_bucket DESC | LIMIT 3; @@ -106,6 +118,16 @@ max(rate(network.total_bytes_in)):double | sum(rate(network.total_bytes_in)):dou 14.97039381153305 | 63.00652190262822 | 2024-05-10T00:10:00.000Z ; +twoRatesWithTBucket +required_capability: metrics_command +required_capability: tbucket +TS k8s | STATS max(rate(network.total_bytes_in)), sum(rate(network.total_bytes_in)) BY time_bucket = tbucket(5minute) | SORT time_bucket DESC | LIMIT 3; + +max(rate(network.total_bytes_in)):double | sum(rate(network.total_bytes_in)):double | time_bucket:datetime +6.980660660660663 | 23.959973363184154 | 2024-05-10T00:20:00.000Z +23.702205882352942 | 94.9517511187984 | 2024-05-10T00:15:00.000Z +14.97039381153305 | 63.00652190262822 | 2024-05-10T00:10:00.000Z +; oneRateWithBucketAndCluster required_capability: metrics_command diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tbucket.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tbucket.csv-spec new file mode 100644 index 0000000000000..cb0c699ae451e --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tbucket.csv-spec @@ -0,0 +1,347 @@ +// TBUCKET-specific tests + +tbucketByTenSecondsDuration +required_capability: tbucket + +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET(10 seconds) +| SORT min +; +ignoreOrder:true + +min:datetime | max:datetime | bucket:datetime +2023-10-23T12:15:03.360Z | 2023-10-23T12:15:03.360Z | 2023-10-23T12:15:00.000Z +2023-10-23T12:27:28.948Z | 2023-10-23T12:27:28.948Z | 2023-10-23T12:27:20.000Z +2023-10-23T13:33:34.937Z | 2023-10-23T13:33:34.937Z | 2023-10-23T13:33:30.000Z +2023-10-23T13:51:54.732Z | 2023-10-23T13:51:54.732Z | 2023-10-23T13:51:50.000Z +2023-10-23T13:52:55.015Z | 2023-10-23T13:52:55.015Z | 2023-10-23T13:52:50.000Z +2023-10-23T13:53:55.832Z | 2023-10-23T13:53:55.832Z | 2023-10-23T13:53:50.000Z +2023-10-23T13:55:01.543Z | 2023-10-23T13:55:01.543Z | 2023-10-23T13:55:00.000Z +; + +tbucketByTenSecondsDurationAsString +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET("10 seconds") +| SORT min +; +ignoreOrder:true + +min:datetime | max:datetime | bucket:datetime +2023-10-23T12:15:03.360Z | 2023-10-23T12:15:03.360Z | 2023-10-23T12:15:00.000Z +2023-10-23T12:27:28.948Z | 2023-10-23T12:27:28.948Z | 2023-10-23T12:27:20.000Z +2023-10-23T13:33:34.937Z | 2023-10-23T13:33:34.937Z | 2023-10-23T13:33:30.000Z +2023-10-23T13:51:54.732Z | 2023-10-23T13:51:54.732Z | 2023-10-23T13:51:50.000Z +2023-10-23T13:52:55.015Z | 2023-10-23T13:52:55.015Z | 2023-10-23T13:52:50.000Z +2023-10-23T13:53:55.832Z | 2023-10-23T13:53:55.832Z | 2023-10-23T13:53:50.000Z +2023-10-23T13:55:01.543Z | 2023-10-23T13:55:01.543Z | 2023-10-23T13:55:00.000Z +; + +tbucketByTenMinutesDuration +required_capability: tbucket + +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET(10 minutes) +| SORT min +; +ignoreOrder:true + +min:datetime | max:datetime | bucket:datetime +2023-10-23T12:15:03.360Z | 2023-10-23T12:15:03.360Z | 2023-10-23T12:10:00.000Z +2023-10-23T12:27:28.948Z | 2023-10-23T12:27:28.948Z | 2023-10-23T12:20:00.000Z +2023-10-23T13:33:34.937Z | 2023-10-23T13:33:34.937Z | 2023-10-23T13:30:00.000Z +2023-10-23T13:51:54.732Z | 2023-10-23T13:55:01.543Z | 2023-10-23T13:50:00.000Z +; + +tbucketByTenMinutesDurationAsString +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET("10 minutes") +| SORT min +; +ignoreOrder:true + +min:datetime | max:datetime | bucket:datetime +2023-10-23T12:15:03.360Z | 2023-10-23T12:15:03.360Z | 2023-10-23T12:10:00.000Z +2023-10-23T12:27:28.948Z | 2023-10-23T12:27:28.948Z | 2023-10-23T12:20:00.000Z +2023-10-23T13:33:34.937Z | 2023-10-23T13:33:34.937Z | 2023-10-23T13:30:00.000Z +2023-10-23T13:51:54.732Z | 2023-10-23T13:55:01.543Z | 2023-10-23T13:50:00.000Z +; + +docsTBucketByOneHourDuration +required_capability: tbucket + +// tag::docsTBucketByOneHourDuration[] +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET(1 hour) +| SORT min +// end::docsTBucketByOneHourDuration[] +; + +// tag::docsTBucketByOneHourDuration-result[] +min:datetime | max:datetime | bucket:datetime +2023-10-23T12:15:03.360Z | 2023-10-23T12:27:28.948Z | 2023-10-23T12:00:00.000Z +2023-10-23T13:33:34.937Z | 2023-10-23T13:55:01.543Z | 2023-10-23T13:00:00.000Z +// end::docsTBucketByOneHourDuration-result[] +; + +docsTBucketByOneHourDurationAsString +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +// tag::docsTBucketByOneHourDurationAsString[] +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET("1 hour") +| SORT min +// end::docsTBucketByOneHourDurationAsString[] +; + +// tag::docsTBucketByOneHourDurationAsString-result[] +min:datetime | max:datetime | bucket:datetime +2023-10-23T12:15:03.360Z | 2023-10-23T12:27:28.948Z | 2023-10-23T12:00:00.000Z +2023-10-23T13:33:34.937Z | 2023-10-23T13:55:01.543Z | 2023-10-23T13:00:00.000Z +// end::docsTBucketByOneHourDurationAsString-result[] +; + +tbucketByOneDayDuration +required_capability: tbucket + +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET(1 day) +| SORT min +; + +min:datetime | max:datetime | bucket:datetime +2023-10-23T12:15:03.360Z | 2023-10-23T13:55:01.543Z | 2023-10-23T00:00:00.000Z +; + +tbucketByOneDayDurationAsString +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET("1 day") +| SORT min +; + +min:datetime | max:datetime | bucket:datetime +2023-10-23T12:15:03.360Z | 2023-10-23T13:55:01.543Z | 2023-10-23T00:00:00.000Z +; + +tbucketByOneWeekDuration +required_capability: tbucket + +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET(1 week) +| SORT min +; + +min:datetime | max:datetime | bucket:datetime +2023-10-23T12:15:03.360Z | 2023-10-23T13:55:01.543Z | 2023-10-23T00:00:00.000Z +; + +tbucketByOneWeekDurationAsString +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET("1 week") +| SORT min +; + +min:datetime | max:datetime | bucket:datetime +2023-10-23T12:15:03.360Z | 2023-10-23T13:55:01.543Z | 2023-10-23T00:00:00.000Z +; + +tbucketByOneMonthDuration +required_capability: tbucket + +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET(1 month) +| SORT min +; + +min:datetime | max:datetime | bucket:datetime +2023-10-23T12:15:03.360Z | 2023-10-23T13:55:01.543Z | 2023-10-01T00:00:00.000Z +; + +tbucketByOneMonthDurationAsString +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET("1 month") +| SORT min +; + +min:datetime | max:datetime | bucket:datetime +2023-10-23T12:15:03.360Z | 2023-10-23T13:55:01.543Z | 2023-10-01T00:00:00.000Z +; + +tbucketByOneYearDuration +required_capability: tbucket + +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET(1 year) +| SORT min +; + +min:datetime | max:datetime | bucket:datetime +2023-10-23T12:15:03.360Z | 2023-10-23T13:55:01.543Z | 2023-01-01T00:00:00.000Z +; + +tbucketByOneYearDurationAsString +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET("1 year") +| SORT min +; + +min:datetime | max:datetime | bucket:datetime +2023-10-23T12:15:03.360Z | 2023-10-23T13:55:01.543Z | 2023-01-01T00:00:00.000Z +; + +reuseGroupingFunction +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +from sample_data +| stats x = 1 year + tbucket(1 day) by b1d = tbucket(1 day) +; + +x:datetime | b1d:datetime +2024-10-23T00:00:00.000Z | 2023-10-23T00:00:00.000Z +; + +keepTimestampBeforeStats +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +FROM sample_data +| WHERE event_duration > 0 +| KEEP @timestamp, client_ip, event_duration +| STATS count = COUNT(*), avg_dur = AVG(event_duration) BY hour = TBUCKET(1h), client_ip +; +ignoreOrder:true + +count:long | avg_dur:double | hour:datetime | client_ip:ip +4 | 3945955.75 | 2023-10-23T13:00:00.000Z | 172.21.3.15 +1 | 3450233.0 | 2023-10-23T12:00:00.000Z | 172.21.2.162 +1 | 2764889.0 | 2023-10-23T12:00:00.000Z | 172.21.2.113 +1 | 1232382.0 | 2023-10-23T13:00:00.000Z | 172.21.0.5 +; + +keepAtWildcardBeforeStats +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +FROM sample_data +| WHERE message == "Connection error" +| KEEP @*, message +| STATS errors = COUNT() BY day = TBUCKET(1d), message +; +ignoreOrder:true + +errors:long | day:datetime | message:keyword +3 | 2023-10-23T00:00:00.000Z | Connection error +; + +keepWildcardBeforeStats +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +FROM sample_data +| WHERE client_ip IS NOT NULL +| KEEP *stamp*, client_ip, event_duration +| STATS p95 = PERCENTILE(event_duration, 95) BY ten_min = TBUCKET(10min), client_ip +; +ignoreOrder:true + +p95:double | ten_min:datetime | client_ip:ip +3450233.0 | 2023-10-23T12:10:00.000Z | 172.21.2.162 +2764889.0 | 2023-10-23T12:20:00.000Z | 172.21.2.113 +1232382.0 | 2023-10-23T13:30:00.000Z | 172.21.0.5 +7782993.299999999 | 2023-10-23T13:50:00.000Z | 172.21.3.15 +; + +statsChainingWithTimestampCarriedForward +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +FROM sample_data +| KEEP @timestamp, event_duration +| STATS day_count = COUNT(), day_p95 = PERCENTILE(event_duration, 95) BY day = TBUCKET(1d), @timestamp +| WHERE day_count > 0 +| STATS hour_count = COUNT(), hour_p95 = PERCENTILE(day_p95, 95) BY hour = TBUCKET(1h), day +; +ignoreOrder:true + +hour_count:long | hour_p95:double | hour:datetime | day:datetime +2 | 3415965.8 | 2023-10-23T12:00:00.000Z | 2023-10-23T00:00:00.000Z +5 | 7621273.399999999 | 2023-10-23T13:00:00.000Z | 2023-10-23T00:00:00.000Z +; + +statsChainingWithTimestampCarriedForwardAsByKey +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +FROM sample_data +| KEEP @timestamp, client_ip, event_duration +| STATS reqs = COUNT(), max_dur = MAX(event_duration) BY day = TBUCKET(1d), client_ip, @timestamp +| WHERE max_dur > 1000 +| STATS spikes = COUNT() BY hour = TBUCKET(1h), client_ip, day +; +ignoreOrder:true + +spikes:long | hour:datetime | client_ip:ip | day:datetime +4 | 2023-10-23T13:00:00.000Z | 172.21.3.15 | 2023-10-23T00:00:00.000Z +1 | 2023-10-23T12:00:00.000Z | 172.21.2.113 | 2023-10-23T00:00:00.000Z +1 | 2023-10-23T12:00:00.000Z | 172.21.2.162 | 2023-10-23T00:00:00.000Z +1 | 2023-10-23T13:00:00.000Z | 172.21.0.5 | 2023-10-23T00:00:00.000Z +; + +statsWithTimestampEval +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +FROM sample_data +| KEEP @timestamp, event_duration, message +| EVAL t = @timestamp +| STATS total = COUNT(*), med = MEDIAN(event_duration) BY d = TBUCKET(1d), message +; +ignoreOrder:true + +total:long | med:double | d:datetime | message:keyword +1 | 1232382.0 | 2023-10-23T00:00:00.000Z | Disconnected +1 | 1756467.0 | 2023-10-23T00:00:00.000Z | Connected to 10.1.0.1 +1 | 2764889.0 | 2023-10-23T00:00:00.000Z | Connected to 10.1.0.2 +1 | 3450233.0 | 2023-10-23T00:00:00.000Z | Connected to 10.1.0.3 +3 | 5033755.0 | 2023-10-23T00:00:00.000Z | Connection error +; + +statsChainingWithTimestampEval +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +FROM sample_data +| KEEP @timestamp, event_duration, message +| EVAL t = @timestamp +| STATS total = COUNT(*), med = MEDIAN(event_duration) BY d = TBUCKET(1d), message, @timestamp +| WHERE total > 0 +| STATS day_total = SUM(total), hour_med = MEDIAN(med) BY h = TBUCKET(1h), message +; +ignoreOrder:true + +day_total:long | hour_med:double | h:datetime | message:keyword +3 | 5033755.0 | 2023-10-23T13:00:00.000Z | Connection error +1 | 3450233.0 | 2023-10-23T12:00:00.000Z | Connected to 10.1.0.3 +1 | 2764889.0 | 2023-10-23T12:00:00.000Z | Connected to 10.1.0.2 +1 | 1756467.0 | 2023-10-23T13:00:00.000Z | Connected to 10.1.0.1 +1 | 1232382.0 | 2023-10-23T13:00:00.000Z | Disconnected +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec index 7c89864989b08..2788236bb4d9a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec @@ -2524,3 +2524,18 @@ c:long | b:date_nanos | yr:date_nanos 22 | 1987-01-01T00:00:00.000Z | 1986-01-01T00:00:00.000Z 22 | 1986-01-01T00:00:00.000Z | 1985-01-01T00:00:00.000Z ; + +multiIndexTBucketGroupingAndAggregation +required_capability: date_nanos_type +required_capability: implicit_casting_date_and_date_nanos +required_capability: implicit_casting_string_literal_to_temporal_amount +required_capability: tbucket + +FROM sample_data_ts_nanos, sample_data +| stats x = 1 day + tbucket(1 hour) by b1d = tbucket(1 hour) +; + +x:date_nanos | b1d:date_nanos +2023-10-24T13:00:00.000Z | 2023-10-23T13:00:00.000Z +2023-10-24T12:00:00.000Z | 2023-10-23T12:00:00.000Z +; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java index 06645ca3af517..bc4053584fc77 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java @@ -33,6 +33,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; import java.util.Map; @@ -270,16 +271,21 @@ private static boolean existingIndex(Collection existing, DataType /** This test generates documentation for the supported output types of the lookup join. */ public void testOutputSupportedTypes() throws Exception { - Map, DataType> signatures = new LinkedHashMap<>(); + Set signatures = new LinkedHashSet<>(); for (TestConfigs configs : testConfigurations.values()) { if (configs.group.equals("unsupported") || configs.group.equals("union-types")) { continue; } for (TestConfig config : configs.configs.values()) { if (config instanceof TestConfigPasses) { - signatures.put( - List.of(new DocsV3Support.Param(config.mainType(), List.of()), new DocsV3Support.Param(config.lookupType(), null)), - null + signatures.add( + new DocsV3Support.TypeSignature( + List.of( + new DocsV3Support.Param(config.mainType(), List.of()), + new DocsV3Support.Param(config.lookupType(), null) + ), + null + ) ); } } @@ -770,7 +776,7 @@ private boolean isValidDataType(DataType dataType) { return UNDER_CONSTRUCTION.get(dataType) == null || UNDER_CONSTRUCTION.get(dataType).isEnabled(); } - private static void saveJoinTypes(Supplier, DataType>> signatures) throws Exception { + private static void saveJoinTypes(Supplier> signatures) throws Exception { if (System.getProperty("generateDocs") == null) { return; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 871e24f69470b..4672dc4500922 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1389,7 +1389,11 @@ public enum Cap { /** * Support for vector Hamming distance. */ - HAMMING_VECTOR_SIMILARITY_FUNCTION(Build.current().isSnapshot()); + HAMMING_VECTOR_SIMILARITY_FUNCTION(Build.current().isSnapshot()), + /** + * Support for tbucket function + */ + TBUCKET; private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index ccb3f94c1e311..b47ca9fa8d4d8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -55,6 +55,7 @@ import org.elasticsearch.xpack.esql.expression.function.fulltext.Term; import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; +import org.elasticsearch.xpack.esql.expression.function.grouping.TBucket; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Case; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Least; @@ -314,7 +315,8 @@ private static FunctionDefinition[][] functions() { // grouping functions new FunctionDefinition[] { def(Bucket.class, Bucket::new, "bucket", "bin"), - def(Categorize.class, Categorize::new, "categorize") }, + def(Categorize.class, Categorize::new, "categorize"), + def(TBucket.class, uni(TBucket::new), "tbucket") }, // aggregate functions // since they declare two public constructors - one with filter (for nested where) and one without // use casting to disambiguate between the two diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/TBucket.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/TBucket.java new file mode 100644 index 0000000000000..f20e3420b9055 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/TBucket.java @@ -0,0 +1,134 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.grouping; + +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; +import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.SurrogateExpression; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.FunctionType; +import org.elasticsearch.xpack.esql.expression.function.Param; + +import java.io.IOException; +import java.util.List; + +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; + +/** + * Splits dates into a given number of buckets. The span is derived from a time range provided. + */ +public class TBucket extends GroupingFunction.EvaluatableGroupingFunction implements SurrogateExpression { + public static final String NAME = "TBucket"; + + private final Expression buckets; + private final Expression timestamp; + + @FunctionInfo( + returnType = { "date", "date_nanos" }, + description = """ + Creates groups of values - buckets - out of a @timestamp attribute. The size of the buckets must be provided directly.""", + examples = { + @Example(description = """ + Provide a bucket size as an argument.""", file = "tbucket", tag = "docsTBucketByOneHourDuration", explanation = """ + ::::{note} + When providing the bucket size, it must be a time duration or date period. + Also the reference is epoch, which starts at `0001-01-01T00:00:00Z`. + ::::"""), + @Example( + description = """ + Provide a string representation of bucket size as an argument.""", + file = "tbucket", + tag = "docsTBucketByOneHourDurationAsString", + explanation = """ + ::::{note} + When providing the bucket size, it can be a string representation of time duration or date period. + For example, "1 hour". Also the reference is epoch, which starts at `0001-01-01T00:00:00Z`. + ::::""" + ) }, + type = FunctionType.GROUPING + ) + public TBucket( + Source source, + @Param(name = "buckets", type = { "date_period", "time_duration" }, description = "Desired bucket size.") Expression buckets + ) { + this(source, buckets, new UnresolvedAttribute(source, MetadataAttribute.TIMESTAMP_FIELD)); + } + + public TBucket(Source source, Expression buckets, Expression timestamp) { + super(source, List.of(buckets, timestamp)); + this.buckets = buckets; + this.timestamp = timestamp; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + throw new UnsupportedOperationException("not serialized"); + } + + @Override + public String getWriteableName() { + throw new UnsupportedOperationException("not serialized"); + } + + @Override + public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { + throw new UnsupportedOperationException("should be rewritten"); + } + + @Override + public Expression surrogate() { + return new Bucket(source(), timestamp, buckets, null, null); + } + + @Override + protected TypeResolution resolveType() { + if (childrenResolved() == false) { + return new TypeResolution("Unresolved children"); + } + return isType(buckets, DataType::isTemporalAmount, sourceText(), DEFAULT, "date_period", "time_duration").and( + isType(timestamp, dt -> dt == DataType.DATETIME || dt == DataType.DATE_NANOS, sourceText(), SECOND, "date_nanos or datetime") + ); + } + + @Override + public DataType dataType() { + return timestamp.dataType(); + } + + @Override + public Expression replaceChildren(List newChildren) { + return new TBucket(source(), newChildren.get(0), newChildren.get(1)); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, TBucket::new, buckets, timestamp); + } + + public Expression field() { + return timestamp; + } + + public Expression buckets() { + return buckets; + } + + @Override + public String toString() { + return "TBucket{buckets=" + buckets + "}"; + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/TranslateTimeSeriesAggregate.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/TranslateTimeSeriesAggregate.java index 022ed8d372635..71ef06c9432fe 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/TranslateTimeSeriesAggregate.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/TranslateTimeSeriesAggregate.java @@ -25,6 +25,7 @@ import org.elasticsearch.xpack.esql.expression.function.aggregate.ToPartial; import org.elasticsearch.xpack.esql.expression.function.aggregate.Values; import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; +import org.elasticsearch.xpack.esql.expression.function.grouping.TBucket; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; @@ -225,6 +226,12 @@ LogicalPlan translate(TimeSeriesAggregate aggregate) { throw new IllegalArgumentException("expected at most one time bucket"); } timeBucketRef.set(e); + } else if (child instanceof TBucket tbucket && tbucket.field().equals(timestamp.get())) { + if (timeBucketRef.get() != null) { + throw new IllegalArgumentException("expected at most one time tbucket"); + } + Bucket bucket = (Bucket) tbucket.surrogate(); + timeBucketRef.set(new Alias(e.source(), bucket.functionName(), bucket, e.id())); } } }); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java index 844f7cb1989df..80bc7c67b90d4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java @@ -23,6 +23,8 @@ import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.enrich.ResolvedEnrichPolicy; import org.elasticsearch.xpack.esql.expression.UnresolvedNamePattern; +import org.elasticsearch.xpack.esql.expression.function.UnresolvedFunction; +import org.elasticsearch.xpack.esql.expression.function.grouping.TBucket; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Drop; import org.elasticsearch.xpack.esql.plan.logical.Enrich; @@ -48,6 +50,7 @@ import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Set; import java.util.function.BiConsumer; import java.util.stream.Collectors; @@ -56,6 +59,8 @@ public class FieldNameUtils { + private static final Set FUNCTIONS_REQUIRING_TIMESTAMP = Set.of(TBucket.NAME.toLowerCase(Locale.ROOT)); + public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichResolution enrichResolution) { // we need the match_fields names from enrich policies and THEN, with an updated list of fields, we call field_caps API @@ -166,6 +171,13 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso // METRICS aggs generally rely on @timestamp without the user having to mention it. referencesBuilder.get().add(new UnresolvedAttribute(ur.source(), MetadataAttribute.TIMESTAMP_FIELD)); } + + p.forEachExpression(UnresolvedFunction.class, uf -> { + if (FUNCTIONS_REQUIRING_TIMESTAMP.contains(uf.name().toLowerCase(Locale.ROOT))) { + referencesBuilder.get().add(new UnresolvedAttribute(uf.source(), MetadataAttribute.TIMESTAMP_FIELD)); + } + }); + // special handling for UnresolvedPattern (which is not an UnresolvedAttribute) p.forEachExpression(UnresolvedNamePattern.class, up -> { var ua = new UnresolvedAttribute(up.source(), up.name()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index a90ca7c99cc9a..768b1974dec21 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -51,6 +51,7 @@ import org.elasticsearch.xpack.esql.expression.function.fulltext.MultiMatch; import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString; import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; +import org.elasticsearch.xpack.esql.expression.function.grouping.TBucket; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDateNanos; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDatetime; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; @@ -4243,6 +4244,43 @@ public void testGroupingOverridesInInlinestats() { """, "Found 1 problem\n" + "line 2:49: Unknown column [x]", "mapping-default.json"); } + public void testTBucketWithDatePeriodInBothAggregationAndGrouping() { + LogicalPlan plan = analyze(""" + FROM sample_data + | STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET(1 week) + | SORT min + """, "mapping-sample_data.json"); + + Limit limit = as(plan, Limit.class); + OrderBy orderBy = as(limit.child(), OrderBy.class); + Aggregate agg = as(orderBy.child(), Aggregate.class); + + List aggregates = agg.aggregates(); + assertThat(aggregates, hasSize(3)); + Alias a = as(aggregates.get(0), Alias.class); + assertEquals("min", a.name()); + Min min = as(a.child(), Min.class); + FieldAttribute fa = as(min.field(), FieldAttribute.class); + assertEquals("@timestamp", fa.name()); + a = as(aggregates.get(1), Alias.class); + assertEquals("max", a.name()); + Max max = as(a.child(), Max.class); + fa = as(max.field(), FieldAttribute.class); + assertEquals("@timestamp", fa.name()); + ReferenceAttribute ra = as(aggregates.get(2), ReferenceAttribute.class); + assertEquals("bucket", ra.name()); + + List groupings = agg.groupings(); + assertEquals(1, groupings.size()); + a = as(groupings.get(0), Alias.class); // reference in groupings is resolved + TBucket tbucket = as(a.child(), TBucket.class); + fa = as(tbucket.field(), FieldAttribute.class); + assertEquals("@timestamp", fa.name()); + Literal literal = as(tbucket.buckets(), Literal.class); + Literal oneWeek = new Literal(EMPTY, Period.ofWeeks(1), DATE_PERIOD); + assertEquals(oneWeek, literal); + } + private void verifyNameAndType(String actualName, DataType actualType, String expectedName, DataType expectedType) { assertEquals(expectedName, actualName); assertEquals(expectedType, actualType); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 5a111c759e2dd..98bc969a7c451 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -71,6 +71,8 @@ public class VerifierTests extends ESTestCase { private static final EsqlParser parser = new EsqlParser(); private final Analyzer defaultAnalyzer = AnalyzerTestUtils.expandedDefaultAnalyzer(); private final Analyzer fullTextAnalyzer = AnalyzerTestUtils.analyzer(loadMapping("mapping-full_text_search.json", "test")); + private final Analyzer sampleDataAnalyzer = AnalyzerTestUtils.analyzer(loadMapping("mapping-sample_data.json", "test")); + private final Analyzer oddSampleDataAnalyzer = AnalyzerTestUtils.analyzer(loadMapping("mapping-odd-timestamp.json", "test")); private final Analyzer tsdb = AnalyzerTestUtils.analyzer(AnalyzerTestUtils.tsdbIndexResolution()); private final List TIME_DURATIONS = List.of("millisecond", "second", "minute", "hour"); @@ -2350,6 +2352,43 @@ public void testToIPInvalidOptions() { assertThat(error(query), containsString("Illegal leading_zeros [abcdef]")); } + public void testInvalidTBucketCalls() { + assertThat(error("from test | stats max(emp_no) by tbucket(1 hour)"), equalTo("1:34: Unknown column [@timestamp]")); + assertThat( + error("from test | stats max(event_duration) by tbucket()", sampleDataAnalyzer, ParsingException.class), + equalTo("1:42: error building [tbucket]: expects exactly one argument") + ); + assertThat( + error("from test | stats max(event_duration) by tbucket(\"@tbucket\", 1 hour)", sampleDataAnalyzer, ParsingException.class), + equalTo("1:42: error building [tbucket]: expects exactly one argument") + ); + assertThat( + error("from test | stats max(event_duration) by tbucket(1 hr)", sampleDataAnalyzer, ParsingException.class), + equalTo("1:50: Unexpected temporal unit: 'hr'") + ); + assertThat( + error("from test | stats max(event_duration) by tbucket(\"1\")", sampleDataAnalyzer), + equalTo("1:42: argument of [tbucket(\"1\")] must be [date_period or time_duration], found value [\"1\"] type [keyword]") + ); + + /* + To test unsupported @timestamp data type. In this case, we use a boolean as a type for the @timestamp field which is not supported + by TBUCKET. + */ + assertThat( + error("from test | stats max(event_duration) by tbucket(\"1 hour\")", oddSampleDataAnalyzer), + equalTo( + "1:42: second argument of [tbucket(\"1 hour\")] must be [date_nanos or datetime], found value [@timestamp] type [boolean]" + ) + ); + for (String interval : List.of("1 minu", "1 dy", "1.5 minutes", "0.5 days", "minutes 1", "day 5")) { + assertThat( + error("from test | stats max(event_duration) by tbucket(\"" + interval + "\")", sampleDataAnalyzer), + containsString("1:50: Cannot convert string [" + interval + "] to [DATE_PERIOD or TIME_DURATION]") + ); + } + } + private void checkVectorFunctionsNullArgs(String functionInvocation) throws Exception { query("from test | eval similarity = " + functionInvocation, fullTextAnalyzer); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java index a2795f308a6f7..0efc9fa60d851 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java @@ -66,7 +66,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Locale; @@ -758,13 +757,13 @@ public static void testFunctionInfo() { for (int i = 0; i < args.size(); i++) { typesFromSignature.add(new HashSet<>()); } - for (Map.Entry, DataType> entry : signatures(testClass).entrySet()) { - List types = entry.getKey(); + for (DocsV3Support.TypeSignature entry : signatures(testClass)) { + List types = entry.argTypes(); for (int i = 0; i < args.size() && i < types.size(); i++) { typesFromSignature.get(i).add(types.get(i).dataType().esNameIfPossible()); } - if (DataType.UNDER_CONSTRUCTION.containsKey(entry.getValue()) == false) { - returnFromSignature.add(entry.getValue().esNameIfPossible()); + if (DataType.UNDER_CONSTRUCTION.containsKey(entry.returnType()) == false) { + returnFromSignature.add(entry.returnType().esNameIfPossible()); } } @@ -838,8 +837,9 @@ public static void testFunctionLicenseChecks() throws Exception { TestCheckLicense checkLicense = new TestCheckLicense(); // Go through all signatures and assert that the license is as expected - signatures(testClass).forEach((signature, returnType) -> { + signatures(testClass).forEach((signatureItem) -> { try { + List signature = signatureItem.argTypes(); License.OperationMode license = licenseChecker.invoke(signature.stream().map(DocsV3Support.Param::dataType).toList()); assertNotNull("License should not be null", license); @@ -933,14 +933,14 @@ protected final void assertTypeResolutionFailure(Expression expression) { /** * Unique signatures in this test’s parameters. */ - private static Map, DataType> signatures; + private static Set signatures; - public static Map, DataType> signatures(Class testClass) { + public static Set signatures(Class testClass) { if (signatures != null && classGeneratingSignatures == testClass) { return signatures; } classGeneratingSignatures = testClass; - signatures = new HashMap<>(); + signatures = new HashSet<>(); Set paramsFactories = new ClassModel(testClass).getAnnotatedLeafMethods(ParametersFactory.class).keySet(); assertThat(paramsFactories, hasSize(1)); Method paramsFactory = paramsFactories.iterator().next(); @@ -960,7 +960,7 @@ public static Map, DataType> signatures(Class testC continue; } List sig = tc.getData().stream().map(d -> new DocsV3Support.Param(d.type(), d.appliesTo())).toList(); - signatures.putIfAbsent(signatureTypes(testClass, sig), tc.expectedType()); + signatures.add(new DocsV3Support.TypeSignature(signatureTypes(testClass, sig), tc.expectedType())); } return signatures; } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/DocsV3Support.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/DocsV3Support.java index 9d7b1720acfb5..5e3d3b2bb032b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/DocsV3Support.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/DocsV3Support.java @@ -64,6 +64,7 @@ import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.TreeMap; import java.util.function.Function; import java.util.function.Supplier; @@ -101,6 +102,8 @@ public abstract class DocsV3Support { public record Param(DataType dataType, List appliesTo) {} + public record TypeSignature(List argTypes, DataType returnType) {} + private static final Logger logger = LogManager.getLogger(DocsV3Support.class); private static final String DOCS_WARNING_JSON = @@ -374,7 +377,7 @@ public License.OperationMode invoke(List fieldTypes) throws Exception protected final String category; protected final String name; protected final FunctionDefinition definition; - protected final Supplier, DataType>> signatures; + protected final Supplier> signatures; protected final Callbacks callbacks; private final LicenseRequirementChecker licenseChecker; @@ -382,7 +385,7 @@ protected DocsV3Support( String category, String name, Class testClass, - Supplier, DataType>> signatures, + Supplier> signatures, Callbacks callbacks ) { this(category, name, null, testClass, signatures, callbacks); @@ -393,7 +396,7 @@ private DocsV3Support( String name, FunctionDefinition definition, Class testClass, - Supplier, DataType>> signatures, + Supplier> signatures, Callbacks callbacks ) { this.category = category; @@ -573,7 +576,7 @@ private FunctionDocsSupport(String name, Class testClass, Callbacks callbacks String name, Class testClass, FunctionDefinition definition, - Supplier, DataType>> signatures, + Supplier> signatures, Callbacks callbacks ) { super("functions", name, definition, testClass, signatures, callbacks); @@ -780,7 +783,7 @@ public OperatorsDocsSupport( String name, Class testClass, OperatorConfig op, - Supplier, DataType>> signatures, + Supplier> signatures, Callbacks callbacks ) { super("operators", name, testClass, signatures, callbacks); @@ -971,7 +974,7 @@ public CommandsDocsSupport( ObservabilityTier observabilityTier, Callbacks callbacks ) { - super("commands", name, testClass, Map::of, callbacks); + super("commands", name, testClass, Set::of, callbacks); this.command = command; this.licenseState = licenseState; this.observabilityTier = observabilityTier; @@ -983,7 +986,7 @@ public CommandsDocsSupport( Class testClass, LogicalPlan command, List args, - Supplier, DataType>> signatures, + Supplier> signatures, Callbacks callbacks ) { super("commands", name, testClass, signatures, callbacks); @@ -1043,12 +1046,12 @@ void renderTypes(String name, List args) thro } Map> compactedTable = new TreeMap<>(); - for (Map.Entry, DataType> sig : this.signatures.get().entrySet()) { - if (shouldHideSignature(sig.getKey(), sig.getValue())) { + for (TypeSignature sig : this.signatures.get()) { + if (shouldHideSignature(sig.argTypes(), sig.returnType())) { continue; } - String mainType = sig.getKey().getFirst().dataType().esNameIfPossible(); - String secondaryType = sig.getKey().get(1).dataType().esNameIfPossible(); + String mainType = sig.argTypes().getFirst().dataType().esNameIfPossible(); + String secondaryType = sig.argTypes().get(1).dataType().esNameIfPossible(); List secondaryTypes = compactedTable.computeIfAbsent(mainType, (k) -> new ArrayList<>()); secondaryTypes.add(secondaryType); } @@ -1092,7 +1095,7 @@ void renderParametersList(List argNames, List argDescriptions) t } void renderTypes(String name, List args) throws IOException { - boolean showResultColumn = signatures.get().values().stream().anyMatch(Objects::nonNull); + boolean showResultColumn = signatures.get().stream().map(TypeSignature::returnType).anyMatch(Objects::nonNull); StringBuilder header = new StringBuilder("| "); StringBuilder separator = new StringBuilder("| "); List argNames = args.stream().map(EsqlFunctionRegistry.ArgSignature::name).toList(); @@ -1106,11 +1109,11 @@ void renderTypes(String name, List args) thro } List table = new ArrayList<>(); - for (Map.Entry, DataType> sig : this.signatures.get().entrySet()) { // TODO flip to using sortedSignatures - if (shouldHideSignature(sig.getKey(), sig.getValue())) { + for (TypeSignature sig : this.signatures.get()) { // TODO flip to using sortedSignatures + if (shouldHideSignature(sig.argTypes(), sig.returnType())) { continue; } - if (sig.getKey().size() > argNames.size()) { // skip variadic [test] cases (but not those with optional parameters) + if (sig.argTypes().size() > argNames.size()) { // skip variadic [test] cases (but not those with optional parameters) continue; } table.add(getTypeRow(args, sig, argNames, showResultColumn)); @@ -1131,13 +1134,13 @@ void renderTypes(String name, List args) thro private static String getTypeRow( List args, - Map.Entry, DataType> sig, + TypeSignature sig, List argNames, boolean showResultColumn ) { StringBuilder b = new StringBuilder("| "); - for (int i = 0; i < sig.getKey().size(); i++) { - Param param = sig.getKey().get(i); + for (int i = 0; i < sig.argTypes().size(); i++) { + Param param = sig.argTypes().get(i); EsqlFunctionRegistry.ArgSignature argSignature = args.get(i); if (argSignature.mapArg()) { b.append("named parameters"); @@ -1149,9 +1152,9 @@ private static String getTypeRow( } b.append(" | "); } - b.append("| ".repeat(argNames.size() - sig.getKey().size())); + b.append("| ".repeat(argNames.size() - sig.argTypes().size())); if (showResultColumn) { - b.append(sig.getValue().esNameIfPossible()); + b.append(sig.returnType().esNameIfPossible()); b.append(" |"); } return b.toString(); @@ -1300,24 +1303,24 @@ void renderKibanaFunctionDefinition( builder.startArray("params"); builder.endArray(); // There should only be one return type so just use that as the example - builder.field("returnType", signatures.get().values().iterator().next().esNameIfPossible()); + builder.field("returnType", signatures.get().iterator().next().returnType().esNameIfPossible()); builder.endObject(); } else { int minArgCount = (int) args.stream().filter(a -> false == a.optional()).count(); - for (Map.Entry, DataType> sig : sortedSignatures()) { - if (variadic && sig.getKey().size() > args.size()) { + for (TypeSignature sig : sortedSignatures()) { + if (variadic && sig.argTypes().size() > args.size()) { // For variadic functions we test much longer signatures, let’s just stop at the last one continue; } - if (sig.getKey().size() < minArgCount) { - throw new IllegalArgumentException("signature " + sig.getKey() + " is missing non-optional arg for " + args); + if (sig.argTypes().size() < minArgCount) { + throw new IllegalArgumentException("signature " + sig.argTypes() + " is missing non-optional arg for " + args); } - if (shouldHideSignature(sig.getKey(), sig.getValue())) { + if (shouldHideSignature(sig.argTypes(), sig.returnType())) { continue; } builder.startObject(); builder.startArray("params"); - for (int i = 0; i < sig.getKey().size(); i++) { + for (int i = 0; i < sig.argTypes().size(); i++) { EsqlFunctionRegistry.ArgSignature arg = args.get(i); builder.startObject(); builder.field("name", arg.name()); @@ -1332,7 +1335,7 @@ void renderKibanaFunctionDefinition( .collect(Collectors.joining(", ")) ); } else { - builder.field("type", sig.getKey().get(i).dataType().esNameIfPossible()); + builder.field("type", sig.argTypes().get(i).dataType().esNameIfPossible()); } builder.field("optional", arg.optional()); String cleanedParamDesc = removeAppliesToBlocks(arg.description()); @@ -1340,12 +1343,12 @@ void renderKibanaFunctionDefinition( builder.endObject(); } builder.endArray(); - license = licenseChecker.invoke(sig.getKey().stream().map(Param::dataType).toList()); + license = licenseChecker.invoke(sig.argTypes().stream().map(Param::dataType).toList()); if (license != null && license != License.OperationMode.BASIC) { builder.field("license", license.toString()); } builder.field("variadic", variadic); - builder.field("returnType", sig.getValue().esNameIfPossible()); + builder.field("returnType", sig.returnType().esNameIfPossible()); builder.endObject(); } } @@ -1388,23 +1391,23 @@ private static String removeAppliesToBlocks(String content) { return content.replaceAll("\\s*\\{applies_to\\}`[^`]*`\\s*", ""); } - private List, DataType>> sortedSignatures() { - List, DataType>> sortedSignatures = new ArrayList<>(signatures.get().entrySet()); + private List sortedSignatures() { + List sortedSignatures = new ArrayList<>(signatures.get()); sortedSignatures.sort((lhs, rhs) -> { - int maxlen = Math.max(lhs.getKey().size(), rhs.getKey().size()); + int maxlen = Math.max(lhs.argTypes().size(), rhs.argTypes().size()); for (int i = 0; i < maxlen; i++) { - if (lhs.getKey().size() <= i) { + if (lhs.argTypes().size() <= i) { return -1; } - if (rhs.getKey().size() <= i) { + if (rhs.argTypes().size() <= i) { return 1; } - int c = lhs.getKey().get(i).dataType().esNameIfPossible().compareTo(rhs.getKey().get(i).dataType().esNameIfPossible()); + int c = lhs.argTypes().get(i).dataType().esNameIfPossible().compareTo(rhs.argTypes().get(i).dataType().esNameIfPossible()); if (c != 0) { return c; } } - return lhs.getValue().esNameIfPossible().compareTo(rhs.getValue().esNameIfPossible()); + return lhs.returnType().esNameIfPossible().compareTo(rhs.returnType().esNameIfPossible()); }); return sortedSignatures; } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/DocsV3SupportTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/DocsV3SupportTests.java index 4ad83f3495294..39f48fa68521f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/DocsV3SupportTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/DocsV3SupportTests.java @@ -21,6 +21,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import static org.hamcrest.Matchers.equalTo; @@ -422,8 +423,8 @@ public TestClass(Source source, @Param(name = "str", type = { "keyword", "text" super(source, List.of(field)); } - public static Map, DataType> signatures() { - return Map.of(List.of(new DocsV3Support.Param(DataType.KEYWORD, List.of())), DataType.LONG); + public static Set signatures() { + return Set.of(new DocsV3Support.TypeSignature(List.of(new DocsV3Support.Param(DataType.KEYWORD, List.of())), DataType.LONG)); } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/TBucketTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/TBucketTests.java new file mode 100644 index 0000000000000..d44dc58436adc --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/TBucketTests.java @@ -0,0 +1,155 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.grouping; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.common.Rounding; +import org.elasticsearch.common.time.DateUtils; +import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.DocsV3Support; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import org.hamcrest.Matcher; +import org.hamcrest.Matchers; + +import java.time.Duration; +import java.time.Instant; +import java.time.Period; +import java.util.ArrayList; +import java.util.List; +import java.util.function.LongSupplier; +import java.util.function.Supplier; + +import static org.hamcrest.Matchers.anyOf; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; + +public class TBucketTests extends AbstractScalarFunctionTestCase { + public TBucketTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + List suppliers = new ArrayList<>(); + dateCasesWithSpan( + suppliers, + "fixed date with period", + () -> DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-01-01T00:00:00.00Z"), + DataType.DATE_PERIOD, + Period.ofYears(1), + "[YEAR_OF_CENTURY in Z][fixed to midnight]" + ); + dateCasesWithSpan( + suppliers, + "fixed date with duration", + () -> DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-02-17T09:00:00.00Z"), + DataType.TIME_DURATION, + Duration.ofDays(1L), + "[86400000 in Z][fixed]" + ); + dateNanosCasesWithSpan( + suppliers, + "fixed date nanos with period", + () -> DateUtils.toLong(Instant.parse("2023-01-01T00:00:00.00Z")), + DataType.DATE_PERIOD, + Period.ofYears(1) + ); + dateNanosCasesWithSpan( + suppliers, + "fixed date nanos with duration", + () -> DateUtils.toLong(Instant.parse("2023-02-17T09:00:00.00Z")), + DataType.TIME_DURATION, + Duration.ofDays(1L) + ); + return parameterSuppliersFromTypedData(suppliers); + } + + private static void dateCasesWithSpan( + List suppliers, + String name, + LongSupplier date, + DataType spanType, + Object span, + String spanStr + ) { + suppliers.add(new TestCaseSupplier(name, List.of(spanType, DataType.DATETIME), () -> { + List args = new ArrayList<>(); + args.add(new TestCaseSupplier.TypedData(span, spanType, "buckets").forceLiteral()); + args.add(new TestCaseSupplier.TypedData(date.getAsLong(), DataType.DATETIME, "@timestamp")); + + return new TestCaseSupplier.TestCase( + args, + "DateTruncDatetimeEvaluator[fieldVal=Attribute[channel=0], rounding=Rounding" + spanStr + "]", + DataType.DATETIME, + resultsMatcher(args) + ); + })); + } + + private static void dateNanosCasesWithSpan( + List suppliers, + String name, + LongSupplier date, + DataType spanType, + Object span + ) { + suppliers.add(new TestCaseSupplier(name, List.of(spanType, DataType.DATE_NANOS), () -> { + List args = new ArrayList<>(); + args.add(new TestCaseSupplier.TypedData(span, spanType, "buckets").forceLiteral()); + args.add(new TestCaseSupplier.TypedData(date.getAsLong(), DataType.DATE_NANOS, "@timestamp")); + return new TestCaseSupplier.TestCase( + args, + Matchers.startsWith("DateTruncDateNanosEvaluator[fieldVal=Attribute[channel=0], rounding=Rounding["), + DataType.DATE_NANOS, + resultsMatcher(args) + ); + })); + } + + private static Matcher resultsMatcher(List typedData) { + if (typedData.get(1).type() == DataType.DATE_NANOS) { + long nanos = ((Number) typedData.get(1).data()).longValue(); + long expected = DateUtils.toNanoSeconds( + Rounding.builder(Rounding.DateTimeUnit.DAY_OF_MONTH).build().prepareForUnknown().round(DateUtils.toMilliSeconds(nanos)) + ); + LogManager.getLogger(getTestClass()).info("Expected: " + DateUtils.toInstant(expected)); + LogManager.getLogger(getTestClass()).info("Input: " + DateUtils.toInstant(nanos)); + return equalTo(expected); + } + + // For DATETIME, we use the millis value directly + long millis = ((Number) typedData.get(1).data()).longValue(); + long expected = Rounding.builder(Rounding.DateTimeUnit.DAY_OF_MONTH).build().prepareForUnknown().round(millis); + LogManager.getLogger(getTestClass()).info("Expected: " + Instant.ofEpochMilli(expected)); + LogManager.getLogger(getTestClass()).info("Input: " + Instant.ofEpochMilli(millis)); + return equalTo(expected); + } + + @Override + protected Expression build(Source source, List args) { + return new TBucket(source, args.get(0), args.get(1)); + } + + @Override + protected boolean canSerialize() { + return false; + } + + public static List signatureTypes(List params) { + assertThat(params, hasSize(2)); + assertThat(params.get(1).dataType(), anyOf(equalTo(DataType.DATE_NANOS), equalTo(DataType.DATETIME))); + return List.of(params.get(0)); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/CastOperatorTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/CastOperatorTests.java index af53a1c666859..40cb11ea511f0 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/CastOperatorTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/CastOperatorTests.java @@ -9,15 +9,13 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.function.DocsV3Support; import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; import org.junit.AfterClass; -import java.util.List; -import java.util.Map; +import java.util.Set; public class CastOperatorTests extends ESTestCase { public void testDummy() { @@ -46,9 +44,9 @@ public static void renderDocs() throws Exception { docs.renderDocs(); } - public static Map, DataType> signatures() { + public static Set signatures() { // The cast operator cannot produce sensible signatures unless we consider the type as an extra parameter - return Map.of(); + return Set.of(); } /** diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java index ee3ea8112e738..0a42b1962bfe1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java @@ -51,6 +51,7 @@ public abstract class AbstractLogicalPlanOptimizerTests extends ESTestCase { protected static Map metricMapping; protected static Analyzer metricsAnalyzer; protected static Analyzer multiIndexAnalyzer; + protected static Analyzer sampleDataIndexAnalyzer; protected static EnrichResolution enrichResolution; @@ -173,6 +174,21 @@ public static void init() { ), TEST_VERIFIER ); + + var sampleDataMapping = loadMapping("mapping-sample_data.json"); + var sampleDataIndex = IndexResolution.valid( + new EsIndex("sample_data", sampleDataMapping, Map.of("sample_data", IndexMode.STANDARD)) + ); + sampleDataIndexAnalyzer = new Analyzer( + new AnalyzerContext( + EsqlTestUtils.TEST_CFG, + new EsqlFunctionRegistry(), + sampleDataIndex, + enrichResolution, + emptyInferenceResolution() + ), + TEST_VERIFIER + ); } protected LogicalPlan optimizedPlan(String query) { @@ -215,6 +231,11 @@ protected LogicalPlan planMultiIndex(String query) { return logicalOptimizer.optimize(multiIndexAnalyzer.analyze(parser.createStatement(query, EsqlTestUtils.TEST_CFG))); } + protected LogicalPlan planSample(String query) { + var analyzed = sampleDataIndexAnalyzer.analyze(parser.createStatement(query, EsqlTestUtils.TEST_CFG)); + return logicalOptimizer.optimize(analyzed); + } + @Override protected List filteredWarnings() { return withDefaultLimitWarning(super.filteredWarnings()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index fff9abe0ca2f0..a780d483c74ae 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -8396,4 +8396,81 @@ public List output() { assertThat(e.getMessage(), containsString("Output has changed from")); } + public void testTranslateDataGroupedByTBucket() { + assumeTrue("requires TBUCKET capability enabled", EsqlCapabilities.Cap.TBUCKET.isEnabled()); + var query = """ + FROM sample_data + | STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET(1 hour) + | SORT min + """; + + var plan = planSample(query); + var topN = as(plan, TopN.class); + + Aggregate aggregate = as(topN.child(), Aggregate.class); + assertThat(aggregate, not(instanceOf(TimeSeriesAggregate.class))); + + assertThat(aggregate.groupings(), hasSize(1)); + assertThat(aggregate.groupings().get(0), instanceOf(ReferenceAttribute.class)); + assertThat(as(aggregate.groupings().getFirst(), ReferenceAttribute.class).name(), equalTo("bucket")); + + assertThat(aggregate.aggregates(), hasSize(3)); + List aggregates = aggregate.aggregates(); + assertThat(aggregates, hasSize(3)); + Alias a = as(aggregates.get(0), Alias.class); + assertEquals("min", a.name()); + Min min = as(a.child(), Min.class); + FieldAttribute fa = as(min.field(), FieldAttribute.class); + assertEquals("@timestamp", fa.name()); + a = as(aggregates.get(1), Alias.class); + assertEquals("max", a.name()); + Max max = as(a.child(), Max.class); + fa = as(max.field(), FieldAttribute.class); + assertEquals("@timestamp", fa.name()); + ReferenceAttribute ra = as(aggregates.get(2), ReferenceAttribute.class); + assertEquals("bucket", ra.name()); + assertThat(Expressions.attribute(aggregate.groupings().get(0)).id(), equalTo(aggregate.aggregates().get(2).id())); + Eval eval = as(aggregate.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + Alias bucketAlias = eval.fields().get(0); + assertThat(bucketAlias.child(), instanceOf(Bucket.class)); + assertThat(Expressions.attribute(bucketAlias).id(), equalTo(aggregate.aggregates().get(2).id())); + Bucket bucket = as(Alias.unwrap(bucketAlias), Bucket.class); + assertThat(Expressions.attribute(bucket.field()).name(), equalTo("@timestamp")); + assertThat(bucket.children().get(0), instanceOf(FieldAttribute.class)); + assertThat(((FieldAttribute) bucket.children().get(0)).name(), equalTo("@timestamp")); + assertThat(bucket.children().get(1), instanceOf(Literal.class)); + assertThat(((Literal) bucket.children().get(1)).value(), equalTo(Duration.ofHours(1))); + } + + public void testTranslateMetricsGroupedByTBucketInTSMode() { + assumeTrue("requires METRICS_COMMAND capability enabled", EsqlCapabilities.Cap.METRICS_COMMAND.isEnabled()); + assumeTrue("requires TBUCKET capability enabled", EsqlCapabilities.Cap.TBUCKET.isEnabled()); + + var query = "TS k8s | STATS sum(rate(network.total_bytes_in)) BY tbucket(1h)"; + var plan = logicalOptimizer.optimize(metricsAnalyzer.analyze(parser.createStatement(query, EsqlTestUtils.TEST_CFG))); + Limit limit = as(plan, Limit.class); + Aggregate finalAgg = as(limit.child(), Aggregate.class); + assertThat(finalAgg, not(instanceOf(TimeSeriesAggregate.class))); + assertThat(finalAgg.aggregates(), hasSize(2)); + TimeSeriesAggregate aggsByTsid = as(finalAgg.child(), TimeSeriesAggregate.class); + assertThat(aggsByTsid.aggregates(), hasSize(2)); // _tsid is dropped + assertNotNull(aggsByTsid.timeBucket()); + assertThat(aggsByTsid.timeBucket().buckets().fold(FoldContext.small()), equalTo(Duration.ofHours(1))); + Eval eval = as(aggsByTsid.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + EsRelation relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexMode(), equalTo(IndexMode.TIME_SERIES)); + + Sum sum = as(Alias.unwrap(finalAgg.aggregates().get(0)), Sum.class); + assertThat(Expressions.attribute(sum.field()).id(), equalTo(aggsByTsid.aggregates().get(0).id())); + assertThat(finalAgg.groupings(), hasSize(1)); + assertThat(Expressions.attribute(finalAgg.groupings().get(0)).id(), equalTo(aggsByTsid.aggregates().get(1).id())); + + Rate rate = as(Alias.unwrap(aggsByTsid.aggregates().get(0)), Rate.class); + assertThat(Expressions.attribute(rate.field()).name(), equalTo("network.total_bytes_in")); + assertThat(Expressions.attribute(aggsByTsid.groupings().get(1)).id(), equalTo(eval.fields().get(0).id())); + Bucket bucket = as(Alias.unwrap(eval.fields().get(0)), Bucket.class); + assertThat(Expressions.attribute(bucket.field()).name(), equalTo("@timestamp")); + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java index 916b14c8de9e9..734d14337175a 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java @@ -2985,6 +2985,74 @@ public void testForkBeforeGrok() { | KEEP emp_no, gender, x, a, b, _fork""", Set.of("emp_no", "gender", "gender.*", "emp_no.*")); } + public void testImplicitFieldNames() { + assertFieldNames(""" + FROM sample_data + | STATS x = 1 year + TBUCKET(1 day) BY b1d = TBUCKET(1 day)""", Set.of("@timestamp", "@timestamp.*")); + } + + public void testKeepTimestampBeforeStats() { + assertFieldNames(""" + FROM sample_data + | WHERE event_duration > 0 + | KEEP @timestamp, client_ip + | STATS count = COUNT(*), avg_dur = AVG(event_duration) BY hour = TBUCKET(1h), client_ip + | SORT hour ASC + """, Set.of("@timestamp", "@timestamp.*", "client_ip", "client_ip.*", "event_duration", "event_duration.*")); + } + + public void testKeepAtWildcardBeforeStats() { + assertFieldNames(""" + FROM sample_data + | WHERE message LIKE "error%" + | KEEP @*, message + | STATS errors = COUNT() BY day = TBUCKET(1d), message + | SORT day ASC + """, Set.of("@timestamp", "@timestamp.*", "@*", "message", "message.*")); + } + + public void testKeepWildcardBeforeStats() { + assertFieldNames(""" + FROM sample_data + | WHERE client_ip IS NOT NULL + | KEEP *stamp*, client_ip + | STATS p95 = PERCENTILE(event_duration, 95) BY ten_min = TBUCKET(10min), client_ip + | SORT ten_min ASC + """, Set.of("@timestamp", "@timestamp.*", "client_ip", "client_ip.*", "event_duration", "event_duration.*", "*stamp*")); + } + + public void testStatsChainingWithTimestampCarriedForward() { + assertFieldNames(""" + FROM sample_data + | KEEP @timestamp, event_duration + | STATS day_count = COUNT(), day_p95 = PERCENTILE(event_duration, 95) BY day = TBUCKET(1d), @timestamp + | WHERE day_count > 0 + | STATS hour_count = COUNT(), hour_p95 = PERCENTILE(day_p95, 95) BY hour = TBUCKET(1h), day + | SORT day ASC, hour ASC + """, Set.of("@timestamp", "@timestamp.*", "event_duration", "event_duration.*")); + } + + public void testStatsChainingWithTimestampEval() { + assertFieldNames(""" + FROM sample_data + | KEEP @timestamp, event_duration, message + | EVAL t = @timestamp + | STATS total = COUNT(*), med = MEDIAN(event_duration) BY d = TBUCKET(1d), message, t + | WHERE total > 5 + | STATS day_total = SUM(total), hour_med = MEDIAN(med) BY h = TBUCKET(1h), message + """, Set.of("@timestamp", "@timestamp.*", "event_duration", "event_duration.*", "message", "message.*")); + } + + public void testStatsChainingWithTimestampCarriedForwardAsByKey() { + assertFieldNames(""" + FROM sample_data + | KEEP @timestamp, client_ip, event_duration + | STATS reqs = COUNT(), max_dur = MAX(event_duration) BY day = TBUCKET(1d), client_ip, @timestamp + | WHERE max_dur > 1000 + | STATS spikes = COUNT() BY hour = TBUCKET(1h), client_ip, day + """, Set.of("@timestamp", "@timestamp.*", "event_duration", "event_duration.*", "client_ip", "client_ip.*")); + } + private void assertFieldNames(String query, Set expected) { assertFieldNames(query, new EnrichResolution(), expected, Set.of()); }