diff --git a/docs/reference/cluster/stats.asciidoc b/docs/reference/cluster/stats.asciidoc index c39bc0dcd2878..575a6457804a6 100644 --- a/docs/reference/cluster/stats.asciidoc +++ b/docs/reference/cluster/stats.asciidoc @@ -1307,6 +1307,142 @@ Each repository type may also include other statistics about the repositories of ==== +`ccs`:: +(object) Contains information relating to <> settings and activity in the cluster. ++ +.Properties of `ccs` +[%collapsible%open] +===== + + +`_search`::: +(object) Contains the telemetry information about the <> usage in the cluster. ++ +.Properties of `_search` +[%collapsible%open] +====== +`total`::: +(integer) The total number of {ccs} requests that have been executed by the cluster. + +`success`::: +(integer) The total number of {ccs} requests that have been successfully executed by the cluster. + +`skipped`::: +(integer) The total number of {ccs} requests (successful or failed) that had at least one remote cluster skipped. + +`took`::: +(object) Contains statistics about the time taken to execute {ccs} requests. ++ +.Properties of `took` +[%collapsible%open] +======= +`max`::: +(integer) The maximum time taken to execute a {ccs} request, in milliseconds. + +`avg`::: +(integer) The median time taken to execute a {ccs} request, in milliseconds. + +`p90`::: +(integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. +======= + +`took_mrt_true`:: +(object) Contains statistics about the time taken to execute {ccs} requests for which the +<> setting was set to `true`. ++ +.Properties of `took_mrt_true` +[%collapsible%open] +======= +`max`::: +(integer) The maximum time taken to execute a {ccs} request, in milliseconds. + +`avg`::: +(integer) The median time taken to execute a {ccs} request, in milliseconds. + +`p90`::: +(integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. +======= + +`took_mrt_false`:: +(object) Contains statistics about the time taken to execute {ccs} requests for which the +<> setting was set to `false`. ++ +.Properties of `took_mrt_false` +[%collapsible%open] +======= +`max`::: +(integer) The maximum time taken to execute a {ccs} request, in milliseconds. + +`avg`::: +(integer) The median time taken to execute a {ccs} request, in milliseconds. + +`p90`::: +(integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. +======= + +`remotes_per_search_max`:: +(integer) The maximum number of remote clusters that were queried in a single {ccs} request. + +`remotes_per_search_avg`:: +(float) The average number of remote clusters that were queried in a single {ccs} request. + +`failure_reasons`:: +(object) Contains statistics about the reasons for {ccs} request failures. +The keys are the failure reason names and the values are the number of requests that failed for that reason. + +`features`:: +(object) Contains statistics about the features used in {ccs} requests. The keys are the names of the search feature, +and the values are the number of requests that used that feature. Single request can use more than one feature +(e.g. both `async` and `wildcard`). Known features are: + +* `async` - <> + +* `mrt` - <> setting was set to `true`. + +* `wildcard` - <> for indices with wildcards was used in the search request. + +`clients`:: +(object) Contains statistics about the clients that executed {ccs} requests. +The keys are the names of the clients, and the values are the number of requests that were executed by that client. +Only known clients (such as `kibana` or `elasticsearch`) are counted. + +`clusters`:: +(object) Contains statistics about the clusters that were queried in {ccs} requests. +The keys are cluster names, and the values are per-cluster telemetry data. +This also includes the local cluster itself, which uses the name `(local)`. ++ +.Properties of per-cluster data: +[%collapsible%open] +======= +`total`::: +(integer) The total number of successful (not skipped) {ccs} requests that were executed against this cluster. +This may include requests where partial results were returned, but not requests in which the cluster has been skipped entirely. + +`skipped`::: +(integer) The total number of {ccs} requests for which this cluster was skipped. + +`took`::: +(object) Contains statistics about the time taken to execute requests against this cluster. ++ +.Properties of `took` +[%collapsible%open] +======== +`max`::: +(integer) The maximum time taken to execute a {ccs} request, in milliseconds. + +`avg`::: +(integer) The median time taken to execute a {ccs} request, in milliseconds. + +`p90`::: +(integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. +======== + +======= + +====== + +===== + [[cluster-stats-api-example]] ==== {api-examples-title} @@ -1607,7 +1743,35 @@ The API returns the following response: }, "repositories": { ... - } + }, + "ccs": { + "_search": { + "total": 7, + "success": 7, + "skipped": 0, + "took": { + "max": 36, + "avg": 20, + "p90": 33 + }, + "took_mrt_true": { + "max": 33, + "avg": 15, + "p90": 33 + }, + "took_mrt_false": { + "max": 36, + "avg": 26, + "p90": 36 + }, + "remotes_per_search_max": 3, + "remotes_per_search_avg": 2.0, + "failure_reasons": { ... }, + "features": { ... }, + "clients": { ... }, + "clusters": { ... } + } + } } -------------------------------------------------- // TESTRESPONSE[s/"plugins": \[[^\]]*\]/"plugins": $body.$_path/] @@ -1618,10 +1782,15 @@ The API returns the following response: // TESTRESPONSE[s/"packaging_types": \[[^\]]*\]/"packaging_types": $body.$_path/] // TESTRESPONSE[s/"snapshots": \{[^\}]*\}/"snapshots": $body.$_path/] // TESTRESPONSE[s/"repositories": \{[^\}]*\}/"repositories": $body.$_path/] +// TESTRESPONSE[s/"clusters": \{[^\}]*\}/"clusters": $body.$_path/] +// TESTRESPONSE[s/"features": \{[^\}]*\}/"features": $body.$_path/] +// TESTRESPONSE[s/"clients": \{[^\}]*\}/"clients": $body.$_path/] +// TESTRESPONSE[s/"failure_reasons": \{[^\}]*\}/"failure_reasons": $body.$_path/] // TESTRESPONSE[s/"field_types": \[[^\]]*\]/"field_types": $body.$_path/] // TESTRESPONSE[s/"runtime_field_types": \[[^\]]*\]/"runtime_field_types": $body.$_path/] // TESTRESPONSE[s/"search": \{[^\}]*\}/"search": $body.$_path/] -// TESTRESPONSE[s/: true|false/: $body.$_path/] +// TESTRESPONSE[s/"remotes_per_search_avg": [.0-9]+/"remotes_per_search_avg": $body.$_path/] +// TESTRESPONSE[s/: (true|false)/: $body.$_path/] // TESTRESPONSE[s/: (\-)?[0-9]+/: $body.$_path/] // TESTRESPONSE[s/: "[^"]*"/: $body.$_path/] // These replacements do a few things: diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 6640b8b5eac8f..2bd1d79afd52d 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -207,6 +207,7 @@ static TransportVersion def(int id) { public static final TransportVersion UNASSIGNED_PRIMARY_COUNT_ON_CLUSTER_HEALTH = def(8_737_00_0); public static final TransportVersion ESQL_AGGREGATE_EXEC_TRACKS_INTERMEDIATE_ATTRS = def(8_738_00_0); + public static final TransportVersion CCS_TELEMETRY_STATS = def(8_739_00_0); /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshot.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshot.java index fe1da86dd54c7..68fd4c2a1529a 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshot.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshot.java @@ -277,7 +277,7 @@ public int hashCode() { */ public void add(CCSTelemetrySnapshot stats) { // This should be called in ClusterStatsResponse ctor only, so we don't need to worry about concurrency - if (stats.totalCount == 0) { + if (stats == null || stats.totalCount == 0) { // Just ignore the empty stats. // This could happen if the node is brand new or if the stats are not available, e.g. because it runs an old version. return; @@ -315,7 +315,7 @@ public void add(CCSTelemetrySnapshot stats) { * "p90": 2570 * } */ - public static void publishLatency(XContentBuilder builder, String name, LongMetricValue took) throws IOException { + private static void publishLatency(XContentBuilder builder, String name, LongMetricValue took) throws IOException { builder.startObject(name); { builder.field("max", took.max()); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSUsageTelemetry.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSUsageTelemetry.java index 60766bd4068e3..6016378aa8867 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSUsageTelemetry.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSUsageTelemetry.java @@ -175,7 +175,7 @@ public static class PerClusterCCSTelemetry { // The number of successful (not skipped) requests to this cluster. private final LongAdder count; private final LongAdder skippedCount; - // This is only over the successful requetss, skipped ones do not count here. + // This is only over the successful requests, skipped ones do not count here. private final LongMetric took; PerClusterCCSTelemetry(String clusterAlias) { diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsNodeResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsNodeResponse.java index b48295dc8b3eb..732eb2ec2dcc2 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsNodeResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsNodeResponse.java @@ -30,6 +30,7 @@ public class ClusterStatsNodeResponse extends BaseNodeResponse { private final ClusterHealthStatus clusterStatus; private final SearchUsageStats searchUsageStats; private final RepositoryUsageStats repositoryUsageStats; + private final CCSTelemetrySnapshot ccsMetrics; public ClusterStatsNodeResponse(StreamInput in) throws IOException { super(in); @@ -47,6 +48,11 @@ public ClusterStatsNodeResponse(StreamInput in) throws IOException { } else { repositoryUsageStats = RepositoryUsageStats.EMPTY; } + if (in.getTransportVersion().onOrAfter(TransportVersions.CCS_TELEMETRY_STATS)) { + ccsMetrics = new CCSTelemetrySnapshot(in); + } else { + ccsMetrics = new CCSTelemetrySnapshot(); + } } public ClusterStatsNodeResponse( @@ -56,7 +62,8 @@ public ClusterStatsNodeResponse( NodeStats nodeStats, ShardStats[] shardsStats, SearchUsageStats searchUsageStats, - RepositoryUsageStats repositoryUsageStats + RepositoryUsageStats repositoryUsageStats, + CCSTelemetrySnapshot ccsTelemetrySnapshot ) { super(node); this.nodeInfo = nodeInfo; @@ -65,6 +72,7 @@ public ClusterStatsNodeResponse( this.clusterStatus = clusterStatus; this.searchUsageStats = Objects.requireNonNull(searchUsageStats); this.repositoryUsageStats = Objects.requireNonNull(repositoryUsageStats); + this.ccsMetrics = ccsTelemetrySnapshot; } public NodeInfo nodeInfo() { @@ -95,6 +103,10 @@ public RepositoryUsageStats repositoryUsageStats() { return repositoryUsageStats; } + public CCSTelemetrySnapshot getCcsMetrics() { + return ccsMetrics; + } + @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); @@ -108,5 +120,9 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getTransportVersion().onOrAfter(TransportVersions.REPOSITORIES_TELEMETRY)) { repositoryUsageStats.writeTo(out); } // else just drop these stats, ok for bwc + if (out.getTransportVersion().onOrAfter(TransportVersions.CCS_TELEMETRY_STATS)) { + ccsMetrics.writeTo(out); + } } + } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java index b6dd40e8c8b79..267db92496f76 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java @@ -24,6 +24,8 @@ import java.util.List; import java.util.Locale; +import static org.elasticsearch.action.search.TransportSearchAction.CCS_TELEMETRY_FEATURE_FLAG; + public class ClusterStatsResponse extends BaseNodesResponse implements ToXContentFragment { final ClusterStatsNodes nodesStats; @@ -31,6 +33,8 @@ public class ClusterStatsResponse extends BaseNodesResponse ccsMetrics.add(node.getCcsMetrics())); this.status = status; this.clusterSnapshotStats = clusterSnapshotStats; @@ -90,6 +96,10 @@ public ClusterStatsIndices getIndicesStats() { return indicesStats; } + public CCSTelemetrySnapshot getCcsMetrics() { + return ccsMetrics; + } + @Override public void writeTo(StreamOutput out) throws IOException { TransportAction.localOnly(); @@ -125,6 +135,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field("repositories"); repositoryUsageStats.toXContent(builder, params); + if (CCS_TELEMETRY_FEATURE_FLAG.isEnabled()) { + builder.startObject("ccs"); + ccsMetrics.toXContent(builder, params); + builder.endObject(); + } + return builder; } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 1912de3cfa4d2..66cf627ce066e 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -81,6 +81,7 @@ public class TransportClusterStatsAction extends TransportNodesAction< private final IndicesService indicesService; private final RepositoriesService repositoriesService; private final SearchUsageHolder searchUsageHolder; + private final CCSUsageTelemetry ccsUsageHolder; private final MetadataStatsCache mappingStatsCache; private final MetadataStatsCache analysisStatsCache; @@ -108,6 +109,7 @@ public TransportClusterStatsAction( this.indicesService = indicesService; this.repositoriesService = repositoriesService; this.searchUsageHolder = usageService.getSearchUsageHolder(); + this.ccsUsageHolder = usageService.getCcsUsageHolder(); this.mappingStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), MappingStats::of); this.analysisStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), AnalysisStats::of); } @@ -249,6 +251,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq final SearchUsageStats searchUsageStats = searchUsageHolder.getSearchUsageStats(); final RepositoryUsageStats repositoryUsageStats = repositoriesService.getUsageStats(); + final CCSTelemetrySnapshot ccsTelemetry = ccsUsageHolder.getCCSTelemetrySnapshot(); return new ClusterStatsNodeResponse( nodeInfo.getNode(), @@ -257,7 +260,8 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq nodeStats, shardsStats.toArray(new ShardStats[shardsStats.size()]), searchUsageStats, - repositoryUsageStats + repositoryUsageStats, + ccsTelemetry ); } diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java index 23ff692da4887..30faae9c1a5fb 100644 --- a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java @@ -127,7 +127,7 @@ public class TransportSearchAction extends HandledTransportAction SHARD_COUNT_LIMIT_SETTING = Setting.longSetting( diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/VersionStatsTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/VersionStatsTests.java index 20eae9833e4b0..b6f1ac46b4250 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/VersionStatsTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/VersionStatsTests.java @@ -128,7 +128,8 @@ public void testCreation() { null, new ShardStats[] { shardStats }, new SearchUsageStats(), - RepositoryUsageStats.EMPTY + RepositoryUsageStats.EMPTY, + null ); stats = VersionStats.of(metadata, Collections.singletonList(nodeResponse)); diff --git a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java index 4a695f7c51e4c..279fec8cc99af 100644 --- a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java +++ b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java @@ -754,7 +754,38 @@ public void testToXContent() throws IOException { }, "repositories": {} }, - "repositories": {} + "repositories": {}, + "ccs": { + "_search": { + "total": 0, + "success": 0, + "skipped": 0, + "took": { + "max": 0, + "avg": 0, + "p90": 0 + }, + "took_mrt_true": { + "max": 0, + "avg": 0, + "p90": 0 + }, + "took_mrt_false": { + "max": 0, + "avg": 0, + "p90": 0 + }, + "remotes_per_search_max": 0, + "remotes_per_search_avg": 0.0, + "failure_reasons": { + }, + "features": { + }, + "clients": { + }, + "clusters": {} + } + } }, "cluster_state": { "nodes_hash": 1314980060,