From 47b55a66b5021c9b4d288aee1b4c00b21f0fdbe8 Mon Sep 17 00:00:00 2001 From: Sahil Shah Date: Thu, 11 Jul 2024 12:45:19 +0100 Subject: [PATCH 1/6] Control Plane latency recommendations for reliable clusters --- modules/control-plane-latency.adoc | 20 +++++++++++++++++++ ...recommendations-for-reliable-clusters.adoc | 11 ++++++++++ 2 files changed, 31 insertions(+) create mode 100644 modules/control-plane-latency.adoc create mode 100644 scalability_and_performance/control-plane-latency-recommendations-for-reliable-clusters.adoc diff --git a/modules/control-plane-latency.adoc b/modules/control-plane-latency.adoc new file mode 100644 index 000000000000..f08e5a166f71 --- /dev/null +++ b/modules/control-plane-latency.adoc @@ -0,0 +1,20 @@ +// Module included in the following assemblies: +// +// * scalability_and_performance/control-plane-latency-recommendations-for-reliable-clusters.adoc + +:_mod-docs-content-type: CONCEPT +[id="control-plane-latency_{context}"] += Recommended Control Plane latency for reliable clusters + +Latency between each of the control plane nodes must be less than 15ms to ensure a well performing and reliable cluster. Some of the metrics to keep track of include etcd gRPC requests latency, fysnc latency and any critical alerts. Here are the PromQL queries: + ++ +[source,terminal] +---- +histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type)) < 0.15 + +avg_over_time(histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[2m]))[10m:]) < 1 + +ALERTS{severity="critical", alertstate="firing"} > 0 + +---- diff --git a/scalability_and_performance/control-plane-latency-recommendations-for-reliable-clusters.adoc b/scalability_and_performance/control-plane-latency-recommendations-for-reliable-clusters.adoc new file mode 100644 index 000000000000..d08c8084a515 --- /dev/null +++ b/scalability_and_performance/control-plane-latency-recommendations-for-reliable-clusters.adoc @@ -0,0 +1,11 @@ +:_mod-docs-content-type: ASSEMBLY +[id="control-plane-latency-recommendations-for-reliable-clusters"] += Control Plane latency recommendations for reliable clusters +include::_attributes/common-attributes.adoc[] +:context: control-plane-latency-recommendations-for-reliable-clusters + +toc::[] + +This topic provides recommended Control Plane latency for reliable clusters. + +include::modules/control-plane-latency.adoc[leveloffset=+1] \ No newline at end of file From abbee2cc83d9543c11d3f74fd2a6ef1db5b1171b Mon Sep 17 00:00:00 2001 From: Sahil Shah Date: Tue, 25 Feb 2025 10:18:03 -0500 Subject: [PATCH 2/6] Update modules/control-plane-latency.adoc Co-authored-by: Steven Smith <77019920+stevsmit@users.noreply.github.com> --- modules/control-plane-latency.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/control-plane-latency.adoc b/modules/control-plane-latency.adoc index f08e5a166f71..c9e41c72cde9 100644 --- a/modules/control-plane-latency.adoc +++ b/modules/control-plane-latency.adoc @@ -4,7 +4,7 @@ :_mod-docs-content-type: CONCEPT [id="control-plane-latency_{context}"] -= Recommended Control Plane latency for reliable clusters += Recommended control plane latency for cluster reliability Latency between each of the control plane nodes must be less than 15ms to ensure a well performing and reliable cluster. Some of the metrics to keep track of include etcd gRPC requests latency, fysnc latency and any critical alerts. Here are the PromQL queries: From e2c03307dac1906e4bc58db307883eedfd08489c Mon Sep 17 00:00:00 2001 From: Sahil Shah Date: Tue, 25 Feb 2025 10:18:13 -0500 Subject: [PATCH 3/6] Update scalability_and_performance/control-plane-latency-recommendations-for-reliable-clusters.adoc Co-authored-by: Steven Smith <77019920+stevsmit@users.noreply.github.com> --- ...rol-plane-latency-recommendations-for-reliable-clusters.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scalability_and_performance/control-plane-latency-recommendations-for-reliable-clusters.adoc b/scalability_and_performance/control-plane-latency-recommendations-for-reliable-clusters.adoc index d08c8084a515..82cbcf37a89f 100644 --- a/scalability_and_performance/control-plane-latency-recommendations-for-reliable-clusters.adoc +++ b/scalability_and_performance/control-plane-latency-recommendations-for-reliable-clusters.adoc @@ -1,6 +1,6 @@ :_mod-docs-content-type: ASSEMBLY [id="control-plane-latency-recommendations-for-reliable-clusters"] -= Control Plane latency recommendations for reliable clusters += Control plane latency recommendations for reliable clusters include::_attributes/common-attributes.adoc[] :context: control-plane-latency-recommendations-for-reliable-clusters From 1169c18cc0dc8734e3fc781a75f10ad1ecee0fd4 Mon Sep 17 00:00:00 2001 From: Sahil Shah Date: Tue, 25 Feb 2025 10:31:00 -0500 Subject: [PATCH 4/6] Update modules/control-plane-latency.adoc Co-authored-by: Steven Smith <77019920+stevsmit@users.noreply.github.com> --- modules/control-plane-latency.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/control-plane-latency.adoc b/modules/control-plane-latency.adoc index c9e41c72cde9..36e858c0446e 100644 --- a/modules/control-plane-latency.adoc +++ b/modules/control-plane-latency.adoc @@ -9,7 +9,7 @@ Latency between each of the control plane nodes must be less than 15ms to ensure a well performing and reliable cluster. Some of the metrics to keep track of include etcd gRPC requests latency, fysnc latency and any critical alerts. Here are the PromQL queries: + -[source,terminal] +[source,promql] ---- histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type)) < 0.15 From 936bc5dd3751c2fa3ce383fd1403508695ab58ab Mon Sep 17 00:00:00 2001 From: Sahil Shah Date: Tue, 25 Feb 2025 11:43:28 -0500 Subject: [PATCH 5/6] making suggested changes --- modules/control-plane-latency.adoc | 6 +++--- ...latency-recommendations-for-reliable-clusters.adoc | 11 +++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 scalability_and_performance/recommended-performance-scale-practices/control-plane-latency-recommendations-for-reliable-clusters.adoc diff --git a/modules/control-plane-latency.adoc b/modules/control-plane-latency.adoc index 36e858c0446e..f9a33926fcd9 100644 --- a/modules/control-plane-latency.adoc +++ b/modules/control-plane-latency.adoc @@ -6,10 +6,10 @@ [id="control-plane-latency_{context}"] = Recommended control plane latency for cluster reliability -Latency between each of the control plane nodes must be less than 15ms to ensure a well performing and reliable cluster. Some of the metrics to keep track of include etcd gRPC requests latency, fysnc latency and any critical alerts. Here are the PromQL queries: +Latency between each of the control plane nodes must be less than 15ms to ensure a well performing and reliable cluster. Some of the metrics to keep track of include etcd gRPC requests latency, fysnc latency and any critical alerts. -+ -[source,promql] +The following PromQL queries can help monitor latency requirements: +[source,terminal] ---- histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type)) < 0.15 diff --git a/scalability_and_performance/recommended-performance-scale-practices/control-plane-latency-recommendations-for-reliable-clusters.adoc b/scalability_and_performance/recommended-performance-scale-practices/control-plane-latency-recommendations-for-reliable-clusters.adoc new file mode 100644 index 000000000000..d1dee9950afb --- /dev/null +++ b/scalability_and_performance/recommended-performance-scale-practices/control-plane-latency-recommendations-for-reliable-clusters.adoc @@ -0,0 +1,11 @@ +:_mod-docs-content-type: ASSEMBLY +[id="recommended-control-plane-latency-recommendations-for-reliable-clusters"] += Recommended control plane latency recommendations for reliable clusters +include::_attributes/common-attributes.adoc[] +:context: recommended-control-plane-latency-recommendations-for-reliable-clusters + +toc::[] + +This topic provides the recommended control plane latency to ensure cluster reliability in {product-title}. + +include::modules/control-plane-latency.adoc[leveloffset=+1] \ No newline at end of file From aa6efadc6b2472d8c0194f5eef09220a895c90c9 Mon Sep 17 00:00:00 2001 From: Sahil Shah Date: Tue, 25 Feb 2025 11:50:06 -0500 Subject: [PATCH 6/6] renaming file --- ...trol-plane-latency-recommendations-for-reliable-clusters.adoc} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scalability_and_performance/recommended-performance-scale-practices/{control-plane-latency-recommendations-for-reliable-clusters.adoc => recommended-control-plane-latency-recommendations-for-reliable-clusters.adoc} (100%) diff --git a/scalability_and_performance/recommended-performance-scale-practices/control-plane-latency-recommendations-for-reliable-clusters.adoc b/scalability_and_performance/recommended-performance-scale-practices/recommended-control-plane-latency-recommendations-for-reliable-clusters.adoc similarity index 100% rename from scalability_and_performance/recommended-performance-scale-practices/control-plane-latency-recommendations-for-reliable-clusters.adoc rename to scalability_and_performance/recommended-performance-scale-practices/recommended-control-plane-latency-recommendations-for-reliable-clusters.adoc