From 54a7d3596b0900b526577e38ef6d56f6721eb847 Mon Sep 17 00:00:00 2001 From: schmikei Date: Wed, 18 Jun 2025 18:09:25 -0400 Subject: [PATCH 1/7] grafana modular lib Couchbase --- couchbase-mixin/.lint | 88 +- couchbase-mixin/Makefile | 2 +- couchbase-mixin/{alerts => }/alerts.libsonnet | 20 +- couchbase-mixin/config.libsonnet | 51 +- couchbase-mixin/dashboards.libsonnet | 157 ++ .../couchbase-bucket-overview.libsonnet | 780 --------- .../couchbase-cluster-overview.libsonnet | 980 ------------ .../couchbase-node-overview.libsonnet | 1158 -------------- .../dashboards/dashboards.libsonnet | 3 - .../couchbase-bucket-overview.json | 965 ------------ .../couchbase-cluster-overview.json | 1184 -------------- .../couchbase-node-overview.json | 1399 ----------------- .../dashboards_out/couchbase_bucket_overview | 559 +++++++ .../dashboards_out/couchbase_cluster_overview | 705 +++++++++ .../dashboards_out/couchbase_node_overview | 769 +++++++++ couchbase-mixin/dashboards_out/logs | 310 ++++ couchbase-mixin/g.libsonnet | 1 + couchbase-mixin/jsonnetfile.json | 44 +- couchbase-mixin/links.libsonnet | 32 + couchbase-mixin/main.libsonnet | 33 + couchbase-mixin/mixin.libsonnet | 21 +- couchbase-mixin/panels.libsonnet | 448 ++++++ couchbase-mixin/rows.libsonnet | 56 + couchbase-mixin/targets.libsonnet | 332 ++++ couchbase-mixin/variables.libsonnet | 99 ++ 25 files changed, 3673 insertions(+), 6523 deletions(-) rename couchbase-mixin/{alerts => }/alerts.libsonnet (91%) create mode 100644 couchbase-mixin/dashboards.libsonnet delete mode 100644 couchbase-mixin/dashboards/couchbase-bucket-overview.libsonnet delete mode 100644 couchbase-mixin/dashboards/couchbase-cluster-overview.libsonnet delete mode 100644 couchbase-mixin/dashboards/couchbase-node-overview.libsonnet delete mode 100644 couchbase-mixin/dashboards/dashboards.libsonnet delete mode 100644 couchbase-mixin/dashboards_out/couchbase-bucket-overview.json delete mode 100644 couchbase-mixin/dashboards_out/couchbase-cluster-overview.json delete mode 100644 couchbase-mixin/dashboards_out/couchbase-node-overview.json create mode 100644 couchbase-mixin/dashboards_out/couchbase_bucket_overview create mode 100644 couchbase-mixin/dashboards_out/couchbase_cluster_overview create mode 100644 couchbase-mixin/dashboards_out/couchbase_node_overview create mode 100644 couchbase-mixin/dashboards_out/logs create mode 100644 couchbase-mixin/g.libsonnet create mode 100644 couchbase-mixin/links.libsonnet create mode 100644 couchbase-mixin/main.libsonnet create mode 100644 couchbase-mixin/panels.libsonnet create mode 100644 couchbase-mixin/rows.libsonnet create mode 100644 couchbase-mixin/targets.libsonnet create mode 100644 couchbase-mixin/variables.libsonnet diff --git a/couchbase-mixin/.lint b/couchbase-mixin/.lint index 0c4773e01..8e24e7cfd 100644 --- a/couchbase-mixin/.lint +++ b/couchbase-mixin/.lint @@ -1,17 +1,79 @@ +--- exclusions: + target-instance-rule: + entries: + - dashboard: "Azure Blob storage" + reason: "Using 'Bucket' as instance label" + - dashboard: "GCP Blob storage" + reason: "Using 'Bucket' as instance label" + - dashboard: "Azure Elastic pool" + reason: "Using 'resourceName' as instance label" + - panel: "Average Message Size" + reason: "Overview which is not filterable by instance" + - dashboard: "Azure SQL database" + reason: "Overview which is not filterable by instance" + - dashboard: "Azure Load Balancing" + - dashboard: "GCP Load Balancing" + reason: "resourceName is the instance label" + - dashboard: "GCP Virtual private cloud" + reason: "project_id is the instance label" + - dashboard: "Azure Queue storage" + reason: "Using 'Bucket' as instance label" + - dashboard: "GCP Compute Engine" + reason: "Using instance_name as instance label" + - dashboard: "Azure Virtual Machines" + - dashboard: "Azure Front Door" + reason: "Aggregation is at the group level, and resourceName is used for instance label" + target-job-rule: + reason: "Using filtering selector with job" + template-instance-rule: + entries: + - dashboard: "Azure Blob storage" + reason: "Using 'Bucket' as instance label" + - dashboard: "GCP Blob storage" + reason: "Using 'Bucket' as instance label" + - dashboard: "Azure Elastic pool" + reason: "Using 'resourceName' as instance label" + - dashboard: "Azure SQL database" + reason: "Using 'resourceName' as instance label" + - dashboard: "Azure Load Balancing" + - dashboard: "GCP Load Balancing" + reason: "resourceName is the instance label" + - dashboard: "GCP Virtual private cloud" + reason: "project_id is the instance label" + - dashboard: "Azure Queue storage" + reason: "Using 'Bucket' as instance label" + - dashboard: "GCP Compute Engine" + reason: "Using instance_name as instance label" + - dashboard: "Azure Virtual Machines" + - dashboard: "Azure Front Door" + reason: "Aggregation is at the group level, and resourceName is used for instance label" + panel-datasource-rule: + reason: "Many panels use --Mixed-- DS" panel-units-rule: - reason: "Custom units are used for better user experience in these panels" + reason: | + Common-lib uses overrides for units in almost all cases, and the dashboard linter creates a false positive here. + When dashboard linter is updated, this exception should be removed and any remaining issues resolved. + # entries: + # - panel: "API requests by type" + # - panel: "eDTU utilization" + # - panel: "Concurrent sessions" + # - panel: "Requests Success Rate" + # - dashboard: "Azure SQL database" + # - dashboard: "Azure Load Balancing" + # - dashboard: "Azure Elastic pool" + # - dashboard: "Azure Blob storage" + # - dashboard: "GCP Blob storage" + # reason: "False positive.. Need to look into this" + # - dashboard: "GCP Virtual private cloud" + # reason: "False positive as above. Linter does not look at overrides." + panel-title-description-rule: entries: - - panel: "XDCR docs received" - - panel: "Current connections" - - panel: "Top buckets by current items" - template-datasource-rule: - reason: "Based on new convention we are using variable names prometheus_datasource and loki_datasource where as linter expects 'datasource'" - template-instance-rule: - reason: "Based on new convention we are using variable names prometheus_datasource and loki_datasource where as linter expects 'datasource'" - template-job-rule: - reason: "Prometheus datasource variable is being named as prometheus_datasource now while linter expects 'datasource'" - target-instance-rule: - reason: "The dashboard is a 'cluster' dashboard where the instance refers to nodes, this dashboard focuses only on the cluster view." + - dashboard: Azure Service Bus + - dashboard: GCP Compute Engine + - dashboard: Azure Virtual Machines + - dashboard: Azure Front Door + target-rate-interval-rule: entries: - - dashboard: "Couchbase cluster overview" + - dashboard: GCP Compute Engine + diff --git a/couchbase-mixin/Makefile b/couchbase-mixin/Makefile index b4fdca560..37cc871c1 100644 --- a/couchbase-mixin/Makefile +++ b/couchbase-mixin/Makefile @@ -1 +1 @@ -include ../Makefile_mixin +include ../Makefile_mixin \ No newline at end of file diff --git a/couchbase-mixin/alerts/alerts.libsonnet b/couchbase-mixin/alerts.libsonnet similarity index 91% rename from couchbase-mixin/alerts/alerts.libsonnet rename to couchbase-mixin/alerts.libsonnet index 0dd16634d..1293719c5 100644 --- a/couchbase-mixin/alerts/alerts.libsonnet +++ b/couchbase-mixin/alerts.libsonnet @@ -1,6 +1,6 @@ { - prometheusAlerts+:: { - groups+: [ + new(this): { + groups: [ { name: 'couchbase', rules: [ @@ -8,7 +8,7 @@ alert: 'CouchbaseHighCPUUsage', expr: ||| (sys_cpu_utilization_rate) > %(alertsCriticalCPUUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -19,14 +19,14 @@ ( '{{ printf "%%.0f" $value }} percent CPU usage on node {{$labels.instance}} and on cluster {{$labels.couchbase_cluster}}, ' + 'which is above the threshold of %(alertsCriticalCPUUsage)s.' - ) % $._config, + ) % this.config, }, }, { alert: 'CouchbaseHighMemoryUsage', expr: ||| 100 * (sys_mem_actual_used / clamp_min(sys_mem_actual_used + sys_mem_actual_free, 1)) > %(alertsCriticalMemoryUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -37,14 +37,14 @@ ( '{{ printf "%%.0f" $value }} percent memory usage on node {{$labels.instance}} and on cluster {{$labels.couchbase_cluster}}, ' + 'which is above the threshold of %(alertsCriticalMemoryUsage)s.' - ) % $._config, + ) % this.config, }, }, { alert: 'CouchbaseMemoryEvictionRate', expr: ||| (kv_ep_num_value_ejects) > %(alertsWarningMemoryEvictionRate)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -55,14 +55,14 @@ ( '{{ printf "%%.0f" $value }} evictions in bucket {{$labels.bucket}}, on node {{$labels.instance}}, and on cluster {{$labels.couchbase_cluster}}, ' + 'which is above the threshold of %(alertsWarningMemoryEvictionRate)s.' - ) % $._config, + ) % this.config, }, }, { alert: 'CouchbaseInvalidRequestVolume', expr: ||| sum without(instance, job) (rate(n1ql_invalid_requests[2m])) > %(alertsWarningInvalidRequestVolume)s - ||| % $._config, + ||| % this.config, 'for': '2m', labels: { severity: 'warning', @@ -73,7 +73,7 @@ ( '{{ printf "%%.0f" $value }} invalid requests to {{$labels.couchbase_cluster}}, ' + 'which is above the threshold of %(alertsWarningInvalidRequestVolume)s.' - ) % $._config, + ) % this.config, }, }, ], diff --git a/couchbase-mixin/config.libsonnet b/couchbase-mixin/config.libsonnet index 697857b35..b9da616df 100644 --- a/couchbase-mixin/config.libsonnet +++ b/couchbase-mixin/config.libsonnet @@ -1,19 +1,42 @@ { - _config+:: { - enableMultiCluster: false, - couchbaseSelector: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster"' else 'job=~"$job"', - multiclusterSelector: 'job=~"$job"', - dashboardTags: ['couchbase-mixin'], - dashboardPeriod: 'now-1h', - dashboardTimezone: 'default', - dashboardRefresh: '1m', + // any modular library should include as inputs: + // 'dashboardNamePrefix' - Use as prefix for all Dashboards and (optional) rule groups + // 'filteringSelector' - Static selector to apply to ALL dashboard variables of type query, panel queries, alerts and recording rules. + // 'groupLabels' - one or more labels that can be used to identify 'group' of instances. In simple cases, can be 'job' or 'cluster'. + // 'instanceLabels' - one or more labels that can be used to identify single entity of instances. In simple cases, can be 'instance' or 'pod'. + // 'uid' - UID to prefix all dashboards original uids - // alerts thresholds - alertsCriticalCPUUsage: 85, // % - alertsCriticalMemoryUsage: 85, // % - alertsWarningMemoryEvictionRate: 10, // count - alertsWarningInvalidRequestVolume: 1000, // count + enableMultiCluster: false, + filteringSelector: '', + groupLabels: if self.enableMultiCluster then ['job', 'cluster', 'couchbase_cluster'] else ['job', 'couchbase_cluster'], + instanceLabels: ['instance'], + dashboardTags: ['couchbase-mixin'], + uid: 'couchbase', + dashboardNamePrefix: 'Couchbase', - enableLokiLogs: true, + local config = self, + // Dashboard-specific label configurations + dashboardVariables: { + cluster: if config.enableMultiCluster then ['job', 'couchbase_cluster', 'cluster'] else ['job', 'couchbase_cluster'], + node: if config.enableMultiCluster then ['job', 'instance', 'couchbase_cluster', 'cluster'] else ['job', 'instance', 'couchbase_cluster'], + bucket: if config.enableMultiCluster then ['job', 'instance', 'couchbase_cluster', 'cluster', 'bucket'] else ['job', 'instance', 'couchbase_cluster', 'bucket'], }, + + // additional params + dashboardPeriod: 'now-1h', + dashboardTimezone: 'default', + dashboardRefresh: '1m', + + // logs lib related + enableLokiLogs: true, + logLabels: if self.enableMultiCluster then ['job', 'instance', 'cluster', 'level'] else ['job', 'instance', 'level'], + extraLogLabels: [], // Required by logs-lib + logsVolumeGroupBy: 'level', + showLogsVolume: true, + + // alerts thresholds + alertsCriticalCPUUsage: 85, // % + alertsCriticalMemoryUsage: 85, // % + alertsWarningMemoryEvictionRate: 10, // count + alertsWarningInvalidRequestVolume: 1000, // count } diff --git a/couchbase-mixin/dashboards.libsonnet b/couchbase-mixin/dashboards.libsonnet new file mode 100644 index 000000000..4e425e553 --- /dev/null +++ b/couchbase-mixin/dashboards.libsonnet @@ -0,0 +1,157 @@ +local g = import './g.libsonnet'; +local logslib = import 'logs-lib/logs/main.libsonnet'; +{ + local root = self, + new(this):: + local prefix = this.config.dashboardNamePrefix; + local links = this.grafana.links; + local tags = this.config.dashboardTags; + local uid = g.util.string.slugify(this.config.uid); + local vars = this.grafana.variables; + local annotations = this.grafana.annotations; + local refresh = this.config.dashboardRefresh; + local period = this.config.dashboardPeriod; + local timezone = this.config.dashboardTimezone; + local panels = this.grafana.panels; + + { + couchbase_bucket_overview: + g.dashboard.new(prefix + ' bucket overview') + + g.dashboard.withPanels( + g.util.grid.wrapPanels( + [ + panels.bucket_topBucketsByMemoryUsedPanel { gridPos+: { w: 12 }}, + panels.bucket_topBucketsByDiskUsedPanel { gridPos+: { w: 12 }}, + panels.bucket_topBucketsByCurrentItemsPanel { gridPos+: { w: 8 }}, + panels.bucket_topBucketsByOperationsPanel { gridPos+: { w: 8 }}, + panels.bucket_topBucketsByOperationsFailedPanel { gridPos+: { w: 8 }}, + panels.bucket_topBucketsByHighPriorityRequestsPanel { gridPos+: { w: 12 }}, + panels.bucket_bottomBucketsByCacheHitRatioPanel { gridPos+: { w: 12 }}, + panels.bucket_topBucketsByVBucketsCountPanel { gridPos+: { w: 12 }}, + panels.bucket_topBucketsByVBucketQueueMemoryPanel { gridPos+: { w: 12 }}, + ], + ) + ) + + root.applyCommon( + vars.bucketVariables, + uid + '_couchbase_bucket_overview', + tags, + links { couchbaseBucketOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + + couchbase_node_overview: + g.dashboard.new(prefix + ' node overview') + + g.dashboard.withPanels( + g.util.grid.wrapPanels( + [ + panels.node_memoryUtilizationPanel { gridPos+: { w: 12 }}, + panels.node_cpuUtilizationPanel { gridPos+: { w: 12 }}, + panels.node_totalMemoryUsedByServicePanel { gridPos+: { w: 8 }}, + panels.node_backupSizePanel { gridPos+: { w: 8 }}, + panels.node_currentConnectionsPanel { gridPos+: { w: 8 }}, + panels.node_httpResponseCodesPanel { gridPos+: { w: 12 }}, + panels.node_httpRequestMethodsPanel { gridPos+: { w: 12 }}, + panels.node_queryServiceRequestsPanel { gridPos+: { w: 12 }}, + panels.node_queryServiceRequestProcessingTimePanel { gridPos+: { w: 12 }}, + panels.node_indexServiceRequestsPanel { gridPos+: { w: 8 }}, + panels.node_indexCacheHitRatioPanel { gridPos+: { w: 8 }}, + panels.node_averageScanLatencyPanel { gridPos+: { w: 8 }}, + ] + ) + ) + + root.applyCommon( + vars.nodeVariables, + uid + '_couchbase_node_overview', + tags, + links { couchbaseNodeOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + + couchbase_cluster_overview: + g.dashboard.new(prefix + ' cluster overview') + + g.dashboard.withPanels( + g.util.grid.wrapPanels( + [ + panels.cluster_topNodesByMemoryUsagePanel { gridPos+: { w: 12 }}, + panels.cluster_topNodesByHTTPRequestsPanel { gridPos+: { w: 12 }}, + panels.cluster_topNodesByQueryServiceRequestsPanel { gridPos+: { w: 12 }}, + panels.cluster_topNodesByIndexAverageScanLatencyPanel { gridPos+: { w: 12 }}, + panels.cluster_xdcrReplicationRatePanel { gridPos+: { w: 8 }}, + panels.cluster_xdcrDocsReceivedPanel { gridPos+: { w: 8 }}, + panels.cluster_localBackupSizePanel { gridPos+: { w: 8 }}, + panels.cluster_topBucketsByMemoryUsedPanel { gridPos+: { w: 12 }}, + panels.cluster_topBucketsByDiskUsedPanel { gridPos+: { w: 12 }}, + panels.cluster_topBucketsByOperationsPanel { gridPos+: { w: 12 }}, + panels.cluster_topBucketsByOperationsFailedPanel { gridPos+: { w: 12 }}, + panels.cluster_topBucketsByVBucketsCountPanel { gridPos+: { w: 12 }}, + panels.cluster_topBucketsByVBucketQueueMemoryPanel { gridPos+: { w: 12 }}, + ] + ) + ) + + root.applyCommon( + vars.clusterVariables, + uid + '_couchbase_cluster_overview', + tags, + links { couchbaseClusterOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + + } + + + if this.config.enableLokiLogs then + { + logs: + logslib.new( + prefix + ' logs', + datasourceName=this.grafana.variables.datasources.loki.name, + datasourceRegex=this.grafana.variables.datasources.loki.regex, + filterSelector=this.config.filteringSelector, + labels=this.config.groupLabels + this.config.extraLogLabels, + formatParser=null, + showLogsVolume=this.config.showLogsVolume, + ) + { + dashboards+: + { + logs+: + // reference to self, already generated variables, to keep them, but apply other common data in applyCommon + root.applyCommon(super.logs.templating.list, uid=uid + '-logs', tags=tags, links=links { logs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period), + }, + panels+: + { + // modify log panel + logs+: + g.panel.logs.options.withEnableLogDetails(true) + + g.panel.logs.options.withShowTime(false) + + g.panel.logs.options.withWrapLogMessage(false), + }, + variables+: { + // add prometheus datasource for annotations processing + toArray+: [ + this.grafana.variables.datasources.prometheus { hide: 2 }, + ], + }, + }.dashboards.logs, + } + else {}, + + applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period): + g.dashboard.withTags(tags) + + g.dashboard.withUid(uid) + + g.dashboard.withLinks(std.objectValues(links)) + + g.dashboard.withTimezone(timezone) + + g.dashboard.withRefresh(refresh) + + g.dashboard.time.withFrom(period) + + g.dashboard.withVariables(vars) + + g.dashboard.withAnnotations(std.objectValues(annotations)), +} diff --git a/couchbase-mixin/dashboards/couchbase-bucket-overview.libsonnet b/couchbase-mixin/dashboards/couchbase-bucket-overview.libsonnet deleted file mode 100644 index d83ce880e..000000000 --- a/couchbase-mixin/dashboards/couchbase-bucket-overview.libsonnet +++ /dev/null @@ -1,780 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'couchbase-bucket-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local topBucketsByMemoryUsedPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, kv_mem_used_bytes{' + matcher + ' })', - datasource=promDatasource, - legendFormat='{{instance}} - {{bucket}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Top buckets by memory used', - description: 'Memory used for the top buckets.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'Bps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local topBucketsByDiskUsedPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, couch_docs_actual_disk_size{' + matcher + ' })', - datasource=promDatasource, - legendFormat='{{instance}} - {{bucket}}', - ), - ], - type: 'bargauge', - title: 'Top buckets by disk used', - description: 'Total space on disk used for the top buckets.', - fieldConfig: { - defaults: { - color: { - fixedColor: 'green', - mode: 'fixed', - }, - mappings: [], - min: 1, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'decbytes', - }, - overrides: [], - }, - options: { - displayMode: 'basic', - minVizHeight: 10, - minVizWidth: 0, - orientation: 'horizontal', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - showUnfilled: true, - valueMode: 'color', - }, - pluginVersion: '10.0.2-cloud.1.94a6f396', -}; - -local topBucketsByCurrentItemsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, kv_curr_items{' + matcher + ' })', - datasource=promDatasource, - legendFormat='{{instance}} - {{bucket}}', - ), - ], - type: 'timeseries', - title: 'Top buckets by current items', - description: 'Number of active items for the largest buckets.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local topBucketsByOperationsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, sum by(bucket, couchbase_cluster, instance, job, op) (rate(kv_ops{' + matcher + ' }[$__rate_interval])))', - datasource=promDatasource, - legendFormat='{{instance}} - {{bucket}} - {{op}}', - ), - ], - type: 'timeseries', - title: 'Top buckets by operations', - description: 'Rate of operations for the busiest buckets.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: true, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local topBucketsByOperationsFailedPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (rate(kv_ops_failed{' + matcher + ' }[$__rate_interval])))', - datasource=promDatasource, - legendFormat='{{instance}} - {{bucket}}', - ), - ], - type: 'timeseries', - title: 'Top buckets by operations failed', - description: 'Rate of failed operations for the most problematic buckets.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local topBucketsByHighPriorityRequestsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_high_pri_requests{' + matcher + ' }))', - datasource=promDatasource, - legendFormat='{{instance}} - {{bucket}}', - ), - ], - type: 'timeseries', - title: 'Top buckets by high priority requests', - description: 'Rate of high priority requests processed by the KV engine for the top buckets.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local bottomBucketsByCacheHitRatioPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'bottomk(5, sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{' + matcher + ' }[$__rate_interval]))) / (clamp_min(sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{' + matcher + ' }[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_misses{' + matcher + ' }[$__rate_interval])))', - datasource=promDatasource, - legendFormat='{{instance}} - {{bucket}}', - ), - ], - type: 'timeseries', - title: 'Bottom buckets by cache hit ratio', - description: 'Worst buckets by cache hit ratio.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: true, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - max: 1, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percentunit', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local topBucketsByVBucketsCountPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_vbuckets{' + matcher + ' }))', - datasource=promDatasource, - legendFormat='{{instance}} - {{bucket}}', - ), - ], - type: 'bargauge', - title: 'Top buckets by vBuckets count', - description: 'Number of virtual buckets across the cluster for the top buckets.', - fieldConfig: { - defaults: { - color: { - fixedColor: 'green', - mode: 'fixed', - }, - mappings: [], - min: 1, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - displayMode: 'basic', - minVizHeight: 10, - minVizWidth: 0, - orientation: 'horizontal', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - showUnfilled: true, - valueMode: 'color', - }, - pluginVersion: '10.0.2-cloud.1.94a6f396', -}; - -local topBucketsByVBucketQueueMemoryPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_vb_queue_memory_bytes{' + matcher + ' }))', - datasource=promDatasource, - legendFormat='{{instance}} - {{bucket}}', - ), - ], - type: 'timeseries', - title: 'Top buckets by vBucket queue memory', - description: 'Memory occupied by the queue for a virtual bucket for the top buckets.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'decbytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local getMatcher(cfg) = '%(couchbaseSelector)s, couchbase_cluster=~"$couchbase_cluster", instance=~"$instance", bucket=~"$bucket"' % cfg; - -{ - grafanaDashboards+:: { - 'couchbase-bucket-overview.json': - dashboard.new( - 'Couchbase bucket overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='Other Couchbase dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(kv_mem_used_bytes,job)', - label='Job', - refresh=2, - includeAll=false, - multi=false, - allValues='', - sort=0 - ), - template.new( - 'cluster', - promDatasource, - 'label_values(kv_mem_used_bytes{%(multiclusterSelector)s}, cluster)' % $._config, - label='Cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.*', - hide=if $._config.enableMultiCluster then '' else 'variable', - sort=0 - ), - template.new( - 'couchbase_cluster', - promDatasource, - 'label_values(kv_mem_used_bytes{%(couchbaseSelector)s},couchbase_cluster)' % $._config, - label='Couchbase cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - template.new( - 'instance', - promDatasource, - 'label_values(kv_mem_used_bytes{%(couchbaseSelector)s},instance)' % $._config, - label='Instance', - refresh=2, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - template.new( - 'bucket', - promDatasource, - 'label_values(kv_mem_used_bytes{%(couchbaseSelector)s},bucket)' % $._config, - label='Bucket', - refresh=2, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - ] - ) - .addPanels( - [ - topBucketsByMemoryUsedPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, - topBucketsByDiskUsedPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, - topBucketsByCurrentItemsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 0, y: 8 } }, - topBucketsByOperationsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 8, y: 8 } }, - topBucketsByOperationsFailedPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 16, y: 8 } }, - topBucketsByHighPriorityRequestsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 16 } }, - bottomBucketsByCacheHitRatioPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 16 } }, - topBucketsByVBucketsCountPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 24 } }, - topBucketsByVBucketQueueMemoryPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 24 } }, - ] - ), - }, -} diff --git a/couchbase-mixin/dashboards/couchbase-cluster-overview.libsonnet b/couchbase-mixin/dashboards/couchbase-cluster-overview.libsonnet deleted file mode 100644 index 480ebbbca..000000000 --- a/couchbase-mixin/dashboards/couchbase-cluster-overview.libsonnet +++ /dev/null @@ -1,980 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'couchbase-cluster-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local topNodesByMemoryUsagePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, sum by(job, couchbase_cluster, instance) (sys_mem_actual_used{' + matcher + '})) / (sum by(job, couchbase_cluster, instance) (clamp_min(sys_mem_actual_free{' + matcher + '}, 1)) + sum by(couchbase_cluster, instance, job) (sys_mem_actual_used{' + matcher + '}))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}}', - ), - ], - type: 'timeseries', - title: 'Top nodes by memory usage', - description: 'Top nodes by memory usage across the Couchbase cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percentunit', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local topNodesByHTTPRequestsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, sum by(job, couchbase_cluster, instance) (rate(cm_http_requests_total{' + matcher + '}[$__rate_interval])))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}}', - ), - ], - type: 'timeseries', - title: 'Top nodes by HTTP requests', - description: 'Rate of HTTP requests handled by the cluster manager for the top nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local topNodesByQueryServiceRequestsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, sum by(job, instance, couchbase_cluster) (rate(n1ql_requests{' + matcher + '}[$__rate_interval])))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}}', - ), - ], - type: 'timeseries', - title: 'Top nodes by query service requests', - description: 'Rate of N1QL requests processed by the query service for the top nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local topNodesByIndexAverageScanLatencyPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, avg by(instance, couchbase_cluster, job) (index_avg_scan_latency{' + matcher + '}))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}}', - ), - ], - type: 'timeseries', - title: 'Top nodes by index average scan latency', - description: 'Average time to serve an index service scan request for the top nodes.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'ns', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local xdcrReplicationRatePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(couchbase_cluster, job) (rate(xdcr_data_replicated_bytes{' + matcher + '}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}}', - ), - ], - type: 'timeseries', - title: 'XDCR replication rate', - description: 'Rate of replication through the Cross Data Center Replication feature.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'Bps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local xdcrDocsReceivedPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, couchbase_cluster) (rate(xdcr_docs_received_from_dcp_total{' + matcher + '}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}}', - ), - ], - type: 'timeseries', - title: 'XDCR docs received', - description: 'The rate of mutations received by this cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'mut/sec', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local localBackupSizePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(couchbase_cluster, job, instance) (backup_data_size{' + matcher + '})', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}}', - ), - ], - type: 'bargauge', - title: 'Local backup size', - description: 'The size of the locally replicated data stored, per node.', - fieldConfig: { - defaults: { - color: { - fixedColor: 'green', - mode: 'fixed', - }, - mappings: [], - min: 1, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'decbytes', - }, - overrides: [], - }, - options: { - displayMode: 'basic', - minVizHeight: 10, - minVizWidth: 0, - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - showUnfilled: true, - valueMode: 'color', - }, - pluginVersion: '10.0.2-cloud.1.94a6f396', -}; - -local bucketsRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Buckets', - collapsed: false, -}; - -local topBucketsByMemoryUsedPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, sum by(bucket, couchbase_cluster, job) (kv_mem_used_bytes{' + matcher + '}))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{bucket}}', - ), - ], - type: 'timeseries', - title: 'Top buckets by memory used', - description: 'Memory used for the top buckets.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'decbytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local topBucketsByDiskUsedPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, sum by(job, couchbase_cluster, bucket) (couch_docs_actual_disk_size{' + matcher + '}))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{bucket}}', - ), - ], - type: 'bargauge', - title: 'Top buckets by disk used', - description: 'Space on disk used for the top buckets.', - fieldConfig: { - defaults: { - color: { - fixedColor: 'green', - mode: 'fixed', - }, - mappings: [], - min: 1, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'decbytes', - }, - overrides: [], - }, - options: { - displayMode: 'basic', - minVizHeight: 10, - minVizWidth: 0, - orientation: 'horizontal', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - showUnfilled: true, - valueMode: 'color', - }, - pluginVersion: '10.0.2-cloud.1.94a6f396', -}; - -local topBucketsByOperationsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops{' + matcher + '}[$__rate_interval])))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{bucket}}', - ), - ], - type: 'timeseries', - title: 'Top buckets by operations', - description: 'Rate of operations for the busiest buckets.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local topBucketsByOperationsFailedPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops_failed{' + matcher + '}[$__rate_interval])))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{bucket}}', - ), - ], - type: 'timeseries', - title: 'Top buckets by operations failed', - description: 'Rate of operations failed for the most problematic buckets.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local topBucketsByVBucketsCountPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(5, sum by(couchbase_cluster, job, bucket) (kv_num_vbuckets{' + matcher + '}))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{bucket}}', - ), - ], - type: 'bargauge', - title: 'Top buckets by vBuckets count', - description: 'Number of virtual buckets across the cluster for the top buckets.', - fieldConfig: { - defaults: { - color: { - fixedColor: 'green', - mode: 'fixed', - }, - mappings: [], - min: 1, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - displayMode: 'basic', - minVizHeight: 10, - minVizWidth: 0, - orientation: 'horizontal', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - showUnfilled: true, - valueMode: 'color', - }, - pluginVersion: '10.0.2-cloud.1.94a6f396', -}; - -local getMatcher(cfg) = '%(couchbaseSelector)s, couchbase_cluster=~"$couchbase_cluster"' % cfg; - -{ - grafanaDashboards+:: { - 'couchbase-cluster-overview.json': - dashboard.new( - 'Couchbase cluster overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='Other Couchbase dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(kv_num_vbuckets,job)', - label='Job', - refresh=2, - includeAll=false, - multi=false, - allValues='', - sort=0 - ), - template.new( - 'cluster', - promDatasource, - 'label_values(kv_num_vbuckets{%(multiclusterSelector)s}, cluster)' % $._config, - label='Cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.*', - hide=if $._config.enableMultiCluster then '' else 'variable', - sort=0 - ), - template.new( - 'couchbase_cluster', - promDatasource, - 'label_values(kv_num_vbuckets{%(couchbaseSelector)s},couchbase_cluster)' % $._config, - label='Couchbase cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - ] - ) - .addPanels( - [ - topNodesByMemoryUsagePanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, - topNodesByHTTPRequestsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, - topNodesByQueryServiceRequestsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, - topNodesByIndexAverageScanLatencyPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, - xdcrReplicationRatePanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 0, y: 16 } }, - xdcrDocsReceivedPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 8, y: 16 } }, - localBackupSizePanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 16, y: 16 } }, - bucketsRow { gridPos: { h: 1, w: 24, x: 0, y: 24 } }, - topBucketsByMemoryUsedPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 25 } }, - topBucketsByDiskUsedPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 25 } }, - topBucketsByOperationsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 0, y: 33 } }, - topBucketsByOperationsFailedPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 8, y: 33 } }, - topBucketsByVBucketsCountPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 16, y: 33 } }, - ] - ), - }, -} diff --git a/couchbase-mixin/dashboards/couchbase-node-overview.libsonnet b/couchbase-mixin/dashboards/couchbase-node-overview.libsonnet deleted file mode 100644 index dc227b7dc..000000000 --- a/couchbase-mixin/dashboards/couchbase-node-overview.libsonnet +++ /dev/null @@ -1,1158 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'couchbase-node-overview'; - -local promDatasourceName = 'prometheus_datasource'; -local lokiDatasourceName = 'loki_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local lokiDatasource = { - uid: '${%s}' % lokiDatasourceName, -}; - -local memoryUtilizationPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sys_mem_actual_used{' + matcher + '} / (clamp_min(sys_mem_actual_free{' + matcher + '} + sys_mem_actual_used{' + matcher + '}, 1))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}}', - ), - ], - type: 'timeseries', - title: 'Memory utilization', - description: 'Percentage of memory allocated to Couchbase on this node actually in use.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percentunit', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local cpuUtilizationPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(couchbase_cluster, job, instance) (sys_cpu_utilization_rate{' + matcher + '})', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}}', - ), - ], - type: 'timeseries', - title: 'CPU utilization', - description: 'CPU utilization percentage across all available cores on this Couchbase node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local totalMemoryUsedByServicePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'index_memory_used_total{' + matcher + '}', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}} - index', - ), - prometheus.target( - 'cbas_direct_memory_used_bytes{' + matcher + '}', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}} - analytics', - ), - prometheus.target( - 'sum by(couchbase_cluster, instance, job) (kv_mem_used_bytes{' + matcher + '})', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}} - data', - ), - ], - type: 'timeseries', - title: 'Total memory used by service', - description: 'Memory used by the index, analytics, and data services for a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'decbytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local backupSizePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(couchbase_cluster, instance, job) (backup_data_size{' + matcher + '})', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}}', - ), - ], - type: 'bargauge', - title: 'Backup size', - description: 'Size of locally replicated cluster data for a Couchbase node.', - fieldConfig: { - defaults: { - color: { - fixedColor: 'green', - mode: 'fixed', - }, - mappings: [], - min: 1, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'decbytes', - }, - overrides: [], - }, - options: { - displayMode: 'basic', - minVizHeight: 10, - minVizWidth: 0, - orientation: 'vertical', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - showUnfilled: true, - valueMode: 'color', - }, - pluginVersion: '10.0.2-cloud.1.94a6f396', -}; - -local currentConnectionsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'kv_curr_connections{' + matcher + '}', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}}', - ), - ], - type: 'timeseries', - title: 'Current connections', - description: 'Number of active connections to a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'stepBefore', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - decimals: 0, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local httpResponseCodesPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, instance, couchbase_cluster, code) (rate(cm_http_requests_total{' + matcher + '}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}} - {{code}}', - ), - ], - type: 'timeseries', - title: 'HTTP response codes', - description: 'Rate of HTTP response codes handled by the cluster manager.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - min: 0.001, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local httpRequestMethodsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, instance, couchbase_cluster, method) (rate(cm_http_requests_total{' + matcher + '}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}} - {{method}}', - ), - ], - type: 'timeseries', - title: 'HTTP request methods', - description: 'Rate of HTTP request methods handled by the cluster manager.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local queryServiceRequestsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(n1ql_requests{' + matcher + '}[$__rate_interval]) + rate(n1ql_invalid_requests{' + matcher + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}} - total', - ), - prometheus.target( - 'rate(n1ql_errors{' + matcher + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}} - error', - ), - prometheus.target( - 'rate(n1ql_invalid_requests{' + matcher + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}} - invalid', - ), - ], - type: 'timeseries', - title: 'Query service requests', - description: 'Rate of N1QL requests processed by the query service for a node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local queryServiceRequestProcessingTimePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(n1ql_requests{' + matcher + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}} - >0ms', - ), - prometheus.target( - 'rate(n1ql_requests_250ms{' + matcher + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}} - >250ms', - ), - prometheus.target( - 'rate(n1ql_requests_500ms{' + matcher + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}} - >500ms', - ), - prometheus.target( - 'rate(n1ql_requests_1000ms{' + matcher + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}} - >1000ms', - ), - prometheus.target( - 'rate(n1ql_requests_5000ms{' + matcher + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}} - >5000ms', - ), - ], - type: 'timeseries', - title: 'Query service request processing time', - description: 'Rate of queries grouped by processing time.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local indexServiceRequestsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(couchbase_cluster, instance, job) (rate(index_num_requests{couchbase_cluster=~"$couchbase_cluster", job=~"$job", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}}', - ), - ], - type: 'timeseries', - title: 'Index service requests', - description: 'Rate of index service requests served.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local indexCacheHitRatioPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{' + matcher + '}[$__rate_interval])) / (clamp_min(sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{' + matcher + '}[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance) (increase(index_cache_misses{' + matcher + '}[$__rate_interval])))', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}}', - ), - ], - type: 'timeseries', - title: 'Index cache hit ratio', - description: 'Ratio at which cache scans result in a hit rather than a miss.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: true, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - max: 1, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percentunit', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local averageScanLatencyPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(couchbase_cluster, index, instance, job) (index_avg_scan_latency{' + matcher + '})', - datasource=promDatasource, - legendFormat='{{couchbase_cluster}} - {{instance}} - {{index}}', - ), - ], - type: 'timeseries', - title: 'Average scan latency', - description: 'Average time to serve a scan request per index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'ns', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local errorLogsPanel(matcher) = { - datasource: lokiDatasource, - targets: [ - { - datasource: lokiDatasource, - editorMode: 'code', - expr: '{' + matcher + '} |~ `ns_server:error|couchbase.log.error`', - queryType: 'range', - refId: 'A', - }, - ], - type: 'logs', - title: 'Error logs', - description: 'Recent error logs from a node.', - options: { - dedupStrategy: 'none', - enableLogDetails: true, - prettifyLogMessage: true, - showCommonLabels: false, - showLabels: false, - showTime: false, - sortOrder: 'Descending', - wrapLogMessage: false, - }, -}; - -local couchbaseLogsPanel(matcher) = { - datasource: lokiDatasource, - targets: [ - { - datasource: lokiDatasource, - editorMode: 'code', - expr: '{' + matcher + '} |~ `couchdb`', - queryType: 'range', - refId: 'A', - }, - ], - type: 'logs', - title: 'Couchbase logs', - description: 'Recent couchbase logs from a node.', - options: { - dedupStrategy: 'none', - enableLogDetails: true, - prettifyLogMessage: false, - showCommonLabels: false, - showLabels: false, - showTime: false, - sortOrder: 'Descending', - wrapLogMessage: false, - }, -}; - -local getMatcher(cfg) = '%(couchbaseSelector)s, couchbase_cluster=~"$couchbase_cluster", instance=~"$instance"' % cfg; - -{ - grafanaDashboards+:: { - 'couchbase-node-overview.json': - dashboard.new( - 'Couchbase node overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='Other Couchbase dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addTemplates( - std.flattenArrays([ - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - ], - if $._config.enableLokiLogs then [ - template.datasource( - lokiDatasourceName, - 'loki', - null, - label='Loki Datasource', - refresh='load' - ), - ] else [], - [ - template.new( - 'job', - promDatasource, - 'label_values(sys_mem_actual_used,job)', - label='Job', - refresh=2, - includeAll=false, - multi=false, - allValues='', - sort=0 - ), - template.new( - 'cluster', - promDatasource, - 'label_values(sys_mem_actual_used{%(multiclusterSelector)s}, cluster)' % $._config, - label='Cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.*', - hide=if $._config.enableMultiCluster then '' else 'variable', - sort=0 - ), - template.new( - 'couchbase_cluster', - promDatasource, - 'label_values(sys_mem_actual_used{%(couchbaseSelector)s},couchbase_cluster)' % $._config, - label='Couchbase cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - template.new( - 'instance', - promDatasource, - 'label_values(sys_mem_actual_used{%(couchbaseSelector)s},instance)' % $._config, - label='Instance', - refresh=2, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - ], - ]) - ) - .addPanels( - std.flattenArrays([ - [ - memoryUtilizationPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, - cpuUtilizationPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, - totalMemoryUsedByServicePanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 0, y: 8 } }, - backupSizePanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 8, y: 8 } }, - currentConnectionsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 16, y: 8 } }, - httpResponseCodesPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 16 } }, - httpRequestMethodsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 16 } }, - queryServiceRequestsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 24 } }, - queryServiceRequestProcessingTimePanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 24 } }, - indexServiceRequestsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 0, y: 32 } }, - indexCacheHitRatioPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 8, y: 32 } }, - averageScanLatencyPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 16, y: 32 } }, - ], - if $._config.enableLokiLogs then [ - errorLogsPanel(getMatcher($._config)) { gridPos: { h: 7, w: 24, x: 0, y: 40 } }, - ] else [], - [ - ], - if $._config.enableLokiLogs then [ - couchbaseLogsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 24, x: 0, y: 47 } }, - ] else [], - [ - ], - ]) - ), - }, -} diff --git a/couchbase-mixin/dashboards/dashboards.libsonnet b/couchbase-mixin/dashboards/dashboards.libsonnet deleted file mode 100644 index ac01ff83c..000000000 --- a/couchbase-mixin/dashboards/dashboards.libsonnet +++ /dev/null @@ -1,3 +0,0 @@ -(import 'couchbase-bucket-overview.libsonnet') + -(import 'couchbase-cluster-overview.libsonnet') + -(import 'couchbase-node-overview.libsonnet') diff --git a/couchbase-mixin/dashboards_out/couchbase-bucket-overview.json b/couchbase-mixin/dashboards_out/couchbase-bucket-overview.json deleted file mode 100644 index 2e340e339..000000000 --- a/couchbase-mixin/dashboards_out/couchbase-bucket-overview.json +++ /dev/null @@ -1,965 +0,0 @@ -{ - "__inputs": [ ], - "__requires": [ ], - "annotations": { - "list": [ ] - }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - { - "asDropdown": false, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "couchbase-mixin" - ], - "targetBlank": false, - "title": "Other Couchbase dashboards", - "type": "dashboards", - "url": "" - } - ], - "panels": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Memory used for the top buckets.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 2, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, kv_mem_used_bytes{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\", bucket=~\"$bucket\" })", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{bucket}}" - } - ], - "title": "Top buckets by memory used", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Total space on disk used for the top buckets.", - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "green", - "mode": "fixed" - }, - "mappings": [ ], - "min": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "id": 3, - "options": { - "displayMode": "basic", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "valueMode": "color" - }, - "pluginVersion": "10.0.2-cloud.1.94a6f396", - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, couch_docs_actual_disk_size{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\", bucket=~\"$bucket\" })", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{bucket}}" - } - ], - "title": "Top buckets by disk used", - "type": "bargauge" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Number of active items for the largest buckets.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 8 - }, - "id": 4, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, kv_curr_items{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\", bucket=~\"$bucket\" })", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{bucket}}" - } - ], - "title": "Top buckets by current items", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Rate of operations for the busiest buckets.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 8 - }, - "id": 5, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job, op) (rate(kv_ops{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\", bucket=~\"$bucket\" }[$__rate_interval])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{bucket}} - {{op}}" - } - ], - "title": "Top buckets by operations", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Rate of failed operations for the most problematic buckets.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 6, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job) (rate(kv_ops_failed{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\", bucket=~\"$bucket\" }[$__rate_interval])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{bucket}}" - } - ], - "title": "Top buckets by operations failed", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Rate of high priority requests processed by the KV engine for the top buckets.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "id": 7, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_high_pri_requests{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\", bucket=~\"$bucket\" }))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{bucket}}" - } - ], - "title": "Top buckets by high priority requests", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Worst buckets by cache hit ratio.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "max": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "id": 8, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "bottomk(5, sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\", bucket=~\"$bucket\" }[$__rate_interval]))) / (clamp_min(sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\", bucket=~\"$bucket\" }[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_misses{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\", bucket=~\"$bucket\" }[$__rate_interval])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{bucket}}" - } - ], - "title": "Bottom buckets by cache hit ratio", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Number of virtual buckets across the cluster for the top buckets.", - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "green", - "mode": "fixed" - }, - "mappings": [ ], - "min": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 24 - }, - "id": 9, - "options": { - "displayMode": "basic", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "valueMode": "color" - }, - "pluginVersion": "10.0.2-cloud.1.94a6f396", - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_vbuckets{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\", bucket=~\"$bucket\" }))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{bucket}}" - } - ], - "title": "Top buckets by vBuckets count", - "type": "bargauge" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Memory occupied by the queue for a virtual bucket for the top buckets.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 24 - }, - "id": 10, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_vb_queue_memory_bytes{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\", bucket=~\"$bucket\" }))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{bucket}}" - } - ], - "title": "Top buckets by vBucket queue memory", - "type": "timeseries" - } - ], - "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "couchbase-mixin" - ], - "templating": { - "list": [ - { - "current": { }, - "hide": 0, - "label": "Data source", - "name": "prometheus_datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": "", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 0, - "includeAll": false, - "label": "Job", - "multi": false, - "name": "job", - "options": [ ], - "query": "label_values(kv_mem_used_bytes,job)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".*", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 2, - "includeAll": true, - "label": "Cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(kv_mem_used_bytes{job=~\"$job\"}, cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 0, - "includeAll": true, - "label": "Couchbase cluster", - "multi": true, - "name": "couchbase_cluster", - "options": [ ], - "query": "label_values(kv_mem_used_bytes{job=~\"$job\"},couchbase_cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 0, - "includeAll": true, - "label": "Instance", - "multi": true, - "name": "instance", - "options": [ ], - "query": "label_values(kv_mem_used_bytes{job=~\"$job\"},instance)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 0, - "includeAll": true, - "label": "Bucket", - "multi": true, - "name": "bucket", - "options": [ ], - "query": "label_values(kv_mem_used_bytes{job=~\"$job\"},bucket)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "default", - "title": "Couchbase bucket overview", - "uid": "couchbase-bucket-overview", - "version": 0 - } \ No newline at end of file diff --git a/couchbase-mixin/dashboards_out/couchbase-cluster-overview.json b/couchbase-mixin/dashboards_out/couchbase-cluster-overview.json deleted file mode 100644 index 748eaa542..000000000 --- a/couchbase-mixin/dashboards_out/couchbase-cluster-overview.json +++ /dev/null @@ -1,1184 +0,0 @@ -{ - "__inputs": [ ], - "__requires": [ ], - "annotations": { - "list": [ ] - }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - { - "asDropdown": false, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "couchbase-mixin" - ], - "targetBlank": false, - "title": "Other Couchbase dashboards", - "type": "dashboards", - "url": "" - } - ], - "panels": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Top nodes by memory usage across the Couchbase cluster.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 2, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, sum by(job, couchbase_cluster, instance) (sys_mem_actual_used{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\"})) / (sum by(job, couchbase_cluster, instance) (clamp_min(sys_mem_actual_free{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\"}, 1)) + sum by(couchbase_cluster, instance, job) (sys_mem_actual_used{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\"}))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}}" - } - ], - "title": "Top nodes by memory usage", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Rate of HTTP requests handled by the cluster manager for the top nodes.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "id": 3, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, sum by(job, couchbase_cluster, instance) (rate(cm_http_requests_total{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}}" - } - ], - "title": "Top nodes by HTTP requests", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Rate of N1QL requests processed by the query service for the top nodes.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 4, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, sum by(job, instance, couchbase_cluster) (rate(n1ql_requests{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}}" - } - ], - "title": "Top nodes by query service requests", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Average time to serve an index service scan request for the top nodes.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ns" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "id": 5, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, avg by(instance, couchbase_cluster, job) (index_avg_scan_latency{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\"}))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}}" - } - ], - "title": "Top nodes by index average scan latency", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Rate of replication through the Cross Data Center Replication feature.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 16 - }, - "id": 6, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(couchbase_cluster, job) (rate(xdcr_data_replicated_bytes{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}}" - } - ], - "title": "XDCR replication rate", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The rate of mutations received by this cluster.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "mut/sec" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 16 - }, - "id": 7, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(job, couchbase_cluster) (rate(xdcr_docs_received_from_dcp_total{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}}" - } - ], - "title": "XDCR docs received", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The size of the locally replicated data stored, per node.", - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "green", - "mode": "fixed" - }, - "mappings": [ ], - "min": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 16 - }, - "id": 8, - "options": { - "displayMode": "basic", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "valueMode": "color" - }, - "pluginVersion": "10.0.2-cloud.1.94a6f396", - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(couchbase_cluster, job, instance) (backup_data_size{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}}" - } - ], - "title": "Local backup size", - "type": "bargauge" - }, - { - "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 24 - }, - "id": 9, - "targets": [ ], - "title": "Buckets", - "type": "row" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Memory used for the top buckets.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 25 - }, - "id": 10, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, sum by(bucket, couchbase_cluster, job) (kv_mem_used_bytes{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\"}))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{bucket}}" - } - ], - "title": "Top buckets by memory used", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Space on disk used for the top buckets.", - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "green", - "mode": "fixed" - }, - "mappings": [ ], - "min": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 25 - }, - "id": 11, - "options": { - "displayMode": "basic", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "valueMode": "color" - }, - "pluginVersion": "10.0.2-cloud.1.94a6f396", - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, sum by(job, couchbase_cluster, bucket) (couch_docs_actual_disk_size{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\"}))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{bucket}}" - } - ], - "title": "Top buckets by disk used", - "type": "bargauge" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Rate of operations for the busiest buckets.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 33 - }, - "id": 12, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{bucket}}" - } - ], - "title": "Top buckets by operations", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Rate of operations failed for the most problematic buckets.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ops" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 33 - }, - "id": 13, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops_failed{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{bucket}}" - } - ], - "title": "Top buckets by operations failed", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Number of virtual buckets across the cluster for the top buckets.", - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "green", - "mode": "fixed" - }, - "mappings": [ ], - "min": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 33 - }, - "id": 14, - "options": { - "displayMode": "basic", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "valueMode": "color" - }, - "pluginVersion": "10.0.2-cloud.1.94a6f396", - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (kv_num_vbuckets{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\"}))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{bucket}}" - } - ], - "title": "Top buckets by vBuckets count", - "type": "bargauge" - } - ], - "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "couchbase-mixin" - ], - "templating": { - "list": [ - { - "current": { }, - "hide": 0, - "label": "Data source", - "name": "prometheus_datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": "", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 0, - "includeAll": false, - "label": "Job", - "multi": false, - "name": "job", - "options": [ ], - "query": "label_values(kv_num_vbuckets,job)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".*", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 2, - "includeAll": true, - "label": "Cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(kv_num_vbuckets{job=~\"$job\"}, cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 0, - "includeAll": true, - "label": "Couchbase cluster", - "multi": true, - "name": "couchbase_cluster", - "options": [ ], - "query": "label_values(kv_num_vbuckets{job=~\"$job\"},couchbase_cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "default", - "title": "Couchbase cluster overview", - "uid": "couchbase-cluster-overview", - "version": 0 - } \ No newline at end of file diff --git a/couchbase-mixin/dashboards_out/couchbase-node-overview.json b/couchbase-mixin/dashboards_out/couchbase-node-overview.json deleted file mode 100644 index c29407795..000000000 --- a/couchbase-mixin/dashboards_out/couchbase-node-overview.json +++ /dev/null @@ -1,1399 +0,0 @@ -{ - "__inputs": [ ], - "__requires": [ ], - "annotations": { - "list": [ ] - }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - { - "asDropdown": false, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "couchbase-mixin" - ], - "targetBlank": false, - "title": "Other Couchbase dashboards", - "type": "dashboards", - "url": "" - } - ], - "panels": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Percentage of memory allocated to Couchbase on this node actually in use.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 2, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sys_mem_actual_used{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"} / (clamp_min(sys_mem_actual_free{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"} + sys_mem_actual_used{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}, 1))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}}" - } - ], - "title": "Memory utilization", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "CPU utilization percentage across all available cores on this Couchbase node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "id": 3, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(couchbase_cluster, job, instance) (sys_cpu_utilization_rate{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}}" - } - ], - "title": "CPU utilization", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Memory used by the index, analytics, and data services for a node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 8 - }, - "id": 4, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "index_memory_used_total{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}} - index" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "cbas_direct_memory_used_bytes{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}} - analytics" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(couchbase_cluster, instance, job) (kv_mem_used_bytes{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}} - data" - } - ], - "title": "Total memory used by service", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Size of locally replicated cluster data for a Couchbase node.", - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "green", - "mode": "fixed" - }, - "mappings": [ ], - "min": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 8 - }, - "id": 5, - "options": { - "displayMode": "basic", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "vertical", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "valueMode": "color" - }, - "pluginVersion": "10.0.2-cloud.1.94a6f396", - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(couchbase_cluster, instance, job) (backup_data_size{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}}" - } - ], - "title": "Backup size", - "type": "bargauge" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Number of active connections to a node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "stepBefore", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 0, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 6, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "kv_curr_connections{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}}" - } - ], - "title": "Current connections", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Rate of HTTP response codes handled by the cluster manager.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "min": 0.001, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "id": 7, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(job, instance, couchbase_cluster, code) (rate(cm_http_requests_total{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}} - {{code}}" - } - ], - "title": "HTTP response codes", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Rate of HTTP request methods handled by the cluster manager.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "id": 8, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(job, instance, couchbase_cluster, method) (rate(cm_http_requests_total{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}} - {{method}}" - } - ], - "title": "HTTP request methods", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Rate of N1QL requests processed by the query service for a node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 24 - }, - "id": 9, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "rate(n1ql_requests{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}[$__rate_interval]) + rate(n1ql_invalid_requests{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}} - total" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "rate(n1ql_errors{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}} - error" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "rate(n1ql_invalid_requests{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}} - invalid" - } - ], - "title": "Query service requests", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Rate of queries grouped by processing time.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 24 - }, - "id": 10, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "rate(n1ql_requests{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}} - >0ms" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "rate(n1ql_requests_250ms{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}} - >250ms" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "rate(n1ql_requests_500ms{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}} - >500ms" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "rate(n1ql_requests_1000ms{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}} - >1000ms" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "rate(n1ql_requests_5000ms{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}} - >5000ms" - } - ], - "title": "Query service request processing time", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Rate of index service requests served.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "reqps" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 32 - }, - "id": 11, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(couchbase_cluster, instance, job) (rate(index_num_requests{couchbase_cluster=~\"$couchbase_cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}}" - } - ], - "title": "Index service requests", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Ratio at which cache scans result in a hit rather than a miss.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "max": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 32 - }, - "id": 12, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}[$__rate_interval])) / (clamp_min(sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance) (increase(index_cache_misses{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"}[$__rate_interval])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}}" - } - ], - "title": "Index cache hit ratio", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Average time to serve a scan request per index.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ns" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 32 - }, - "id": 13, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(couchbase_cluster, index, instance, job) (index_avg_scan_latency{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{couchbase_cluster}} - {{instance}} - {{index}}" - } - ], - "title": "Average scan latency", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${loki_datasource}" - }, - "description": "Recent error logs from a node.", - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 40 - }, - "id": 14, - "options": { - "dedupStrategy": "none", - "enableLogDetails": true, - "prettifyLogMessage": true, - "showCommonLabels": false, - "showLabels": false, - "showTime": false, - "sortOrder": "Descending", - "wrapLogMessage": false - }, - "targets": [ - { - "datasource": { - "uid": "${loki_datasource}" - }, - "editorMode": "code", - "expr": "{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"} |~ `ns_server:error|couchbase.log.error`", - "queryType": "range", - "refId": "A" - } - ], - "title": "Error logs", - "type": "logs" - }, - { - "datasource": { - "uid": "${loki_datasource}" - }, - "description": "Recent couchbase logs from a node.", - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 47 - }, - "id": 15, - "options": { - "dedupStrategy": "none", - "enableLogDetails": true, - "prettifyLogMessage": false, - "showCommonLabels": false, - "showLabels": false, - "showTime": false, - "sortOrder": "Descending", - "wrapLogMessage": false - }, - "targets": [ - { - "datasource": { - "uid": "${loki_datasource}" - }, - "editorMode": "code", - "expr": "{job=~\"$job\", couchbase_cluster=~\"$couchbase_cluster\", instance=~\"$instance\"} |~ `couchdb`", - "queryType": "range", - "refId": "A" - } - ], - "title": "Couchbase logs", - "type": "logs" - } - ], - "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "couchbase-mixin" - ], - "templating": { - "list": [ - { - "current": { }, - "hide": 0, - "label": "Data Source", - "name": "prometheus_datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "current": { }, - "hide": 0, - "label": "Loki Datasource", - "name": "loki_datasource", - "options": [ ], - "query": "loki", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": "", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 0, - "includeAll": false, - "label": "Job", - "multi": false, - "name": "job", - "options": [ ], - "query": "label_values(sys_mem_actual_used,job)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".*", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 2, - "includeAll": true, - "label": "Cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(sys_mem_actual_used{job=~\"$job\"}, cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 0, - "includeAll": true, - "label": "Couchbase cluster", - "multi": true, - "name": "couchbase_cluster", - "options": [ ], - "query": "label_values(sys_mem_actual_used{job=~\"$job\"},couchbase_cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 0, - "includeAll": true, - "label": "Instance", - "multi": true, - "name": "instance", - "options": [ ], - "query": "label_values(sys_mem_actual_used{job=~\"$job\"},instance)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "default", - "title": "Couchbase node overview", - "uid": "couchbase-node-overview", - "version": 0 - } \ No newline at end of file diff --git a/couchbase-mixin/dashboards_out/couchbase_bucket_overview b/couchbase-mixin/dashboards_out/couchbase_bucket_overview new file mode 100644 index 000000000..f12aff7de --- /dev/null +++ b/couchbase-mixin/dashboards_out/couchbase_bucket_overview @@ -0,0 +1,559 @@ +{ + "annotations": { + "list": [ ] + }, + "links": [ + { + "keepTime": true, + "title": "Couchbase Cluster Overview", + "type": "link", + "url": "/d/couchbase_couchbase_cluster_overview" + }, + { + "keepTime": true, + "title": "Couchbase Node Overview", + "type": "link", + "url": "/d/couchbase_couchbase_node_overview" + }, + { + "keepTime": true, + "title": "Logs", + "type": "link", + "url": "/d/couchbase-logs" + }, + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "couchbase-mixin" + ], + "title": "All dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Memory used for the top buckets.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "decbytes" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, kv_mem_used_bytes{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"})", + "legendFormat": "{{instance}} - {{bucket}}" + } + ], + "title": "Top buckets by memory used", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Disk used for the top buckets.", + "fieldConfig": { + "defaults": { + "min": 0, + "thresholds": { + "steps": [ + { + "color": "light-green", + "value": null + } + ] + }, + "unit": "decbytes" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "orientation": "horizontal" + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, couch_docs_actual_disk_size{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"})", + "legendFormat": "{{instance}} - {{bucket}}" + } + ], + "title": "Top buckets by disk used", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Number of active items for the largest buckets.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "none" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (kv_curr_items{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}))", + "legendFormat": "{{instance}} - {{bucket}}" + } + ], + "title": "Top buckets by current items", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Rate of operations for the busiest buckets.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job, op) (rate(kv_ops{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}[$__rate_interval])))", + "legendFormat": "{{instance}} - {{bucket}} - {{op}}" + } + ], + "title": "Top buckets by operations", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Rate of operations failed for the most problematic buckets.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 8 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job) (rate(kv_ops_failed{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}[$__rate_interval])))", + "legendFormat": "{{instance}} - {{bucket}}" + } + ], + "title": "Top buckets by operations failed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Rate of high priority requests processed by the KV engine for the top buckets.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "reqps" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_high_pri_requests{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}))", + "legendFormat": "{{instance}} - {{bucket}}" + } + ], + "title": "Top buckets by high priority requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Worst buckets by cache hit ratio.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": true + }, + "max": 100, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "bottomk(5, sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}[$__rate_interval])) / (clamp_min(sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_misses{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}[$__rate_interval]))))", + "legendFormat": "{{instance}} - {{bucket}}" + } + ], + "title": "Bottom buckets by cache hit ratio", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of vBuckets for the top buckets.", + "fieldConfig": { + "defaults": { + "min": 0, + "thresholds": { + "steps": [ + { + "color": "light-green", + "value": null + } + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 8, + "options": { + "orientation": "horizontal" + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_vbuckets{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}))", + "legendFormat": "{{instance}} - {{bucket}}" + } + ], + "title": "Top buckets by vBuckets count", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Memory occupied by the queue for a virtual bucket for the top buckets.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "decbytes" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_vb_queue_memory_bytes{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}))", + "legendFormat": "{{instance}} - {{bucket}}" + } + ], + "title": "Top buckets by vBucket queue memory", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 39, + "tags": [ + "couchbase-mixin" + ], + "templating": { + "list": [ + { + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Instance", + "multi": true, + "name": "instance", + "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\"}, instance)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Couchbase_cluster", + "multi": true, + "name": "couchbase_cluster", + "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\",instance=~\"$instance\"}, couchbase_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Bucket", + "multi": true, + "name": "bucket", + "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}, bucket)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "default", + "title": "Couchbase bucket overview", + "uid": "couchbase_couchbase_bucket_overview" + } \ No newline at end of file diff --git a/couchbase-mixin/dashboards_out/couchbase_cluster_overview b/couchbase-mixin/dashboards_out/couchbase_cluster_overview new file mode 100644 index 000000000..dc6c94f59 --- /dev/null +++ b/couchbase-mixin/dashboards_out/couchbase_cluster_overview @@ -0,0 +1,705 @@ +{ + "annotations": { + "list": [ ] + }, + "links": [ + { + "keepTime": true, + "title": "Couchbase Bucket Overview", + "type": "link", + "url": "/d/couchbase_couchbase_bucket_overview" + }, + { + "keepTime": true, + "title": "Couchbase Node Overview", + "type": "link", + "url": "/d/couchbase_couchbase_node_overview" + }, + { + "keepTime": true, + "title": "Logs", + "type": "link", + "url": "/d/couchbase-logs" + }, + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "couchbase-mixin" + ], + "title": "All dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Top nodes by memory usage across the Couchbase cluster.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, sum by(job, couchbase_cluster, instance) (sys_mem_actual_used{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"})) / (sum by(job, couchbase_cluster, instance) (clamp_min(sys_mem_actual_free{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}, 1)) + sum by(couchbase_cluster, instance, job) (sys_mem_actual_used{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}))", + "legendFormat": "{{couchbase_cluster}} - {{instance}}" + } + ], + "title": "Top nodes by memory usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Rate of HTTP requests handled by the cluster manager for the top nodes.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "reqps" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, sum by(job, couchbase_cluster, instance) (rate(cm_http_requests_total{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])))", + "legendFormat": "{{couchbase_cluster}} - {{instance}}" + } + ], + "title": "Top nodes by HTTP requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Rate of N1QL requests processed by the query service for the top nodes.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "reqps" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, sum by(job, instance, couchbase_cluster) (rate(n1ql_requests{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])))", + "legendFormat": "{{couchbase_cluster}} - {{instance}}" + } + ], + "title": "Top nodes by query service requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Average time to serve an index service scan request for the top nodes.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "ns" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, avg by(instance, couchbase_cluster, job) (index_avg_scan_latency{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}))", + "legendFormat": "{{couchbase_cluster}} - {{instance}}" + } + ], + "title": "Top nodes by index average scan latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Rate of replication through the Cross Data Center Replication feature.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(couchbase_cluster, job) (rate(xdcr_data_replicated_bytes{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval]))", + "legendFormat": "{{couchbase_cluster}}" + } + ], + "title": "XDCR replication rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "The rate of mutations received by this cluster.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "mut/sec" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 16 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(job, couchbase_cluster) (rate(xdcr_docs_received_from_dcp_total{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval]))", + "legendFormat": "{{couchbase_cluster}}" + } + ], + "title": "XDCR docs received", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Size of the local backup for a node.", + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 16 + }, + "id": 7, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(couchbase_cluster, job, instance) (backup_data_size{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"})", + "legendFormat": "{{couchbase_cluster}} - {{instance}}" + } + ], + "title": "Local backup size", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Memory used for the top buckets across the cluster.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "decbytes" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, sum by(bucket, couchbase_cluster, job) (kv_mem_used_bytes{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}))", + "legendFormat": "{{couchbase_cluster}} - {{bucket}}" + } + ], + "title": "Top buckets by memory used", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Disk used for the top buckets across the cluster.", + "fieldConfig": { + "defaults": { + "min": 0, + "thresholds": { + "steps": [ + { + "color": "light-green", + "value": null + } + ] + }, + "unit": "decbytes" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 9, + "options": { + "orientation": "horizontal" + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, sum by(job, couchbase_cluster, bucket) (couch_docs_actual_disk_size{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}))", + "legendFormat": "{{couchbase_cluster}} - {{bucket}}" + } + ], + "title": "Top buckets by disk used", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Rate of operations for the busiest buckets across the cluster.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])))", + "legendFormat": "{{couchbase_cluster}} - {{bucket}}" + } + ], + "title": "Top buckets by operations", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Rate of operations failed for the most problematic buckets across the cluster.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops_failed{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])))", + "legendFormat": "{{couchbase_cluster}} - {{bucket}}" + } + ], + "title": "Top buckets by operations failed", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of vBuckets for the top buckets across the cluster.", + "fieldConfig": { + "defaults": { + "min": 0, + "thresholds": { + "steps": [ + { + "color": "light-green", + "value": null + } + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 12, + "options": { + "orientation": "horizontal" + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (kv_num_vbuckets{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}))", + "legendFormat": "{{couchbase_cluster}} - {{bucket}}" + } + ], + "title": "Top buckets by vBuckets count", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "decbytes" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (kv_vb_queue_memory_bytes{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}))", + "legendFormat": "{{couchbase_cluster}} - {{bucket}}" + } + ], + "title": "Top buckets by vBucket queue memory", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 39, + "tags": [ + "couchbase-mixin" + ], + "templating": { + "list": [ + { + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": false, + "name": "job", + "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Couchbase_cluster", + "multi": false, + "name": "couchbase_cluster", + "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\"}, couchbase_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "default", + "title": "Couchbase cluster overview", + "uid": "couchbase_couchbase_cluster_overview" + } \ No newline at end of file diff --git a/couchbase-mixin/dashboards_out/couchbase_node_overview b/couchbase-mixin/dashboards_out/couchbase_node_overview new file mode 100644 index 000000000..61e8a02ce --- /dev/null +++ b/couchbase-mixin/dashboards_out/couchbase_node_overview @@ -0,0 +1,769 @@ +{ + "annotations": { + "list": [ ] + }, + "links": [ + { + "keepTime": true, + "title": "Couchbase Bucket Overview", + "type": "link", + "url": "/d/couchbase_couchbase_bucket_overview" + }, + { + "keepTime": true, + "title": "Couchbase Cluster Overview", + "type": "link", + "url": "/d/couchbase_couchbase_cluster_overview" + }, + { + "keepTime": true, + "title": "Logs", + "type": "link", + "url": "/d/couchbase-logs" + }, + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "couchbase-mixin" + ], + "title": "All dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Percentage of memory allocated to Couchbase on this node actually in use.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sys_mem_actual_used{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"} / (clamp_min(sys_mem_actual_free{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"} + sys_mem_actual_used{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}, 1))", + "legendFormat": "{{couchbase_cluster}} - {{instance}}" + } + ], + "title": "Memory utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "CPU utilization percentage across all available cores on this Couchbase node.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "percent" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(couchbase_cluster, job, instance) (sys_cpu_utilization_rate{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"})", + "legendFormat": "{{couchbase_cluster}} - {{instance}}" + } + ], + "title": "CPU utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Memory used by the index, analytics, and data services for a node.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "decbytes" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(couchbase_cluster, instance, job) (kv_mem_used_bytes{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"})", + "legendFormat": "{{couchbase_cluster}} - {{instance}} - data" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "index_memory_used_total{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}", + "legendFormat": "{{couchbase_cluster}} - {{instance}} - index" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "cbas_direct_memory_used_bytes{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}", + "legendFormat": "{{couchbase_cluster}} - {{instance}} - analytics" + } + ], + "title": "Total memory used by service", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Size of the backup for a node.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "decbytes" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(couchbase_cluster, instance, job) (backup_data_size{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"})", + "legendFormat": "{{couchbase_cluster}} - {{instance}}" + } + ], + "title": "Backup size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Number of active connections to a node.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "none" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 8 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "kv_curr_connections{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}", + "legendFormat": "{{couchbase_cluster}} - {{instance}}" + } + ], + "title": "Current connections", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Rate of HTTP response codes handled by the cluster manager.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "reqps" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(job, instance, couchbase_cluster, code) (rate(cm_http_requests_total{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval]))", + "legendFormat": "{{couchbase_cluster}} - {{instance}} - {{code}}" + } + ], + "title": "HTTP response codes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Rate of HTTP request methods handled by the cluster manager.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "reqps" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(job, instance, couchbase_cluster, method) (rate(cm_http_requests_total{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval]))", + "legendFormat": "{{couchbase_cluster}} - {{instance}} - {{method}}" + } + ], + "title": "HTTP request methods", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Rate of N1QL requests processed by the query service for a node.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "reqps" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "rate(n1ql_requests{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval]) + rate(n1ql_invalid_requests{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", + "legendFormat": "{{couchbase_cluster}} - {{instance}} - total" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "rate(n1ql_errors{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", + "legendFormat": "{{couchbase_cluster}} - {{instance}} - error" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "rate(n1ql_invalid_requests{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", + "legendFormat": "{{couchbase_cluster}} - {{instance}} - invalid" + } + ], + "title": "Query service requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Rate of queries grouped by processing time.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "reqps" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "rate(n1ql_requests{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", + "legendFormat": "{{couchbase_cluster}} - {{instance}} - >0ms" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "rate(n1ql_requests_250ms{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", + "legendFormat": "{{couchbase_cluster}} - {{instance}} - >250ms" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "rate(n1ql_requests_500ms{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", + "legendFormat": "{{couchbase_cluster}} - {{instance}} - >500ms" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "rate(n1ql_requests_1000ms{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", + "legendFormat": "{{couchbase_cluster}} - {{instance}} - >1000ms" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "rate(n1ql_requests_5000ms{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", + "legendFormat": "{{couchbase_cluster}} - {{instance}} - >5000ms" + } + ], + "title": "Query service request processing time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Rate of index service requests served.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "reqps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 32 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(couchbase_cluster, instance, job) (rate(index_num_requests{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval]))", + "legendFormat": "{{couchbase_cluster}} - {{instance}}" + } + ], + "title": "Index service requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Ratio at which cache scans result in a hit rather than a miss.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": true + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 32 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])) / (clamp_min(sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance) (increase(index_cache_misses{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])))", + "legendFormat": "{{couchbase_cluster}} - {{instance}}" + } + ], + "title": "Index cache hit ratio", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Average time to serve a scan request per index.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 0, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "spanNulls": false + }, + "unit": "ns" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 32 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(couchbase_cluster, index, instance, job) (index_avg_scan_latency{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"})", + "legendFormat": "{{couchbase_cluster}} - {{instance}} - {{index}}" + } + ], + "title": "Average scan latency", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 39, + "tags": [ + "couchbase-mixin" + ], + "templating": { + "list": [ + { + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Instance", + "multi": true, + "name": "instance", + "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\"}, instance)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Couchbase_cluster", + "multi": true, + "name": "couchbase_cluster", + "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\",instance=~\"$instance\"}, couchbase_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "default", + "title": "Couchbase node overview", + "uid": "couchbase_couchbase_node_overview" + } \ No newline at end of file diff --git a/couchbase-mixin/dashboards_out/logs b/couchbase-mixin/dashboards_out/logs new file mode 100644 index 000000000..21c5ac386 --- /dev/null +++ b/couchbase-mixin/dashboards_out/logs @@ -0,0 +1,310 @@ +{ + "annotations": { + "list": [ ] + }, + "links": [ + { + "keepTime": true, + "title": "Couchbase Bucket Overview", + "type": "link", + "url": "/d/couchbase_couchbase_bucket_overview" + }, + { + "keepTime": true, + "title": "Couchbase Cluster Overview", + "type": "link", + "url": "/d/couchbase_couchbase_cluster_overview" + }, + { + "keepTime": true, + "title": "Couchbase Node Overview", + "type": "link", + "url": "/d/couchbase_couchbase_node_overview" + }, + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "couchbase-mixin" + ], + "title": "All dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "description": "Logs volume grouped by \"level\" label.", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "bars", + "fillOpacity": 50, + "stacking": { + "mode": "normal" + } + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(E|e)(rr.*|RR.*)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(W|w)(arn.*|ARN.*|rn|RN)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(T|t)(race|RACE)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "logs" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "text", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "maxDataPoints": 100, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "expr": "sum by (level) (count_over_time({job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}\n|~ \"$regex_search\"\n\n[$__auto]))\n", + "legendFormat": "{{ level }}" + } + ], + "title": "Logs volume", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "Value", + "renamePattern": "logs" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 18, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 2, + "options": { + "dedupStrategy": "exact", + "enableLogDetails": true, + "prettifyLogMessage": true, + "showTime": false, + "wrapLogMessage": false + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "expr": "{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"} \n|~ \"$regex_search\"\n\n\n" + } + ], + "title": "Logs", + "type": "logs" + } + ], + "refresh": "1m", + "schemaVersion": 39, + "tags": [ + "couchbase-mixin" + ], + "templating": { + "list": [ + { + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values({job=~\"integrations/couchbase\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Couchbase_cluster", + "multi": true, + "name": "couchbase_cluster", + "query": "label_values({job=~\"integrations/couchbase\",job=~\"$job\"}, couchbase_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "label": "Regex search", + "name": "regex_search", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "type": "textbox" + }, + { + "hide": 2, + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "default", + "title": "Couchbase logs", + "uid": "couchbase-logs" + } \ No newline at end of file diff --git a/couchbase-mixin/g.libsonnet b/couchbase-mixin/g.libsonnet new file mode 100644 index 000000000..f89dcc064 --- /dev/null +++ b/couchbase-mixin/g.libsonnet @@ -0,0 +1 @@ +import 'github.com/grafana/grafonnet/gen/grafonnet-v11.0.0/main.libsonnet' diff --git a/couchbase-mixin/jsonnetfile.json b/couchbase-mixin/jsonnetfile.json index 65cebf84b..69b58cc74 100644 --- a/couchbase-mixin/jsonnetfile.json +++ b/couchbase-mixin/jsonnetfile.json @@ -1,15 +1,33 @@ { - "version": 1, - "dependencies": [ - { - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet" - } - }, - "version": "master" + "version": 1, + "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "common-lib" } - ], - "legacyImports": true -} + }, + "version": "master" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-v10.0.0" + } + }, + "version": "main" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "logs-lib" + } + }, + "version": "master" + } + ], + "legacyImports": true +} \ No newline at end of file diff --git a/couchbase-mixin/links.libsonnet b/couchbase-mixin/links.libsonnet new file mode 100644 index 000000000..5b3e0cb10 --- /dev/null +++ b/couchbase-mixin/links.libsonnet @@ -0,0 +1,32 @@ +local g = import './g.libsonnet'; +{ + local link = g.dashboard.link, + new(this): + { + couchbaseBucketOverview: + link.link.new('Couchbase Bucket Overview', '/d/' + this.grafana.dashboards.couchbase_bucket_overview.uid) + + link.link.options.withKeepTime(true), + + couchbaseNodeOverview: + link.link.new('Couchbase Node Overview', '/d/' + this.grafana.dashboards.couchbase_node_overview.uid) + + link.link.options.withKeepTime(true), + + couchbaseClusterOverview: + link.link.new('Couchbase Cluster Overview', '/d/' + this.grafana.dashboards.couchbase_cluster_overview.uid) + + link.link.options.withKeepTime(true), + + otherDashboards: + link.dashboards.new('All dashboards', this.config.dashboardTags) + + link.dashboards.options.withIncludeVars(true) + + link.dashboards.options.withKeepTime(true) + + link.dashboards.options.withAsDropdown(true), + } + + + if this.config.enableLokiLogs then + { + logs: + link.link.new('Logs', '/d/' + this.grafana.dashboards.logs.uid) + + link.link.options.withKeepTime(true), + } + else {}, +} diff --git a/couchbase-mixin/main.libsonnet b/couchbase-mixin/main.libsonnet new file mode 100644 index 000000000..05b155dff --- /dev/null +++ b/couchbase-mixin/main.libsonnet @@ -0,0 +1,33 @@ +local alerts = import './alerts.libsonnet'; +local config = import './config.libsonnet'; +local dashboards = import './dashboards.libsonnet'; +local links = import './links.libsonnet'; +local panels = import './panels.libsonnet'; +local targets = import './targets.libsonnet'; +local variables = import './variables.libsonnet'; + +{ + withConfigMixin(config): { + config+: config, + }, + + new(): { + + local this = self, + config: config, + + grafana: { + variables: variables.new(this, varMetric='kv_mem_used_bytes'), + targets: targets.new(this), + annotations: {}, + links: links.new(this), + panels: panels.new(this), + dashboards: dashboards.new(this), + }, + + prometheus: { + alerts: alerts.new(this), + recordingRules: {}, + }, + }, +} diff --git a/couchbase-mixin/mixin.libsonnet b/couchbase-mixin/mixin.libsonnet index 4d987cf31..8c5556134 100644 --- a/couchbase-mixin/mixin.libsonnet +++ b/couchbase-mixin/mixin.libsonnet @@ -1,3 +1,18 @@ -(import 'dashboards/dashboards.libsonnet') + -(import 'alerts/alerts.libsonnet') + -(import 'config.libsonnet') +local couchbaselib = import './main.libsonnet'; + +local couchbase = + couchbaselib.new() + + couchbaselib.withConfigMixin( + { + filteringSelector: 'job=~"integrations/couchbase"', + uid: 'couchbase', + enableLokiLogs: true, + } + ); + +// populate monitoring-mixin: +{ + grafanaDashboards+:: couchbase.grafana.dashboards, + prometheusAlerts+:: couchbase.prometheus.alerts, + prometheusRules+:: couchbase.prometheus.recordingRules, +} diff --git a/couchbase-mixin/panels.libsonnet b/couchbase-mixin/panels.libsonnet new file mode 100644 index 000000000..138472821 --- /dev/null +++ b/couchbase-mixin/panels.libsonnet @@ -0,0 +1,448 @@ +local g = import './g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; +local utils = commonlib.utils; + +{ + new(this):: + { + local t = this.grafana.targets, + local barGauge = g.panel.barGauge, + + // + // Bucket Overview Dashboard Panels + // + + bucket_topBucketsByMemoryUsedPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top buckets by memory used', + targets=[ + t.topBucketsByMemoryUsedDetailed, + ], + description='Memory used for the top buckets.' + ) + + g.panel.timeSeries.standardOptions.withUnit('decbytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + bucket_topBucketsByDiskUsedPanel: + barGauge.new( + 'Top buckets by disk used' + ) + + barGauge.queryOptions.withTargets([ + t.topBucketsByDiskUsedDetailed, + ]) + + barGauge.standardOptions.withUnit('decbytes') + + barGauge.standardOptions.withMin(0) + + barGauge.options.withOrientation('horizontal') + + barGauge.standardOptions.thresholds.withSteps([ + barGauge.thresholdStep.withColor('light-green') + + barGauge.thresholdStep.withValue(null), + ]) + + barGauge.panelOptions.withDescription('Disk used for the top buckets.'), + + bucket_topBucketsByCurrentItemsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top buckets by current items', + targets=[ + t.topBucketsByCurrentItems, + ], + description='Number of active items for the largest buckets.' + ) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + bucket_topBucketsByOperationsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top buckets by operations', + targets=[ + t.topBucketsByOperationsWithOp, + ], + description='Rate of operations for the busiest buckets.' + ) + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + bucket_topBucketsByOperationsFailedPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top buckets by operations failed', + targets=[ + t.topBucketsByOperationsFailedDetailed, + ], + description='Rate of operations failed for the most problematic buckets.' + ) + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + bucket_topBucketsByHighPriorityRequestsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top buckets by high priority requests', + targets=[ + t.topBucketsByHighPriorityRequests, + ], + description='Rate of high priority requests processed by the KV engine for the top buckets.' + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + bucket_bottomBucketsByCacheHitRatioPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Bottom buckets by cache hit ratio', + targets=[ + t.bottomBucketsByCacheHitRatio, + ], + description='Worst buckets by cache hit ratio.' + ) + + g.panel.timeSeries.standardOptions.withMax(1) + + g.panel.timeSeries.standardOptions.withUnit('percentunit') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(true), + + bucket_topBucketsByVBucketsCountPanel: + barGauge.new(title='Top buckets by vBuckets count') + + barGauge.queryOptions.withTargets([ + t.bucketTopBucketsByVBucketsCount, + ]) + + barGauge.panelOptions.withDescription('The number of vBuckets for the top buckets.') + + barGauge.standardOptions.withMin(0) + + barGauge.options.withOrientation('horizontal') + + barGauge.standardOptions.thresholds.withSteps([ + barGauge.thresholdStep.withColor('light-green') + + barGauge.thresholdStep.withValue(null), + ]) + + barGauge.standardOptions.withUnit('none'), + + bucket_topBucketsByVBucketQueueMemoryPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top buckets by vBucket queue memory', + targets=[ + t.topBucketsByVBucketQueueMemory, + ], + description='Memory occupied by the queue for a virtual bucket for the top buckets.' + ) + + g.panel.timeSeries.standardOptions.withUnit('decbytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + // + // Node Overview Dashboard Panels + // + + node_memoryUtilizationPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Memory utilization', + targets=[ + t.memoryUtilization, + ], + description='Percentage of memory allocated to Couchbase on this node actually in use.' + ) + + g.panel.timeSeries.standardOptions.withUnit('percentunit') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + node_cpuUtilizationPanel: + commonlib.panels.generic.timeSeries.base.new( + 'CPU utilization', + targets=[ + t.cpuUtilization, + ], + description='CPU utilization percentage across all available cores on this Couchbase node.' + ) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + node_totalMemoryUsedByServicePanel: + commonlib.panels.generic.timeSeries.base.new( + 'Total memory used by service', + targets=[ + t.totalMemoryUsedByService, + t.totalMemoryUsedByIndexService, + t.totalMemoryUsedByAnalyticsService, + ], + description='Memory used by the index, analytics, and data services for a node.' + ) + + g.panel.timeSeries.standardOptions.withUnit('decbytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + node_backupSizePanel: + commonlib.panels.generic.timeSeries.base.new( + 'Backup size', + targets=[ + t.backupSize, + ], + description='Size of the backup for a node.' + ) + + g.panel.timeSeries.standardOptions.withUnit('decbytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + node_currentConnectionsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Current connections', + targets=[ + t.currentConnections, + ], + description='Number of active connections to a node.' + ) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + node_httpResponseCodesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'HTTP response codes', + targets=[ + t.httpResponseCodes, + ], + description='Rate of HTTP response codes handled by the cluster manager.' + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + node_httpRequestMethodsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'HTTP request methods', + targets=[ + t.httpRequestMethods, + ], + description='Rate of HTTP request methods handled by the cluster manager.' + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + node_queryServiceRequestsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Query service requests', + targets=[ + t.queryServiceRequestsTotal, + t.queryServiceErrors, + t.queryServiceInvalidRequests, + ], + description='Rate of N1QL requests processed by the query service for a node.' + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + node_queryServiceRequestProcessingTimePanel: + commonlib.panels.generic.timeSeries.base.new( + 'Query service request processing time', + targets=[ + t.queryServiceRequests, + t.queryServiceRequests250ms, + t.queryServiceRequests500ms, + t.queryServiceRequests1000ms, + t.queryServiceRequests5000ms, + ], + description='Rate of queries grouped by processing time.' + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + node_indexServiceRequestsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Index service requests', + targets=[ + t.indexServiceRequests, + ], + description='Rate of index service requests served.' + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + node_indexCacheHitRatioPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Index cache hit ratio', + targets=[ + t.indexCacheHitRatio, + ], + description='Ratio at which cache scans result in a hit rather than a miss.' + ) + + g.panel.timeSeries.standardOptions.withUnit('percentunit') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(true), + + node_averageScanLatencyPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Average scan latency', + targets=[ + t.indexAverageScanLatency, + ], + description='Average time to serve a scan request per index.' + ) + + g.panel.timeSeries.standardOptions.withUnit('ns') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + // + // Cluster Overview Dashboard Panels + // + + cluster_topNodesByMemoryUsagePanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top nodes by memory usage', + targets=[ + t.topNodesByMemoryUsage, + ], + description='Top nodes by memory usage across the Couchbase cluster.' + ) + + g.panel.timeSeries.standardOptions.withUnit('percentunit') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + cluster_topNodesByHTTPRequestsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top nodes by HTTP requests', + targets=[ + t.topNodesByHTTPRequests, + ], + description='Rate of HTTP requests handled by the cluster manager for the top nodes.' + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + cluster_topNodesByQueryServiceRequestsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top nodes by query service requests', + targets=[ + t.topNodesByQueryServiceRequests, + ], + description='Rate of N1QL requests processed by the query service for the top nodes.' + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + cluster_topNodesByIndexAverageScanLatencyPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top nodes by index average scan latency', + targets=[ + t.topNodesByIndexAverageScanLatency, + ], + description='Average time to serve an index service scan request for the top nodes.' + ) + + g.panel.timeSeries.standardOptions.withUnit('ns') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + cluster_xdcrReplicationRatePanel: + commonlib.panels.generic.timeSeries.base.new( + 'XDCR replication rate', + targets=[ + t.xdcrReplicationRate, + ], + description='Rate of replication through the Cross Data Center Replication feature.' + ) + + g.panel.timeSeries.standardOptions.withUnit('Bps') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + cluster_xdcrDocsReceivedPanel: + commonlib.panels.generic.timeSeries.base.new( + 'XDCR docs received', + targets=[ + t.xdcrDocsReceived, + ], + description='The rate of mutations received by this cluster.' + ) + + g.panel.timeSeries.standardOptions.withUnit('mut/sec') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + cluster_localBackupSizePanel: + barGauge.new( + 'Local backup size' + ) + + barGauge.queryOptions.withTargets([ + t.localBackupSize, + ]) + + barGauge.panelOptions.withDescription('Size of the local backup for a node.'), + + cluster_topBucketsByMemoryUsedPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top buckets by memory used', + targets=[ + t.topBucketsByMemoryUsed, + ], + description='Memory used for the top buckets across the cluster.' + ) + + g.panel.timeSeries.standardOptions.withUnit('decbytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + cluster_topBucketsByDiskUsedPanel: + barGauge.new( + 'Top buckets by disk used' + ) + + barGauge.queryOptions.withTargets([ + t.topBucketsByDiskUsed, + ]) + + barGauge.standardOptions.withUnit('decbytes') + + barGauge.standardOptions.withMin(0) + + barGauge.options.withOrientation('horizontal') + + barGauge.standardOptions.thresholds.withSteps([ + barGauge.thresholdStep.withColor('light-green') + + barGauge.thresholdStep.withValue(null), + ]) + + barGauge.panelOptions.withDescription('Disk used for the top buckets across the cluster.'), + + cluster_topBucketsByOperationsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top buckets by operations', + targets=[ + t.clusterTopBucketsByOperations, + ], + description='Rate of operations for the busiest buckets across the cluster.' + ) + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + cluster_topBucketsByOperationsFailedPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top buckets by operations failed', + targets=[ + t.clusterTopBucketsByOperationsFailed, + ], + description='Rate of operations failed for the most problematic buckets across the cluster.' + ) + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + cluster_topBucketsByVBucketsCountPanel: + barGauge.new(title='Top buckets by vBuckets count') + + barGauge.queryOptions.withTargets([ + t.clusterTopBucketsByVBucketsCount, + ]) + + barGauge.panelOptions.withDescription('The number of vBuckets for the top buckets across the cluster.') + + barGauge.standardOptions.withMin(0) + + barGauge.options.withOrientation('horizontal') + + barGauge.standardOptions.thresholds.withSteps([ + barGauge.thresholdStep.withColor('light-green') + + barGauge.thresholdStep.withValue(null), + ]) + + barGauge.standardOptions.withUnit('none'), + + cluster_topBucketsByVBucketQueueMemoryPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top buckets by vBucket queue memory', + targets=[ + t.clusterTopBucketsByVBucketQueueMemory, + ], + ) + + g.panel.timeSeries.standardOptions.withUnit('decbytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + }, +} diff --git a/couchbase-mixin/rows.libsonnet b/couchbase-mixin/rows.libsonnet new file mode 100644 index 000000000..0555f78b0 --- /dev/null +++ b/couchbase-mixin/rows.libsonnet @@ -0,0 +1,56 @@ +local g = import './g.libsonnet'; + +// Use g.util.grid.wrapPanels() to import into custom dashboard +{ + new(panels): { + row_1: + [ + g.panel.row.new('Row 1'), + panels.memoryUtilizationPanel { gridPos+: { h: 8, w: 12, x: 0, y: 0 } }, + panels.cpuUtilizationPanel { gridPos+: { h: 8, w: 12, x: 12, y: 0 } }, + ], + + row_2: + [ + g.panel.row.new('Row 2'), + panels.totalMemoryUsedByServicePanel { gridPos+: { h: 8, w: 8, x: 0, y: 8 } }, + panels.backupSizePanel { gridPos+: { h: 8, w: 8, x: 8, y: 8 } }, + panels.currentConnectionsPanel { gridPos+: { h: 8, w: 8, x: 16, y: 8 } }, + ], + + row_3: + [ + g.panel.row.new('Row 3'), + panels.httpResponseCodesPanel { gridPos+: { h: 8, w: 12, x: 0, y: 16 } }, + panels.httpRequestMethodsPanel { gridPos+: { h: 8, w: 12, x: 12, y: 16 } }, + ], + + row_4: + [ + g.panel.row.new('Row 4'), + panels.queryServiceRequestsPanel { gridPos+: { h: 8, w: 12, x: 0, y: 24 } }, + panels.queryServiceRequestProcessingTimePanel { gridPos+: { h: 8, w: 12, x: 12, y: 24 } }, + ], + + row_5: + [ + g.panel.row.new('Row 5'), + panels.indexServiceRequestsPanel { gridPos+: { h: 8, w: 8, x: 0, y: 32 } }, + panels.indexCacheHitRatioPanel { gridPos+: { h: 8, w: 8, x: 8, y: 32 } }, + panels.averageScanLatencyPanel { gridPos+: { h: 8, w: 8, x: 16, y: 32 } }, + ], + + row_6: + [ + g.panel.row.new('Row 6'), + panels.errorLogsPanel { gridPos+: { h: 7, w: 24, x: 0, y: 40 } }, + ], + + row_7: + [ + g.panel.row.new('Row 7'), + panels.couchbaseLogsPanel { gridPos+: { h: 8, w: 24, x: 0, y: 47 } }, + ], + + }, +} diff --git a/couchbase-mixin/targets.libsonnet b/couchbase-mixin/targets.libsonnet new file mode 100644 index 000000000..75668f5b6 --- /dev/null +++ b/couchbase-mixin/targets.libsonnet @@ -0,0 +1,332 @@ +local g = import './g.libsonnet'; +local prometheusQuery = g.query.prometheus; + +{ + new(this): { + local vars = this.grafana.variables, + local clusterSelector = vars.clusterSelector, + local nodeSelector = vars.nodeSelector, + local bucketSelector = vars.bucketSelector, + + // + // Cluster Overview Dashboard Targets + // + + // Top nodes metrics + topNodesByMemoryUsage: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, sum by(job, couchbase_cluster, instance) (sys_mem_actual_used{%(clusterSelector)s})) / (sum by(job, couchbase_cluster, instance) (clamp_min(sys_mem_actual_free{%(clusterSelector)s}, 1)) + sum by(couchbase_cluster, instance, job) (sys_mem_actual_used{%(clusterSelector)s}))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), + + topNodesByHTTPRequests: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, sum by(job, couchbase_cluster, instance) (rate(cm_http_requests_total{%(clusterSelector)s}[$__rate_interval])))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), + + topNodesByQueryServiceRequests: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, sum by(job, instance, couchbase_cluster) (rate(n1ql_requests{%(clusterSelector)s}[$__rate_interval])))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), + + topNodesByIndexAverageScanLatency: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, avg by(instance, couchbase_cluster, job) (index_avg_scan_latency{%(clusterSelector)s}))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), + + // XDCR metrics + xdcrReplicationRate: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'sum by(couchbase_cluster, job) (rate(xdcr_data_replicated_bytes{%(clusterSelector)s}[$__rate_interval]))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}}'), + + xdcrDocsReceived: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'sum by(job, couchbase_cluster) (rate(xdcr_docs_received_from_dcp_total{%(clusterSelector)s}[$__rate_interval]))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}}'), + + // Backup metrics + localBackupSize: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'sum by(couchbase_cluster, job, instance) (backup_data_size{%(clusterSelector)s})' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), + + // Top buckets metrics (cluster level) + topBucketsByMemoryUsed: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, sum by(bucket, couchbase_cluster, job) (kv_mem_used_bytes{%(clusterSelector)s}))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{bucket}}'), + + topBucketsByDiskUsed: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, sum by(job, couchbase_cluster, bucket) (couch_docs_actual_disk_size{%(clusterSelector)s}))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{bucket}}'), + + clusterTopBucketsByOperations: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops{%(clusterSelector)s}[$__rate_interval])))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{bucket}}'), + + clusterTopBucketsByOperationsFailed: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops_failed{%(clusterSelector)s}[$__rate_interval])))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{bucket}}'), + + clusterTopBucketsByVBucketsCount: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, sum by(couchbase_cluster, job, bucket) (kv_num_vbuckets{%(clusterSelector)s}))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{bucket}}'), + + clusterTopBucketsByVBucketQueueMemory: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, sum by(couchbase_cluster, job, bucket) (kv_vb_queue_memory_bytes{%(clusterSelector)s}))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{bucket}}'), + + // + // Node Overview Dashboard Targets + // + + // Node system metrics + memoryUtilization: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'sys_mem_actual_used{%(nodeSelector)s} / (clamp_min(sys_mem_actual_free{%(nodeSelector)s} + sys_mem_actual_used{%(nodeSelector)s}, 1))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), + + cpuUtilization: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'sum by(couchbase_cluster, job, instance) (sys_cpu_utilization_rate{%(nodeSelector)s})' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), + + // Memory by service + totalMemoryUsedByService: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'sum by(couchbase_cluster, instance, job) (kv_mem_used_bytes{%(nodeSelector)s})' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - data'), + + totalMemoryUsedByIndexService: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'index_memory_used_total{%(nodeSelector)s}' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - index'), + + totalMemoryUsedByAnalyticsService: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'cbas_direct_memory_used_bytes{%(nodeSelector)s}' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - analytics'), + + // Node backup and connections + backupSize: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'sum by(couchbase_cluster, instance, job) (backup_data_size{%(nodeSelector)s})' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), + + currentConnections: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'kv_curr_connections{%(nodeSelector)s}' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), + + // HTTP metrics + httpResponseCodes: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'sum by(job, instance, couchbase_cluster, code) (rate(cm_http_requests_total{%(nodeSelector)s}[$__rate_interval]))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - {{code}}'), + + httpRequestMethods: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'sum by(job, instance, couchbase_cluster, method) (rate(cm_http_requests_total{%(nodeSelector)s}[$__rate_interval]))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - {{method}}'), + + // Query service metrics + queryServiceRequests: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'rate(n1ql_requests{%(nodeSelector)s}[$__rate_interval])' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - >0ms'), + + queryServiceRequestsTotal: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'rate(n1ql_requests{%(nodeSelector)s}[$__rate_interval]) + rate(n1ql_invalid_requests{%(nodeSelector)s}[$__rate_interval])' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - total'), + + queryServiceErrors: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'rate(n1ql_errors{%(nodeSelector)s}[$__rate_interval])' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - error'), + + queryServiceInvalidRequests: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'rate(n1ql_invalid_requests{%(nodeSelector)s}[$__rate_interval])' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - invalid'), + + // Query service latency buckets + queryServiceRequests250ms: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'rate(n1ql_requests_250ms{%(nodeSelector)s}[$__rate_interval])' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - >250ms'), + + queryServiceRequests500ms: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'rate(n1ql_requests_500ms{%(nodeSelector)s}[$__rate_interval])' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - >500ms'), + + queryServiceRequests1000ms: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'rate(n1ql_requests_1000ms{%(nodeSelector)s}[$__rate_interval])' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - >1000ms'), + + queryServiceRequests5000ms: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'rate(n1ql_requests_5000ms{%(nodeSelector)s}[$__rate_interval])' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - >5000ms'), + + // Index service metrics + indexServiceRequests: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'sum by(couchbase_cluster, instance, job) (rate(index_num_requests{%(nodeSelector)s}[$__rate_interval]))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), + + indexCacheHitRatio: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{%(nodeSelector)s}[$__rate_interval])) / (clamp_min(sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{%(nodeSelector)s}[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance) (increase(index_cache_misses{%(nodeSelector)s}[$__rate_interval])))' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), + + indexAverageScanLatency: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'sum by(couchbase_cluster, index, instance, job) (index_avg_scan_latency{%(nodeSelector)s})' % vars + ) + + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - {{index}}'), + + // + // Bucket Overview Dashboard Targets + // + + // Detailed bucket metrics (instance-level) + topBucketsByMemoryUsedDetailed: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, kv_mem_used_bytes{%(bucketSelector)s})' % vars + ) + + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), + + topBucketsByDiskUsedDetailed: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, couch_docs_actual_disk_size{%(bucketSelector)s})' % vars + ) + + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), + + topBucketsByCurrentItems: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, sum by(couchbase_cluster, job, bucket) (kv_curr_items{%(bucketSelector)s}))' % vars + ) + + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), + + // Bucket operations + topBucketsByOperationsWithOp: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, sum by(bucket, couchbase_cluster, instance, job, op) (rate(kv_ops{%(bucketSelector)s}[$__rate_interval])))' % vars + ) + + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}} - {{op}}'), + + topBucketsByOperationsFailedDetailed: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (rate(kv_ops_failed{%(bucketSelector)s}[$__rate_interval])))' % vars + ) + + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), + + topBucketsByHighPriorityRequests: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_high_pri_requests{%(bucketSelector)s}))' % vars + ) + + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), + + // Bucket cache and performance + bottomBucketsByCacheHitRatio: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'bottomk(5, sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{%(bucketSelector)s}[$__rate_interval])) / (clamp_min(sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{%(bucketSelector)s}[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_misses{%(bucketSelector)s}[$__rate_interval]))))' % vars + ) + + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), + + // Bucket vBuckets + bucketTopBucketsByVBucketsCount: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_vbuckets{%(bucketSelector)s}))' % vars + ) + + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), + + topBucketsByVBucketQueueMemory: + prometheusQuery.new( + '${' + vars.datasources.prometheus.name + '}', + 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_vb_queue_memory_bytes{%(bucketSelector)s}))' % vars + ) + + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), + }, +} diff --git a/couchbase-mixin/variables.libsonnet b/couchbase-mixin/variables.libsonnet new file mode 100644 index 000000000..9cbed87ea --- /dev/null +++ b/couchbase-mixin/variables.libsonnet @@ -0,0 +1,99 @@ +local g = import './g.libsonnet'; +local var = g.dashboard.variable; +local commonlib = import 'common-lib/common/main.libsonnet'; +local utils = commonlib.utils; + +// Generates chained variables to use on all dashboards +{ + new(this, varMetric): + { + local filteringSelector = this.config.filteringSelector, + local groupLabels = this.config.groupLabels, + local instanceLabels = this.config.instanceLabels, + + local root = self, + // Helper function to create variables from specific label list + local variablesFromSpecificLabels(labels, filteringSelector, multiInstance=true) = + local chainVarProto(index, chainVar) = + var.query.new(chainVar.label) + + var.query.withDatasourceFromVariable(root.datasources.prometheus) + + var.query.queryTypes.withLabelValues( + chainVar.label, + // Combine filteringSelector with chainSelector, avoiding duplicate label filters + local combinedSelector = + if std.length(std.stripChars(filteringSelector, ' ')) == 0 then chainVar.chainSelector + else if std.length(chainVar.chainSelector) == 0 then std.stripChars(filteringSelector, ' ') + else + // Both exist - need to merge intelligently to avoid duplicates + local filteringParts = std.split(std.stripChars(filteringSelector, ' '), ','); + local filteringLabels = std.set([ + std.stripChars(std.split(std.stripChars(part, ' '), '=')[0], ' ') + for part in filteringParts + if std.length(std.split(part, '=')) > 1 + ]); + local chainParts = std.split(chainVar.chainSelector, ','); + local chainFiltered = std.filter(function(part) + local label = std.stripChars(std.split(std.stripChars(part, ' '), '=')[0], ' '); + !std.setMember(label, filteringLabels), + chainParts + ); + std.join(',', std.filter(function(x) std.length(x) > 0, [std.stripChars(filteringSelector, ' ')] + chainFiltered)); + '%s{%s}' % [varMetric, combinedSelector], + ) + + var.query.generalOptions.withLabel(utils.toSentenceCase(chainVar.label)) + + var.query.selectionOptions.withIncludeAll( + value=true, + customAllValue='.+' + ) + + var.query.selectionOptions.withMulti(multiInstance) + + var.query.refresh.onTime() + + var.query.withSort( + i=1, + type='alphabetical', + asc=true, + caseInsensitive=false + ); + + // Create variables for all labels, let chainLabels handle the chaining without filteringSelector + std.mapWithIndex(chainVarProto, utils.chainLabels(labels, [])), + + datasources: { + prometheus: + var.datasource.new('prometheus_datasource', 'prometheus') + + var.datasource.generalOptions.withLabel('Prometheus data source') + + var.datasource.withRegex(''), + loki: + var.datasource.new('loki_datasource', 'loki') + + var.datasource.generalOptions.withLabel('Loki data source') + + var.datasource.withRegex(''), + }, + + // Dashboard-specific variable sets + clusterVariables: + [root.datasources.prometheus] + + variablesFromSpecificLabels(this.config.dashboardVariables.cluster, filteringSelector, multiInstance=false), + + nodeVariables: + [root.datasources.prometheus] + + variablesFromSpecificLabels(this.config.dashboardVariables.node, filteringSelector, multiInstance=true), + + bucketVariables: + [root.datasources.prometheus] + + variablesFromSpecificLabels(this.config.dashboardVariables.bucket, filteringSelector, multiInstance=true), + + clusterSelector: + '%s' % [ + utils.labelsToPromQLSelector(this.config.dashboardVariables.cluster), + ], + + nodeSelector: + '%s' % [ + utils.labelsToPromQLSelector(this.config.dashboardVariables.node), + ], + + bucketSelector: + '%s' % [ + utils.labelsToPromQLSelector(this.config.dashboardVariables.bucket), + ], + }, +} From 493b39afc7bf0d79acf44f0acdf23c54a0cc535e Mon Sep 17 00:00:00 2001 From: schmikei Date: Thu, 19 Jun 2025 08:31:10 -0400 Subject: [PATCH 2/7] readd rows based off new guidance --- couchbase-mixin/.lint | 88 +++---------------- couchbase-mixin/Makefile | 2 +- couchbase-mixin/dashboards.libsonnet | 28 +++--- .../dashboards_out/couchbase_bucket_overview | 4 +- .../dashboards_out/couchbase_cluster_overview | 39 +++++--- couchbase-mixin/main.libsonnet | 2 + couchbase-mixin/panels.libsonnet | 8 +- couchbase-mixin/rows.libsonnet | 56 ++---------- couchbase-mixin/targets.libsonnet | 11 +-- couchbase-mixin/variables.libsonnet | 9 +- 10 files changed, 76 insertions(+), 171 deletions(-) diff --git a/couchbase-mixin/.lint b/couchbase-mixin/.lint index 8e24e7cfd..0c4773e01 100644 --- a/couchbase-mixin/.lint +++ b/couchbase-mixin/.lint @@ -1,79 +1,17 @@ ---- exclusions: - target-instance-rule: - entries: - - dashboard: "Azure Blob storage" - reason: "Using 'Bucket' as instance label" - - dashboard: "GCP Blob storage" - reason: "Using 'Bucket' as instance label" - - dashboard: "Azure Elastic pool" - reason: "Using 'resourceName' as instance label" - - panel: "Average Message Size" - reason: "Overview which is not filterable by instance" - - dashboard: "Azure SQL database" - reason: "Overview which is not filterable by instance" - - dashboard: "Azure Load Balancing" - - dashboard: "GCP Load Balancing" - reason: "resourceName is the instance label" - - dashboard: "GCP Virtual private cloud" - reason: "project_id is the instance label" - - dashboard: "Azure Queue storage" - reason: "Using 'Bucket' as instance label" - - dashboard: "GCP Compute Engine" - reason: "Using instance_name as instance label" - - dashboard: "Azure Virtual Machines" - - dashboard: "Azure Front Door" - reason: "Aggregation is at the group level, and resourceName is used for instance label" - target-job-rule: - reason: "Using filtering selector with job" - template-instance-rule: - entries: - - dashboard: "Azure Blob storage" - reason: "Using 'Bucket' as instance label" - - dashboard: "GCP Blob storage" - reason: "Using 'Bucket' as instance label" - - dashboard: "Azure Elastic pool" - reason: "Using 'resourceName' as instance label" - - dashboard: "Azure SQL database" - reason: "Using 'resourceName' as instance label" - - dashboard: "Azure Load Balancing" - - dashboard: "GCP Load Balancing" - reason: "resourceName is the instance label" - - dashboard: "GCP Virtual private cloud" - reason: "project_id is the instance label" - - dashboard: "Azure Queue storage" - reason: "Using 'Bucket' as instance label" - - dashboard: "GCP Compute Engine" - reason: "Using instance_name as instance label" - - dashboard: "Azure Virtual Machines" - - dashboard: "Azure Front Door" - reason: "Aggregation is at the group level, and resourceName is used for instance label" - panel-datasource-rule: - reason: "Many panels use --Mixed-- DS" panel-units-rule: - reason: | - Common-lib uses overrides for units in almost all cases, and the dashboard linter creates a false positive here. - When dashboard linter is updated, this exception should be removed and any remaining issues resolved. - # entries: - # - panel: "API requests by type" - # - panel: "eDTU utilization" - # - panel: "Concurrent sessions" - # - panel: "Requests Success Rate" - # - dashboard: "Azure SQL database" - # - dashboard: "Azure Load Balancing" - # - dashboard: "Azure Elastic pool" - # - dashboard: "Azure Blob storage" - # - dashboard: "GCP Blob storage" - # reason: "False positive.. Need to look into this" - # - dashboard: "GCP Virtual private cloud" - # reason: "False positive as above. Linter does not look at overrides." - panel-title-description-rule: + reason: "Custom units are used for better user experience in these panels" entries: - - dashboard: Azure Service Bus - - dashboard: GCP Compute Engine - - dashboard: Azure Virtual Machines - - dashboard: Azure Front Door - target-rate-interval-rule: + - panel: "XDCR docs received" + - panel: "Current connections" + - panel: "Top buckets by current items" + template-datasource-rule: + reason: "Based on new convention we are using variable names prometheus_datasource and loki_datasource where as linter expects 'datasource'" + template-instance-rule: + reason: "Based on new convention we are using variable names prometheus_datasource and loki_datasource where as linter expects 'datasource'" + template-job-rule: + reason: "Prometheus datasource variable is being named as prometheus_datasource now while linter expects 'datasource'" + target-instance-rule: + reason: "The dashboard is a 'cluster' dashboard where the instance refers to nodes, this dashboard focuses only on the cluster view." entries: - - dashboard: GCP Compute Engine - + - dashboard: "Couchbase cluster overview" diff --git a/couchbase-mixin/Makefile b/couchbase-mixin/Makefile index 37cc871c1..b4fdca560 100644 --- a/couchbase-mixin/Makefile +++ b/couchbase-mixin/Makefile @@ -1 +1 @@ -include ../Makefile_mixin \ No newline at end of file +include ../Makefile_mixin diff --git a/couchbase-mixin/dashboards.libsonnet b/couchbase-mixin/dashboards.libsonnet index 4e425e553..2a13e5cdc 100644 --- a/couchbase-mixin/dashboards.libsonnet +++ b/couchbase-mixin/dashboards.libsonnet @@ -77,22 +77,18 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; couchbase_cluster_overview: g.dashboard.new(prefix + ' cluster overview') + g.dashboard.withPanels( - g.util.grid.wrapPanels( - [ - panels.cluster_topNodesByMemoryUsagePanel { gridPos+: { w: 12 }}, - panels.cluster_topNodesByHTTPRequestsPanel { gridPos+: { w: 12 }}, - panels.cluster_topNodesByQueryServiceRequestsPanel { gridPos+: { w: 12 }}, - panels.cluster_topNodesByIndexAverageScanLatencyPanel { gridPos+: { w: 12 }}, - panels.cluster_xdcrReplicationRatePanel { gridPos+: { w: 8 }}, - panels.cluster_xdcrDocsReceivedPanel { gridPos+: { w: 8 }}, - panels.cluster_localBackupSizePanel { gridPos+: { w: 8 }}, - panels.cluster_topBucketsByMemoryUsedPanel { gridPos+: { w: 12 }}, - panels.cluster_topBucketsByDiskUsedPanel { gridPos+: { w: 12 }}, - panels.cluster_topBucketsByOperationsPanel { gridPos+: { w: 12 }}, - panels.cluster_topBucketsByOperationsFailedPanel { gridPos+: { w: 12 }}, - panels.cluster_topBucketsByVBucketsCountPanel { gridPos+: { w: 12 }}, - panels.cluster_topBucketsByVBucketQueueMemoryPanel { gridPos+: { w: 12 }}, - ] + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + panels.cluster_topNodesByMemoryUsagePanel { gridPos+: { w: 12 } }, + panels.cluster_topNodesByHTTPRequestsPanel { gridPos+: { w: 12 } }, + panels.cluster_topNodesByQueryServiceRequestsPanel { gridPos+: { w: 12 } }, + panels.cluster_topNodesByIndexAverageScanLatencyPanel { gridPos+: { w: 12 } }, + panels.cluster_xdcrReplicationRatePanel { gridPos+: { w: 8 } }, + panels.cluster_xdcrDocsReceivedPanel { gridPos+: { w: 8 } }, + panels.cluster_localBackupSizePanel { gridPos+: { w: 8 } }, + ] + this.grafana.rows.clusterOverviewBucket, + ) ) ) + root.applyCommon( diff --git a/couchbase-mixin/dashboards_out/couchbase_bucket_overview b/couchbase-mixin/dashboards_out/couchbase_bucket_overview index f12aff7de..85723d96f 100644 --- a/couchbase-mixin/dashboards_out/couchbase_bucket_overview +++ b/couchbase-mixin/dashboards_out/couchbase_bucket_overview @@ -343,7 +343,7 @@ "showPoints": "never", "spanNulls": true }, - "max": 100, + "max": 1, "unit": "percentunit" } }, @@ -465,7 +465,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_vb_queue_memory_bytes{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}))", + "expr": "topk(5, kv_mem_used_bytes{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"})", "legendFormat": "{{instance}} - {{bucket}}" } ], diff --git a/couchbase-mixin/dashboards_out/couchbase_cluster_overview b/couchbase-mixin/dashboards_out/couchbase_cluster_overview index dc6c94f59..6c2603521 100644 --- a/couchbase-mixin/dashboards_out/couchbase_cluster_overview +++ b/couchbase-mixin/dashboards_out/couchbase_cluster_overview @@ -360,6 +360,19 @@ "title": "Local backup size", "type": "bargauge" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 24, + "y": 24 + }, + "id": 8, + "panels": [ ], + "title": "Buckets", + "type": "row" + }, { "datasource": { "type": "prometheus", @@ -383,9 +396,9 @@ "h": 8, "w": 12, "x": 0, - "y": 24 + "y": 25 }, - "id": 8, + "id": 9, "options": { "legend": { "calcs": [ ], @@ -434,9 +447,9 @@ "h": 8, "w": 12, "x": 12, - "y": 24 + "y": 25 }, - "id": 9, + "id": 10, "options": { "orientation": "horizontal" }, @@ -477,9 +490,9 @@ "h": 8, "w": 12, "x": 0, - "y": 32 + "y": 33 }, - "id": 10, + "id": 11, "options": { "legend": { "calcs": [ ], @@ -527,9 +540,9 @@ "h": 8, "w": 12, "x": 12, - "y": 32 + "y": 33 }, - "id": 11, + "id": 12, "options": { "legend": { "calcs": [ ], @@ -578,9 +591,9 @@ "h": 8, "w": 12, "x": 0, - "y": 40 + "y": 41 }, - "id": 12, + "id": 13, "options": { "orientation": "horizontal" }, @@ -603,7 +616,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "", + "description": "Memory occupied by the queue for a virtual bucket for the top buckets across the cluster.", "fieldConfig": { "defaults": { "custom": { @@ -621,9 +634,9 @@ "h": 8, "w": 12, "x": 12, - "y": 40 + "y": 41 }, - "id": 13, + "id": 14, "options": { "legend": { "calcs": [ ], diff --git a/couchbase-mixin/main.libsonnet b/couchbase-mixin/main.libsonnet index 05b155dff..3e70509cd 100644 --- a/couchbase-mixin/main.libsonnet +++ b/couchbase-mixin/main.libsonnet @@ -3,6 +3,7 @@ local config = import './config.libsonnet'; local dashboards = import './dashboards.libsonnet'; local links = import './links.libsonnet'; local panels = import './panels.libsonnet'; +local rows = import './rows.libsonnet'; local targets = import './targets.libsonnet'; local variables = import './variables.libsonnet'; @@ -23,6 +24,7 @@ local variables = import './variables.libsonnet'; links: links.new(this), panels: panels.new(this), dashboards: dashboards.new(this), + rows: rows.new(panels.new(this)), }, prometheus: { diff --git a/couchbase-mixin/panels.libsonnet b/couchbase-mixin/panels.libsonnet index 138472821..db4e6bf6b 100644 --- a/couchbase-mixin/panels.libsonnet +++ b/couchbase-mixin/panels.libsonnet @@ -1,6 +1,5 @@ local g = import './g.libsonnet'; local commonlib = import 'common-lib/common/main.libsonnet'; -local utils = commonlib.utils; { new(this):: @@ -16,7 +15,7 @@ local utils = commonlib.utils; commonlib.panels.generic.timeSeries.base.new( 'Top buckets by memory used', targets=[ - t.topBucketsByMemoryUsedDetailed, + t.bucketTopBucketsByMemoryUsed, ], description='Memory used for the top buckets.' ) @@ -119,7 +118,7 @@ local utils = commonlib.utils; commonlib.panels.generic.timeSeries.base.new( 'Top buckets by vBucket queue memory', targets=[ - t.topBucketsByVBucketQueueMemory, + t.bucketTopBucketsByMemoryUsed, ], description='Memory occupied by the queue for a virtual bucket for the top buckets.' ) @@ -433,13 +432,14 @@ local utils = commonlib.utils; + barGauge.thresholdStep.withValue(null), ]) + barGauge.standardOptions.withUnit('none'), - + cluster_topBucketsByVBucketQueueMemoryPanel: commonlib.panels.generic.timeSeries.base.new( 'Top buckets by vBucket queue memory', targets=[ t.clusterTopBucketsByVBucketQueueMemory, ], + description='Memory occupied by the queue for a virtual bucket for the top buckets across the cluster.' ) + g.panel.timeSeries.standardOptions.withUnit('decbytes') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(0) diff --git a/couchbase-mixin/rows.libsonnet b/couchbase-mixin/rows.libsonnet index 0555f78b0..5bea489bf 100644 --- a/couchbase-mixin/rows.libsonnet +++ b/couchbase-mixin/rows.libsonnet @@ -1,56 +1,18 @@ local g = import './g.libsonnet'; +local panels = import './panels.libsonnet'; // Use g.util.grid.wrapPanels() to import into custom dashboard { new(panels): { - row_1: + clusterOverviewBucket: [ - g.panel.row.new('Row 1'), - panels.memoryUtilizationPanel { gridPos+: { h: 8, w: 12, x: 0, y: 0 } }, - panels.cpuUtilizationPanel { gridPos+: { h: 8, w: 12, x: 12, y: 0 } }, + g.panel.row.new('Buckets'), + panels.cluster_topBucketsByMemoryUsedPanel { gridPos+: { w: 12 } }, + panels.cluster_topBucketsByDiskUsedPanel { gridPos+: { w: 12 } }, + panels.cluster_topBucketsByOperationsPanel { gridPos+: { w: 12 } }, + panels.cluster_topBucketsByOperationsFailedPanel { gridPos+: { w: 12 } }, + panels.cluster_topBucketsByVBucketsCountPanel { gridPos+: { w: 12 } }, + panels.cluster_topBucketsByVBucketQueueMemoryPanel { gridPos+: { w: 12 } }, ], - - row_2: - [ - g.panel.row.new('Row 2'), - panels.totalMemoryUsedByServicePanel { gridPos+: { h: 8, w: 8, x: 0, y: 8 } }, - panels.backupSizePanel { gridPos+: { h: 8, w: 8, x: 8, y: 8 } }, - panels.currentConnectionsPanel { gridPos+: { h: 8, w: 8, x: 16, y: 8 } }, - ], - - row_3: - [ - g.panel.row.new('Row 3'), - panels.httpResponseCodesPanel { gridPos+: { h: 8, w: 12, x: 0, y: 16 } }, - panels.httpRequestMethodsPanel { gridPos+: { h: 8, w: 12, x: 12, y: 16 } }, - ], - - row_4: - [ - g.panel.row.new('Row 4'), - panels.queryServiceRequestsPanel { gridPos+: { h: 8, w: 12, x: 0, y: 24 } }, - panels.queryServiceRequestProcessingTimePanel { gridPos+: { h: 8, w: 12, x: 12, y: 24 } }, - ], - - row_5: - [ - g.panel.row.new('Row 5'), - panels.indexServiceRequestsPanel { gridPos+: { h: 8, w: 8, x: 0, y: 32 } }, - panels.indexCacheHitRatioPanel { gridPos+: { h: 8, w: 8, x: 8, y: 32 } }, - panels.averageScanLatencyPanel { gridPos+: { h: 8, w: 8, x: 16, y: 32 } }, - ], - - row_6: - [ - g.panel.row.new('Row 6'), - panels.errorLogsPanel { gridPos+: { h: 7, w: 24, x: 0, y: 40 } }, - ], - - row_7: - [ - g.panel.row.new('Row 7'), - panels.couchbaseLogsPanel { gridPos+: { h: 8, w: 24, x: 0, y: 47 } }, - ], - }, } diff --git a/couchbase-mixin/targets.libsonnet b/couchbase-mixin/targets.libsonnet index 75668f5b6..628830572 100644 --- a/couchbase-mixin/targets.libsonnet +++ b/couchbase-mixin/targets.libsonnet @@ -11,7 +11,7 @@ local prometheusQuery = g.query.prometheus; // // Cluster Overview Dashboard Targets // - + // Top nodes metrics topNodesByMemoryUsage: prometheusQuery.new( @@ -263,7 +263,7 @@ local prometheusQuery = g.query.prometheus; // // Detailed bucket metrics (instance-level) - topBucketsByMemoryUsedDetailed: + bucketTopBucketsByMemoryUsed: prometheusQuery.new( '${' + vars.datasources.prometheus.name + '}', 'topk(5, kv_mem_used_bytes{%(bucketSelector)s})' % vars @@ -321,12 +321,5 @@ local prometheusQuery = g.query.prometheus; 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_vbuckets{%(bucketSelector)s}))' % vars ) + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), - - topBucketsByVBucketQueueMemory: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_vb_queue_memory_bytes{%(bucketSelector)s}))' % vars - ) - + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), }, } diff --git a/couchbase-mixin/variables.libsonnet b/couchbase-mixin/variables.libsonnet index 9cbed87ea..519144a48 100644 --- a/couchbase-mixin/variables.libsonnet +++ b/couchbase-mixin/variables.libsonnet @@ -20,7 +20,7 @@ local utils = commonlib.utils; + var.query.queryTypes.withLabelValues( chainVar.label, // Combine filteringSelector with chainSelector, avoiding duplicate label filters - local combinedSelector = + local combinedSelector = if std.length(std.stripChars(filteringSelector, ' ')) == 0 then chainVar.chainSelector else if std.length(chainVar.chainSelector) == 0 then std.stripChars(filteringSelector, ' ') else @@ -32,9 +32,10 @@ local utils = commonlib.utils; if std.length(std.split(part, '=')) > 1 ]); local chainParts = std.split(chainVar.chainSelector, ','); - local chainFiltered = std.filter(function(part) - local label = std.stripChars(std.split(std.stripChars(part, ' '), '=')[0], ' '); - !std.setMember(label, filteringLabels), + local chainFiltered = std.filter( + function(part) + local label = std.stripChars(std.split(std.stripChars(part, ' '), '=')[0], ' '); + !std.setMember(label, filteringLabels), chainParts ); std.join(',', std.filter(function(x) std.length(x) > 0, [std.stripChars(filteringSelector, ' ')] + chainFiltered)); From a0fa2d88d565d7272696cfd29f580b7e7fabf7dc Mon Sep 17 00:00:00 2001 From: schmikei Date: Thu, 19 Jun 2025 08:31:37 -0400 Subject: [PATCH 3/7] make fmt --- couchbase-mixin/dashboards.libsonnet | 42 ++++++++++++++-------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/couchbase-mixin/dashboards.libsonnet b/couchbase-mixin/dashboards.libsonnet index 2a13e5cdc..9a55bc4e5 100644 --- a/couchbase-mixin/dashboards.libsonnet +++ b/couchbase-mixin/dashboards.libsonnet @@ -20,15 +20,15 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; + g.dashboard.withPanels( g.util.grid.wrapPanels( [ - panels.bucket_topBucketsByMemoryUsedPanel { gridPos+: { w: 12 }}, - panels.bucket_topBucketsByDiskUsedPanel { gridPos+: { w: 12 }}, - panels.bucket_topBucketsByCurrentItemsPanel { gridPos+: { w: 8 }}, - panels.bucket_topBucketsByOperationsPanel { gridPos+: { w: 8 }}, - panels.bucket_topBucketsByOperationsFailedPanel { gridPos+: { w: 8 }}, - panels.bucket_topBucketsByHighPriorityRequestsPanel { gridPos+: { w: 12 }}, - panels.bucket_bottomBucketsByCacheHitRatioPanel { gridPos+: { w: 12 }}, - panels.bucket_topBucketsByVBucketsCountPanel { gridPos+: { w: 12 }}, - panels.bucket_topBucketsByVBucketQueueMemoryPanel { gridPos+: { w: 12 }}, + panels.bucket_topBucketsByMemoryUsedPanel { gridPos+: { w: 12 } }, + panels.bucket_topBucketsByDiskUsedPanel { gridPos+: { w: 12 } }, + panels.bucket_topBucketsByCurrentItemsPanel { gridPos+: { w: 8 } }, + panels.bucket_topBucketsByOperationsPanel { gridPos+: { w: 8 } }, + panels.bucket_topBucketsByOperationsFailedPanel { gridPos+: { w: 8 } }, + panels.bucket_topBucketsByHighPriorityRequestsPanel { gridPos+: { w: 12 } }, + panels.bucket_bottomBucketsByCacheHitRatioPanel { gridPos+: { w: 12 } }, + panels.bucket_topBucketsByVBucketsCountPanel { gridPos+: { w: 12 } }, + panels.bucket_topBucketsByVBucketQueueMemoryPanel { gridPos+: { w: 12 } }, ], ) ) @@ -48,18 +48,18 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; + g.dashboard.withPanels( g.util.grid.wrapPanels( [ - panels.node_memoryUtilizationPanel { gridPos+: { w: 12 }}, - panels.node_cpuUtilizationPanel { gridPos+: { w: 12 }}, - panels.node_totalMemoryUsedByServicePanel { gridPos+: { w: 8 }}, - panels.node_backupSizePanel { gridPos+: { w: 8 }}, - panels.node_currentConnectionsPanel { gridPos+: { w: 8 }}, - panels.node_httpResponseCodesPanel { gridPos+: { w: 12 }}, - panels.node_httpRequestMethodsPanel { gridPos+: { w: 12 }}, - panels.node_queryServiceRequestsPanel { gridPos+: { w: 12 }}, - panels.node_queryServiceRequestProcessingTimePanel { gridPos+: { w: 12 }}, - panels.node_indexServiceRequestsPanel { gridPos+: { w: 8 }}, - panels.node_indexCacheHitRatioPanel { gridPos+: { w: 8 }}, - panels.node_averageScanLatencyPanel { gridPos+: { w: 8 }}, + panels.node_memoryUtilizationPanel { gridPos+: { w: 12 } }, + panels.node_cpuUtilizationPanel { gridPos+: { w: 12 } }, + panels.node_totalMemoryUsedByServicePanel { gridPos+: { w: 8 } }, + panels.node_backupSizePanel { gridPos+: { w: 8 } }, + panels.node_currentConnectionsPanel { gridPos+: { w: 8 } }, + panels.node_httpResponseCodesPanel { gridPos+: { w: 12 } }, + panels.node_httpRequestMethodsPanel { gridPos+: { w: 12 } }, + panels.node_queryServiceRequestsPanel { gridPos+: { w: 12 } }, + panels.node_queryServiceRequestProcessingTimePanel { gridPos+: { w: 12 } }, + panels.node_indexServiceRequestsPanel { gridPos+: { w: 8 } }, + panels.node_indexCacheHitRatioPanel { gridPos+: { w: 8 } }, + panels.node_averageScanLatencyPanel { gridPos+: { w: 8 } }, ] ) ) From c4b7cc636fc322af9c87bffcc85cb218dbef7968 Mon Sep 17 00:00:00 2001 From: schmikei Date: Tue, 12 Aug 2025 16:37:29 -0400 Subject: [PATCH 4/7] use signals API; wip --- couchbase-mixin/config.libsonnet | 12 + couchbase-mixin/dashboards.libsonnet | 14 +- ...verview => couchbase_bucket_overview.json} | 98 +++--- ...erview => couchbase_cluster_overview.json} | 126 +++++-- ..._overview => couchbase_node_overview.json} | 160 ++++++--- .../dashboards_out/{logs => logs.json} | 0 couchbase-mixin/g.libsonnet | 2 +- couchbase-mixin/jsonnetfile.json | 2 +- couchbase-mixin/links.libsonnet | 8 +- couchbase-mixin/main.libsonnet | 20 +- couchbase-mixin/panels.libsonnet | 86 ++--- couchbase-mixin/signals/bucket.libsonnet | 130 +++++++ couchbase-mixin/signals/cluster.libsonnet | 193 +++++++++++ couchbase-mixin/signals/index.libsonnet | 61 ++++ couchbase-mixin/signals/node.libsonnet | 141 ++++++++ couchbase-mixin/signals/query.libsonnet | 124 +++++++ couchbase-mixin/targets.libsonnet | 325 ------------------ couchbase-mixin/variables.libsonnet | 100 ------ 18 files changed, 1002 insertions(+), 600 deletions(-) rename couchbase-mixin/dashboards_out/{couchbase_bucket_overview => couchbase_bucket_overview.json} (79%) rename couchbase-mixin/dashboards_out/{couchbase_cluster_overview => couchbase_cluster_overview.json} (80%) rename couchbase-mixin/dashboards_out/{couchbase_node_overview => couchbase_node_overview.json} (74%) rename couchbase-mixin/dashboards_out/{logs => logs.json} (100%) create mode 100644 couchbase-mixin/signals/bucket.libsonnet create mode 100644 couchbase-mixin/signals/cluster.libsonnet create mode 100644 couchbase-mixin/signals/index.libsonnet create mode 100644 couchbase-mixin/signals/node.libsonnet create mode 100644 couchbase-mixin/signals/query.libsonnet delete mode 100644 couchbase-mixin/targets.libsonnet delete mode 100644 couchbase-mixin/variables.libsonnet diff --git a/couchbase-mixin/config.libsonnet b/couchbase-mixin/config.libsonnet index b9da616df..9d859fded 100644 --- a/couchbase-mixin/config.libsonnet +++ b/couchbase-mixin/config.libsonnet @@ -39,4 +39,16 @@ alertsCriticalMemoryUsage: 85, // % alertsWarningMemoryEvictionRate: 10, // count alertsWarningInvalidRequestVolume: 1000, // count + + // metrics source for signals library + metricsSource: 'prometheus', + + // expose signals library + signals+: { + cluster: (import './signals/cluster.libsonnet')(config), + node: (import './signals/node.libsonnet')(config), + query: (import './signals/query.libsonnet')(config), + bucket: (import './signals/bucket.libsonnet')(config), + index: (import './signals/index.libsonnet')(config), + }, } diff --git a/couchbase-mixin/dashboards.libsonnet b/couchbase-mixin/dashboards.libsonnet index 9a55bc4e5..b66fd6c92 100644 --- a/couchbase-mixin/dashboards.libsonnet +++ b/couchbase-mixin/dashboards.libsonnet @@ -15,7 +15,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; local panels = this.grafana.panels; { - couchbase_bucket_overview: + 'couchbase_bucket_overview.json': g.dashboard.new(prefix + ' bucket overview') + g.dashboard.withPanels( g.util.grid.wrapPanels( @@ -33,7 +33,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; ) ) + root.applyCommon( - vars.bucketVariables, + vars.multiInstance, uid + '_couchbase_bucket_overview', tags, links { couchbaseBucketOverview+:: {} }, @@ -43,7 +43,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; period ), - couchbase_node_overview: + 'couchbase_node_overview.json': g.dashboard.new(prefix + ' node overview') + g.dashboard.withPanels( g.util.grid.wrapPanels( @@ -64,7 +64,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; ) ) + root.applyCommon( - vars.nodeVariables, + vars.multiInstance, uid + '_couchbase_node_overview', tags, links { couchbaseNodeOverview+:: {} }, @@ -74,7 +74,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; period ), - couchbase_cluster_overview: + 'couchbase_cluster_overview.json': g.dashboard.new(prefix + ' cluster overview') + g.dashboard.withPanels( g.util.panel.resolveCollapsedFlagOnRows( @@ -92,7 +92,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; ) ) + root.applyCommon( - vars.clusterVariables, + vars.multiInstance, uid + '_couchbase_cluster_overview', tags, links { couchbaseClusterOverview+:: {} }, @@ -106,7 +106,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; + if this.config.enableLokiLogs then { - logs: + 'logs.json': logslib.new( prefix + ' logs', datasourceName=this.grafana.variables.datasources.loki.name, diff --git a/couchbase-mixin/dashboards_out/couchbase_bucket_overview b/couchbase-mixin/dashboards_out/couchbase_bucket_overview.json similarity index 79% rename from couchbase-mixin/dashboards_out/couchbase_bucket_overview rename to couchbase-mixin/dashboards_out/couchbase_bucket_overview.json index 85723d96f..6911c5687 100644 --- a/couchbase-mixin/dashboards_out/couchbase_bucket_overview +++ b/couchbase-mixin/dashboards_out/couchbase_bucket_overview.json @@ -76,8 +76,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, kv_mem_used_bytes{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"})", - "legendFormat": "{{instance}} - {{bucket}}" + "expr": "topk(5, kv_mem_used_bytes{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - {{bucket}}", + "refId": "Bucket memory used" } ], "title": "Top buckets by memory used", @@ -113,15 +116,18 @@ "options": { "orientation": "horizontal" }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, couch_docs_actual_disk_size{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"})", - "legendFormat": "{{instance}} - {{bucket}}" + "expr": "topk(5, couch_docs_actual_disk_size{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - {{bucket}}", + "refId": "Bucket disk used" } ], "title": "Top buckets by disk used", @@ -170,8 +176,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (kv_curr_items{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}))", - "legendFormat": "{{instance}} - {{bucket}}" + "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (kv_curr_items{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}))", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - {{bucket}}", + "refId": "Bucket current items" } ], "title": "Top buckets by current items", @@ -220,8 +229,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job, op) (rate(kv_ops{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}[$__rate_interval])))", - "legendFormat": "{{instance}} - {{bucket}} - {{op}}" + "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job, op) (rate(kv_ops{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])))", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - {{bucket}} - {{op}}", + "refId": "Bucket operations by operation type" } ], "title": "Top buckets by operations", @@ -270,8 +282,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job) (rate(kv_ops_failed{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}[$__rate_interval])))", - "legendFormat": "{{instance}} - {{bucket}}" + "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job) (rate(kv_ops_failed{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])))", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - {{bucket}}", + "refId": "Bucket operations failed" } ], "title": "Top buckets by operations failed", @@ -320,8 +335,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_high_pri_requests{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}))", - "legendFormat": "{{instance}} - {{bucket}}" + "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_high_pri_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}))", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - {{bucket}}", + "refId": "Bucket high priority requests" } ], "title": "Top buckets by high priority requests", @@ -371,8 +389,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "bottomk(5, sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}[$__rate_interval])) / (clamp_min(sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_misses{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}[$__rate_interval]))))", - "legendFormat": "{{instance}} - {{bucket}}" + "expr": "bottomk(5, sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])) / (clamp_min(sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_misses{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))))", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - {{bucket}}", + "refId": "Bucket cache hit ratio" } ], "title": "Bottom buckets by cache hit ratio", @@ -408,15 +429,18 @@ "options": { "orientation": "horizontal" }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_vbuckets{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"}))", - "legendFormat": "{{instance}} - {{bucket}}" + "expr": "topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_vbuckets{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}))", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - {{bucket}}", + "refId": "Bucket vBuckets count" } ], "title": "Top buckets by vBuckets count", @@ -465,8 +489,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, kv_mem_used_bytes{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\",bucket=~\"$bucket\"})", - "legendFormat": "{{instance}} - {{bucket}}" + "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (kv_vb_queue_memory_bytes{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}))", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{bucket}}", + "refId": "Top buckets by vBucket queue memory" } ], "title": "Top buckets by vBucket queue memory", @@ -502,21 +529,6 @@ "sort": 1, "type": "query" }, - { - "allValue": ".+", - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "includeAll": true, - "label": "Instance", - "multi": true, - "name": "instance", - "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\"}, instance)", - "refresh": 2, - "sort": 1, - "type": "query" - }, { "allValue": ".+", "datasource": { @@ -527,7 +539,7 @@ "label": "Couchbase_cluster", "multi": true, "name": "couchbase_cluster", - "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\",instance=~\"$instance\"}, couchbase_cluster)", + "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\",job=~\"$job\"}, couchbase_cluster)", "refresh": 2, "sort": 1, "type": "query" @@ -539,13 +551,21 @@ "uid": "${prometheus_datasource}" }, "includeAll": true, - "label": "Bucket", + "label": "Instance", "multi": true, - "name": "bucket", - "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}, bucket)", + "name": "instance", + "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}, instance)", "refresh": 2, "sort": 1, "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "", + "type": "datasource" } ] }, diff --git a/couchbase-mixin/dashboards_out/couchbase_cluster_overview b/couchbase-mixin/dashboards_out/couchbase_cluster_overview.json similarity index 80% rename from couchbase-mixin/dashboards_out/couchbase_cluster_overview rename to couchbase-mixin/dashboards_out/couchbase_cluster_overview.json index 6c2603521..1e6891d1f 100644 --- a/couchbase-mixin/dashboards_out/couchbase_cluster_overview +++ b/couchbase-mixin/dashboards_out/couchbase_cluster_overview.json @@ -76,8 +76,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, sum by(job, couchbase_cluster, instance) (sys_mem_actual_used{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"})) / (sum by(job, couchbase_cluster, instance) (clamp_min(sys_mem_actual_free{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}, 1)) + sum by(couchbase_cluster, instance, job) (sys_mem_actual_used{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}))", - "legendFormat": "{{couchbase_cluster}} - {{instance}}" + "expr": "topk(5, sum by(job, couchbase_cluster, instance) (sys_mem_actual_used{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"})) / (sum by(job, couchbase_cluster, instance) (clamp_min(sys_mem_actual_free{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}, 1)) + sum by(couchbase_cluster, instance, job) (sys_mem_actual_used{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}))", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}}", + "refId": "Top nodes by memory usage" } ], "title": "Top nodes by memory usage", @@ -126,8 +129,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, sum by(job, couchbase_cluster, instance) (rate(cm_http_requests_total{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])))", - "legendFormat": "{{couchbase_cluster}} - {{instance}}" + "expr": "topk(5, sum by(job, couchbase_cluster, instance) (rate(cm_http_requests_total{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])))", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}}", + "refId": "Top nodes by HTTP requests" } ], "title": "Top nodes by HTTP requests", @@ -176,8 +182,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, sum by(job, instance, couchbase_cluster) (rate(n1ql_requests{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])))", - "legendFormat": "{{couchbase_cluster}} - {{instance}}" + "expr": "topk(5, sum by(job, instance, couchbase_cluster) (rate(n1ql_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])))", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}}", + "refId": "Top nodes by query service requests" } ], "title": "Top nodes by query service requests", @@ -226,8 +235,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, avg by(instance, couchbase_cluster, job) (index_avg_scan_latency{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}))", - "legendFormat": "{{couchbase_cluster}} - {{instance}}" + "expr": "topk(5, avg by(instance, couchbase_cluster, job) (index_avg_scan_latency{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}))", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}}", + "refId": "Top nodes by index average scan latency" } ], "title": "Top nodes by index average scan latency", @@ -276,8 +288,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(couchbase_cluster, job) (rate(xdcr_data_replicated_bytes{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval]))", - "legendFormat": "{{couchbase_cluster}}" + "expr": "sum by(couchbase_cluster, job) (rate(xdcr_data_replicated_bytes{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}}", + "refId": "XDCR replication rate" } ], "title": "XDCR replication rate", @@ -326,8 +341,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, couchbase_cluster) (rate(xdcr_docs_received_from_dcp_total{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval]))", - "legendFormat": "{{couchbase_cluster}}" + "expr": "sum by(job, couchbase_cluster) (rate(xdcr_docs_received_from_dcp_total{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}}", + "refId": "XDCR docs received" } ], "title": "XDCR docs received", @@ -346,15 +364,18 @@ "y": 16 }, "id": 7, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(couchbase_cluster, job, instance) (backup_data_size{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"})", - "legendFormat": "{{couchbase_cluster}} - {{instance}}" + "expr": "sum by(couchbase_cluster, job, instance) (backup_data_size{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}}", + "refId": "Local backup size" } ], "title": "Local backup size", @@ -416,8 +437,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, sum by(bucket, couchbase_cluster, job) (kv_mem_used_bytes{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}))", - "legendFormat": "{{couchbase_cluster}} - {{bucket}}" + "expr": "topk(5, sum by(bucket, couchbase_cluster, job) (kv_mem_used_bytes{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}))", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{bucket}}", + "refId": "Top buckets by memory used" } ], "title": "Top buckets by memory used", @@ -453,15 +477,18 @@ "options": { "orientation": "horizontal" }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, sum by(job, couchbase_cluster, bucket) (couch_docs_actual_disk_size{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}))", - "legendFormat": "{{couchbase_cluster}} - {{bucket}}" + "expr": "topk(5, sum by(job, couchbase_cluster, bucket) (couch_docs_actual_disk_size{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}))", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{bucket}}", + "refId": "Top buckets by disk used" } ], "title": "Top buckets by disk used", @@ -510,8 +537,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])))", - "legendFormat": "{{couchbase_cluster}} - {{bucket}}" + "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])))", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{bucket}}", + "refId": "Top buckets by operations" } ], "title": "Top buckets by operations", @@ -560,8 +590,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops_failed{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])))", - "legendFormat": "{{couchbase_cluster}} - {{bucket}}" + "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops_failed{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])))", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{bucket}}", + "refId": "Top buckets by operations failed" } ], "title": "Top buckets by operations failed", @@ -597,15 +630,18 @@ "options": { "orientation": "horizontal" }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (kv_num_vbuckets{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}))", - "legendFormat": "{{couchbase_cluster}} - {{bucket}}" + "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (kv_num_vbuckets{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}))", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{bucket}}", + "refId": "Top buckets by vBuckets count" } ], "title": "Top buckets by vBuckets count", @@ -654,8 +690,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (kv_vb_queue_memory_bytes{job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}))", - "legendFormat": "{{couchbase_cluster}} - {{bucket}}" + "expr": "topk(5, sum by(couchbase_cluster, job, bucket) (kv_vb_queue_memory_bytes{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}))", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{bucket}}", + "refId": "Top buckets by vBucket queue memory" } ], "title": "Top buckets by vBucket queue memory", @@ -684,7 +723,7 @@ }, "includeAll": true, "label": "Job", - "multi": false, + "multi": true, "name": "job", "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\"}, job)", "refresh": 2, @@ -699,12 +738,35 @@ }, "includeAll": true, "label": "Couchbase_cluster", - "multi": false, + "multi": true, "name": "couchbase_cluster", - "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\"}, couchbase_cluster)", + "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\",job=~\"$job\"}, couchbase_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Instance", + "multi": true, + "name": "instance", + "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}, instance)", "refresh": 2, "sort": 1, "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "", + "type": "datasource" } ] }, diff --git a/couchbase-mixin/dashboards_out/couchbase_node_overview b/couchbase-mixin/dashboards_out/couchbase_node_overview.json similarity index 74% rename from couchbase-mixin/dashboards_out/couchbase_node_overview rename to couchbase-mixin/dashboards_out/couchbase_node_overview.json index 61e8a02ce..dee945cc3 100644 --- a/couchbase-mixin/dashboards_out/couchbase_node_overview +++ b/couchbase-mixin/dashboards_out/couchbase_node_overview.json @@ -76,8 +76,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sys_mem_actual_used{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"} / (clamp_min(sys_mem_actual_free{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"} + sys_mem_actual_used{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}, 1))", - "legendFormat": "{{couchbase_cluster}} - {{instance}}" + "expr": "sys_mem_actual_used{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"} / (clamp_min(sys_mem_actual_free{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"} + sys_mem_actual_used{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}, 1))", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}}", + "refId": "Memory utilization" } ], "title": "Memory utilization", @@ -126,8 +129,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(couchbase_cluster, job, instance) (sys_cpu_utilization_rate{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"})", - "legendFormat": "{{couchbase_cluster}} - {{instance}}" + "expr": "sum by(couchbase_cluster, job, instance) (sys_cpu_utilization_rate{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}}", + "refId": "CPU utilization" } ], "title": "CPU utilization", @@ -176,24 +182,33 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(couchbase_cluster, instance, job) (kv_mem_used_bytes{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"})", - "legendFormat": "{{couchbase_cluster}} - {{instance}} - data" + "expr": "sum by(couchbase_cluster, instance, job) (kv_mem_used_bytes{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}} - data", + "refId": "Data service memory used" }, { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "index_memory_used_total{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}", - "legendFormat": "{{couchbase_cluster}} - {{instance}} - index" + "expr": "index_memory_used_total{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}} - index", + "refId": "Index service memory used" }, { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "cbas_direct_memory_used_bytes{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}", - "legendFormat": "{{couchbase_cluster}} - {{instance}} - analytics" + "expr": "cbas_direct_memory_used_bytes{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}} - analytics", + "refId": "Analytics service memory used" } ], "title": "Total memory used by service", @@ -242,8 +257,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(couchbase_cluster, instance, job) (backup_data_size{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"})", - "legendFormat": "{{couchbase_cluster}} - {{instance}}" + "expr": "sum by(couchbase_cluster, instance, job) (backup_data_size{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}}", + "refId": "Backup size" } ], "title": "Backup size", @@ -292,8 +310,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "kv_curr_connections{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}", - "legendFormat": "{{couchbase_cluster}} - {{instance}}" + "expr": "kv_curr_connections{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}}", + "refId": "Current connections" } ], "title": "Current connections", @@ -342,8 +363,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, instance, couchbase_cluster, code) (rate(cm_http_requests_total{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval]))", - "legendFormat": "{{couchbase_cluster}} - {{instance}} - {{code}}" + "expr": "rate(sum by(job, instance, couchbase_cluster, code) (rate(cm_http_requests_total{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}} - {{code}}", + "refId": "HTTP response codes" } ], "title": "HTTP response codes", @@ -392,8 +416,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, instance, couchbase_cluster, method) (rate(cm_http_requests_total{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval]))", - "legendFormat": "{{couchbase_cluster}} - {{instance}} - {{method}}" + "expr": "rate(sum by(job, instance, couchbase_cluster, method) (rate(cm_http_requests_total{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}} - {{method}}", + "refId": "HTTP request methods" } ], "title": "HTTP request methods", @@ -442,24 +469,33 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(n1ql_requests{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval]) + rate(n1ql_invalid_requests{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", - "legendFormat": "{{couchbase_cluster}} - {{instance}} - total" + "expr": "rate(rate(n1ql_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval]) + rate(n1ql_invalid_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}} - total", + "refId": "Query service requests total" }, { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(n1ql_errors{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", - "legendFormat": "{{couchbase_cluster}} - {{instance}} - error" + "expr": "rate(rate(n1ql_errors{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}} - error", + "refId": "Query service errors" }, { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(n1ql_invalid_requests{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", - "legendFormat": "{{couchbase_cluster}} - {{instance}} - invalid" + "expr": "rate(rate(n1ql_invalid_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}} - invalid", + "refId": "Query service invalid requests" } ], "title": "Query service requests", @@ -508,40 +544,55 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(n1ql_requests{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", - "legendFormat": "{{couchbase_cluster}} - {{instance}} - >0ms" + "expr": "rate(rate(n1ql_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}} - >0ms", + "refId": "Query service requests" }, { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(n1ql_requests_250ms{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", - "legendFormat": "{{couchbase_cluster}} - {{instance}} - >250ms" + "expr": "rate(rate(n1ql_requests_250ms{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}} - >250ms", + "refId": "Query service requests >250ms" }, { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(n1ql_requests_500ms{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", - "legendFormat": "{{couchbase_cluster}} - {{instance}} - >500ms" + "expr": "rate(rate(n1ql_requests_500ms{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}} - >500ms", + "refId": "Query service requests >500ms" }, { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(n1ql_requests_1000ms{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", - "legendFormat": "{{couchbase_cluster}} - {{instance}} - >1000ms" + "expr": "rate(rate(n1ql_requests_1000ms{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}} - >1000ms", + "refId": "Query service requests >1000ms" }, { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(n1ql_requests_5000ms{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])", - "legendFormat": "{{couchbase_cluster}} - {{instance}} - >5000ms" + "expr": "rate(rate(n1ql_requests_5000ms{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}} - >5000ms", + "refId": "Query service requests >5000ms" } ], "title": "Query service request processing time", @@ -590,8 +641,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(couchbase_cluster, instance, job) (rate(index_num_requests{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval]))", - "legendFormat": "{{couchbase_cluster}} - {{instance}}" + "expr": "rate(sum by(couchbase_cluster, instance, job) (rate(index_num_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}}", + "refId": "Index service requests" } ], "title": "Index service requests", @@ -640,8 +694,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])) / (clamp_min(sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance) (increase(index_cache_misses{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"}[$__rate_interval])))", - "legendFormat": "{{couchbase_cluster}} - {{instance}}" + "expr": "sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])) / (clamp_min(sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance) (increase(index_cache_misses{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])))", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}}", + "refId": "Index cache hit ratio" } ], "title": "Index cache hit ratio", @@ -690,8 +747,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(couchbase_cluster, index, instance, job) (index_avg_scan_latency{job=~\"$job\",instance=~\"$instance\",couchbase_cluster=~\"$couchbase_cluster\"})", - "legendFormat": "{{couchbase_cluster}} - {{instance}} - {{index}}" + "expr": "sum by(couchbase_cluster, index, instance, job) (index_avg_scan_latency{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{couchbase_cluster}} - {{instance}} - {{index}}", + "refId": "Index average scan latency" } ], "title": "Average scan latency", @@ -734,10 +794,10 @@ "uid": "${prometheus_datasource}" }, "includeAll": true, - "label": "Instance", + "label": "Couchbase_cluster", "multi": true, - "name": "instance", - "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\"}, instance)", + "name": "couchbase_cluster", + "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\",job=~\"$job\"}, couchbase_cluster)", "refresh": 2, "sort": 1, "type": "query" @@ -749,13 +809,21 @@ "uid": "${prometheus_datasource}" }, "includeAll": true, - "label": "Couchbase_cluster", + "label": "Instance", "multi": true, - "name": "couchbase_cluster", - "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\",instance=~\"$instance\"}, couchbase_cluster)", + "name": "instance", + "query": "label_values(kv_mem_used_bytes{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\"}, instance)", "refresh": 2, "sort": 1, "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "", + "type": "datasource" } ] }, diff --git a/couchbase-mixin/dashboards_out/logs b/couchbase-mixin/dashboards_out/logs.json similarity index 100% rename from couchbase-mixin/dashboards_out/logs rename to couchbase-mixin/dashboards_out/logs.json diff --git a/couchbase-mixin/g.libsonnet b/couchbase-mixin/g.libsonnet index f89dcc064..e6a2060ee 100644 --- a/couchbase-mixin/g.libsonnet +++ b/couchbase-mixin/g.libsonnet @@ -1 +1 @@ -import 'github.com/grafana/grafonnet/gen/grafonnet-v11.0.0/main.libsonnet' +import 'github.com/grafana/grafonnet/gen/grafonnet-v11.4.0/main.libsonnet' diff --git a/couchbase-mixin/jsonnetfile.json b/couchbase-mixin/jsonnetfile.json index 69b58cc74..89c9bd0da 100644 --- a/couchbase-mixin/jsonnetfile.json +++ b/couchbase-mixin/jsonnetfile.json @@ -14,7 +14,7 @@ "source": { "git": { "remote": "https://github.com/grafana/grafonnet.git", - "subdir": "gen/grafonnet-v10.0.0" + "subdir": "gen/grafonnet-v11.4.0" } }, "version": "main" diff --git a/couchbase-mixin/links.libsonnet b/couchbase-mixin/links.libsonnet index 5b3e0cb10..a53281aa5 100644 --- a/couchbase-mixin/links.libsonnet +++ b/couchbase-mixin/links.libsonnet @@ -4,15 +4,15 @@ local g = import './g.libsonnet'; new(this): { couchbaseBucketOverview: - link.link.new('Couchbase Bucket Overview', '/d/' + this.grafana.dashboards.couchbase_bucket_overview.uid) + link.link.new('Couchbase Bucket Overview', '/d/' + this.grafana.dashboards['couchbase_bucket_overview.json'].uid) + link.link.options.withKeepTime(true), couchbaseNodeOverview: - link.link.new('Couchbase Node Overview', '/d/' + this.grafana.dashboards.couchbase_node_overview.uid) + link.link.new('Couchbase Node Overview', '/d/' + this.grafana.dashboards['couchbase_node_overview.json'].uid) + link.link.options.withKeepTime(true), couchbaseClusterOverview: - link.link.new('Couchbase Cluster Overview', '/d/' + this.grafana.dashboards.couchbase_cluster_overview.uid) + link.link.new('Couchbase Cluster Overview', '/d/' + this.grafana.dashboards['couchbase_cluster_overview.json'].uid) + link.link.options.withKeepTime(true), otherDashboards: @@ -25,7 +25,7 @@ local g = import './g.libsonnet'; if this.config.enableLokiLogs then { logs: - link.link.new('Logs', '/d/' + this.grafana.dashboards.logs.uid) + link.link.new('Logs', '/d/' + this.grafana.dashboards['logs.json'].uid) + link.link.options.withKeepTime(true), } else {}, diff --git a/couchbase-mixin/main.libsonnet b/couchbase-mixin/main.libsonnet index 3e70509cd..fe43d48a8 100644 --- a/couchbase-mixin/main.libsonnet +++ b/couchbase-mixin/main.libsonnet @@ -5,7 +5,7 @@ local links = import './links.libsonnet'; local panels = import './panels.libsonnet'; local rows = import './rows.libsonnet'; local targets = import './targets.libsonnet'; -local variables = import './variables.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; { withConfigMixin(config): { @@ -17,8 +17,24 @@ local variables = import './variables.libsonnet'; local this = self, config: config, + signals: + { + [sig]: commonlib.signals.unmarshallJsonMulti( + this.config.signals[sig], + type=this.config.metricsSource + ) + for sig in std.objectFields(this.config.signals) + }, + grafana: { - variables: variables.new(this, varMetric='kv_mem_used_bytes'), + variables: commonlib.variables.new( + filteringSelector=this.config.filteringSelector, + groupLabels=this.config.groupLabels, + instanceLabels=this.config.instanceLabels, + varMetric='kv_mem_used_bytes', + customAllValue='.+', + enableLokiLogs=this.config.enableLokiLogs, + ), targets: targets.new(this), annotations: {}, links: links.new(this), diff --git a/couchbase-mixin/panels.libsonnet b/couchbase-mixin/panels.libsonnet index db4e6bf6b..b4bb38e44 100644 --- a/couchbase-mixin/panels.libsonnet +++ b/couchbase-mixin/panels.libsonnet @@ -4,7 +4,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; { new(this):: { - local t = this.grafana.targets, + local signals = this.signals, local barGauge = g.panel.barGauge, // @@ -15,7 +15,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Top buckets by memory used', targets=[ - t.bucketTopBucketsByMemoryUsed, + signals.bucket.bucketMemoryUsed.asTarget(), ], description='Memory used for the top buckets.' ) @@ -28,7 +28,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; 'Top buckets by disk used' ) + barGauge.queryOptions.withTargets([ - t.topBucketsByDiskUsedDetailed, + signals.bucket.bucketDiskUsed.asTarget(), ]) + barGauge.standardOptions.withUnit('decbytes') + barGauge.standardOptions.withMin(0) @@ -43,7 +43,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Top buckets by current items', targets=[ - t.topBucketsByCurrentItems, + signals.bucket.bucketCurrentItems.asTarget(), ], description='Number of active items for the largest buckets.' ) @@ -55,7 +55,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Top buckets by operations', targets=[ - t.topBucketsByOperationsWithOp, + signals.bucket.bucketOperationsWithOp.asTarget(), ], description='Rate of operations for the busiest buckets.' ) @@ -67,7 +67,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Top buckets by operations failed', targets=[ - t.topBucketsByOperationsFailedDetailed, + signals.bucket.bucketOperationsFailed.asTarget(), ], description='Rate of operations failed for the most problematic buckets.' ) @@ -79,7 +79,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Top buckets by high priority requests', targets=[ - t.topBucketsByHighPriorityRequests, + signals.bucket.bucketHighPriorityRequests.asTarget(), ], description='Rate of high priority requests processed by the KV engine for the top buckets.' ) @@ -91,7 +91,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Bottom buckets by cache hit ratio', targets=[ - t.bottomBucketsByCacheHitRatio, + signals.bucket.bucketCacheHitRatio.asTarget(), ], description='Worst buckets by cache hit ratio.' ) @@ -103,7 +103,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; bucket_topBucketsByVBucketsCountPanel: barGauge.new(title='Top buckets by vBuckets count') + barGauge.queryOptions.withTargets([ - t.bucketTopBucketsByVBucketsCount, + signals.bucket.bucketVBucketsCount.asTarget(), ]) + barGauge.panelOptions.withDescription('The number of vBuckets for the top buckets.') + barGauge.standardOptions.withMin(0) @@ -118,7 +118,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Top buckets by vBucket queue memory', targets=[ - t.bucketTopBucketsByMemoryUsed, + signals.cluster.topBucketsByVBucketQueueMemory.asTarget(), ], description='Memory occupied by the queue for a virtual bucket for the top buckets.' ) @@ -134,7 +134,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Memory utilization', targets=[ - t.memoryUtilization, + signals.node.memoryUtilization.asTarget(), ], description='Percentage of memory allocated to Couchbase on this node actually in use.' ) @@ -146,7 +146,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'CPU utilization', targets=[ - t.cpuUtilization, + signals.node.cpuUtilization.asTarget(), ], description='CPU utilization percentage across all available cores on this Couchbase node.' ) @@ -158,9 +158,9 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Total memory used by service', targets=[ - t.totalMemoryUsedByService, - t.totalMemoryUsedByIndexService, - t.totalMemoryUsedByAnalyticsService, + signals.node.dataServiceMemoryUsed.asTarget(), + signals.node.indexServiceMemoryUsed.asTarget(), + signals.node.analyticsServiceMemoryUsed.asTarget(), ], description='Memory used by the index, analytics, and data services for a node.' ) @@ -172,7 +172,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Backup size', targets=[ - t.backupSize, + signals.node.backupSize.asTarget(), ], description='Size of the backup for a node.' ) @@ -184,7 +184,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Current connections', targets=[ - t.currentConnections, + signals.node.currentConnections.asTarget(), ], description='Number of active connections to a node.' ) @@ -196,7 +196,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'HTTP response codes', targets=[ - t.httpResponseCodes, + signals.node.httpResponseCodes.asTarget(), ], description='Rate of HTTP response codes handled by the cluster manager.' ) @@ -208,7 +208,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'HTTP request methods', targets=[ - t.httpRequestMethods, + signals.node.httpRequestMethods.asTarget(), ], description='Rate of HTTP request methods handled by the cluster manager.' ) @@ -220,9 +220,9 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Query service requests', targets=[ - t.queryServiceRequestsTotal, - t.queryServiceErrors, - t.queryServiceInvalidRequests, + signals.query.queryServiceRequestsTotal.asTarget(), + signals.query.queryServiceErrors.asTarget(), + signals.query.queryServiceInvalidRequests.asTarget(), ], description='Rate of N1QL requests processed by the query service for a node.' ) @@ -234,11 +234,11 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Query service request processing time', targets=[ - t.queryServiceRequests, - t.queryServiceRequests250ms, - t.queryServiceRequests500ms, - t.queryServiceRequests1000ms, - t.queryServiceRequests5000ms, + signals.query.queryServiceRequests.asTarget(), + signals.query.queryServiceRequests250ms.asTarget(), + signals.query.queryServiceRequests500ms.asTarget(), + signals.query.queryServiceRequests1000ms.asTarget(), + signals.query.queryServiceRequests5000ms.asTarget(), ], description='Rate of queries grouped by processing time.' ) @@ -250,7 +250,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Index service requests', targets=[ - t.indexServiceRequests, + signals.index.indexServiceRequests.asTarget(), ], description='Rate of index service requests served.' ) @@ -262,7 +262,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Index cache hit ratio', targets=[ - t.indexCacheHitRatio, + signals.index.indexCacheHitRatio.asTarget(), ], description='Ratio at which cache scans result in a hit rather than a miss.' ) @@ -274,7 +274,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Average scan latency', targets=[ - t.indexAverageScanLatency, + signals.index.indexAverageScanLatency.asTarget(), ], description='Average time to serve a scan request per index.' ) @@ -290,7 +290,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Top nodes by memory usage', targets=[ - t.topNodesByMemoryUsage, + signals.cluster.topNodesByMemoryUsage.asTarget(), ], description='Top nodes by memory usage across the Couchbase cluster.' ) @@ -302,7 +302,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Top nodes by HTTP requests', targets=[ - t.topNodesByHTTPRequests, + signals.cluster.topNodesByHTTPRequests.asTarget(), ], description='Rate of HTTP requests handled by the cluster manager for the top nodes.' ) @@ -314,7 +314,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Top nodes by query service requests', targets=[ - t.topNodesByQueryServiceRequests, + signals.cluster.topNodesByQueryServiceRequests.asTarget(), ], description='Rate of N1QL requests processed by the query service for the top nodes.' ) @@ -326,7 +326,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Top nodes by index average scan latency', targets=[ - t.topNodesByIndexAverageScanLatency, + signals.cluster.topNodesByIndexAverageScanLatency.asTarget(), ], description='Average time to serve an index service scan request for the top nodes.' ) @@ -338,7 +338,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'XDCR replication rate', targets=[ - t.xdcrReplicationRate, + signals.cluster.xdcrReplicationRate.asTarget(), ], description='Rate of replication through the Cross Data Center Replication feature.' ) @@ -350,7 +350,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'XDCR docs received', targets=[ - t.xdcrDocsReceived, + signals.cluster.xdcrDocsReceived.asTarget(), ], description='The rate of mutations received by this cluster.' ) @@ -363,7 +363,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; 'Local backup size' ) + barGauge.queryOptions.withTargets([ - t.localBackupSize, + signals.cluster.localBackupSize.asTarget(), ]) + barGauge.panelOptions.withDescription('Size of the local backup for a node.'), @@ -371,7 +371,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Top buckets by memory used', targets=[ - t.topBucketsByMemoryUsed, + signals.cluster.topBucketsByMemoryUsed.asTarget(), ], description='Memory used for the top buckets across the cluster.' ) @@ -384,7 +384,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; 'Top buckets by disk used' ) + barGauge.queryOptions.withTargets([ - t.topBucketsByDiskUsed, + signals.cluster.topBucketsByDiskUsed.asTarget(), ]) + barGauge.standardOptions.withUnit('decbytes') + barGauge.standardOptions.withMin(0) @@ -399,7 +399,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Top buckets by operations', targets=[ - t.clusterTopBucketsByOperations, + signals.cluster.topBucketsByOperations.asTarget(), ], description='Rate of operations for the busiest buckets across the cluster.' ) @@ -411,7 +411,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Top buckets by operations failed', targets=[ - t.clusterTopBucketsByOperationsFailed, + signals.cluster.topBucketsByOperationsFailed.asTarget(), ], description='Rate of operations failed for the most problematic buckets across the cluster.' ) @@ -422,7 +422,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; cluster_topBucketsByVBucketsCountPanel: barGauge.new(title='Top buckets by vBuckets count') + barGauge.queryOptions.withTargets([ - t.clusterTopBucketsByVBucketsCount, + signals.cluster.topBucketsByVBucketsCount.asTarget(), ]) + barGauge.panelOptions.withDescription('The number of vBuckets for the top buckets across the cluster.') + barGauge.standardOptions.withMin(0) @@ -437,7 +437,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Top buckets by vBucket queue memory', targets=[ - t.clusterTopBucketsByVBucketQueueMemory, + signals.cluster.topBucketsByVBucketQueueMemory.asTarget(), ], description='Memory occupied by the queue for a virtual bucket for the top buckets across the cluster.' ) diff --git a/couchbase-mixin/signals/bucket.libsonnet b/couchbase-mixin/signals/bucket.libsonnet new file mode 100644 index 000000000..e9a7ec95d --- /dev/null +++ b/couchbase-mixin/signals/bucket.libsonnet @@ -0,0 +1,130 @@ +local commonlib = import 'common-lib/common/main.libsonnet'; + + + +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '2m', + discoveryMetric: { + prometheus: 'kv_mem_used_bytes', + }, + signals: { + // Bucket memory and disk usage + bucketMemoryUsed: { + name: 'Bucket memory used', + nameShort: 'Memory', + type: 'gauge', + description: 'Memory used for the top buckets.', + unit: 'decbytes', + sources: { + prometheus: { + expr: 'topk(5, kv_mem_used_bytes{%(queriesSelector)s})', + legendCustomTemplate: '{{instance}} - {{bucket}}', + }, + }, + }, + bucketDiskUsed: { + name: 'Bucket disk used', + nameShort: 'Disk', + type: 'gauge', + description: 'Disk used for the top buckets.', + unit: 'decbytes', + sources: { + prometheus: { + expr: 'topk(5, couch_docs_actual_disk_size{%(queriesSelector)s})', + legendCustomTemplate: '{{instance}} - {{bucket}}', + }, + }, + }, + bucketCurrentItems: { + name: 'Bucket current items', + nameShort: 'Items', + type: 'gauge', + description: 'Number of active items for the largest buckets.', + unit: 'none', + sources: { + prometheus: { + expr: 'topk(5, sum by(couchbase_cluster, job, bucket) (kv_curr_items{%(queriesSelector)s}))', + legendCustomTemplate: '{{instance}} - {{bucket}}', + }, + }, + }, + + // Bucket operations + bucketOperationsWithOp: { + name: 'Bucket operations by operation type', + nameShort: 'Operations', + type: 'raw', + description: 'Rate of operations for the busiest buckets by operation type.', + unit: 'ops', + sources: { + prometheus: { + expr: 'topk(5, sum by(bucket, couchbase_cluster, instance, job, op) (rate(kv_ops{%(queriesSelector)s}[$__rate_interval])))', + legendCustomTemplate: '{{instance}} - {{bucket}} - {{op}}', + }, + }, + }, + bucketOperationsFailed: { + name: 'Bucket operations failed', + nameShort: 'Failed Ops', + type: 'raw', + description: 'Rate of operations failed for the most problematic buckets.', + unit: 'ops', + sources: { + prometheus: { + expr: 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (rate(kv_ops_failed{%(queriesSelector)s}[$__rate_interval])))', + legendCustomTemplate: '{{instance}} - {{bucket}}', + }, + }, + }, + bucketHighPriorityRequests: { + name: 'Bucket high priority requests', + nameShort: 'High Priority', + type: 'gauge', + description: 'Rate of high priority requests processed by the KV engine for the top buckets.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_high_pri_requests{%(queriesSelector)s}))', + legendCustomTemplate: '{{instance}} - {{bucket}}', + }, + }, + }, + + // Bucket cache performance + bucketCacheHitRatio: { + name: 'Bucket cache hit ratio', + nameShort: 'Cache Hit %', + type: 'raw', + description: 'Worst buckets by cache hit ratio.', + unit: 'percentunit', + sources: { + prometheus: { + expr: 'bottomk(5, sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{%(queriesSelector)s}[$__rate_interval])) / (clamp_min(sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{%(queriesSelector)s}[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_misses{%(queriesSelector)s}[$__rate_interval]))))', + legendCustomTemplate: '{{instance}} - {{bucket}}', + }, + }, + }, + + // Bucket vBuckets + bucketVBucketsCount: { + name: 'Bucket vBuckets count', + nameShort: 'vBuckets', + type: 'raw', + description: 'The number of vBuckets for the top buckets.', + unit: 'none', + sources: { + prometheus: { + expr: 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_vbuckets{%(queriesSelector)s}))', + legendCustomTemplate: '{{instance}} - {{bucket}}', + }, + }, + }, + }, + } \ No newline at end of file diff --git a/couchbase-mixin/signals/cluster.libsonnet b/couchbase-mixin/signals/cluster.libsonnet new file mode 100644 index 000000000..e1aa5d189 --- /dev/null +++ b/couchbase-mixin/signals/cluster.libsonnet @@ -0,0 +1,193 @@ +local commonlib = import 'common-lib/common/main.libsonnet'; + +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '2m', + discoveryMetric: { + prometheus: 'sys_mem_actual_used', + }, + signals: { + // Top nodes metrics + topNodesByMemoryUsage: { + name: 'Top nodes by memory usage', + nameShort: 'Memory', + type: 'raw', + description: 'Top nodes by memory usage across the Couchbase cluster.', + unit: 'percentunit', + sources: { + prometheus: { + expr: 'topk(5, sum by(job, couchbase_cluster, instance) (sys_mem_actual_used{%(queriesSelector)s})) / (sum by(job, couchbase_cluster, instance) (clamp_min(sys_mem_actual_free{%(queriesSelector)s}, 1)) + sum by(couchbase_cluster, instance, job) (sys_mem_actual_used{%(queriesSelector)s}))', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}}', + }, + }, + }, + topNodesByHTTPRequests: { + name: 'Top nodes by HTTP requests', + nameShort: 'HTTP Requests', + type: 'raw', + description: 'Rate of HTTP requests handled by the cluster manager for the top nodes.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'topk(5, sum by(job, couchbase_cluster, instance) (rate(cm_http_requests_total{%(queriesSelector)s}[$__rate_interval])))', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}}', + }, + }, + }, + topNodesByQueryServiceRequests: { + name: 'Top nodes by query service requests', + nameShort: 'N1QL Requests', + type: 'raw', + description: 'Rate of N1QL requests processed by the query service for the top nodes.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'topk(5, sum by(job, instance, couchbase_cluster) (rate(n1ql_requests{%(queriesSelector)s}[$__rate_interval])))', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}}', + }, + }, + }, + topNodesByIndexAverageScanLatency: { + name: 'Top nodes by index average scan latency', + nameShort: 'Scan Latency', + type: 'raw', + description: 'Average time to serve an index service scan request for the top nodes.', + unit: 'ns', + sources: { + prometheus: { + expr: 'topk(5, avg by(instance, couchbase_cluster, job) (index_avg_scan_latency{%(queriesSelector)s}))', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}}', + }, + }, + }, + + // XDCR metrics + xdcrReplicationRate: { + name: 'XDCR replication rate', + nameShort: 'XDCR Rate', + type: 'raw', + description: 'Rate of replication through the Cross Data Center Replication feature.', + unit: 'Bps', + sources: { + prometheus: { + expr: 'sum by(couchbase_cluster, job) (rate(xdcr_data_replicated_bytes{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{couchbase_cluster}}', + }, + }, + }, + xdcrDocsReceived: { + name: 'XDCR docs received', + nameShort: 'XDCR Docs', + type: 'raw', + description: 'The rate of mutations received by this cluster.', + unit: 'mut/sec', + sources: { + prometheus: { + expr: 'sum by(job, couchbase_cluster) (rate(xdcr_docs_received_from_dcp_total{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{couchbase_cluster}}', + }, + }, + }, + + // Backup metrics + localBackupSize: { + name: 'Local backup size', + nameShort: 'Backup', + type: 'raw', + description: 'Size of the local backup for a node.', + unit: 'decbytes', + sources: { + prometheus: { + expr: 'sum by(couchbase_cluster, job, instance) (backup_data_size{%(queriesSelector)s})', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}}', + }, + }, + }, + + // Top buckets metrics (cluster level) + topBucketsByMemoryUsed: { + name: 'Top buckets by memory used', + nameShort: 'Bucket Memory', + type: 'raw', + description: 'Memory used for the top buckets across the cluster.', + unit: 'decbytes', + sources: { + prometheus: { + expr: 'topk(5, sum by(bucket, couchbase_cluster, job) (kv_mem_used_bytes{%(queriesSelector)s}))', + legendCustomTemplate: '{{couchbase_cluster}} - {{bucket}}', + }, + }, + }, + topBucketsByDiskUsed: { + name: 'Top buckets by disk used', + nameShort: 'Bucket Disk', + type: 'raw', + description: 'Disk used for the top buckets across the cluster.', + unit: 'decbytes', + sources: { + prometheus: { + expr: 'topk(5, sum by(job, couchbase_cluster, bucket) (couch_docs_actual_disk_size{%(queriesSelector)s}))', + legendCustomTemplate: '{{couchbase_cluster}} - {{bucket}}', + }, + }, + }, + topBucketsByOperations: { + name: 'Top buckets by operations', + nameShort: 'Bucket Ops', + type: 'raw', + description: 'Rate of operations for the busiest buckets across the cluster.', + unit: 'ops', + sources: { + prometheus: { + expr: 'topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops{%(queriesSelector)s}[$__rate_interval])))', + legendCustomTemplate: '{{couchbase_cluster}} - {{bucket}}', + }, + }, + }, + topBucketsByOperationsFailed: { + name: 'Top buckets by operations failed', + nameShort: 'Bucket Failed Ops', + type: 'raw', + description: 'Rate of operations failed for the most problematic buckets across the cluster.', + unit: 'ops', + sources: { + prometheus: { + expr: 'topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops_failed{%(queriesSelector)s}[$__rate_interval])))', + legendCustomTemplate: '{{couchbase_cluster}} - {{bucket}}', + }, + }, + }, + topBucketsByVBucketsCount: { + name: 'Top buckets by vBuckets count', + nameShort: 'vBuckets', + type: 'raw', + description: 'The number of vBuckets for the top buckets across the cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'topk(5, sum by(couchbase_cluster, job, bucket) (kv_num_vbuckets{%(queriesSelector)s}))', + legendCustomTemplate: '{{couchbase_cluster}} - {{bucket}}', + }, + }, + }, + topBucketsByVBucketQueueMemory: { + name: 'Top buckets by vBucket queue memory', + nameShort: 'vBucket Memory', + type: 'raw', + description: 'Memory occupied by the queue for a virtual bucket for the top buckets across the cluster.', + unit: 'decbytes', + sources: { + prometheus: { + expr: 'topk(5, sum by(couchbase_cluster, job, bucket) (kv_vb_queue_memory_bytes{%(queriesSelector)s}))', + legendCustomTemplate: '{{couchbase_cluster}} - {{bucket}}', + }, + }, + }, + }, + } \ No newline at end of file diff --git a/couchbase-mixin/signals/index.libsonnet b/couchbase-mixin/signals/index.libsonnet new file mode 100644 index 000000000..581ffcf20 --- /dev/null +++ b/couchbase-mixin/signals/index.libsonnet @@ -0,0 +1,61 @@ +local commonlib = import 'common-lib/common/main.libsonnet'; + +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '2m', + discoveryMetric: { + prometheus: 'index_num_requests', + }, + signals: { + // Index service requests + indexServiceRequests: { + name: 'Index service requests', + nameShort: 'Index Requests', + type: 'counter', + description: 'Rate of index service requests served.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(couchbase_cluster, instance, job) (rate(index_num_requests{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}}', + }, + }, + }, + + // Index cache hit ratio + indexCacheHitRatio: { + name: 'Index cache hit ratio', + nameShort: 'Cache Hit %', + type: 'raw', + description: 'Ratio at which cache scans result in a hit rather than a miss.', + unit: 'percentunit', + sources: { + prometheus: { + expr: 'sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{%(queriesSelector)s}[$__rate_interval])) / (clamp_min(sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{%(queriesSelector)s}[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance) (increase(index_cache_misses{%(queriesSelector)s}[$__rate_interval])))', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}}', + }, + }, + }, + + // Index average scan latency + indexAverageScanLatency: { + name: 'Index average scan latency', + nameShort: 'Scan Latency', + type: 'gauge', + description: 'Average time to serve a scan request per index.', + unit: 'ns', + sources: { + prometheus: { + expr: 'sum by(couchbase_cluster, index, instance, job) (index_avg_scan_latency{%(queriesSelector)s})', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - {{index}}', + }, + }, + }, + }, + } \ No newline at end of file diff --git a/couchbase-mixin/signals/node.libsonnet b/couchbase-mixin/signals/node.libsonnet new file mode 100644 index 000000000..c16f16b17 --- /dev/null +++ b/couchbase-mixin/signals/node.libsonnet @@ -0,0 +1,141 @@ +local commonlib = import 'common-lib/common/main.libsonnet'; + +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '2m', + discoveryMetric: { + prometheus: 'sys_mem_actual_used', + }, + signals: { + // Node system metrics + memoryUtilization: { + name: 'Memory utilization', + nameShort: 'Memory %', + type: 'raw', + description: 'Percentage of memory allocated to Couchbase on this node actually in use.', + unit: 'percentunit', + sources: { + prometheus: { + expr: 'sys_mem_actual_used{%(queriesSelector)s} / (clamp_min(sys_mem_actual_free{%(queriesSelector)s} + sys_mem_actual_used{%(queriesSelector)s}, 1))', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}}', + }, + }, + }, + cpuUtilization: { + name: 'CPU utilization', + nameShort: 'CPU %', + type: 'gauge', + description: 'CPU utilization percentage across all available cores on this Couchbase node.', + unit: 'percent', + sources: { + prometheus: { + expr: 'sum by(couchbase_cluster, job, instance) (sys_cpu_utilization_rate{%(queriesSelector)s})', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}}', + }, + }, + }, + + // Memory by service + dataServiceMemoryUsed: { + name: 'Data service memory used', + nameShort: 'Data Memory', + type: 'gauge', + description: 'Memory used by the data service for a node.', + unit: 'decbytes', + sources: { + prometheus: { + expr: 'sum by(couchbase_cluster, instance, job) (kv_mem_used_bytes{%(queriesSelector)s})', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - data', + }, + }, + }, + indexServiceMemoryUsed: { + name: 'Index service memory used', + nameShort: 'Index Memory', + type: 'gauge', + description: 'Memory used by the index service for a node.', + unit: 'decbytes', + sources: { + prometheus: { + expr: 'index_memory_used_total{%(queriesSelector)s}', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - index', + }, + }, + }, + analyticsServiceMemoryUsed: { + name: 'Analytics service memory used', + nameShort: 'Analytics Memory', + type: 'gauge', + description: 'Memory used by the analytics service for a node.', + unit: 'decbytes', + sources: { + prometheus: { + expr: 'cbas_direct_memory_used_bytes{%(queriesSelector)s}', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - analytics', + }, + }, + }, + + // Node backup and connections + backupSize: { + name: 'Backup size', + nameShort: 'Backup', + type: 'gauge', + description: 'Size of the backup for a node.', + unit: 'decbytes', + sources: { + prometheus: { + expr: 'sum by(couchbase_cluster, instance, job) (backup_data_size{%(queriesSelector)s})', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}}', + }, + }, + }, + currentConnections: { + name: 'Current connections', + nameShort: 'Connections', + type: 'gauge', + description: 'Number of active connections to a node.', + unit: 'none', + sources: { + prometheus: { + expr: 'kv_curr_connections{%(queriesSelector)s}', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}}', + }, + }, + }, + + // HTTP metrics + httpResponseCodes: { + name: 'HTTP response codes', + nameShort: 'HTTP Codes', + type: 'counter', + description: 'Rate of HTTP response codes handled by the cluster manager.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(job, instance, couchbase_cluster, code) (rate(cm_http_requests_total{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - {{code}}', + }, + }, + }, + httpRequestMethods: { + name: 'HTTP request methods', + nameShort: 'HTTP Methods', + type: 'counter', + description: 'Rate of HTTP request methods handled by the cluster manager.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(job, instance, couchbase_cluster, method) (rate(cm_http_requests_total{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - {{method}}', + }, + }, + }, + }, + } \ No newline at end of file diff --git a/couchbase-mixin/signals/query.libsonnet b/couchbase-mixin/signals/query.libsonnet new file mode 100644 index 000000000..1dc948c29 --- /dev/null +++ b/couchbase-mixin/signals/query.libsonnet @@ -0,0 +1,124 @@ +local commonlib = import 'common-lib/common/main.libsonnet'; + +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '2m', + discoveryMetric: { + prometheus: 'n1ql_requests', + }, + signals: { + // Query service requests + queryServiceRequests: { + name: 'Query service requests', + nameShort: 'N1QL >0ms', + type: 'counter', + description: 'Rate of N1QL requests processed by the query service for a node.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'rate(n1ql_requests{%(queriesSelector)s}[$__rate_interval])', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - >0ms', + }, + }, + }, + queryServiceRequestsTotal: { + name: 'Query service requests total', + nameShort: 'N1QL Total', + type: 'counter', + description: 'Total rate of N1QL requests processed by the query service (including valid and invalid).', + unit: 'reqps', + sources: { + prometheus: { + expr: 'rate(n1ql_requests{%(queriesSelector)s}[$__rate_interval]) + rate(n1ql_invalid_requests{%(queriesSelector)s}[$__rate_interval])', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - total', + }, + }, + }, + queryServiceErrors: { + name: 'Query service errors', + nameShort: 'N1QL Errors', + type: 'counter', + description: 'Rate of N1QL query errors.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'rate(n1ql_errors{%(queriesSelector)s}[$__rate_interval])', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - error', + }, + }, + }, + queryServiceInvalidRequests: { + name: 'Query service invalid requests', + nameShort: 'N1QL Invalid', + type: 'counter', + description: 'Rate of invalid N1QL requests.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'rate(n1ql_invalid_requests{%(queriesSelector)s}[$__rate_interval])', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - invalid', + }, + }, + }, + + // Query service latency buckets + queryServiceRequests250ms: { + name: 'Query service requests >250ms', + nameShort: 'N1QL >250ms', + type: 'counter', + description: 'Rate of N1QL requests taking more than 250ms.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'rate(n1ql_requests_250ms{%(queriesSelector)s}[$__rate_interval])', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - >250ms', + }, + }, + }, + queryServiceRequests500ms: { + name: 'Query service requests >500ms', + nameShort: 'N1QL >500ms', + type: 'counter', + description: 'Rate of N1QL requests taking more than 500ms.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'rate(n1ql_requests_500ms{%(queriesSelector)s}[$__rate_interval])', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - >500ms', + }, + }, + }, + queryServiceRequests1000ms: { + name: 'Query service requests >1000ms', + nameShort: 'N1QL >1s', + type: 'counter', + description: 'Rate of N1QL requests taking more than 1000ms.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'rate(n1ql_requests_1000ms{%(queriesSelector)s}[$__rate_interval])', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - >1000ms', + }, + }, + }, + queryServiceRequests5000ms: { + name: 'Query service requests >5000ms', + nameShort: 'N1QL >5s', + type: 'counter', + description: 'Rate of N1QL requests taking more than 5000ms.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'rate(n1ql_requests_5000ms{%(queriesSelector)s}[$__rate_interval])', + legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - >5000ms', + }, + }, + }, + }, + } \ No newline at end of file diff --git a/couchbase-mixin/targets.libsonnet b/couchbase-mixin/targets.libsonnet deleted file mode 100644 index 628830572..000000000 --- a/couchbase-mixin/targets.libsonnet +++ /dev/null @@ -1,325 +0,0 @@ -local g = import './g.libsonnet'; -local prometheusQuery = g.query.prometheus; - -{ - new(this): { - local vars = this.grafana.variables, - local clusterSelector = vars.clusterSelector, - local nodeSelector = vars.nodeSelector, - local bucketSelector = vars.bucketSelector, - - // - // Cluster Overview Dashboard Targets - // - - // Top nodes metrics - topNodesByMemoryUsage: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, sum by(job, couchbase_cluster, instance) (sys_mem_actual_used{%(clusterSelector)s})) / (sum by(job, couchbase_cluster, instance) (clamp_min(sys_mem_actual_free{%(clusterSelector)s}, 1)) + sum by(couchbase_cluster, instance, job) (sys_mem_actual_used{%(clusterSelector)s}))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), - - topNodesByHTTPRequests: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, sum by(job, couchbase_cluster, instance) (rate(cm_http_requests_total{%(clusterSelector)s}[$__rate_interval])))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), - - topNodesByQueryServiceRequests: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, sum by(job, instance, couchbase_cluster) (rate(n1ql_requests{%(clusterSelector)s}[$__rate_interval])))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), - - topNodesByIndexAverageScanLatency: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, avg by(instance, couchbase_cluster, job) (index_avg_scan_latency{%(clusterSelector)s}))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), - - // XDCR metrics - xdcrReplicationRate: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'sum by(couchbase_cluster, job) (rate(xdcr_data_replicated_bytes{%(clusterSelector)s}[$__rate_interval]))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}}'), - - xdcrDocsReceived: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'sum by(job, couchbase_cluster) (rate(xdcr_docs_received_from_dcp_total{%(clusterSelector)s}[$__rate_interval]))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}}'), - - // Backup metrics - localBackupSize: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'sum by(couchbase_cluster, job, instance) (backup_data_size{%(clusterSelector)s})' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), - - // Top buckets metrics (cluster level) - topBucketsByMemoryUsed: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, sum by(bucket, couchbase_cluster, job) (kv_mem_used_bytes{%(clusterSelector)s}))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{bucket}}'), - - topBucketsByDiskUsed: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, sum by(job, couchbase_cluster, bucket) (couch_docs_actual_disk_size{%(clusterSelector)s}))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{bucket}}'), - - clusterTopBucketsByOperations: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops{%(clusterSelector)s}[$__rate_interval])))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{bucket}}'), - - clusterTopBucketsByOperationsFailed: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, sum by(couchbase_cluster, job, bucket) (rate(kv_ops_failed{%(clusterSelector)s}[$__rate_interval])))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{bucket}}'), - - clusterTopBucketsByVBucketsCount: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, sum by(couchbase_cluster, job, bucket) (kv_num_vbuckets{%(clusterSelector)s}))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{bucket}}'), - - clusterTopBucketsByVBucketQueueMemory: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, sum by(couchbase_cluster, job, bucket) (kv_vb_queue_memory_bytes{%(clusterSelector)s}))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{bucket}}'), - - // - // Node Overview Dashboard Targets - // - - // Node system metrics - memoryUtilization: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'sys_mem_actual_used{%(nodeSelector)s} / (clamp_min(sys_mem_actual_free{%(nodeSelector)s} + sys_mem_actual_used{%(nodeSelector)s}, 1))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), - - cpuUtilization: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'sum by(couchbase_cluster, job, instance) (sys_cpu_utilization_rate{%(nodeSelector)s})' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), - - // Memory by service - totalMemoryUsedByService: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'sum by(couchbase_cluster, instance, job) (kv_mem_used_bytes{%(nodeSelector)s})' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - data'), - - totalMemoryUsedByIndexService: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'index_memory_used_total{%(nodeSelector)s}' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - index'), - - totalMemoryUsedByAnalyticsService: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'cbas_direct_memory_used_bytes{%(nodeSelector)s}' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - analytics'), - - // Node backup and connections - backupSize: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'sum by(couchbase_cluster, instance, job) (backup_data_size{%(nodeSelector)s})' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), - - currentConnections: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'kv_curr_connections{%(nodeSelector)s}' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), - - // HTTP metrics - httpResponseCodes: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'sum by(job, instance, couchbase_cluster, code) (rate(cm_http_requests_total{%(nodeSelector)s}[$__rate_interval]))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - {{code}}'), - - httpRequestMethods: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'sum by(job, instance, couchbase_cluster, method) (rate(cm_http_requests_total{%(nodeSelector)s}[$__rate_interval]))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - {{method}}'), - - // Query service metrics - queryServiceRequests: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'rate(n1ql_requests{%(nodeSelector)s}[$__rate_interval])' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - >0ms'), - - queryServiceRequestsTotal: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'rate(n1ql_requests{%(nodeSelector)s}[$__rate_interval]) + rate(n1ql_invalid_requests{%(nodeSelector)s}[$__rate_interval])' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - total'), - - queryServiceErrors: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'rate(n1ql_errors{%(nodeSelector)s}[$__rate_interval])' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - error'), - - queryServiceInvalidRequests: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'rate(n1ql_invalid_requests{%(nodeSelector)s}[$__rate_interval])' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - invalid'), - - // Query service latency buckets - queryServiceRequests250ms: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'rate(n1ql_requests_250ms{%(nodeSelector)s}[$__rate_interval])' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - >250ms'), - - queryServiceRequests500ms: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'rate(n1ql_requests_500ms{%(nodeSelector)s}[$__rate_interval])' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - >500ms'), - - queryServiceRequests1000ms: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'rate(n1ql_requests_1000ms{%(nodeSelector)s}[$__rate_interval])' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - >1000ms'), - - queryServiceRequests5000ms: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'rate(n1ql_requests_5000ms{%(nodeSelector)s}[$__rate_interval])' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - >5000ms'), - - // Index service metrics - indexServiceRequests: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'sum by(couchbase_cluster, instance, job) (rate(index_num_requests{%(nodeSelector)s}[$__rate_interval]))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), - - indexCacheHitRatio: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{%(nodeSelector)s}[$__rate_interval])) / (clamp_min(sum by(couchbase_cluster, job, instance) (increase(index_cache_hits{%(nodeSelector)s}[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance) (increase(index_cache_misses{%(nodeSelector)s}[$__rate_interval])))' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}}'), - - indexAverageScanLatency: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'sum by(couchbase_cluster, index, instance, job) (index_avg_scan_latency{%(nodeSelector)s})' % vars - ) - + prometheusQuery.withLegendFormat('{{couchbase_cluster}} - {{instance}} - {{index}}'), - - // - // Bucket Overview Dashboard Targets - // - - // Detailed bucket metrics (instance-level) - bucketTopBucketsByMemoryUsed: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, kv_mem_used_bytes{%(bucketSelector)s})' % vars - ) - + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), - - topBucketsByDiskUsedDetailed: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, couch_docs_actual_disk_size{%(bucketSelector)s})' % vars - ) - + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), - - topBucketsByCurrentItems: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, sum by(couchbase_cluster, job, bucket) (kv_curr_items{%(bucketSelector)s}))' % vars - ) - + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), - - // Bucket operations - topBucketsByOperationsWithOp: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, sum by(bucket, couchbase_cluster, instance, job, op) (rate(kv_ops{%(bucketSelector)s}[$__rate_interval])))' % vars - ) - + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}} - {{op}}'), - - topBucketsByOperationsFailedDetailed: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (rate(kv_ops_failed{%(bucketSelector)s}[$__rate_interval])))' % vars - ) - + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), - - topBucketsByHighPriorityRequests: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_high_pri_requests{%(bucketSelector)s}))' % vars - ) - + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), - - // Bucket cache and performance - bottomBucketsByCacheHitRatio: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'bottomk(5, sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{%(bucketSelector)s}[$__rate_interval])) / (clamp_min(sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_hits{%(bucketSelector)s}[$__rate_interval])), 1) + sum by(couchbase_cluster, job, instance, bucket) (increase(index_cache_misses{%(bucketSelector)s}[$__rate_interval]))))' % vars - ) - + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), - - // Bucket vBuckets - bucketTopBucketsByVBucketsCount: - prometheusQuery.new( - '${' + vars.datasources.prometheus.name + '}', - 'topk(5, sum by(bucket, couchbase_cluster, instance, job) (kv_num_vbuckets{%(bucketSelector)s}))' % vars - ) - + prometheusQuery.withLegendFormat('{{instance}} - {{bucket}}'), - }, -} diff --git a/couchbase-mixin/variables.libsonnet b/couchbase-mixin/variables.libsonnet deleted file mode 100644 index 519144a48..000000000 --- a/couchbase-mixin/variables.libsonnet +++ /dev/null @@ -1,100 +0,0 @@ -local g = import './g.libsonnet'; -local var = g.dashboard.variable; -local commonlib = import 'common-lib/common/main.libsonnet'; -local utils = commonlib.utils; - -// Generates chained variables to use on all dashboards -{ - new(this, varMetric): - { - local filteringSelector = this.config.filteringSelector, - local groupLabels = this.config.groupLabels, - local instanceLabels = this.config.instanceLabels, - - local root = self, - // Helper function to create variables from specific label list - local variablesFromSpecificLabels(labels, filteringSelector, multiInstance=true) = - local chainVarProto(index, chainVar) = - var.query.new(chainVar.label) - + var.query.withDatasourceFromVariable(root.datasources.prometheus) - + var.query.queryTypes.withLabelValues( - chainVar.label, - // Combine filteringSelector with chainSelector, avoiding duplicate label filters - local combinedSelector = - if std.length(std.stripChars(filteringSelector, ' ')) == 0 then chainVar.chainSelector - else if std.length(chainVar.chainSelector) == 0 then std.stripChars(filteringSelector, ' ') - else - // Both exist - need to merge intelligently to avoid duplicates - local filteringParts = std.split(std.stripChars(filteringSelector, ' '), ','); - local filteringLabels = std.set([ - std.stripChars(std.split(std.stripChars(part, ' '), '=')[0], ' ') - for part in filteringParts - if std.length(std.split(part, '=')) > 1 - ]); - local chainParts = std.split(chainVar.chainSelector, ','); - local chainFiltered = std.filter( - function(part) - local label = std.stripChars(std.split(std.stripChars(part, ' '), '=')[0], ' '); - !std.setMember(label, filteringLabels), - chainParts - ); - std.join(',', std.filter(function(x) std.length(x) > 0, [std.stripChars(filteringSelector, ' ')] + chainFiltered)); - '%s{%s}' % [varMetric, combinedSelector], - ) - + var.query.generalOptions.withLabel(utils.toSentenceCase(chainVar.label)) - + var.query.selectionOptions.withIncludeAll( - value=true, - customAllValue='.+' - ) - + var.query.selectionOptions.withMulti(multiInstance) - + var.query.refresh.onTime() - + var.query.withSort( - i=1, - type='alphabetical', - asc=true, - caseInsensitive=false - ); - - // Create variables for all labels, let chainLabels handle the chaining without filteringSelector - std.mapWithIndex(chainVarProto, utils.chainLabels(labels, [])), - - datasources: { - prometheus: - var.datasource.new('prometheus_datasource', 'prometheus') - + var.datasource.generalOptions.withLabel('Prometheus data source') - + var.datasource.withRegex(''), - loki: - var.datasource.new('loki_datasource', 'loki') - + var.datasource.generalOptions.withLabel('Loki data source') - + var.datasource.withRegex(''), - }, - - // Dashboard-specific variable sets - clusterVariables: - [root.datasources.prometheus] - + variablesFromSpecificLabels(this.config.dashboardVariables.cluster, filteringSelector, multiInstance=false), - - nodeVariables: - [root.datasources.prometheus] - + variablesFromSpecificLabels(this.config.dashboardVariables.node, filteringSelector, multiInstance=true), - - bucketVariables: - [root.datasources.prometheus] - + variablesFromSpecificLabels(this.config.dashboardVariables.bucket, filteringSelector, multiInstance=true), - - clusterSelector: - '%s' % [ - utils.labelsToPromQLSelector(this.config.dashboardVariables.cluster), - ], - - nodeSelector: - '%s' % [ - utils.labelsToPromQLSelector(this.config.dashboardVariables.node), - ], - - bucketSelector: - '%s' % [ - utils.labelsToPromQLSelector(this.config.dashboardVariables.bucket), - ], - }, -} From 12a907bdab0650309727ee2bc30c2cab000c22ea Mon Sep 17 00:00:00 2001 From: schmikei Date: Tue, 12 Aug 2025 16:52:13 -0400 Subject: [PATCH 5/7] fix queries from last commit --- couchbase-mixin/dashboards.libsonnet | 6 ++--- ...ew.json => couchbase-bucket-overview.json} | 0 ...w.json => couchbase-cluster-overview.json} | 0 ...view.json => couchbase-node-overview.json} | 22 +++++++++---------- couchbase-mixin/links.libsonnet | 6 ++--- couchbase-mixin/signals/bucket.libsonnet | 3 +-- couchbase-mixin/signals/cluster.libsonnet | 2 +- couchbase-mixin/signals/index.libsonnet | 6 ++--- couchbase-mixin/signals/node.libsonnet | 10 ++++----- couchbase-mixin/signals/query.libsonnet | 18 +++++++-------- 10 files changed, 36 insertions(+), 37 deletions(-) rename couchbase-mixin/dashboards_out/{couchbase_bucket_overview.json => couchbase-bucket-overview.json} (100%) rename couchbase-mixin/dashboards_out/{couchbase_cluster_overview.json => couchbase-cluster-overview.json} (100%) rename couchbase-mixin/dashboards_out/{couchbase_node_overview.json => couchbase-node-overview.json} (92%) diff --git a/couchbase-mixin/dashboards.libsonnet b/couchbase-mixin/dashboards.libsonnet index b66fd6c92..d58ca07b4 100644 --- a/couchbase-mixin/dashboards.libsonnet +++ b/couchbase-mixin/dashboards.libsonnet @@ -15,7 +15,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; local panels = this.grafana.panels; { - 'couchbase_bucket_overview.json': + 'couchbase-bucket-overview.json': g.dashboard.new(prefix + ' bucket overview') + g.dashboard.withPanels( g.util.grid.wrapPanels( @@ -43,7 +43,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; period ), - 'couchbase_node_overview.json': + 'couchbase-node-overview.json': g.dashboard.new(prefix + ' node overview') + g.dashboard.withPanels( g.util.grid.wrapPanels( @@ -74,7 +74,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; period ), - 'couchbase_cluster_overview.json': + 'couchbase-cluster-overview.json': g.dashboard.new(prefix + ' cluster overview') + g.dashboard.withPanels( g.util.panel.resolveCollapsedFlagOnRows( diff --git a/couchbase-mixin/dashboards_out/couchbase_bucket_overview.json b/couchbase-mixin/dashboards_out/couchbase-bucket-overview.json similarity index 100% rename from couchbase-mixin/dashboards_out/couchbase_bucket_overview.json rename to couchbase-mixin/dashboards_out/couchbase-bucket-overview.json diff --git a/couchbase-mixin/dashboards_out/couchbase_cluster_overview.json b/couchbase-mixin/dashboards_out/couchbase-cluster-overview.json similarity index 100% rename from couchbase-mixin/dashboards_out/couchbase_cluster_overview.json rename to couchbase-mixin/dashboards_out/couchbase-cluster-overview.json diff --git a/couchbase-mixin/dashboards_out/couchbase_node_overview.json b/couchbase-mixin/dashboards_out/couchbase-node-overview.json similarity index 92% rename from couchbase-mixin/dashboards_out/couchbase_node_overview.json rename to couchbase-mixin/dashboards_out/couchbase-node-overview.json index dee945cc3..b1a9c629f 100644 --- a/couchbase-mixin/dashboards_out/couchbase_node_overview.json +++ b/couchbase-mixin/dashboards_out/couchbase-node-overview.json @@ -363,7 +363,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(sum by(job, instance, couchbase_cluster, code) (rate(cm_http_requests_total{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))[$__rate_interval])", + "expr": "sum by(job, instance, couchbase_cluster, code) (rate(cm_http_requests_total{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "{{couchbase_cluster}} - {{instance}} - {{code}}", @@ -416,7 +416,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(sum by(job, instance, couchbase_cluster, method) (rate(cm_http_requests_total{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))[$__rate_interval])", + "expr": "sum by(job, instance, couchbase_cluster, method) (rate(cm_http_requests_total{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "{{couchbase_cluster}} - {{instance}} - {{method}}", @@ -469,7 +469,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(rate(n1ql_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval]) + rate(n1ql_invalid_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "expr": "rate(n1ql_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval]) + rate(n1ql_invalid_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "instant": false, "legendFormat": "{{couchbase_cluster}} - {{instance}} - total", @@ -480,7 +480,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(rate(n1ql_errors{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "expr": "rate(n1ql_errors{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "instant": false, "legendFormat": "{{couchbase_cluster}} - {{instance}} - error", @@ -491,7 +491,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(rate(n1ql_invalid_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "expr": "rate(n1ql_invalid_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "instant": false, "legendFormat": "{{couchbase_cluster}} - {{instance}} - invalid", @@ -544,7 +544,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(rate(n1ql_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "expr": "rate(n1ql_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "instant": false, "legendFormat": "{{couchbase_cluster}} - {{instance}} - >0ms", @@ -555,7 +555,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(rate(n1ql_requests_250ms{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "expr": "rate(n1ql_requests_250ms{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "instant": false, "legendFormat": "{{couchbase_cluster}} - {{instance}} - >250ms", @@ -566,7 +566,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(rate(n1ql_requests_500ms{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "expr": "rate(n1ql_requests_500ms{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "instant": false, "legendFormat": "{{couchbase_cluster}} - {{instance}} - >500ms", @@ -577,7 +577,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(rate(n1ql_requests_1000ms{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "expr": "rate(n1ql_requests_1000ms{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "instant": false, "legendFormat": "{{couchbase_cluster}} - {{instance}} - >1000ms", @@ -588,7 +588,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(rate(n1ql_requests_5000ms{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])[$__rate_interval])", + "expr": "rate(n1ql_requests_5000ms{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "instant": false, "legendFormat": "{{couchbase_cluster}} - {{instance}} - >5000ms", @@ -641,7 +641,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(sum by(couchbase_cluster, instance, job) (rate(index_num_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))[$__rate_interval])", + "expr": "sum by(couchbase_cluster, instance, job) (rate(index_num_requests{job=~\"integrations/couchbase\",job=~\"$job\",couchbase_cluster=~\"$couchbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "{{couchbase_cluster}} - {{instance}}", diff --git a/couchbase-mixin/links.libsonnet b/couchbase-mixin/links.libsonnet index a53281aa5..4f7d2b4a2 100644 --- a/couchbase-mixin/links.libsonnet +++ b/couchbase-mixin/links.libsonnet @@ -4,15 +4,15 @@ local g = import './g.libsonnet'; new(this): { couchbaseBucketOverview: - link.link.new('Couchbase Bucket Overview', '/d/' + this.grafana.dashboards['couchbase_bucket_overview.json'].uid) + link.link.new('Couchbase Bucket Overview', '/d/' + this.grafana.dashboards['couchbase-bucket-overview.json'].uid) + link.link.options.withKeepTime(true), couchbaseNodeOverview: - link.link.new('Couchbase Node Overview', '/d/' + this.grafana.dashboards['couchbase_node_overview.json'].uid) + link.link.new('Couchbase Node Overview', '/d/' + this.grafana.dashboards['couchbase-node-overview.json'].uid) + link.link.options.withKeepTime(true), couchbaseClusterOverview: - link.link.new('Couchbase Cluster Overview', '/d/' + this.grafana.dashboards['couchbase_cluster_overview.json'].uid) + link.link.new('Couchbase Cluster Overview', '/d/' + this.grafana.dashboards['couchbase-cluster-overview.json'].uid) + link.link.options.withKeepTime(true), otherDashboards: diff --git a/couchbase-mixin/signals/bucket.libsonnet b/couchbase-mixin/signals/bucket.libsonnet index e9a7ec95d..cb42d0a6e 100644 --- a/couchbase-mixin/signals/bucket.libsonnet +++ b/couchbase-mixin/signals/bucket.libsonnet @@ -1,7 +1,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; - function(this) { filteringSelector: this.filteringSelector, @@ -127,4 +126,4 @@ function(this) }, }, }, - } \ No newline at end of file + } diff --git a/couchbase-mixin/signals/cluster.libsonnet b/couchbase-mixin/signals/cluster.libsonnet index e1aa5d189..38592fbea 100644 --- a/couchbase-mixin/signals/cluster.libsonnet +++ b/couchbase-mixin/signals/cluster.libsonnet @@ -190,4 +190,4 @@ function(this) }, }, }, - } \ No newline at end of file + } diff --git a/couchbase-mixin/signals/index.libsonnet b/couchbase-mixin/signals/index.libsonnet index 581ffcf20..4db3f9833 100644 --- a/couchbase-mixin/signals/index.libsonnet +++ b/couchbase-mixin/signals/index.libsonnet @@ -17,7 +17,7 @@ function(this) indexServiceRequests: { name: 'Index service requests', nameShort: 'Index Requests', - type: 'counter', + type: 'raw', description: 'Rate of index service requests served.', unit: 'reqps', sources: { @@ -47,7 +47,7 @@ function(this) indexAverageScanLatency: { name: 'Index average scan latency', nameShort: 'Scan Latency', - type: 'gauge', + type: 'raw', description: 'Average time to serve a scan request per index.', unit: 'ns', sources: { @@ -58,4 +58,4 @@ function(this) }, }, }, - } \ No newline at end of file + } diff --git a/couchbase-mixin/signals/node.libsonnet b/couchbase-mixin/signals/node.libsonnet index c16f16b17..0234b817b 100644 --- a/couchbase-mixin/signals/node.libsonnet +++ b/couchbase-mixin/signals/node.libsonnet @@ -30,7 +30,7 @@ function(this) cpuUtilization: { name: 'CPU utilization', nameShort: 'CPU %', - type: 'gauge', + type: 'raw', description: 'CPU utilization percentage across all available cores on this Couchbase node.', unit: 'percent', sources: { @@ -86,7 +86,7 @@ function(this) backupSize: { name: 'Backup size', nameShort: 'Backup', - type: 'gauge', + type: 'raw', description: 'Size of the backup for a node.', unit: 'decbytes', sources: { @@ -114,7 +114,7 @@ function(this) httpResponseCodes: { name: 'HTTP response codes', nameShort: 'HTTP Codes', - type: 'counter', + type: 'raw', description: 'Rate of HTTP response codes handled by the cluster manager.', unit: 'reqps', sources: { @@ -127,7 +127,7 @@ function(this) httpRequestMethods: { name: 'HTTP request methods', nameShort: 'HTTP Methods', - type: 'counter', + type: 'raw', description: 'Rate of HTTP request methods handled by the cluster manager.', unit: 'reqps', sources: { @@ -138,4 +138,4 @@ function(this) }, }, }, - } \ No newline at end of file + } diff --git a/couchbase-mixin/signals/query.libsonnet b/couchbase-mixin/signals/query.libsonnet index 1dc948c29..10c9262f9 100644 --- a/couchbase-mixin/signals/query.libsonnet +++ b/couchbase-mixin/signals/query.libsonnet @@ -22,7 +22,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'rate(n1ql_requests{%(queriesSelector)s}[$__rate_interval])', + expr: 'n1ql_requests{%(queriesSelector)s}', legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - >0ms', }, }, @@ -30,7 +30,7 @@ function(this) queryServiceRequestsTotal: { name: 'Query service requests total', nameShort: 'N1QL Total', - type: 'counter', + type: 'raw', description: 'Total rate of N1QL requests processed by the query service (including valid and invalid).', unit: 'reqps', sources: { @@ -48,7 +48,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'rate(n1ql_errors{%(queriesSelector)s}[$__rate_interval])', + expr: 'n1ql_errors{%(queriesSelector)s}', legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - error', }, }, @@ -61,7 +61,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'rate(n1ql_invalid_requests{%(queriesSelector)s}[$__rate_interval])', + expr: 'n1ql_invalid_requests{%(queriesSelector)s}', legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - invalid', }, }, @@ -76,7 +76,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'rate(n1ql_requests_250ms{%(queriesSelector)s}[$__rate_interval])', + expr: 'n1ql_requests_250ms{%(queriesSelector)s}', legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - >250ms', }, }, @@ -89,7 +89,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'rate(n1ql_requests_500ms{%(queriesSelector)s}[$__rate_interval])', + expr: 'n1ql_requests_500ms{%(queriesSelector)s}', legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - >500ms', }, }, @@ -102,7 +102,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'rate(n1ql_requests_1000ms{%(queriesSelector)s}[$__rate_interval])', + expr: 'n1ql_requests_1000ms{%(queriesSelector)s}', legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - >1000ms', }, }, @@ -115,10 +115,10 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'rate(n1ql_requests_5000ms{%(queriesSelector)s}[$__rate_interval])', + expr: 'n1ql_requests_5000ms{%(queriesSelector)s}', legendCustomTemplate: '{{couchbase_cluster}} - {{instance}} - >5000ms', }, }, }, }, - } \ No newline at end of file + } From 6a8a9991e441eb47e0ad8d7bf45c248864faa595 Mon Sep 17 00:00:00 2001 From: schmikei Date: Wed, 13 Aug 2025 09:53:15 -0400 Subject: [PATCH 6/7] cleanup config object after recent changes to signals --- couchbase-mixin/config.libsonnet | 19 +++---------------- couchbase-mixin/dashboards.libsonnet | 2 ++ couchbase-mixin/main.libsonnet | 2 -- 3 files changed, 5 insertions(+), 18 deletions(-) diff --git a/couchbase-mixin/config.libsonnet b/couchbase-mixin/config.libsonnet index 9d859fded..c50b15c0c 100644 --- a/couchbase-mixin/config.libsonnet +++ b/couchbase-mixin/config.libsonnet @@ -1,27 +1,13 @@ { - // any modular library should include as inputs: - // 'dashboardNamePrefix' - Use as prefix for all Dashboards and (optional) rule groups - // 'filteringSelector' - Static selector to apply to ALL dashboard variables of type query, panel queries, alerts and recording rules. - // 'groupLabels' - one or more labels that can be used to identify 'group' of instances. In simple cases, can be 'job' or 'cluster'. - // 'instanceLabels' - one or more labels that can be used to identify single entity of instances. In simple cases, can be 'instance' or 'pod'. - // 'uid' - UID to prefix all dashboards original uids - enableMultiCluster: false, - filteringSelector: '', + filteringSelector: 'job=~"integrations/couchbase"', groupLabels: if self.enableMultiCluster then ['job', 'cluster', 'couchbase_cluster'] else ['job', 'couchbase_cluster'], instanceLabels: ['instance'], dashboardTags: ['couchbase-mixin'], uid: 'couchbase', dashboardNamePrefix: 'Couchbase', - local config = self, - // Dashboard-specific label configurations - dashboardVariables: { - cluster: if config.enableMultiCluster then ['job', 'couchbase_cluster', 'cluster'] else ['job', 'couchbase_cluster'], - node: if config.enableMultiCluster then ['job', 'instance', 'couchbase_cluster', 'cluster'] else ['job', 'instance', 'couchbase_cluster'], - bucket: if config.enableMultiCluster then ['job', 'instance', 'couchbase_cluster', 'cluster', 'bucket'] else ['job', 'instance', 'couchbase_cluster', 'bucket'], - }, - + // additional params dashboardPeriod: 'now-1h', dashboardTimezone: 'default', @@ -44,6 +30,7 @@ metricsSource: 'prometheus', // expose signals library + local config = self, signals+: { cluster: (import './signals/cluster.libsonnet')(config), node: (import './signals/node.libsonnet')(config), diff --git a/couchbase-mixin/dashboards.libsonnet b/couchbase-mixin/dashboards.libsonnet index d58ca07b4..1406ea0d3 100644 --- a/couchbase-mixin/dashboards.libsonnet +++ b/couchbase-mixin/dashboards.libsonnet @@ -46,6 +46,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; 'couchbase-node-overview.json': g.dashboard.new(prefix + ' node overview') + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( g.util.grid.wrapPanels( [ panels.node_memoryUtilizationPanel { gridPos+: { w: 12 } }, @@ -62,6 +63,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; panels.node_averageScanLatencyPanel { gridPos+: { w: 8 } }, ] ) + ) ) + root.applyCommon( vars.multiInstance, diff --git a/couchbase-mixin/main.libsonnet b/couchbase-mixin/main.libsonnet index fe43d48a8..7f26e8cee 100644 --- a/couchbase-mixin/main.libsonnet +++ b/couchbase-mixin/main.libsonnet @@ -4,7 +4,6 @@ local dashboards = import './dashboards.libsonnet'; local links = import './links.libsonnet'; local panels = import './panels.libsonnet'; local rows = import './rows.libsonnet'; -local targets = import './targets.libsonnet'; local commonlib = import 'common-lib/common/main.libsonnet'; { @@ -35,7 +34,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; customAllValue='.+', enableLokiLogs=this.config.enableLokiLogs, ), - targets: targets.new(this), annotations: {}, links: links.new(this), panels: panels.new(this), From 33decb52515a3bbf2d98efacbab27b8d39b84a4a Mon Sep 17 00:00:00 2001 From: schmikei Date: Wed, 13 Aug 2025 10:01:50 -0400 Subject: [PATCH 7/7] make fmt --- couchbase-mixin/config.libsonnet | 2 +- couchbase-mixin/dashboards.libsonnet | 32 ++++++++++++++-------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/couchbase-mixin/config.libsonnet b/couchbase-mixin/config.libsonnet index c50b15c0c..38ee660a8 100644 --- a/couchbase-mixin/config.libsonnet +++ b/couchbase-mixin/config.libsonnet @@ -7,7 +7,7 @@ uid: 'couchbase', dashboardNamePrefix: 'Couchbase', - + // additional params dashboardPeriod: 'now-1h', dashboardTimezone: 'default', diff --git a/couchbase-mixin/dashboards.libsonnet b/couchbase-mixin/dashboards.libsonnet index 1406ea0d3..659321c20 100644 --- a/couchbase-mixin/dashboards.libsonnet +++ b/couchbase-mixin/dashboards.libsonnet @@ -47,22 +47,22 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; g.dashboard.new(prefix + ' node overview') + g.dashboard.withPanels( g.util.panel.resolveCollapsedFlagOnRows( - g.util.grid.wrapPanels( - [ - panels.node_memoryUtilizationPanel { gridPos+: { w: 12 } }, - panels.node_cpuUtilizationPanel { gridPos+: { w: 12 } }, - panels.node_totalMemoryUsedByServicePanel { gridPos+: { w: 8 } }, - panels.node_backupSizePanel { gridPos+: { w: 8 } }, - panels.node_currentConnectionsPanel { gridPos+: { w: 8 } }, - panels.node_httpResponseCodesPanel { gridPos+: { w: 12 } }, - panels.node_httpRequestMethodsPanel { gridPos+: { w: 12 } }, - panels.node_queryServiceRequestsPanel { gridPos+: { w: 12 } }, - panels.node_queryServiceRequestProcessingTimePanel { gridPos+: { w: 12 } }, - panels.node_indexServiceRequestsPanel { gridPos+: { w: 8 } }, - panels.node_indexCacheHitRatioPanel { gridPos+: { w: 8 } }, - panels.node_averageScanLatencyPanel { gridPos+: { w: 8 } }, - ] - ) + g.util.grid.wrapPanels( + [ + panels.node_memoryUtilizationPanel { gridPos+: { w: 12 } }, + panels.node_cpuUtilizationPanel { gridPos+: { w: 12 } }, + panels.node_totalMemoryUsedByServicePanel { gridPos+: { w: 8 } }, + panels.node_backupSizePanel { gridPos+: { w: 8 } }, + panels.node_currentConnectionsPanel { gridPos+: { w: 8 } }, + panels.node_httpResponseCodesPanel { gridPos+: { w: 12 } }, + panels.node_httpRequestMethodsPanel { gridPos+: { w: 12 } }, + panels.node_queryServiceRequestsPanel { gridPos+: { w: 12 } }, + panels.node_queryServiceRequestProcessingTimePanel { gridPos+: { w: 12 } }, + panels.node_indexServiceRequestsPanel { gridPos+: { w: 8 } }, + panels.node_indexCacheHitRatioPanel { gridPos+: { w: 8 } }, + panels.node_averageScanLatencyPanel { gridPos+: { w: 8 } }, + ] + ) ) ) + root.applyCommon(