From 9b0a90911fd4f5bda6408cd3c8938e47bfea255d Mon Sep 17 00:00:00 2001 From: schmikei Date: Thu, 25 Sep 2025 17:40:19 -0400 Subject: [PATCH 1/4] modernize the influxdb mixin to use modern libraries --- influxdb-mixin/{alerts => }/alerts.libsonnet | 42 +- influxdb-mixin/config.libsonnet | 49 +- influxdb-mixin/dashboards.libsonnet | 124 ++ .../dashboards/dashboards.libsonnet | 3 - .../influxdb-cluster-overview.libsonnet | 1443 ------------- .../influxdb-instance-overview.libsonnet | 1881 ----------------- .../influxdb-logs-overview.libsonnet | 32 - .../influxdb-cluster-overview.json | 1361 ++++-------- .../influxdb-instance-overview.json | 1747 +++++---------- .../dashboards_out/influxdb-logs.json | 116 +- influxdb-mixin/g.libsonnet | 1 + influxdb-mixin/jsonnetfile.json | 20 +- influxdb-mixin/links.libsonnet | 27 + influxdb-mixin/main.libsonnet | 49 + influxdb-mixin/mixin.libsonnet | 37 +- influxdb-mixin/panels.libsonnet | 525 +++++ .../prometheus_alerts.yaml | 16 +- influxdb-mixin/rows.libsonnet | 105 + influxdb-mixin/signals/instance.libsonnet | 486 +++++ influxdb-mixin/signals/overview.libsonnet | 411 ++++ 20 files changed, 2727 insertions(+), 5748 deletions(-) rename influxdb-mixin/{alerts => }/alerts.libsonnet (71%) create mode 100644 influxdb-mixin/dashboards.libsonnet delete mode 100644 influxdb-mixin/dashboards/dashboards.libsonnet delete mode 100644 influxdb-mixin/dashboards/influxdb-cluster-overview.libsonnet delete mode 100644 influxdb-mixin/dashboards/influxdb-instance-overview.libsonnet delete mode 100644 influxdb-mixin/dashboards/influxdb-logs-overview.libsonnet create mode 100644 influxdb-mixin/g.libsonnet create mode 100644 influxdb-mixin/links.libsonnet create mode 100644 influxdb-mixin/main.libsonnet create mode 100644 influxdb-mixin/panels.libsonnet create mode 100644 influxdb-mixin/rows.libsonnet create mode 100644 influxdb-mixin/signals/instance.libsonnet create mode 100644 influxdb-mixin/signals/overview.libsonnet diff --git a/influxdb-mixin/alerts/alerts.libsonnet b/influxdb-mixin/alerts.libsonnet similarity index 71% rename from influxdb-mixin/alerts/alerts.libsonnet rename to influxdb-mixin/alerts.libsonnet index 976bf7328..c6d5c6ade 100644 --- a/influxdb-mixin/alerts/alerts.libsonnet +++ b/influxdb-mixin/alerts.libsonnet @@ -1,14 +1,14 @@ { - prometheusAlerts+:: { + new(this): { groups+: [ { name: 'influxdb', rules: [ { - alert: 'InfluxDBWarningTaskSchedulerHighFailureRate', + alert: 'InfluxDBWarningTaskHighFailureRate', expr: ||| - 100 * rate(task_scheduler_total_execute_failure[5m])/clamp_min(rate(task_scheduler_total_execution_calls[5m]), 1) >= %(alertsWarningTaskSchedulerHighFailureRate)s - ||| % $._config, + 100 * rate(task_scheduler_total_execute_failure{%(filteringSelector)s}[5m])/clamp_min(rate(task_scheduler_total_execution_calls{%(filteringSelector)s}[5m]), 1) >= %(alertsWarningTaskSchedulerHighFailureRate)s + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -19,14 +19,14 @@ ( 'Task scheduler task executions for instance {{$labels.instance}} on cluster {{$labels.influxdb_cluster}} are failing at a rate of {{ printf "%%.0f" $value }} percent, ' + 'which is above the threshold of %(alertsWarningTaskSchedulerHighFailureRate)s percent.' - ) % $._config, + ) % this.config, }, }, { - alert: 'InfluxDBCriticalTaskSchedulerHighFailureRate', + alert: 'InfluxDBCriticalTaskHighFailureRate', expr: ||| - 100 * rate(task_scheduler_total_execute_failure[5m])/clamp_min(rate(task_scheduler_total_execution_calls[5m]), 1) >= %(alertsCriticalTaskSchedulerHighFailureRate)s - ||| % $._config, + 100 * rate(task_scheduler_total_execute_failure{%(filteringSelector)s}[5m])/clamp_min(rate(task_scheduler_total_execution_calls{%(filteringSelector)s}[5m]), 1) >= %(alertsCriticalTaskSchedulerHighFailureRate)s + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -37,14 +37,14 @@ ( 'Task scheduler task executions for instance {{$labels.instance}} on cluster {{$labels.influxdb_cluster}} are failing at a rate of {{ printf "%%.0f" $value }} percent, ' + 'which is above the threshold of %(alertsCriticalTaskSchedulerHighFailureRate)s percent.' - ) % $._config, + ) % this.config, }, }, { alert: 'InfluxDBHighBusyWorkerPercentage', expr: ||| - task_executor_workers_busy >= %(alertsWarningHighBusyWorkerPercentage)s - ||| % $._config, + task_executor_workers_busy{%(filteringSelector)s} >= %(alertsWarningHighBusyWorkerPercentage)s + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -55,14 +55,14 @@ ( 'The busy worker percentage for instance {{$labels.instance}} on cluster {{$labels.influxdb_cluster}} is {{ printf "%%.0f" $value }} percent, ' + 'which is above the threshold of %(alertsWarningHighBusyWorkerPercentage)s percent.' - ) % $._config, + ) % this.config, }, }, { alert: 'InfluxDBHighHeapMemoryUsage', expr: ||| - 100 * go_memstats_heap_alloc_bytes/clamp_min((go_memstats_heap_idle_bytes + go_memstats_heap_alloc_bytes), 1) >= %(alertsWarningHighHeapMemoryUsage)s - ||| % $._config, + 100 * go_memstats_heap_alloc_bytes{%(filteringSelector)s}/clamp_min((go_memstats_heap_idle_bytes{%(filteringSelector)s} + go_memstats_heap_alloc_bytes{%(filteringSelector)s}), 1) >= %(alertsWarningHighHeapMemoryUsage)s + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -73,14 +73,14 @@ ( 'The heap memory usage for instance {{$labels.instance}} on cluster {{$labels.influxdb_cluster}} is {{ printf "%%.0f" $value }} percent, ' + 'which is above the threshold of %(alertsWarningHighHeapMemoryUsage)s percent.' - ) % $._config, + ) % this.config, }, }, { alert: 'InfluxDBHighAverageAPIRequestLatency', expr: ||| - sum without(handler, method, path, response_code, status, user_agent) (increase(http_api_request_duration_seconds_sum[5m])/clamp_min(increase(http_api_requests_total[5m]), 1)) >= %(alertsWarningHighAverageAPIRequestLatency)s - ||| % $._config, + sum without(handler, method, path, response_code, status, user_agent) (increase(http_api_request_duration_seconds_sum{%(filteringSelector)s}[5m])/clamp_min(increase(http_api_requests_total{%(filteringSelector)s}[5m]), 1)) >= %(alertsWarningHighAverageAPIRequestLatency)s + ||| % this.config, 'for': '1m', labels: { severity: 'critical', @@ -90,14 +90,14 @@ description: ( 'The average API request latency for instance {{$labels.instance}} on cluster {{$labels.influxdb_cluster}} is {{ printf "%%.2f" $value }} seconds, which is above the threshold of %(alertsWarningHighAverageAPIRequestLatency)s seconds.' - ) % $._config, + ) % this.config, }, }, { alert: 'InfluxDBSlowAverageIQLExecutionTime', expr: ||| - sum without(result) (increase(influxql_service_executing_duration_seconds_sum[5m])/clamp_min(increase(influxql_service_requests_total[5m]), 1)) >= %(alertsWarningSlowAverageIQLExecutionTime)s - ||| % $._config, + sum without(result) (increase(influxql_service_executing_duration_seconds_sum{%(filteringSelector)s}[5m])/clamp_min(increase(influxql_service_requests_total{%(filteringSelector)s}[5m]), 1)) >= %(alertsWarningSlowAverageIQLExecutionTime)s + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -108,7 +108,7 @@ ( 'The average InfluxQL query execution time for instance {{$labels.instance}} on cluster {{$labels.influxdb_cluster}} is {{ printf "%%.2f" $value }} seconds, ' + 'which is above the threshold of %(alertsWarningSlowAverageIQLExecutionTime)s seconds.' - ) % $._config, + ) % this.config, }, }, ], diff --git a/influxdb-mixin/config.libsonnet b/influxdb-mixin/config.libsonnet index 3aabd38af..63b369828 100644 --- a/influxdb-mixin/config.libsonnet +++ b/influxdb-mixin/config.libsonnet @@ -1,23 +1,38 @@ { - _config+:: { - enableMultiCluster: false, - influxdbSelector: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster"' else 'job=~"$job"', - multiclusterSelector: 'job=~"$job"', - filterSelector: 'job=~"integrations/influxdb"', + local this = self, + filteringSelector: 'job="integrations/influxdb"', + groupLabels: ['job', 'influxdb_cluster'], + instanceLabels: ['instance'], + dashboardTags: ['influxdb-mixin'], + uid: 'influxdb', + dashboardNamePrefix: 'InfluxDB', - dashboardTags: ['influxdb-mixin'], - dashboardPeriod: 'now-30m', - dashboardTimezone: 'default', - dashboardRefresh: '1m', + // additional params + dashboardPeriod: 'now-30m', + dashboardTimezone: 'default', + dashboardRefresh: '1m', - // alerts thresholds - alertsWarningTaskSchedulerHighFailureRate: 25, // % - alertsCriticalTaskSchedulerHighFailureRate: 50, // % - alertsWarningHighBusyWorkerPercentage: 80, // % - alertsWarningHighHeapMemoryUsage: 80, // % - alertsWarningHighAverageAPIRequestLatency: 0.3, // count - alertsWarningSlowAverageIQLExecutionTime: 0.1, // count + // logs lib related + enableLokiLogs: true, + logLabels: ['job', 'instance', 'influxdb_cluster', 'level'], + extraLogLabels: [], // Required by logs-lib + logsVolumeGroupBy: 'level', + showLogsVolume: true, - enableLokiLogs: true, + // alert thresholds + alertsWarningTaskSchedulerHighFailureRate: 25, // % + alertsCriticalTaskSchedulerHighFailureRate: 50, // % + alertsWarningHighBusyWorkerPercentage: 80, // % + alertsWarningHighHeapMemoryUsage: 80, // % + alertsWarningHighAverageAPIRequestLatency: 0.3, // count + alertsWarningSlowAverageIQLExecutionTime: 0.1, // count + + // metrics source for signals library + metricsSource: 'prometheus', + + legendCustomTemplate: std.join(' ', std.map(function(label) '{{' + label + '}}', this.instanceLabels)), + signals+: { + overview: (import './signals/overview.libsonnet')(this), + instance: (import './signals/instance.libsonnet')(this), }, } diff --git a/influxdb-mixin/dashboards.libsonnet b/influxdb-mixin/dashboards.libsonnet new file mode 100644 index 000000000..3ea38efaa --- /dev/null +++ b/influxdb-mixin/dashboards.libsonnet @@ -0,0 +1,124 @@ +local g = import '../g.libsonnet'; +local logslib = import 'logs-lib/logs/main.libsonnet'; + +{ + local root = self, + new(this):: + local prefix = this.config.dashboardNamePrefix; + local links = this.grafana.links; + local tags = this.config.dashboardTags; + local uid = g.util.string.slugify(this.config.uid); + local vars = this.grafana.variables; + local annotations = this.grafana.annotations; + local refresh = this.config.dashboardRefresh; + local period = this.config.dashboardPeriod; + local timezone = this.config.dashboardTimezone; + + { + // InfluxDB cluster overview dashboard + 'influxdb-cluster-overview.json': + g.dashboard.new(prefix + ' cluster overview') + + g.dashboard.withDescription('Dashboard providing an overview of InfluxDB cluster performance and health.') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.influxdbClusterOverview, + this.grafana.rows.influxdbClusterOverviewQueriesAndOperations, + this.grafana.rows.influxdbClusterOverviewTaskScheduler, + this.grafana.rows.influxdbClusterOverviewMemoryAndGC, + ] + ) + ) + ) + + root.applyCommon( + vars.multiInstance + [ + g.dashboard.variable.custom.new( + 'k', + values=['5', '10', '20', '50'], + ) + g.dashboard.variable.custom.generalOptions.withCurrent('5') + + g.dashboard.variable.custom.generalOptions.withLabel('Top node count') + + g.dashboard.variable.custom.selectionOptions.withMulti(false) + + g.dashboard.variable.custom.selectionOptions.withIncludeAll(false), + ], + uid + '_cluster_overview', + tags, + links { influxdbClusterOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + + // InfluxDB instance overview dashboard + 'influxdb-instance-overview.json': + g.dashboard.new(prefix + ' instance overview') + + g.dashboard.withDescription('Dashboard providing detailed overview of InfluxDB instance performance, including configuration stats, Go runtime performance, query/request load, and task scheduler activity.') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.influxdbInstanceOverview, + this.grafana.rows.influxdbInstanceOverviewQueriesAndOperations, + this.grafana.rows.influxdbInstanceOverviewTaskScheduler, + this.grafana.rows.influxdbInstanceOverviewGo, + ] + ) + ) + ) + + root.applyCommon( + vars.multiInstance, + uid + '_instance_overview', + tags, + links { influxdbInstanceOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + } + + + if this.config.enableLokiLogs then + { + 'influxdb-logs.json': + logslib.new( + prefix + ' logs', + datasourceName=this.grafana.variables.datasources.loki.name, + datasourceRegex=this.grafana.variables.datasources.loki.regex, + filterSelector=this.config.filteringSelector, + labels=this.config.groupLabels + this.config.extraLogLabels, + formatParser=null, + showLogsVolume=this.config.showLogsVolume, + ) + { + dashboards+: + { + logs+: + root.applyCommon(super.logs.templating.list, uid=uid + '-logs', tags=tags, links=links { logs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period), + }, + panels+: + { + logs+: + g.panel.logs.options.withEnableLogDetails(true) + + g.panel.logs.options.withShowTime(false) + + g.panel.logs.options.withWrapLogMessage(false), + }, + variables+: { + toArray+: [ + this.grafana.variables.datasources.prometheus { hide: 2 }, + ], + }, + }.dashboards.logs, + } + else {}, + + applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period): + g.dashboard.withTags(tags) + + g.dashboard.withUid(uid) + + g.dashboard.withLinks(std.objectValues(links)) + + g.dashboard.withTimezone(timezone) + + g.dashboard.withRefresh(refresh) + + g.dashboard.time.withFrom(period) + + g.dashboard.withVariables(vars) + + g.dashboard.withAnnotations(std.objectValues(annotations)), +} diff --git a/influxdb-mixin/dashboards/dashboards.libsonnet b/influxdb-mixin/dashboards/dashboards.libsonnet deleted file mode 100644 index d3c08ba62..000000000 --- a/influxdb-mixin/dashboards/dashboards.libsonnet +++ /dev/null @@ -1,3 +0,0 @@ -(import 'influxdb-cluster-overview.libsonnet') + -(import 'influxdb-instance-overview.libsonnet') + -(import 'influxdb-logs-overview.libsonnet') diff --git a/influxdb-mixin/dashboards/influxdb-cluster-overview.libsonnet b/influxdb-mixin/dashboards/influxdb-cluster-overview.libsonnet deleted file mode 100644 index ec9e9bf74..000000000 --- a/influxdb-mixin/dashboards/influxdb-cluster-overview.libsonnet +++ /dev/null @@ -1,1443 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'influxdb-cluster-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local alertsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - ), - ], - type: 'alertlist', - title: 'Alerts', - description: 'Panel to report on the status of firing alerts.', - options: { - alertInstanceLabelFilter: '{job=~"${job:regex}", influxdb_cluster=~"${influxdb_cluster:regex}"}', - alertName: '', - dashboardAlerts: false, - groupBy: [], - groupMode: 'default', - maxItems: 20, - sortOrder: 1, - stateFilter: { - 'error': true, - firing: true, - noData: false, - normal: false, - pending: true, - }, - viewMode: 'list', - }, -}; - -local serversPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'influxdb_uptime_seconds{' + matcher + '}', - datasource=promDatasource, - legendFormat='Uptime', - format='table', - ), - prometheus.target( - 'influxdb_buckets_total{' + matcher + '}', - datasource=promDatasource, - legendFormat='Buckets', - format='table', - ), - prometheus.target( - 'influxdb_users_total{' + matcher + '}', - datasource=promDatasource, - legendFormat='Users', - format='table', - ), - prometheus.target( - 'influxdb_replications_total{' + matcher + '}', - datasource=promDatasource, - legendFormat='Replications', - format='table', - ), - prometheus.target( - 'influxdb_remotes_total{' + matcher + '}', - datasource=promDatasource, - legendFormat='Remotes', - format='table', - ), - prometheus.target( - 'influxdb_scrapers_total{' + matcher + '}', - datasource=promDatasource, - legendFormat='Scrapers', - format='table', - ), - prometheus.target( - 'influxdb_dashboards_total{' + matcher + '}', - datasource=promDatasource, - legendFormat='Dashboards', - format='table', - ), - ], - type: 'table', - title: 'Servers', - description: 'Statistics for each instance in the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - custom: { - align: 'left', - cellOptions: { - type: 'auto', - }, - inspect: false, - }, - links: [], - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - }, - overrides: [ - { - matcher: { - id: 'byName', - options: 'instance', - }, - properties: [ - { - id: 'links', - value: [ - { - title: 'Instance overview', - url: '/d/influxdb-instance-overview?from=${__from}&to=${__to}&var-instance=${__data.fields["Instance"]}', - }, - ], - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'Uptime', - }, - properties: [ - { - id: 'unit', - value: 's', - }, - ], - }, - ], - }, - options: { - cellHeight: 'sm', - footer: { - countRows: false, - fields: '', - reducer: [ - 'sum', - ], - show: false, - }, - showHeader: true, - }, - pluginVersion: '10.3.0-63516', - transformations: [ - { - id: 'joinByField', - options: { - byField: 'instance', - mode: 'outer', - }, - }, - { - id: 'organize', - options: { - excludeByName: { - Time: true, - 'Time 2': true, - 'Time 3': true, - 'Time 4': true, - 'Time 5': true, - 'Time 6': true, - 'Time 7': true, - 'Value #B': false, - 'Value #H': true, - __name__: true, - '__name__ 1': true, - '__name__ 2': true, - '__name__ 3': true, - '__name__ 4': true, - '__name__ 5': true, - '__name__ 6': true, - '__name__ 7': true, - id: true, - influxdb_cluster: false, - 'influxdb_cluster 2': true, - 'influxdb_cluster 3': true, - 'influxdb_cluster 4': true, - 'influxdb_cluster 5': true, - 'influxdb_cluster 6': true, - 'influxdb_cluster 7': true, - job: true, - 'job 2': true, - 'job 3': true, - 'job 4': true, - 'job 5': true, - 'job 6': true, - 'job 7': true, - 'cluster 7': true, - 'cluster 2': true, - cluster: false, - 'cluster 3': true, - 'cluster 4': true, - 'cluster 5': true, - 'cluster 6': true, - }, - indexByName: {}, - renameByName: { - Dashboards: '', - 'Value #A': 'Uptime', - 'Value #B': 'Buckets', - 'Value #C': 'Users', - 'Value #D': 'Replications', - 'Value #E': 'Remotes', - 'Value #F': 'Scrapers', - 'Value #G': 'Dashboards', - influxdb_cluster: 'InfluxDB cluster', - instance: 'Instance', - cluster: 'K8s cluster', - }, - }, - }, - ], -}; - -local queriesAndOperationsRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Queries and operations', - collapsed: false, -}; - -local topInstancesByHTTPAPIRequestsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk($k, sum by(job, influxdb_cluster, instance) (rate(http_api_requests_total{' + matcher + '}[$__rate_interval])))', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - {{instance}}', - ), - ], - type: 'timeseries', - title: 'Top instances by HTTP API requests', - description: 'HTTP API request rate for the instances with the most traffic in the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local httpAPIRequestDurationPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'histogram_quantile(0.95, sum by(le, job, influxdb_cluster) (rate(http_api_request_duration_seconds_bucket{' + matcher + '}[$__rate_interval])))', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}}', - ), - ], - type: 'histogram', - title: 'HTTP API request duration', - description: 'Time taken to respond to HTTP API requests for the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - custom: { - fillOpacity: 80, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineWidth: 1, - }, - decimals: 0, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - bucketOffset: 0, - combine: false, - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - }, -}; - -local httpAPIResponseCodesPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster, response_code) (rate(http_api_requests_total{' + matcher + '}[$__rate_interval])) > 0', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - {{response_code}}', - ), - ], - type: 'piechart', - title: 'HTTP API response codes', - description: 'Rate of different HTTP response codes for the entire cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - }, - mappings: [], - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - pieType: 'pie', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local httpOperationsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster, status) (rate(http_query_request_count{' + matcher + '}[$__rate_interval])) > 0', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - query - {{status}}', - ), - prometheus.target( - 'sum by(job, influxdb_cluster, status) (rate(http_write_request_count{' + matcher + '}[$__rate_interval])) > 0', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - write - {{status}}', - ), - ], - type: 'timeseries', - title: 'HTTP operations', - description: 'Rate of database operations from HTTP for the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local httpOperationDataPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster) (rate(http_query_request_bytes{' + matcher + '}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - query - request', - ), - prometheus.target( - 'sum by(job, influxdb_cluster) (rate(http_query_response_bytes{' + matcher + '}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - query - response', - ), - prometheus.target( - 'sum by(job, influxdb_cluster) (rate(http_write_request_bytes{' + matcher + '}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - write - request', - ), - prometheus.target( - 'sum by(job, influxdb_cluster) (rate(http_write_response_bytes{' + matcher + '}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - write - response', - ), - ], - type: 'timeseries', - title: 'HTTP operation data', - description: 'Rate of database HTTP operation data for the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'Bps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [ - 'min', - 'mean', - 'max', - ], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local topInstancesByIQLQueryRatePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk($k, sum by(job, influxdb_cluster, instance) (rate(influxql_service_requests_total{' + matcher + '}[$__rate_interval])))', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - {{instance}}', - ), - ], - type: 'timeseries', - title: 'Top instances by IQL query rate', - description: 'Rate of InfluxQL queries for the instances with the most traffic in the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'queries/s', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local iqlQueryResponseTimePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster, result) (increase(influxql_service_executing_duration_seconds_sum{' + matcher + '}[$__interval:]))', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - {{result}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'IQL query response time / $__interval', - description: 'Response time for recent InfluxQL queries, organized by result.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local boltdbOperationsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(boltdb_reads_total{' + matcher + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - reads', - ), - prometheus.target( - 'rate(boltdb_writes_total{' + matcher + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - writes', - ), - ], - type: 'timeseries', - title: 'BoltDB operations', - description: 'Rate of reads and writes to the underlying BoltDB storage engine for the entire cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local taskSchedulerRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Task scheduler', - collapsed: false, -}; - -local activeTasksPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster) (task_scheduler_current_execution{' + matcher + '})', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}}', - ), - ], - type: 'timeseries', - title: 'Active tasks', - description: 'Number of tasks currently being executed for the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local activeWorkersPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster) (task_executor_total_runs_active{' + matcher + '})', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}}', - ), - ], - type: 'timeseries', - title: 'Active workers', - description: 'Number of workers currently running tasks on the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local executionsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(task_scheduler_total_execution_calls{' + matcher + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - total', - ), - prometheus.target( - 'rate(task_scheduler_total_execute_failure{' + matcher + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - failed', - ), - ], - type: 'timeseries', - title: 'Executions', - description: 'Rate of executions and execution failures for the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local schedulesPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(task_scheduler_total_schedule_calls{' + matcher + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - total', - ), - prometheus.target( - 'rate(task_scheduler_total_schedule_fails{' + matcher + '}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - failed', - ), - ], - type: 'timeseries', - title: 'Schedules', - description: 'Rate of schedule operations and schedule operation failures for the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local goRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Go', - collapsed: false, -}; - -local topInstancesByHeapMemoryUsagePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk($k, go_memstats_heap_alloc_bytes{' + matcher + '}/clamp_min(go_memstats_heap_idle_bytes{' + matcher + '} + go_memstats_heap_alloc_bytes{' + matcher + '}, 1))', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - {{instance}}', - ), - ], - type: 'timeseries', - title: 'Top instances by heap memory usage', - description: 'Heap memory usage for the largest instances in the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'continuous-BlYlRd', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - max: 1, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'percentunit', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local topInstancesByGCCPUUsagePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'go_memstats_gc_cpu_fraction{' + matcher + '}', - datasource=promDatasource, - legendFormat='{{influxdb_cluster}} - {{instance}}', - ), - ], - type: 'timeseries', - title: 'Top instances by GC CPU usage', - description: 'Fraction of CPU time used for garbage collection for the top instances in the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'continuous-BlYlRd', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - max: 100, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local getMatcher(cfg) = '%(influxdbSelector)s, influxdb_cluster=~"$influxdb_cluster"' % cfg; - -{ - grafanaDashboards+:: { - 'influxdb-cluster-overview.json': - dashboard.new( - 'InfluxDB cluster overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='Other InfluxDB dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(influxdb_uptime_seconds,job)', - label='Job', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'cluster', - promDatasource, - 'label_values(influxdb_uptime_seconds{%(multiclusterSelector)s}, cluster)' % $._config, - label='Cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.*', - hide=if $._config.enableMultiCluster then '' else 'variable', - sort=0 - ), - template.new( - 'influxdb_cluster', - promDatasource, - 'label_values(influxdb_uptime_seconds{%(influxdbSelector)s}, influxdb_cluster)' % $._config, - label='InfluxDB cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - template.custom( - 'k', - query='5,10,20,50', - current='5', - label='Top node count', - refresh='never', - includeAll=false, - multi=false, - allValues='', - ), - ] - ) - .addPanels( - [ - alertsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 7, x: 0, y: 0 } }, - serversPanel(getMatcher($._config)) { gridPos: { h: 8, w: 17, x: 7, y: 0 } }, - queriesAndOperationsRow { gridPos: { h: 1, w: 24, x: 0, y: 17 } }, - topInstancesByHTTPAPIRequestsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 0, y: 18 } }, - httpAPIRequestDurationPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 8, y: 18 } }, - httpAPIResponseCodesPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 16, y: 18 } }, - httpOperationsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 26 } }, - httpOperationDataPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 26 } }, - topInstancesByIQLQueryRatePanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 0, y: 34 } }, - iqlQueryResponseTimePanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 8, y: 34 } }, - boltdbOperationsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 16, y: 34 } }, - taskSchedulerRow { gridPos: { h: 1, w: 24, x: 0, y: 42 } }, - activeTasksPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 43 } }, - activeWorkersPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 43 } }, - executionsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 51 } }, - schedulesPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 51 } }, - goRow { gridPos: { h: 1, w: 24, x: 0, y: 8 } }, - topInstancesByHeapMemoryUsagePanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 9 } }, - topInstancesByGCCPUUsagePanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 9 } }, - ] - ), - }, -} diff --git a/influxdb-mixin/dashboards/influxdb-instance-overview.libsonnet b/influxdb-mixin/dashboards/influxdb-instance-overview.libsonnet deleted file mode 100644 index d6da53124..000000000 --- a/influxdb-mixin/dashboards/influxdb-instance-overview.libsonnet +++ /dev/null @@ -1,1881 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'influxdb-instance-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local uptimePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'influxdb_uptime_seconds{' + matcher + ', instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'stat', - title: 'Uptime', - description: 'Time that the InfluxDB process has been running.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - wideLayout: true, - }, - pluginVersion: '10.3.0-63516', -}; - -local bucketsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster) (influxdb_buckets_total{' + matcher + ', instance=~"$instance"})', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'stat', - title: 'Buckets', - description: 'Number of buckets on the server.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - wideLayout: true, - }, - pluginVersion: '10.3.0-63516', -}; - -local usersPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster) (influxdb_users_total{' + matcher + ', instance=~"$instance"})', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'stat', - title: 'Users', - description: 'Total number of users for the server.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - wideLayout: true, - }, - pluginVersion: '10.3.0-63516', -}; - -local replicationsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster) (influxdb_replications_total{' + matcher + ', instance=~"$instance"})', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'stat', - title: 'Replications', - description: 'Number of replication configurations on the server.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - wideLayout: true, - }, - pluginVersion: '10.3.0-63516', -}; - -local remotesPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster) (influxdb_remotes_total{' + matcher + ', instance=~"$instance"})', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'stat', - title: 'Remotes', - description: 'Number of remote connections configured on the server.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - wideLayout: true, - }, - pluginVersion: '10.3.0-63516', -}; - -local scrapersPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster) (influxdb_scrapers_total{' + matcher + ', instance=~"$instance"})', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'stat', - title: 'Scrapers', - description: 'Number of scrapers on the server.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - wideLayout: true, - }, - pluginVersion: '10.3.0-63516', -}; - -local dashboardsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster) (influxdb_dashboards_total{' + matcher + ', instance=~"$instance"})', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'stat', - title: 'Dashboards', - description: 'Number of dashboards on the server.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - wideLayout: true, - }, - pluginVersion: '10.3.0-63516', -}; - -local threadsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster) (go_threads{' + matcher + ', instance=~"$instance"})', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'stat', - title: 'Threads', - description: 'Number of threads currently active on the server.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - wideLayout: true, - }, - pluginVersion: '10.3.0-63516', -}; - -local queriesAndOperationsRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Queries and operations', - collapsed: false, -}; - -local httpAPIRequestsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster, instance, status) (rate(http_api_requests_total{' + matcher + ', instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{instance}} - {{status}}', - ), - ], - type: 'timeseries', - title: 'HTTP API requests', - description: 'Rate of HTTP requests to the API, organized by response code.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local activeQueriesPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster, instance) (qc_compiling_active{' + matcher + ', instance=~"$instance"})', - datasource=promDatasource, - legendFormat='{{instance}} - compiling', - ), - prometheus.target( - 'sum by(job, influxdb_cluster, instance) (qc_queueing_active{' + matcher + ', instance=~"$instance"})', - datasource=promDatasource, - legendFormat='{{instance}} - queueing', - ), - prometheus.target( - 'sum by(job, influxdb_cluster, instance) (qc_executing_active{' + matcher + ', instance=~"$instance"})', - datasource=promDatasource, - legendFormat='{{instance}} - executing', - ), - ], - type: 'timeseries', - title: 'Active queries', - description: 'Number of active queries for the server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - decimals: 0, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local httpOperationsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster, instance, status) (rate(http_query_request_count{' + matcher + ', instance=~"$instance"}[$__rate_interval])) > 0', - datasource=promDatasource, - legendFormat='{{instance}} - query - {{status}}', - ), - prometheus.target( - 'sum by(job, influxdb_cluster, instance, status) (rate(http_write_request_count{' + matcher + ', instance=~"$instance"}[$__rate_interval])) > 0', - datasource=promDatasource, - legendFormat='{{instance}} - write - {{status}}', - ), - ], - type: 'timeseries', - title: 'HTTP operations', - description: 'Rate of database operations from HTTP for the server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local httpOperationDataPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster, instance) (rate(http_query_request_bytes{' + matcher + ', instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{instance}} - query request', - ), - prometheus.target( - 'sum by(job, influxdb_cluster, instance) (rate(http_query_response_bytes{' + matcher + ', instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{instance}} - query response', - ), - prometheus.target( - 'sum by(job, influxdb_cluster, instance) (rate(http_write_request_bytes{' + matcher + ', instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{instance}} - write request', - ), - prometheus.target( - 'sum by(job, influxdb_cluster, instance) (rate(http_write_response_bytes{' + matcher + ', instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{instance}} - write response', - ), - ], - type: 'timeseries', - title: 'HTTP operation data', - description: 'Rate of database HTTP operation data for the server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'Bps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [ - 'min', - 'mean', - 'max', - ], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local iqlQueryRatePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster, instance, result) (rate(influxql_service_requests_total{' + matcher + ', instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{instance}} - {{result}}', - ), - ], - type: 'timeseries', - title: 'IQL query rate', - description: 'Rate of InfluxQL queries for the server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'queries/s', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local iqlQueryResponseTimePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, influxdb_cluster, instance, result) (increase(influxql_service_executing_duration_seconds_sum{' + matcher + ', instance=~"$instance"}[$__interval:]))', - datasource=promDatasource, - legendFormat='{{instance}} - {{result}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'IQL query response time / $__interval', - description: 'Response time for recent InfluxQL queries, organized by result.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local boltdbOperationsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(boltdb_reads_total{' + matcher + ', instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - reads', - ), - prometheus.target( - 'rate(boltdb_writes_total{' + matcher + ', instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - writes', - ), - ], - type: 'timeseries', - title: 'BoltDB operations', - description: 'Rate of reads and writes to the underlying BoltDB storage engine for the server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local taskSchedulerRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Task scheduler', - collapsed: false, -}; - -local activeTasksPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'task_scheduler_current_execution{' + matcher + ', instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'timeseries', - title: 'Active tasks', - description: 'Number of tasks currently being executed for the server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local activeWorkersPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'task_executor_total_runs_active{' + matcher + ', instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'timeseries', - title: 'Active workers', - description: 'Number of workers currently running tasks on the server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local workerUsagePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'task_executor_workers_busy{' + matcher + ', instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'timeseries', - title: 'Worker usage', - description: 'Percentage of available workers that are currently busy.', - fieldConfig: { - defaults: { - color: { - mode: 'continuous-BlYlRd', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - max: 100, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local executionsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(task_scheduler_total_execution_calls{' + matcher + ', instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - total', - ), - prometheus.target( - 'rate(task_scheduler_total_execute_failure{' + matcher + ', instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - failed', - ), - ], - type: 'timeseries', - title: 'Executions', - description: 'Rate of executions and execution failures for the server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local schedulesPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(task_scheduler_total_schedule_calls{' + matcher + ', instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - total', - ), - prometheus.target( - 'rate(task_scheduler_total_schedule_fails{' + matcher + ', instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - failed', - ), - ], - type: 'timeseries', - title: 'Schedules', - description: 'Rate of schedule operations and schedule operation failures for the server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local goRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Go', - collapsed: false, -}; - -local timeSinceLastGCPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'time() - go_memstats_last_gc_time_seconds{' + matcher + ', instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'stat', - title: 'Time since last GC', - description: 'Amount of time since the last garbage collection cycle.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - wideLayout: true, - }, - pluginVersion: '10.3.0-63516', -}; - -local gcTimePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(go_gc_duration_seconds_sum{' + matcher + ', instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'GC time / $__interval', - description: 'Server CPU time spent on garbage collection.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local gcCPUUsagePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'go_memstats_gc_cpu_fraction{' + matcher + ', instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'timeseries', - title: 'GC CPU usage', - description: 'Percent of server CPU time used for garbage collection.', - fieldConfig: { - defaults: { - color: { - mode: 'continuous-BlYlRd', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - max: 100, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local heapMemoryUsagePanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'go_memstats_heap_alloc_bytes{' + matcher + ', instance=~"$instance"}/clamp_min((go_memstats_heap_idle_bytes{' + matcher + ', instance=~"$instance"} + go_memstats_heap_alloc_bytes{' + matcher + ', instance=~"$instance"}), 1)', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'timeseries', - title: 'Heap memory usage', - description: 'Heap memory usage for the server.', - fieldConfig: { - defaults: { - color: { - mode: 'continuous-BlYlRd', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - max: 1, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'percentunit', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local goThreadsPanel(matcher) = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'go_threads{' + matcher + ', instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'timeseries', - title: 'Go threads', - description: 'Number of OS threads created for the server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 20, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - decimals: 0, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local getMatcher(cfg) = '%(influxdbSelector)s, influxdb_cluster=~"$influxdb_cluster", instance=~"$instance"' % cfg; - -{ - grafanaDashboards+:: { - 'influxdb-instance-overview.json': - dashboard.new( - 'InfluxDB instance overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='Other InfluxDB dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(influxdb_uptime_seconds,job)', - label='Job', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'cluster', - promDatasource, - 'label_values(influxdb_uptime_seconds{%(multiclusterSelector)s}, cluster)' % $._config, - label='Cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.*', - hide=if $._config.enableMultiCluster then '' else 'variable', - sort=0 - ), - template.new( - 'influxdb_cluster', - promDatasource, - 'label_values(influxdb_uptime_seconds{%(influxdbSelector)s}, influxdb_cluster)' % $._config, - label='InfluxDB cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - template.new( - 'instance', - promDatasource, - 'label_values(influxdb_uptime_seconds{%(influxdbSelector)s, influxdb_cluster=~"$influxdb_cluster"}, instance)' % $._config, - label='Instance', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - ] - ) - .addPanels( - [ - uptimePanel(getMatcher($._config)) { gridPos: { h: 8, w: 3, x: 0, y: 0 } }, - bucketsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 3, x: 3, y: 0 } }, - usersPanel(getMatcher($._config)) { gridPos: { h: 8, w: 3, x: 6, y: 0 } }, - replicationsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 3, x: 9, y: 0 } }, - remotesPanel(getMatcher($._config)) { gridPos: { h: 8, w: 3, x: 12, y: 0 } }, - scrapersPanel(getMatcher($._config)) { gridPos: { h: 8, w: 3, x: 15, y: 0 } }, - dashboardsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 3, x: 18, y: 0 } }, - threadsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 3, x: 21, y: 0 } }, - queriesAndOperationsRow { gridPos: { h: 1, w: 24, x: 0, y: 25 } }, - httpAPIRequestsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 26 } }, - activeQueriesPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 26 } }, - httpOperationsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 34 } }, - httpOperationDataPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 34 } }, - iqlQueryRatePanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 0, y: 42 } }, - iqlQueryResponseTimePanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 8, y: 42 } }, - boltdbOperationsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 16, y: 42 } }, - taskSchedulerRow { gridPos: { h: 1, w: 24, x: 0, y: 50 } }, - activeTasksPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 0, y: 51 } }, - activeWorkersPanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 8, y: 51 } }, - workerUsagePanel(getMatcher($._config)) { gridPos: { h: 8, w: 8, x: 16, y: 51 } }, - executionsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 59 } }, - schedulesPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 59 } }, - goRow { gridPos: { h: 1, w: 24, x: 0, y: 8 } }, - timeSinceLastGCPanel(getMatcher($._config)) { gridPos: { h: 8, w: 6, x: 0, y: 9 } }, - gcTimePanel(getMatcher($._config)) { gridPos: { h: 8, w: 9, x: 6, y: 9 } }, - gcCPUUsagePanel(getMatcher($._config)) { gridPos: { h: 8, w: 9, x: 15, y: 9 } }, - heapMemoryUsagePanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 0, y: 17 } }, - goThreadsPanel(getMatcher($._config)) { gridPos: { h: 8, w: 12, x: 12, y: 17 } }, - ] - ), - }, -} diff --git a/influxdb-mixin/dashboards/influxdb-logs-overview.libsonnet b/influxdb-mixin/dashboards/influxdb-logs-overview.libsonnet deleted file mode 100644 index df3847d9e..000000000 --- a/influxdb-mixin/dashboards/influxdb-logs-overview.libsonnet +++ /dev/null @@ -1,32 +0,0 @@ -local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; -local logsDashboard = import 'github.com/grafana/jsonnet-libs/logs-lib/logs/main.libsonnet'; -{ - grafanaDashboards+:: - if $._config.enableLokiLogs then { - local influxdbLogs = - logsDashboard.new( - 'InfluxDB logs overview', - datasourceName='loki_datasource', - datasourceRegex='', - filterSelector=$._config.filterSelector, - labels=['job', 'influxdb_cluster', 'instance', 'level', 'service', 'engine'], - formatParser=null, - showLogsVolume=true - ) - { - panels+: - { - logs+: - // InfluxDB logs already have timestamp - g.panel.logs.options.withShowTime(false), - }, - dashboards+: - { - logs+: g.dashboard.withLinksMixin($.grafanaDashboards['influxdb-cluster-overview.json'].links) - + g.dashboard.withTags($._config.dashboardTags) - + g.dashboard.withRefresh($._config.dashboardRefresh), - }, - }, - 'influxdb-logs.json': influxdbLogs.dashboards.logs, - } else {}, -} diff --git a/influxdb-mixin/dashboards_out/influxdb-cluster-overview.json b/influxdb-mixin/dashboards_out/influxdb-cluster-overview.json index 8280859e8..a67f9820c 100644 --- a/influxdb-mixin/dashboards_out/influxdb-cluster-overview.json +++ b/influxdb-mixin/dashboards_out/influxdb-cluster-overview.json @@ -1,41 +1,52 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, + "description": "Dashboard providing an overview of InfluxDB cluster performance and health.", "links": [ { - "asDropdown": false, - "icon": "external link", + "keepTime": true, + "title": "InfluxDB instance overview", + "type": "link", + "url": "/d/influxdb_instance_overview" + }, + { + "keepTime": true, + "title": "InfluxDB logs", + "type": "link", + "url": "/d/influxdb-logs" + }, + { + "asDropdown": true, "includeVars": true, "keepTime": true, - "tags": [ + "title": "All dashboards", + "type": "link", + "url": [ "influxdb-mixin" - ], - "targetBlank": false, - "title": "Other InfluxDB dashboards", - "type": "dashboards", - "url": "" + ] } ], "panels": [ { - "datasource": { - "uid": "${prometheus_datasource}" + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 }, - "description": "Panel to report on the status of firing alerts.", + "id": 1, + "panels": [ ], + "title": "InfluxDB cluster overview", + "type": "row" + }, + { "gridPos": { "h": 8, "w": 7, "x": 0, - "y": 0 + "y": 1 }, "id": 2, "options": { @@ -55,49 +66,17 @@ }, "viewMode": "list" }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], + "targets": [ ], "title": "Alerts", "type": "alertlist" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Statistics for each instance in the cluster.", "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "left", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "links": [ ], - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, "overrides": [ { "matcher": { @@ -105,12 +84,16 @@ "options": "instance" }, "properties": [ + { + "id": "displayName", + "value": "Instance" + }, { "id": "links", "value": [ { "title": "Instance overview", - "url": "/d/influxdb-instance-overview?from=${__from}&to=${__to}&var-instance=${__data.fields[\"Instance\"]}" + "url": "/d/influxdb_instance_overview?var-instance=${__data.fields.Instance}&${__url_time_range}&var-datasource=${datasource}" } ] } @@ -134,85 +117,87 @@ "h": 8, "w": 17, "x": 7, - "y": 0 + "y": 1 }, "id": 3, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "10.3.0-63516", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "influxdb_uptime_seconds{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}", + "expr": "influxdb_uptime_seconds{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "table", - "intervalFactor": 2, - "legendFormat": "Uptime" + "instant": true, + "legendFormat": "Uptime", + "refId": "Uptime" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "influxdb_buckets_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}", + "expr": "influxdb_buckets_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "table", - "intervalFactor": 2, - "legendFormat": "Buckets" + "instant": true, + "legendFormat": "Buckets", + "refId": "Buckets" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "influxdb_users_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}", + "expr": "influxdb_users_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "table", - "intervalFactor": 2, - "legendFormat": "Users" + "instant": true, + "legendFormat": "Users", + "refId": "Users" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "influxdb_replications_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}", + "expr": "influxdb_replications_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "table", - "intervalFactor": 2, - "legendFormat": "Replications" + "instant": true, + "legendFormat": "Replications", + "refId": "Replications" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "influxdb_remotes_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}", + "expr": "influxdb_remotes_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "table", - "intervalFactor": 2, - "legendFormat": "Remotes" + "instant": true, + "legendFormat": "Remotes", + "refId": "remotes" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "influxdb_scrapers_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}", + "expr": "influxdb_scrapers_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "table", - "intervalFactor": 2, - "legendFormat": "Scrapers" + "instant": true, + "legendFormat": "Scrapers", + "refId": "Scrapers" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "influxdb_dashboards_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}", + "expr": "influxdb_dashboards_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "table", - "intervalFactor": 2, - "legendFormat": "Dashboards" + "instant": true, + "legendFormat": "Dashboards", + "refId": "Dashboards" } ], "title": "Servers", @@ -224,173 +209,102 @@ "mode": "outer" } }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "pattern": "influxdb_cluster 1$|instance|^cluster$|cluster 1$|Value.+" + } + } + }, { "id": "organize", "options": { "excludeByName": { "Time": true, - "Time 2": true, - "Time 3": true, - "Time 4": true, - "Time 5": true, - "Time 6": true, - "Time 7": true, - "Value #B": false, - "Value #H": true, - "__name__": true, - "__name__ 1": true, - "__name__ 2": true, - "__name__ 3": true, - "__name__ 4": true, - "__name__ 5": true, - "__name__ 6": true, - "__name__ 7": true, - "cluster": false, - "cluster 2": true, - "cluster 3": true, - "cluster 4": true, - "cluster 5": true, - "cluster 6": true, - "cluster 7": true, "id": true, - "influxdb_cluster": false, - "influxdb_cluster 2": true, - "influxdb_cluster 3": true, - "influxdb_cluster 4": true, - "influxdb_cluster 5": true, - "influxdb_cluster 6": true, - "influxdb_cluster 7": true, + "id 1": true, "job": true, - "job 2": true, - "job 3": true, - "job 4": true, - "job 5": true, - "job 6": true, - "job 7": true + "job 1": true }, - "indexByName": { }, "renameByName": { - "Dashboards": "", - "Value #A": "Uptime", - "Value #B": "Buckets", - "Value #C": "Users", - "Value #D": "Replications", - "Value #E": "Remotes", - "Value #F": "Scrapers", - "Value #G": "Dashboards", - "cluster": "K8s cluster", - "influxdb_cluster": "InfluxDB cluster", - "instance": "Instance" + "Time": "", + "cluster 1": "Cluster", + "influxdb_cluster 1": "InfluxDB cluster" } } + }, + { + "id": "renameByRegex", + "options": { + "regex": "Value #(.*)", + "renamePattern": "$1" + } } ], "type": "table" }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 17 + "y": 9 }, "id": 4, - "targets": [ ], + "panels": [ ], "title": "Queries and operations", "type": "row" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "HTTP API request rate for the instances with the most traffic in the cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never" }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 0, - "y": 18 + "y": 10 }, "id": 5, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk($k, sum by(job, influxdb_cluster, instance) (rate(http_api_requests_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])))", + "expr": "topk($k, sum by(job, influxdb_cluster, instance) (rate(http_api_requests_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - {{instance}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - {{instance}}", + "refId": "Top instances by HTTP API requests" } ], "title": "Top instances by HTTP API requests", @@ -398,65 +312,35 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "Time taken to respond to HTTP API requests for the cluster.", + "description": "Distribution of HTTP API request durations across the cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "fillOpacity": 80, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineWidth": 1 - }, - "decimals": 0, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 8, - "y": 18 + "y": 10 }, "id": 6, - "options": { - "bucketOffset": 0, - "combine": false, - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "histogram_quantile(0.95, sum by(le, job, influxdb_cluster) (rate(http_api_request_duration_seconds_bucket{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])))", + "expr": "histogram_quantile(0.95, sum by(le, job, influxdb_cluster) (rate(http_api_request_duration_seconds_bucket{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}}", + "refId": "HTTP API request duration" } ], "title": "HTTP API request duration", @@ -464,61 +348,45 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "Rate of different HTTP response codes for the entire cluster.", + "description": "Share of HTTP API responses by status code across the cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [ ], "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 16, - "y": 18 + "y": 10 }, "id": 7, "options": { "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true + "placement": "right" }, - "pieType": "pie", "reduceOptions": { "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" + "sum" + ] } }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, response_code) (rate(http_api_requests_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])) > 0", + "expr": "sum by(job, influxdb_cluster, response_code) (rate(http_api_requests_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - {{response_code}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - {{response_code}}", + "refId": "HTTP API response codes" } ], "title": "HTTP API response codes", @@ -526,310 +394,160 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of database operations from HTTP for the cluster.", + "description": "Rate of HTTP query operations by status in the cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, - "unit": "ops" - }, - "overrides": [ ] + "unit": "reqps" + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 26 + "y": 18 }, "id": 8, "options": { "legend": { "calcs": [ ], "displayMode": "list", - "placement": "right", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, status) (rate(http_query_request_count{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - query - {{status}}" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(job, influxdb_cluster, status) (rate(http_write_request_count{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])) > 0", + "expr": "sum by(job, influxdb_cluster, status) (rate(http_query_request_count{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])) > 0", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - write - {{status}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - query -{{status}}", + "refId": "HTTP operations" } ], - "title": "HTTP operations", + "title": "HTTP query operations", "type": "timeseries" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of database HTTP operation data for the cluster.", + "description": "Rate of HTTP write operations by status in the cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "Bps" - }, - "overrides": [ ] + "unit": "reqps" + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 26 + "y": 18 }, "id": 9, "options": { "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true + "calcs": [ ], + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster) (rate(http_query_request_bytes{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - query - request" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(job, influxdb_cluster) (rate(http_query_response_bytes{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - query - response" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(job, influxdb_cluster) (rate(http_write_request_bytes{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - write - request" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(job, influxdb_cluster) (rate(http_write_response_bytes{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval]))", + "expr": "sum by(job, influxdb_cluster, status) (rate(http_write_request_count{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])) > 0", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - write - response" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - write -{{status}}", + "refId": "HTTP write operations" } ], - "title": "HTTP operation data", + "title": "HTTP write operations", "type": "timeseries" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of InfluxQL queries for the instances with the most traffic in the cluster.", + "description": "Top 5 instances by InfluxQL query rate in the cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never" }, - "unit": "queries/s" - }, - "overrides": [ ] + "unit": "reqps" + } }, "gridPos": { "h": 8, "w": 8, "x": 0, - "y": 34 + "y": 26 }, "id": 10, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk($k, sum by(job, influxdb_cluster, instance) (rate(influxql_service_requests_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])))", + "expr": "topk($k, sum by(job, influxdb_cluster, instance) (rate(influxql_service_requests_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - {{instance}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - {{instance}}", + "refId": "Top instances by InfluxQL query rate" } ], "title": "Top instances by IQL query rate", @@ -837,187 +555,117 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Response time for recent InfluxQL queries, organized by result.", + "description": "Total time spent executing InfluxQL queries in the cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 8, - "y": 34 + "y": 26 }, "id": 11, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, result) (increase(influxql_service_executing_duration_seconds_sum{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__interval:]))", + "expr": "sum by(job, influxdb_cluster, result) (increase(influxql_service_executing_duration_seconds_sum{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - {{result}}" + "legendFormat": "{{influxdb_cluster}}", + "refId": "InfluxQL query response time" } ], - "title": "IQL query response time / $__interval", + "title": "IQL query response time", "type": "timeseries" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of reads and writes to the underlying BoltDB storage engine for the entire cluster.", + "description": "Rate of BoltDB read and write operations in the cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "ops" - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { "h": 8, "w": 8, "x": 16, - "y": 34 + "y": 26 }, "id": 12, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(boltdb_reads_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])", + "expr": "rate(boltdb_reads_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - reads" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - read", + "refId": "BoltDB read operations" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(boltdb_writes_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])", + "expr": "rate(boltdb_writes_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - writes" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - write", + "refId": "BoltDB write operations" } ], "title": "BoltDB operations", @@ -1025,104 +673,65 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 42 + "y": 34 }, "id": 13, - "targets": [ ], + "panels": [ ], "title": "Task scheduler", "type": "row" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Number of tasks currently being executed for the cluster.", + "description": "Number of currently executing tasks in the cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 43 + "y": 35 }, "id": 14, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster) (task_scheduler_current_execution{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"})", + "expr": "sum by(job, influxdb_cluster) (task_scheduler_current_execution{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}}", + "refId": "Active tasks" } ], "title": "Active tasks", @@ -1130,88 +739,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Number of workers currently running tasks on the cluster.", + "description": "Number of active task executor workers in the cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 43 + "y": 35 }, "id": 15, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster) (task_executor_total_runs_active{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"})", + "expr": "sum by(job, influxdb_cluster) (task_executor_total_runs_active{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}}", + "refId": "Active workers" } ], "title": "Active workers", @@ -1219,97 +792,64 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of executions and execution failures for the cluster.", + "description": "Total number of task executions in the cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "ops" - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 51 + "y": 43 }, "id": 16, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(task_scheduler_total_execution_calls{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])", + "expr": "sum by(job, influxdb_cluster) (task_scheduler_total_execution_calls{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - total" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - total", + "refId": "Execution totals" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(task_scheduler_total_execute_failure{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])", + "expr": "sum by(job, influxdb_cluster) (task_scheduler_total_execute_failure{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - failed" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - failed", + "refId": "Execution failures" } ], "title": "Executions", @@ -1317,97 +857,64 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of schedule operations and schedule operation failures for the cluster.", + "description": "Total and failed task schedules across the cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "ops" - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 51 + "y": 43 }, "id": 17, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(task_scheduler_total_schedule_calls{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])", + "expr": "rate(task_scheduler_total_schedule_calls{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - total" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - total", + "refId": "Schedule totals" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(task_scheduler_total_schedule_fails{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])", + "expr": "rate(task_scheduler_total_schedule_fails{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - failed" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - failed", + "refId": "Schedule failures" } ], "title": "Schedules", @@ -1415,106 +922,65 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 8 + "y": 51 }, "id": 18, - "targets": [ ], + "panels": [ ], "title": "Go", "type": "row" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Heap memory usage for the largest instances in the cluster.", + "description": "Top instances by Go heap memory usage percentage.", "fieldConfig": { "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, "unit": "percentunit" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 9 + "y": 52 }, "id": 19, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk($k, go_memstats_heap_alloc_bytes{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}/clamp_min(go_memstats_heap_idle_bytes{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"} + go_memstats_heap_alloc_bytes{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}, 1))", + "expr": "topk($k, sum by(job, influxdb_cluster, instance) (go_memstats_heap_alloc_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}/clamp_min(go_memstats_heap_idle_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"} + go_memstats_heap_alloc_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}, 1)))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - {{instance}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - {{instance}}", + "refId": "Top instances by heap memory usage" } ], "title": "Top instances by heap memory usage", @@ -1522,94 +988,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Fraction of CPU time used for garbage collection for the top instances in the cluster.", + "description": "Instances with the highest Go garbage collection CPU usage.", "fieldConfig": { "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never" }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 9 + "y": 52 }, "id": 20, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "go_memstats_gc_cpu_fraction{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}", + "expr": "go_memstats_gc_cpu_fraction{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{influxdb_cluster}} - {{instance}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - {{instance}}", + "refId": "Top instances by GC CPU usage" } ], "title": "Top instances by GC CPU usage", @@ -1617,122 +1041,105 @@ } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "influxdb-mixin" ], "templating": { "list": [ { - "current": { }, - "hide": 0, - "label": "Data Source", + "label": "Prometheus data source", "name": "prometheus_datasource", - "options": [ ], "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": ".+", - "current": { }, "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, "label": "Job", "multi": true, "name": "job", - "options": [ ], - "query": "label_values(influxdb_uptime_seconds,job)", + "query": "label_values(influxdb_uptime_seconds{job=\"integrations/influxdb\"}, job)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".*", - "current": { }, "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 2, "includeAll": true, - "label": "Cluster", + "label": "InfluxDB cluster", "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(influxdb_uptime_seconds{job=~\"$job\"}, cluster)", + "name": "influxdb_cluster", + "query": "label_values(influxdb_uptime_seconds{job=\"integrations/influxdb\",job=~\"$job\"}, influxdb_cluster)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { - "allValue": "", - "current": { }, + "allValue": ".+", "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, - "label": "InfluxDB cluster", + "label": "Instance", "multi": true, - "name": "influxdb_cluster", - "options": [ ], - "query": "label_values(influxdb_uptime_seconds{job=~\"$job\"}, influxdb_cluster)", + "name": "instance", + "query": "label_values(influxdb_uptime_seconds{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}, instance)", "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "datasource" }, { - "allValue": "", "current": { + "selected": false, "text": "5", "value": "5" }, - "hide": 0, "includeAll": false, "label": "Top node count", "multi": false, "name": "k", "options": [ { + "selected": true, "text": "5", "value": "5" }, { + "selected": false, "text": "10", "value": "10" }, { + "selected": false, "text": "20", "value": "20" }, { + "selected": false, "text": "50", "value": "50" } ], - "query": "5,10,20,50", - "refresh": 0, + "query": "5 : 5,10 : 10,20 : 20,50 : 50", "type": "custom" } ] @@ -1741,33 +1148,7 @@ "from": "now-30m", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", "title": "InfluxDB cluster overview", - "uid": "influxdb-cluster-overview", - "version": 0 + "uid": "influxdb_cluster_overview" } \ No newline at end of file diff --git a/influxdb-mixin/dashboards_out/influxdb-instance-overview.json b/influxdb-mixin/dashboards_out/influxdb-instance-overview.json index b31aea859..4b7d85dec 100644 --- a/influxdb-mixin/dashboards_out/influxdb-instance-overview.json +++ b/influxdb-mixin/dashboards_out/influxdb-instance-overview.json @@ -1,87 +1,83 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, + "description": "Dashboard providing detailed overview of InfluxDB instance performance, including configuration stats, Go runtime performance, query/request load, and task scheduler activity.", "links": [ { - "asDropdown": false, - "icon": "external link", + "keepTime": true, + "title": "InfluxDB cluster overview", + "type": "link", + "url": "/d/influxdb_cluster_overview" + }, + { + "keepTime": true, + "title": "InfluxDB logs", + "type": "link", + "url": "/d/influxdb-logs" + }, + { + "asDropdown": true, "includeVars": true, "keepTime": true, - "tags": [ + "title": "All dashboards", + "type": "link", + "url": [ "influxdb-mixin" - ], - "targetBlank": false, - "title": "Other InfluxDB dashboards", - "type": "dashboards", - "url": "" + ] } ], "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "InfluxDB instance overview", + "type": "row" + }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Time that the InfluxDB process has been running.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fixedColor": "light-green", + "mode": "fixed" }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 3, "x": 0, - "y": 0 + "y": 1 }, "id": 2, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto", - "wideLayout": true + "graphMode": "none" }, - "pluginVersion": "10.3.0-63516", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "influxdb_uptime_seconds{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}", + "expr": "influxdb_uptime_seconds{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "Uptime", + "refId": "Uptime" } ], "title": "Uptime", @@ -89,60 +85,41 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Number of buckets on the server.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fixedColor": "light-green", + "mode": "fixed" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 3, "x": 3, - "y": 0 + "y": 1 }, "id": 3, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto", - "wideLayout": true + "graphMode": "none" }, - "pluginVersion": "10.3.0-63516", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster) (influxdb_buckets_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"})", + "expr": "influxdb_buckets_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "Buckets", + "refId": "Buckets" } ], "title": "Buckets", @@ -150,60 +127,41 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Total number of users for the server.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fixedColor": "light-green", + "mode": "fixed" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 3, "x": 6, - "y": 0 + "y": 1 }, "id": 4, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto", - "wideLayout": true + "graphMode": "none" }, - "pluginVersion": "10.3.0-63516", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster) (influxdb_users_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"})", + "expr": "influxdb_users_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "Users", + "refId": "Users" } ], "title": "Users", @@ -211,60 +169,41 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Number of replication configurations on the server.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fixedColor": "light-green", + "mode": "fixed" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 3, "x": 9, - "y": 0 + "y": 1 }, "id": 5, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto", - "wideLayout": true + "graphMode": "none" }, - "pluginVersion": "10.3.0-63516", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster) (influxdb_replications_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"})", + "expr": "influxdb_replications_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "Replications", + "refId": "Replications" } ], "title": "Replications", @@ -272,60 +211,41 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Number of remote connections configured on the server.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fixedColor": "light-green", + "mode": "fixed" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 3, "x": 12, - "y": 0 + "y": 1 }, "id": 6, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto", - "wideLayout": true + "graphMode": "none" }, - "pluginVersion": "10.3.0-63516", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster) (influxdb_remotes_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"})", + "expr": "influxdb_remotes_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "Remotes", + "refId": "remotes" } ], "title": "Remotes", @@ -333,60 +253,41 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Number of scrapers on the server.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fixedColor": "light-green", + "mode": "fixed" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 3, "x": 15, - "y": 0 + "y": 1 }, "id": 7, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto", - "wideLayout": true + "graphMode": "none" }, - "pluginVersion": "10.3.0-63516", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster) (influxdb_scrapers_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"})", + "expr": "influxdb_scrapers_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "Scrapers", + "refId": "Scrapers" } ], "title": "Scrapers", @@ -394,60 +295,41 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Number of dashboards on the server.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fixedColor": "light-green", + "mode": "fixed" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 3, "x": 18, - "y": 0 + "y": 1 }, "id": 8, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto", - "wideLayout": true + "graphMode": "none" }, - "pluginVersion": "10.3.0-63516", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster) (influxdb_dashboards_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"})", + "expr": "influxdb_dashboards_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "Dashboards", + "refId": "Dashboards" } ], "title": "Dashboards", @@ -455,60 +337,41 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Number of threads currently active on the server.", + "description": "Number of threads on the server.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fixedColor": "light-green", + "mode": "fixed" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 3, "x": 21, - "y": 0 + "y": 1 }, "id": 9, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto", - "wideLayout": true + "graphMode": "none" }, - "pluginVersion": "10.3.0-63516", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster) (go_threads{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"})", + "expr": "go_threads{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Go threads" } ], "title": "Threads", @@ -516,104 +379,66 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 25 + "y": 9 }, "id": 10, - "targets": [ ], + "panels": [ ], "title": "Queries and operations", "type": "row" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of HTTP requests to the API, organized by response code.", + "description": "Rate of HTTP API requests received by this instance.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 26 + "y": 10 }, "id": 11, "options": { "legend": { "calcs": [ ], "displayMode": "list", - "placement": "bottom", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, instance, status) (rate(http_api_requests_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,influxdb_cluster,instance) (rate(http_api_requests_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{status}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - {{instance}}", + "refId": "HTTP API requests" } ], "title": "HTTP API requests", @@ -621,107 +446,76 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Number of active queries for the server.", + "description": "Number of queries compiling, queuing, and executing on this instance.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 0, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 26 + "y": 10 }, "id": 12, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, instance) (qc_compiling_active{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"})", + "expr": "sum by (job,influxdb_cluster,instance) (qc_compiling_active{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - compiling" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - compiling", + "refId": "Compiling active queries" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, instance) (qc_queueing_active{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"})", + "expr": "sum by (job,influxdb_cluster,instance) (qc_queueing_active{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - queueing" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - queuing", + "refId": "Queuing queries" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, instance) (qc_executing_active{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"})", + "expr": "sum by (job,influxdb_cluster,instance) (qc_executing_active{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - executing" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - executing", + "refId": "Executing queries" } ], "title": "Active queries", @@ -729,97 +523,65 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of database operations from HTTP for the server.", + "description": "Rate of HTTP query and write operations handled by this instance.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "ops" - }, - "overrides": [ ] + "unit": "reqps" + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 34 + "y": 18 }, "id": 13, "options": { "legend": { "calcs": [ ], "displayMode": "list", - "placement": "right", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, instance, status) (rate(http_query_request_count{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__rate_interval])) > 0", + "expr": "sum by (job,influxdb_cluster,instance) (rate(http_query_request_count{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - query - {{status}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - query - {{status}}", + "refId": "HTTP operation queries" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, instance, status) (rate(http_write_request_count{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__rate_interval])) > 0", + "expr": "sum by (job,influxdb_cluster,instance) (rate(http_write_request_count{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - write - {{status}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - write - {{status}}", + "refId": "HTTP operation writes" } ], "title": "HTTP operations", @@ -827,119 +589,89 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of database HTTP operation data for the server.", + "description": "Bytes per second for HTTP query and write request/response bodies.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "Bps" - }, - "overrides": [ ] + "unit": "bytes" + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 34 + "y": 18 }, "id": 14, "options": { "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true + "calcs": [ ], + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, instance) (rate(http_query_request_bytes{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by (job,influxdb_cluster,instance) (rate(http_query_request_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - query request" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - query request", + "refId": "HTTP operations data query requests" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, instance) (rate(http_query_response_bytes{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by (job,influxdb_cluster,instance) (rate(http_query_response_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - query response" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - query response", + "refId": "HTTP operations data query responses" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, instance) (rate(http_write_request_bytes{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by (job,influxdb_cluster,instance) (rate(http_write_request_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - write request" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - write request", + "refId": "HTTP operations data write requests" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, instance) (rate(http_write_response_bytes{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by (job,influxdb_cluster,instance) (rate(http_write_response_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - write response" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - write response", + "refId": "HTTP operations data write responses" } ], "title": "HTTP operation data", @@ -947,88 +679,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of InfluxQL queries for the server.", + "description": "Rate of InfluxQL queries executed by this instance.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "queries/s" - }, - "overrides": [ ] + "unit": "query/s" + } }, "gridPos": { "h": 8, "w": 8, "x": 0, - "y": 42 + "y": 26 }, "id": 15, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, instance, result) (rate(influxql_service_requests_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by (job,influxdb_cluster,instance, result) (rate(influxql_service_requests_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{result}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - {{result}}", + "refId": "IQL query rate" } ], "title": "IQL query rate", @@ -1036,187 +732,117 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Response time for recent InfluxQL queries, organized by result.", + "description": "Total time spent executing InfluxQL queries during each $__interval.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 8, - "y": 42 + "y": 26 }, "id": 16, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, instance, result) (increase(influxql_service_executing_duration_seconds_sum{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__interval:]))", + "expr": "sum by (job,influxdb_cluster,instance, result) (increase(influxql_service_executing_duration_seconds_sum{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__interval] offset -$__interval))", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{result}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - {{result}}", + "refId": "IQL query response time" } ], - "title": "IQL query response time / $__interval", + "title": "IQL response time / $__interval", "type": "timeseries" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of reads and writes to the underlying BoltDB storage engine for the server.", + "description": "Rate of BoltDB read and write operations performed by this instance.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "ops" - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { "h": 8, "w": 8, "x": 16, - "y": 42 + "y": 26 }, "id": 17, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(boltdb_reads_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(boltdb_reads_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - reads" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - read", + "refId": "BoltDB read operations" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(boltdb_writes_total{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(boltdb_writes_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - writes" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - write", + "refId": "BoltDB write operations" } ], "title": "BoltDB operations", @@ -1224,104 +850,65 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 50 + "y": 34 }, "id": 18, - "targets": [ ], + "panels": [ ], "title": "Task scheduler", "type": "row" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Number of tasks currently being executed for the server.", + "description": "Number of currently executing tasks on this instance.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 0, - "y": 51 + "y": 35 }, "id": 19, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "task_scheduler_current_execution{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}", + "expr": "task_scheduler_current_execution{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}}", + "refId": "Active tasks" } ], "title": "Active tasks", @@ -1329,88 +916,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Number of workers currently running tasks on the server.", + "description": "Number of active task executor workers on this instance.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 8, - "y": 51 + "y": 35 }, "id": 20, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "task_executor_total_runs_active{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}", + "expr": "task_executor_total_runs_active{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}}", + "refId": "Active workers" } ], "title": "Active workers", @@ -1418,90 +969,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Percentage of available workers that are currently busy.", + "description": "Worker utilization for task execution on this instance.", "fieldConfig": { "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, - "unit": "percent" - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { "h": 8, "w": 8, "x": 16, - "y": 51 + "y": 35 }, "id": 21, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "task_executor_workers_busy{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}", + "expr": "task_executor_workers_busy{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}}", + "refId": "Worker usage" } ], "title": "Worker usage", @@ -1509,97 +1022,64 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of executions and execution failures for the server.", + "description": "Total and failed task executions on this instance.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, - "unit": "ops" - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 59 + "y": 43 }, "id": 22, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(task_scheduler_total_execution_calls{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(task_scheduler_total_execution_calls{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - total" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - total", + "refId": "Executions total" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(task_scheduler_total_execute_failure{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(task_scheduler_total_execute_failure{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - failed" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - failed", + "refId": "Executions failures" } ], "title": "Executions", @@ -1607,97 +1087,64 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of schedule operations and schedule operation failures for the server.", + "description": "Total and failed task schedules on this instance.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, - "unit": "ops" - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 59 + "y": 43 }, "id": 23, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(task_scheduler_total_schedule_calls{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(task_scheduler_total_schedule_calls{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - total" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - total", + "refId": "Schedule totals" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(task_scheduler_total_schedule_fails{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(task_scheduler_total_schedule_fails{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - failed" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - failed", + "refId": "Schedule failures" } ], "title": "Schedules", @@ -1705,76 +1152,54 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 8 + "y": 51 }, "id": 24, - "targets": [ ], + "panels": [ ], "title": "Go", "type": "row" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Amount of time since the last garbage collection cycle.", + "description": "Elapsed seconds since the Go runtime last performed a GC.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fixedColor": "light-green", + "mode": "fixed" }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 6, "x": 0, - "y": 9 + "y": 52 }, "id": 25, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto", - "wideLayout": true + "graphMode": "none" }, - "pluginVersion": "10.3.0-63516", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "time() - go_memstats_last_gc_time_seconds{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}", + "expr": "time() - go_memstats_last_gc_time_seconds{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Time since last GC" } ], "title": "Time since last GC", @@ -1782,180 +1207,104 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Server CPU time spent on garbage collection.", + "description": "Time spent in Go garbage collection during each $__interval.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 9, "x": 6, - "y": 9 + "y": 52 }, "id": 26, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "increase(go_gc_duration_seconds_sum{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}[$__interval:])", + "expr": "increase(go_gc_duration_seconds_sum{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "GC time / $__interval" } ], - "title": "GC time / $__interval", + "title": "GC time", "type": "timeseries" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Percent of server CPU time used for garbage collection.", + "description": "Fraction of CPU time used by Go garbage collection.", "fieldConfig": { "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 9, "x": 15, - "y": 9 + "y": 52 }, "id": 27, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "go_memstats_gc_cpu_fraction{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}", + "expr": "go_memstats_gc_cpu_fraction{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{influxdb_cluster}} - {{instance}}", + "refId": "GC CPU usage" } ], "title": "GC CPU usage", @@ -1963,90 +1312,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Heap memory usage for the server.", + "description": "Estimated Go heap memory utilization of this instance.", "fieldConfig": { "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, - "unit": "percentunit" - }, - "overrides": [ ] + "unit": "percent" + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 17 + "y": 60 }, "id": 28, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "go_memstats_heap_alloc_bytes{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}/clamp_min((go_memstats_heap_idle_bytes{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"} + go_memstats_heap_alloc_bytes{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}), 1)", + "expr": "go_memstats_heap_alloc_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"} / clamp_min(go_memstats_heap_idle_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"} + go_memstats_heap_alloc_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}, 1)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Go heap memory usage" } ], "title": "Heap memory usage", @@ -2054,202 +1364,123 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Number of OS threads created for the server.", + "description": "Number of OS threads created by this process.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 0, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 17 + "y": 60 }, "id": 29, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "go_threads{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\", instance=~\"$instance\", instance=~\"$instance\"}", + "expr": "go_threads{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Go threads" } ], - "title": "Go threads", + "title": "Threads", "type": "timeseries" } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "influxdb-mixin" ], "templating": { "list": [ { - "current": { }, - "hide": 0, - "label": "Data Source", + "label": "Prometheus data source", "name": "prometheus_datasource", - "options": [ ], "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": ".+", - "current": { }, "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, "label": "Job", "multi": true, "name": "job", - "options": [ ], - "query": "label_values(influxdb_uptime_seconds,job)", + "query": "label_values(influxdb_uptime_seconds{job=\"integrations/influxdb\"}, job)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".*", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 2, - "includeAll": true, - "label": "Cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(influxdb_uptime_seconds{job=~\"$job\"}, cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": { }, "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, "label": "InfluxDB cluster", "multi": true, "name": "influxdb_cluster", - "options": [ ], - "query": "label_values(influxdb_uptime_seconds{job=~\"$job\"}, influxdb_cluster)", + "query": "label_values(influxdb_uptime_seconds{job=\"integrations/influxdb\",job=~\"$job\"}, influxdb_cluster)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, "label": "Instance", "multi": true, "name": "instance", - "options": [ ], - "query": "label_values(influxdb_uptime_seconds{job=~\"$job\", influxdb_cluster=~\"$influxdb_cluster\"}, instance)", + "query": "label_values(influxdb_uptime_seconds{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}, instance)", "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "datasource" } ] }, @@ -2257,33 +1488,7 @@ "from": "now-30m", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", "title": "InfluxDB instance overview", - "uid": "influxdb-instance-overview", - "version": 0 + "uid": "influxdb_instance_overview" } \ No newline at end of file diff --git a/influxdb-mixin/dashboards_out/influxdb-logs.json b/influxdb-mixin/dashboards_out/influxdb-logs.json index 26f8fecc3..10fb1d823 100644 --- a/influxdb-mixin/dashboards_out/influxdb-logs.json +++ b/influxdb-mixin/dashboards_out/influxdb-logs.json @@ -1,17 +1,29 @@ { + "annotations": { + "list": [ ] + }, "links": [ { - "asDropdown": false, - "icon": "external link", + "keepTime": true, + "title": "InfluxDB cluster overview", + "type": "link", + "url": "/d/influxdb_cluster_overview" + }, + { + "keepTime": true, + "title": "InfluxDB instance overview", + "type": "link", + "url": "/d/influxdb_instance_overview" + }, + { + "asDropdown": true, "includeVars": true, "keepTime": true, - "tags": [ + "title": "All dashboards", + "type": "link", + "url": [ "influxdb-mixin" - ], - "targetBlank": false, - "title": "Other InfluxDB dashboards", - "type": "dashboards", - "url": "" + ] } ], "panels": [ @@ -161,7 +173,7 @@ "type": "loki", "uid": "${loki_datasource}" }, - "expr": "sum by (level) (count_over_time({job=~\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\",level=~\"$level\",service=~\"$service\",engine=~\"$engine\"}\n|~ \"$regex_search\"\n\n[$__auto]))\n", + "expr": "sum by (level) (count_over_time({job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}\n|~ \"$regex_search\"\n\n[$__auto]))\n", "legendFormat": "{{ level }}" } ], @@ -194,7 +206,7 @@ "enableLogDetails": true, "prettifyLogMessage": true, "showTime": false, - "wrapLogMessage": true + "wrapLogMessage": false }, "pluginVersion": "v11.0.0", "targets": [ @@ -203,7 +215,7 @@ "type": "loki", "uid": "${loki_datasource}" }, - "expr": "{job=~\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\",level=~\"$level\",service=~\"$service\",engine=~\"$engine\"} \n|~ \"$regex_search\"\n\n\n" + "expr": "{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"} \n|~ \"$regex_search\"\n\n\n" } ], "title": "Logs", @@ -234,7 +246,7 @@ "label": "Job", "multi": true, "name": "job", - "query": "label_values({job=~\"integrations/influxdb\"}, job)", + "query": "label_values({job=\"integrations/influxdb\"}, job)", "refresh": 2, "sort": 1, "type": "query" @@ -246,70 +258,10 @@ "uid": "${loki_datasource}" }, "includeAll": true, - "label": "Influxdb_cluster", + "label": "InfluxDB cluster", "multi": true, "name": "influxdb_cluster", - "query": "label_values({job=~\"integrations/influxdb\",job=~\"$job\"}, influxdb_cluster)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "allValue": ".*", - "datasource": { - "type": "loki", - "uid": "${loki_datasource}" - }, - "includeAll": true, - "label": "Instance", - "multi": true, - "name": "instance", - "query": "label_values({job=~\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}, instance)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "allValue": ".*", - "datasource": { - "type": "loki", - "uid": "${loki_datasource}" - }, - "includeAll": true, - "label": "Level", - "multi": true, - "name": "level", - "query": "label_values({job=~\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}, level)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "allValue": ".*", - "datasource": { - "type": "loki", - "uid": "${loki_datasource}" - }, - "includeAll": true, - "label": "Service", - "multi": true, - "name": "service", - "query": "label_values({job=~\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\",level=~\"$level\"}, service)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "allValue": ".*", - "datasource": { - "type": "loki", - "uid": "${loki_datasource}" - }, - "includeAll": true, - "label": "Engine", - "multi": true, - "name": "engine", - "query": "label_values({job=~\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\",level=~\"$level\",service=~\"$service\"}, engine)", + "query": "label_values({job=\"integrations/influxdb\",job=~\"$job\"}, influxdb_cluster)", "refresh": 2, "sort": 1, "type": "query" @@ -331,14 +283,22 @@ ], "query": "", "type": "textbox" + }, + { + "hide": 2, + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" } ] }, "time": { - "from": "now-6h", + "from": "now-30m", "to": "now" }, - "timezone": "utc", - "title": "InfluxDB logs overview", - "uid": "influxdb-logs-overview" + "timezone": "default", + "title": "InfluxDB logs", + "uid": "influxdb-logs" } \ No newline at end of file diff --git a/influxdb-mixin/g.libsonnet b/influxdb-mixin/g.libsonnet new file mode 100644 index 000000000..e6a2060ee --- /dev/null +++ b/influxdb-mixin/g.libsonnet @@ -0,0 +1 @@ +import 'github.com/grafana/grafonnet/gen/grafonnet-v11.4.0/main.libsonnet' diff --git a/influxdb-mixin/jsonnetfile.json b/influxdb-mixin/jsonnetfile.json index 83a4fe35b..79c4d8a18 100644 --- a/influxdb-mixin/jsonnetfile.json +++ b/influxdb-mixin/jsonnetfile.json @@ -14,11 +14,29 @@ "source": { "git": { "remote": "https://github.com/grafana/grafonnet.git", - "subdir": "gen/grafonnet-latest" + "subdir": "gen/grafonnet-v11.4.0" } }, "version": "main" }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "common-lib" + } + }, + "version": "master" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "grafana-cloud-integration-utils" + } + }, + "version": "master" + }, { "source": { "git": { diff --git a/influxdb-mixin/links.libsonnet b/influxdb-mixin/links.libsonnet new file mode 100644 index 000000000..6347d1e43 --- /dev/null +++ b/influxdb-mixin/links.libsonnet @@ -0,0 +1,27 @@ +local g = import './g.libsonnet'; + +{ + local link = g.dashboard.link, + new(this): { + influxdbClusterOverview: + link.link.new(this.config.dashboardNamePrefix + ' cluster overview','/d/' + this.grafana.dashboards['influxdb-cluster-overview.json'].uid) + + link.link.options.withKeepTime(true), + + influxdbInstanceOverview: + link.link.new(this.config.dashboardNamePrefix + ' instance overview','/d/' + this.grafana.dashboards['influxdb-instance-overview.json'].uid) + + link.link.options.withKeepTime(true), + + otherDashboards: + link.link.new('All dashboards', this.config.dashboardTags) + + link.dashboards.options.withIncludeVars(true) + + link.dashboards.options.withKeepTime(true) + + link.dashboards.options.withAsDropdown(true), + } + + + if this.config.enableLokiLogs then + { + logs: + link.link.new(this.config.dashboardNamePrefix + ' logs','/d/' + this.grafana.dashboards['influxdb-logs.json'].uid) + + link.link.options.withKeepTime(true), + } else {}, +} diff --git a/influxdb-mixin/main.libsonnet b/influxdb-mixin/main.libsonnet new file mode 100644 index 000000000..801bdb683 --- /dev/null +++ b/influxdb-mixin/main.libsonnet @@ -0,0 +1,49 @@ +local alerts = import './alerts.libsonnet'; +local config = import './config.libsonnet'; +local dashboards = import './dashboards.libsonnet'; +local links = import './links.libsonnet'; +local panels = import './panels.libsonnet'; +local rows = import './rows.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + withConfigMixin(config): { + config+: config, + }, + + new(): { + + local this = self, + config: config, + + signals: + { + [sig]: commonlib.signals.unmarshallJsonMulti( + this.config.signals[sig], + type=this.config.metricsSource + ) + for sig in std.objectFields(this.config.signals) + }, + + grafana: { + variables: commonlib.variables.new( + filteringSelector=this.config.filteringSelector, + groupLabels=this.config.groupLabels, + instanceLabels=this.config.instanceLabels, + varMetric='influxdb_uptime_seconds', + customAllValue='.+', + enableLokiLogs=this.config.enableLokiLogs, + ), + annotations: {}, + links: links.new(this), + panels: panels.new(this), + dashboards: dashboards.new(this), + rows: rows.new(this), + }, + + prometheus: { + alerts: alerts.new(this), + recordingRules: {}, + }, + }, +} diff --git a/influxdb-mixin/mixin.libsonnet b/influxdb-mixin/mixin.libsonnet index 4d987cf31..1535da7b3 100644 --- a/influxdb-mixin/mixin.libsonnet +++ b/influxdb-mixin/mixin.libsonnet @@ -1,3 +1,34 @@ -(import 'dashboards/dashboards.libsonnet') + -(import 'alerts/alerts.libsonnet') + -(import 'config.libsonnet') +local influxdblib = import './main.libsonnet'; +local config = (import './config.libsonnet'); +local util = import 'grafana-cloud-integration-utils/util.libsonnet'; + + +local influxdb = + influxdblib.new() + + influxdblib.withConfigMixin( + { + filteringSelector: config.filteringSelector, + uid: config.uid, + enableLokiLogs: config.enableLokiLogs, + } + ); + +local label_patch = { + influxdb_cluster+: { + allValue: '.*', + label: 'InfluxDB cluster', + }, +}; + +// populate monitoring-mixin: +{ + grafanaDashboards+:: { + [fname]: + local dashboard = influxdb.grafana.dashboards[fname]; + dashboard + util.patch_variables(dashboard, label_patch) + + for fname in std.objectFields(influxdb.grafana.dashboards) + }, + prometheusAlerts+:: influxdb.prometheus.alerts, + prometheusRules+:: influxdb.prometheus.recordingRules, +} diff --git a/influxdb-mixin/panels.libsonnet b/influxdb-mixin/panels.libsonnet new file mode 100644 index 000000000..f6ed9f692 --- /dev/null +++ b/influxdb-mixin/panels.libsonnet @@ -0,0 +1,525 @@ +local g = (import './g.libsonnet'); +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + new(this):: + { + local signals = this.signals, + local groupLabels = this.config.groupLabels, + local shownGroupLabels = std.filter(function(l) l != 'job', groupLabels), + + // + // Cluster Overview Dashboard Panels + // + + // Alert panel + alertsPanel: { + title: 'Alerts', + type: 'alertlist', + targets: [], + options: { + alertInstanceLabelFilter: '{job=~"${job:regex}", influxdb_cluster=~"${influxdb_cluster:regex}"}', + alertName: '', + dashboardAlerts: false, + groupBy: [], + groupMode: 'default', + maxItems: 20, + sortOrder: 1, + stateFilter: { + 'error': true, + firing: true, + noData: false, + normal: false, + pending: true, + }, + viewMode: 'list', + }, + }, + + serversPanel: + commonlib.panels.generic.table.base.new( + 'Servers', + targets=[ + signals.overview.uptime.asTableTarget(), + signals.overview.buckets.asTableTarget(), + signals.overview.users.asTableTarget(), + signals.overview.replications.asTableTarget(), + signals.overview.remotes.asTableTarget(), + signals.overview.scrapers.asTableTarget(), + signals.overview.dashboards.asTableTarget(), + ], + description='Statistics for each instance in the cluster.', + ) + + g.panel.table.standardOptions.withOverridesMixin([ + g.panel.table.fieldOverride.byName.new('instance') + + g.panel.table.fieldOverride.byName.withProperty('displayName', 'Instance') + + g.panel.table.fieldOverride.byName.withProperty('links', [ + { + title: 'Instance overview', + url: '/d/influxdb_instance_overview?var-instance=${__data.fields.Instance}&${__url_time_range}&var-datasource=${datasource}', + }, + ]), + ]) + + g.panel.table.standardOptions.withOverridesMixin([ + g.panel.table.fieldOverride.byName.new('Uptime') + + g.panel.table.fieldOverride.byName.withPropertiesFromOptions( + g.panel.table.standardOptions.withUnit('s') + ), + ]) + + g.panel.table.queryOptions.withTransformationsMixin([ + { + id: 'joinByField', + options: { + byField: 'instance', + mode: 'outer', + }, + }, + { + id: 'filterFieldsByName', + options: { + include: { + pattern: + std.join(' 1$|', shownGroupLabels) + ' 1$|' + + 'instance' + '|' + + '^cluster$|cluster 1$|' + + 'Value.+', + }, + }, + }, + { + id: 'organize', + options: { + excludeByName: { + Time: true, + id: true, + 'id 1': true, + job: true, + 'job 1': true, + }, + renameByName: { + Time: '', + 'cluster 1': 'Cluster', + 'influxdb_cluster 1': 'InfluxDB cluster', + }, + }, + }, + { + id: 'renameByRegex', + options: { + regex: 'Value #(.*)', + renamePattern: '$1', + }, + }, + ]), + + + // HTTP API panels + topInstancesByHTTPAPIRequestsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top instances by HTTP API requests', + targets=[ + signals.overview.topInstancesByHTTPAPIRequests.asTarget() { interval: '1m' }, + ], + description='HTTP API request rate for the instances with the most traffic in the cluster.', + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + + httpAPIRequestDurationPanel: + g.panel.histogram.new('HTTP API request duration') + + g.panel.histogram.panelOptions.withDescription('Distribution of HTTP API request durations across the cluster.') + + g.panel.histogram.queryOptions.withTargets([ + signals.overview.httpAPIRequestDuration.asTarget() { interval: '1m' }, + ]) + + g.panel.histogram.standardOptions.withUnit('s'), + + httpAPIResponseCodesPanel: + g.panel.pieChart.new( + 'HTTP API response codes', + ) + + g.panel.pieChart.panelOptions.withDescription('Share of HTTP API responses by status code across the cluster.') + + g.panel.pieChart.queryOptions.withTargets([ + signals.overview.httpAPIResponseCodes.asTarget() { interval: '1m' }, + ]) + + g.panel.pieChart.standardOptions.withUnit('reqps') + + g.panel.pieChart.options.legend.withPlacement('right') + + g.panel.pieChart.options.reduceOptions.withCalcs(['sum']), + + // Query operations panels + httpQueryOperationsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'HTTP query operations', + targets=[ + signals.overview.httpQueryOperations.asTarget() { interval: '1m' }, + ], + description='Rate of HTTP query operations by status in the cluster.', + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.legend.withPlacement('right'), + + + httpWriteOperationsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'HTTP write operations', + targets=[ + signals.overview.httpWriteOperations.asTarget() { interval: '1m' }, + ], + description='Rate of HTTP write operations by status in the cluster.', + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.legend.withPlacement('right'), + + + // InfluxQL panels + topInstancesByIQLQueryRatePanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top instances by IQL query rate', + targets=[ + signals.overview.topInstancesByIQLQueryRate.asTarget() { interval: '1m' }, + ], + description='Top 5 instances by InfluxQL query rate in the cluster.', + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + iqlQueryResponseTimePanel: + commonlib.panels.generic.timeSeries.base.new( + 'IQL query response time', + targets=[ + signals.overview.iqlQueryResponseTime.asTarget() { interval: '1m' }, + ], + description='Total time spent executing InfluxQL queries in the cluster.', + ) + + g.panel.timeSeries.standardOptions.withUnit('s'), + + boltdbOperationsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'BoltDB operations', + targets=[ + signals.overview.boltdbReadOperations.asTarget() { interval: '1m' }, + signals.overview.boltdbWriteOperations.asTarget() { interval: '1m' }, + ], + description='Rate of BoltDB read and write operations in the cluster.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none'), + + // Task scheduler panels + activeTasksPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Active tasks', + targets=[ + signals.overview.activeTasks.asTarget() { interval: '1m' }, + ], + description='Number of currently executing tasks in the cluster.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none'), + + activeWorkersPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Active workers', + targets=[ + signals.overview.activeWorkers.asTarget() { interval: '1m' }, + ], + description='Number of active task executor workers in the cluster.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none'), + + + executionTotalsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Executions', + targets=[ + signals.overview.executionTotals.asTarget() { interval: '1m' }, + signals.overview.executionFailures.asTarget() { interval: '1m' }, + ], + description='Total number of task executions in the cluster.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none'), + + + scheduleTotalsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Schedules', + targets=[ + signals.overview.scheduleTotals.asTarget() { interval: '1m' }, + signals.overview.scheduleFailures.asTarget() { interval: '1m' }, + ], + description='Total and failed task schedules across the cluster.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none'), + + // Memory and performance panels + topInstancesByHeapMemoryUsagePanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top instances by heap memory usage', + targets=[ + signals.overview.topInstancesByHeapMemoryUsage.asTarget() { interval: '1m' }, + ], + description='Top instances by Go heap memory usage percentage.', + ) + + g.panel.timeSeries.standardOptions.withUnit('percentunit'), + + + topInstancesByGCCPUUsagePanel: + commonlib.panels.generic.timeSeries.base.new( + 'Top instances by GC CPU usage', + targets=[ + signals.overview.topInstancesByGCCPUUsage.asTarget() { interval: '1m' }, + ], + description='Instances with the highest Go garbage collection CPU usage.', + ) + + g.panel.timeSeries.standardOptions.withUnit('percent'), + + // + // Instance Overview Dashboard Panels + // + + instanceUptimePanel: + commonlib.panels.generic.stat.base.new( + 'Uptime', + targets=[signals.instance.uptime.asTarget()], + description='Time that the InfluxDB process has been running.', + ) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.stat.standardOptions.color.withFixedColor('light-green') + + g.panel.stat.options.withGraphMode('none'), + + instanceBucketsPanel: + commonlib.panels.generic.stat.base.new( + 'Buckets', + targets=[signals.instance.buckets.asTarget()], + description='Number of buckets on the server.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.stat.standardOptions.color.withFixedColor('light-green') + + g.panel.stat.options.withGraphMode('none'), + + instanceUsersPanel: + commonlib.panels.generic.stat.base.new( + 'Users', + targets=[signals.instance.users.asTarget()], + description='Total number of users for the server.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.stat.standardOptions.color.withFixedColor('light-green') + + g.panel.stat.options.withGraphMode('none'), + + instanceReplicationsPanel: + commonlib.panels.generic.stat.base.new( + 'Replications', + targets=[signals.instance.replications.asTarget()], + description='Number of replication configurations on the server.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.stat.standardOptions.color.withFixedColor('light-green') + + g.panel.stat.options.withGraphMode('none'), + + + instanceRemotesPanel: + commonlib.panels.generic.stat.base.new( + 'Remotes', + targets=[signals.instance.remotes.asTarget()], + description='Number of remote connections configured on the server.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.stat.standardOptions.color.withFixedColor('light-green') + + g.panel.stat.options.withGraphMode('none'), + + instanceScrapersPanel: + commonlib.panels.generic.stat.base.new( + 'Scrapers', + targets=[signals.instance.scrapers.asTarget()], + description='Number of scrapers on the server.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.stat.standardOptions.color.withFixedColor('light-green') + + g.panel.stat.options.withGraphMode('none'), + + instanceDashboardsPanel: + commonlib.panels.generic.stat.base.new( + 'Dashboards', + targets=[signals.instance.dashboards.asTarget()], + description='Number of dashboards on the server.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.stat.standardOptions.color.withFixedColor('light-green') + + g.panel.stat.options.withGraphMode('none'), + + instanceThreadsPanel: + commonlib.panels.generic.stat.base.new( + 'Threads', + targets=[signals.instance.goThreads.asTarget()], + description='Number of threads on the server.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.stat.standardOptions.color.withFixedColor('light-green') + + g.panel.stat.options.withGraphMode('none'), + + instanceHTTPAPIRequestsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'HTTP API requests', + targets=[signals.instance.httpAPIRequests.asTarget() { interval: '1m' }], + description='Rate of HTTP API requests received by this instance.', + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.legend.withPlacement('right'), + + instanceActiveQueriesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Active queries', + targets=[ + signals.instance.compilingActiveQueries.asTarget() { interval: '1m' }, + signals.instance.queuingQueries.asTarget() { interval: '1m' }, + signals.instance.executingQueries.asTarget() { interval: '1m' }, + ], + description='Number of queries compiling, queuing, and executing on this instance.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none'), + + instanceHTTPOperationsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'HTTP operations', + targets=[ + signals.instance.httpOperationQueries.asTarget() { interval: '1m' }, + signals.instance.httpOperationWrites.asTarget() { interval: '1m' }, + ], + description='Rate of HTTP query and write operations handled by this instance.', + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.legend.withPlacement('right'), + + instanceHTTPOperationDataPanel: + commonlib.panels.generic.timeSeries.base.new( + 'HTTP operation data', + targets=[ + signals.instance.httpOperationsDataQueryRequests.asTarget() { interval: '1m' }, + signals.instance.httpOperationsDataQueryResponses.asTarget() { interval: '1m' }, + signals.instance.httpOperationsDataWriteRequests.asTarget() { interval: '1m' }, + signals.instance.httpOperationsDataWriteResponses.asTarget() { interval: '1m' }, + ], + description='Bytes per second for HTTP query and write request/response bodies.', + ) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.options.legend.withPlacement('right'), + + instanceIQLRatePanel: + commonlib.panels.generic.timeSeries.base.new( + 'IQL query rate', + targets=[signals.instance.iqlQueryRate.asTarget() { interval: '1m' }], + description='Rate of InfluxQL queries executed by this instance.', + ) + + g.panel.timeSeries.standardOptions.withUnit('query/s'), + + instanceIQLResponseTimePanel: + commonlib.panels.generic.timeSeries.base.new( + 'IQL response time / $__interval', + targets=[signals.instance.iqlQueryResponseTime.asTarget() { interval: '2m' }], + description='Total time spent executing InfluxQL queries during each $__interval.', + ) + + g.panel.timeSeries.standardOptions.withUnit('s'), + + instanceBoltDBOperationsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'BoltDB operations', + targets=[ + signals.instance.boltdbReadOperations.asTarget() { interval: '1m' }, + signals.instance.boltdbWriteOperations.asTarget() { interval: '1m' }, + ], + description='Rate of BoltDB read and write operations performed by this instance.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none'), + + instanceActiveTasksPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Active tasks', + targets=[signals.instance.activeTasks.asTarget() { interval: '1m' }], + description='Number of currently executing tasks on this instance.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none'), + + instanceActiveWorkersPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Active workers', + targets=[signals.instance.activeWorkers.asTarget() { interval: '1m' }], + description='Number of active task executor workers on this instance.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none'), + + instanceWorkerUsagePanel: + commonlib.panels.generic.timeSeries.base.new( + 'Worker usage', + targets=[signals.instance.workerUsage.asTarget() { interval: '1m' }], + description='Worker utilization for task execution on this instance.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none'), + + instanceExecutionsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Executions', + targets=[ + signals.instance.executionsTotal.asTarget() { interval: '1m' }, + signals.instance.executionsFailures.asTarget() { interval: '1m' }, + ], + description='Total and failed task executions on this instance.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none'), + + instanceSchedulesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Schedules', + targets=[ + signals.instance.scheduleTotals.asTarget() { interval: '1m' }, + signals.instance.scheduleFailures.asTarget() { interval: '1m' }, + ], + description='Total and failed task schedules on this instance.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none'), + + + // Go metrics + instanceGoLastGCPanel: + commonlib.panels.generic.stat.base.new( + 'Time since last GC', + targets=[signals.instance.timeSinceLastGC.asTarget()], + description='Elapsed seconds since the Go runtime last performed a GC.', + ) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.stat.standardOptions.color.withFixedColor('light-green') + + g.panel.stat.options.withGraphMode('none'), + + instanceGoGCTimePanel: + commonlib.panels.generic.timeSeries.base.new( + 'GC time', + targets=[signals.instance.gcTime.asTarget() { interval: '2m' }], + description='Time spent in Go garbage collection during each $__interval.', + ) + + g.panel.timeSeries.standardOptions.withUnit('s'), + + + instanceGoGCCPUUsagePanel: + commonlib.panels.generic.timeSeries.base.new( + 'GC CPU usage', + targets=[signals.instance.gcCPUUsage.asTarget()], + description='Fraction of CPU time used by Go garbage collection.', + ) + + g.panel.timeSeries.standardOptions.withUnit('percent'), + + instanceGoHeapMemoryUsagePanel: + commonlib.panels.generic.timeSeries.base.new( + 'Heap memory usage', + targets=[signals.instance.goHeapMemoryUsage.asTarget()], + description='Estimated Go heap memory utilization of this instance.', + ) + + g.panel.timeSeries.standardOptions.withUnit('percent'), + + + instanceGoThreadsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Threads', + targets=[signals.instance.goThreads.asTarget()], + description='Number of OS threads created by this process.', + ) + + g.panel.timeSeries.standardOptions.withUnit('none'), + + + }, +} diff --git a/influxdb-mixin/prometheus_rules_out/prometheus_alerts.yaml b/influxdb-mixin/prometheus_rules_out/prometheus_alerts.yaml index bc4077612..36196e5a8 100644 --- a/influxdb-mixin/prometheus_rules_out/prometheus_alerts.yaml +++ b/influxdb-mixin/prometheus_rules_out/prometheus_alerts.yaml @@ -1,21 +1,21 @@ groups: - name: influxdb rules: - - alert: InfluxDBWarningTaskSchedulerHighFailureRate + - alert: InfluxDBWarningTaskHighFailureRate annotations: description: Task scheduler task executions for instance {{$labels.instance}} on cluster {{$labels.influxdb_cluster}} are failing at a rate of {{ printf "%.0f" $value }} percent, which is above the threshold of 25 percent. summary: Automated data processing tasks are failing at a high rate. expr: | - 100 * rate(task_scheduler_total_execute_failure[5m])/clamp_min(rate(task_scheduler_total_execution_calls[5m]), 1) >= 25 + 100 * rate(task_scheduler_total_execute_failure{job="integrations/influxdb"}[5m])/clamp_min(rate(task_scheduler_total_execution_calls{job="integrations/influxdb"}[5m]), 1) >= 25 for: 5m labels: severity: warning - - alert: InfluxDBCriticalTaskSchedulerHighFailureRate + - alert: InfluxDBCriticalTaskHighFailureRate annotations: description: Task scheduler task executions for instance {{$labels.instance}} on cluster {{$labels.influxdb_cluster}} are failing at a rate of {{ printf "%.0f" $value }} percent, which is above the threshold of 50 percent. summary: Automated data processing tasks are failing at a critical rate. expr: | - 100 * rate(task_scheduler_total_execute_failure[5m])/clamp_min(rate(task_scheduler_total_execution_calls[5m]), 1) >= 50 + 100 * rate(task_scheduler_total_execute_failure{job="integrations/influxdb"}[5m])/clamp_min(rate(task_scheduler_total_execution_calls{job="integrations/influxdb"}[5m]), 1) >= 50 for: 5m labels: severity: critical @@ -24,7 +24,7 @@ groups: description: The busy worker percentage for instance {{$labels.instance}} on cluster {{$labels.influxdb_cluster}} is {{ printf "%.0f" $value }} percent, which is above the threshold of 80 percent. summary: There is a high percentage of busy workers. expr: | - task_executor_workers_busy >= 80 + task_executor_workers_busy{job="integrations/influxdb"} >= 80 for: 5m labels: severity: critical @@ -33,7 +33,7 @@ groups: description: The heap memory usage for instance {{$labels.instance}} on cluster {{$labels.influxdb_cluster}} is {{ printf "%.0f" $value }} percent, which is above the threshold of 80 percent. summary: There is a high amount of heap memory being used. expr: | - 100 * go_memstats_heap_alloc_bytes/clamp_min((go_memstats_heap_idle_bytes + go_memstats_heap_alloc_bytes), 1) >= 80 + 100 * go_memstats_heap_alloc_bytes{job="integrations/influxdb"}/clamp_min((go_memstats_heap_idle_bytes{job="integrations/influxdb"} + go_memstats_heap_alloc_bytes{job="integrations/influxdb"}), 1) >= 80 for: 5m labels: severity: critical @@ -42,7 +42,7 @@ groups: description: The average API request latency for instance {{$labels.instance}} on cluster {{$labels.influxdb_cluster}} is {{ printf "%.2f" $value }} seconds, which is above the threshold of 0.29999999999999999 seconds. summary: Average API request latency is too high. High latency will negatively affect system performance, degrading data availability and precision. expr: | - sum without(handler, method, path, response_code, status, user_agent) (increase(http_api_request_duration_seconds_sum[5m])/clamp_min(increase(http_api_requests_total[5m]), 1)) >= 0.29999999999999999 + sum without(handler, method, path, response_code, status, user_agent) (increase(http_api_request_duration_seconds_sum{job="integrations/influxdb"}[5m])/clamp_min(increase(http_api_requests_total{job="integrations/influxdb"}[5m]), 1)) >= 0.29999999999999999 for: 1m labels: severity: critical @@ -51,7 +51,7 @@ groups: description: The average InfluxQL query execution time for instance {{$labels.instance}} on cluster {{$labels.influxdb_cluster}} is {{ printf "%.2f" $value }} seconds, which is above the threshold of 0.10000000000000001 seconds. summary: InfluxQL execution times are too slow. Slow query execution times will negatively affect system performance, degrading data availability and precision. expr: | - sum without(result) (increase(influxql_service_executing_duration_seconds_sum[5m])/clamp_min(increase(influxql_service_requests_total[5m]), 1)) >= 0.10000000000000001 + sum without(result) (increase(influxql_service_executing_duration_seconds_sum{job="integrations/influxdb"}[5m])/clamp_min(increase(influxql_service_requests_total{job="integrations/influxdb"}[5m]), 1)) >= 0.10000000000000001 for: 5m labels: severity: warning diff --git a/influxdb-mixin/rows.libsonnet b/influxdb-mixin/rows.libsonnet new file mode 100644 index 000000000..731d633b1 --- /dev/null +++ b/influxdb-mixin/rows.libsonnet @@ -0,0 +1,105 @@ +local g = import './g.libsonnet'; + +{ + new(this): + { + // --- + // Cluster overview rows + // --- + + // Cluster overview row + influxdbClusterOverview: + g.panel.row.new('InfluxDB cluster overview') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.alertsPanel { gridPos+: { w: 7 } }, + this.grafana.panels.serversPanel { gridPos+: { w: 17 } }, + ]), + + influxdbClusterOverviewQueriesAndOperations: + g.panel.row.new('Queries and operations') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.topInstancesByHTTPAPIRequestsPanel { gridPos+: { w: 8 } }, + this.grafana.panels.httpAPIRequestDurationPanel { gridPos+: { w: 8 } }, + this.grafana.panels.httpAPIResponseCodesPanel { gridPos+: { w: 8 } }, + this.grafana.panels.httpQueryOperationsPanel { gridPos+: { w: 12 } }, + this.grafana.panels.httpWriteOperationsPanel { gridPos+: { w: 12 } }, + this.grafana.panels.topInstancesByIQLQueryRatePanel { gridPos+: { w: 8 } }, + this.grafana.panels.iqlQueryResponseTimePanel { gridPos+: { w: 8 } }, + this.grafana.panels.boltdbOperationsPanel { gridPos+: { w: 8 } }, + ]), + + influxdbClusterOverviewTaskScheduler: + g.panel.row.new('Task scheduler') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.activeTasksPanel { gridPos+: { w: 12 } }, + this.grafana.panels.activeWorkersPanel { gridPos+: { w: 12 } }, + this.grafana.panels.executionTotalsPanel { gridPos+: { w: 12 } }, + this.grafana.panels.scheduleTotalsPanel { gridPos+: { w: 12 } }, + ]), + + influxdbClusterOverviewMemoryAndGC: + g.panel.row.new('Go') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.topInstancesByHeapMemoryUsagePanel { gridPos+: { w: 12 } }, + this.grafana.panels.topInstancesByGCCPUUsagePanel { gridPos+: { w: 12 } }, + ]), + + // --- + // Instance Dashboard Rows + // --- + + influxdbInstanceOverview: + g.panel.row.new('InfluxDB instance overview') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.instanceUptimePanel { gridPos+: { h: 8, w: 3 } }, + this.grafana.panels.instanceBucketsPanel { gridPos+: { h: 8, w: 3 } }, + this.grafana.panels.instanceUsersPanel { gridPos+: { h: 8, w: 3 } }, + this.grafana.panels.instanceReplicationsPanel { gridPos+: { h: 8, w: 3 } }, + this.grafana.panels.instanceRemotesPanel { gridPos+: { h: 8, w: 3 } }, + this.grafana.panels.instanceScrapersPanel { gridPos+: { h: 8, w: 3 } }, + this.grafana.panels.instanceDashboardsPanel { gridPos+: { h: 8, w: 3 } }, + this.grafana.panels.instanceThreadsPanel { gridPos+: { h: 8, w: 3 } }, + ]), + + + influxdbInstanceOverviewQueriesAndOperations: + g.panel.row.new('Queries and operations') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.instanceHTTPAPIRequestsPanel { gridPos+: { h: 8, w: 12 } }, + this.grafana.panels.instanceActiveQueriesPanel { gridPos+: { h: 8, w: 12 } }, + this.grafana.panels.instanceHTTPOperationsPanel { gridPos+: { h: 8, w: 12 } }, + this.grafana.panels.instanceHTTPOperationDataPanel { gridPos+: { h: 8, w: 12 } }, + this.grafana.panels.instanceIQLRatePanel { gridPos+: { h: 8, w: 8 } }, + this.grafana.panels.instanceIQLResponseTimePanel { gridPos+: { h: 8, w: 8 } }, + this.grafana.panels.instanceBoltDBOperationsPanel { gridPos+: { h: 8, w: 8 } }, + ]), + + influxdbInstanceOverviewTaskScheduler: + g.panel.row.new('Task scheduler') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.instanceActiveTasksPanel { gridPos+: { h: 8, w: 8 } }, + this.grafana.panels.instanceActiveWorkersPanel { gridPos+: { h: 8, w: 8 } }, + this.grafana.panels.instanceWorkerUsagePanel { gridPos+: { h: 8, w: 8 } }, + this.grafana.panels.instanceExecutionsPanel { gridPos+: { h: 8, w: 12 } }, + this.grafana.panels.instanceSchedulesPanel { gridPos+: { h: 8, w: 12 } }, + ]), + + influxdbInstanceOverviewGo: + g.panel.row.new('Go') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.instanceGoLastGCPanel { gridPos+: { h: 8, w: 6 } }, + this.grafana.panels.instanceGoGCTimePanel { gridPos+: { h: 8, w: 9 } }, + this.grafana.panels.instanceGoGCCPUUsagePanel { gridPos+: { h: 8, w: 9 } }, + this.grafana.panels.instanceGoHeapMemoryUsagePanel { gridPos+: { h: 8, w: 12 } }, + this.grafana.panels.instanceGoThreadsPanel { gridPos+: { h: 8, w: 12 } }, + ]), + }, +} diff --git a/influxdb-mixin/signals/instance.libsonnet b/influxdb-mixin/signals/instance.libsonnet new file mode 100644 index 000000000..f265e1b11 --- /dev/null +++ b/influxdb-mixin/signals/instance.libsonnet @@ -0,0 +1,486 @@ +function(this) + local groupAggListWithInstance = std.join(',', this.groupLabels) + (if std.length(this.instanceLabels) > 0 then ',' + std.join(',', this.instanceLabels) else ''); + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + legendCustomTemplate: '{{influxdb_cluster}} - ' + std.join(' ', std.map(function(label) '{{' + label + '}}', this.instanceLabels)), + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '2m', + discoveryMetric: { + prometheus: 'influxdb_uptime_seconds', + }, + signals: { + // Instance stats + uptime: { + name: 'Uptime', + nameShort: 'Uptime', + type: 'gauge', + description: 'Uptime for an instance.', + unit: 'seconds', + sources: { + prometheus: { + expr: 'influxdb_uptime_seconds{%(queriesSelector)s}', + legendCustomTemplate: 'Uptime', + }, + }, + }, + + buckets: { + name: 'Buckets', + nameShort: 'Buckets', + type: 'gauge', + description: 'Number of buckets on an instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'influxdb_buckets_total{%(queriesSelector)s}', + legendCustomTemplate: 'Buckets', + }, + }, + }, + + users: { + name: 'Users', + nameShort: 'Users', + type: 'gauge', + description: 'Number of users on an instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'influxdb_users_total{%(queriesSelector)s}', + legendCustomTemplate: 'Users', + }, + }, + }, + + replications: { + name: 'Replications', + nameShort: 'Replications', + type: 'gauge', + description: 'Number of replications configured on an instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'influxdb_replications_total{%(queriesSelector)s}', + legendCustomTemplate: 'Replications', + }, + }, + }, + + remotes: { + name: 'remotes', + nameShort: 'Remotes', + type: 'gauge', + description: 'Number of remotes configured on an instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'influxdb_remotes_total{%(queriesSelector)s}', + legendCustomTemplate: 'Remotes', + }, + }, + }, + + scrapers: { + name: 'Scrapers', + nameShort: 'Scrapers', + type: 'gauge', + description: 'Number of scrapers configured on an instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'influxdb_scrapers_total{%(queriesSelector)s}', + legendCustomTemplate: 'Scrapers', + }, + }, + }, + + dashboards: { + name: 'Dashboards', + nameShort: 'Dashboards', + type: 'gauge', + description: 'Number of dashboards on an instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'influxdb_dashboards_total{%(queriesSelector)s}', + legendCustomTemplate: 'Dashboards', + }, + }, + }, + + httpAPIRequests: { + name: 'HTTP API requests', + nameShort: 'HTTP API requests', + type: 'raw', + description: 'Rate of HTTP API requests on an instance.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggListWithInstance + ') (rate(http_api_requests_total{%(queriesSelector)s}[$__rate_interval]))', + }, + }, + }, + + compilingActiveQueries: { + name: 'Compiling active queries', + nameShort: 'Compiling active queries', + type: 'raw', + description: 'Number of active queries being compiled on an instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by (' + groupAggListWithInstance + ') (qc_compiling_active{%(queriesSelector)s})', + legendCustomTemplate: '{{instance}} - compiling', + }, + }, + }, + + + queuingQueries: { + name: 'Queuing queries', + nameShort: 'Queuing queries', + type: 'raw', + description: 'Number of queries being queued on an instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by (' + groupAggListWithInstance + ') (qc_queueing_active{%(queriesSelector)s})', + legendCustomTemplate: '{{instance}} - queuing', + }, + }, + }, + + executingQueries: { + name: 'Executing queries', + nameShort: 'Executing queries', + type: 'raw', + description: 'Number of queries being executed on an instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by (' + groupAggListWithInstance + ') (qc_executing_active{%(queriesSelector)s})', + legendCustomTemplate: '{{instance}} - executing', + }, + }, + }, + + httpOperationQueries: { + name: 'HTTP operation queries', + nameShort: 'HTTP operation queries', + type: 'raw', + description: 'Number of queries being executed on an instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by (' + groupAggListWithInstance + ') (rate(http_query_request_count{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{instance}} - query - {{status}}', + }, + }, + }, + + httpOperationWrites: { + name: 'HTTP operation writes', + nameShort: 'HTTP operation writes', + type: 'raw', + description: 'Number of queries being executed on an instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by (' + groupAggListWithInstance + ') (rate(http_write_request_count{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{instance}} - write - {{status}}', + }, + }, + }, + + httpOperationsDataQueryRequests: { + name: 'HTTP operations data query requests', + nameShort: 'HTTP operations data query requests', + type: 'raw', + description: 'Rate of database HTTP query requests.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by (' + groupAggListWithInstance + ') (rate(http_query_request_bytes{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{instance}} - query request', + }, + }, + }, + + httpOperationsDataQueryResponses: { + name: 'HTTP operations data query responses', + nameShort: 'HTTP operations data query responses', + type: 'raw', + description: 'Rate of database HTTP query responses.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by (' + groupAggListWithInstance + ') (rate(http_query_response_bytes{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{instance}} - query response', + }, + }, + }, + + httpOperationsDataWriteRequests: { + name: 'HTTP operations data write requests', + nameShort: 'HTTP operations data write requests', + type: 'raw', + description: 'Rate of database HTTP write requests.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by (' + groupAggListWithInstance + ') (rate(http_write_request_bytes{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{instance}} - write request', + }, + }, + }, + + httpOperationsDataWriteResponses: { + name: 'HTTP operations data write responses', + nameShort: 'HTTP operations data write responses', + type: 'raw', + description: 'Rate of database HTTP write responses.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by (' + groupAggListWithInstance + ') (rate(http_write_response_bytes{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{instance}} - write response', + }, + }, + }, + + iqlQueryRate: { + name: 'IQL query rate', + nameShort: 'IQL query rate', + type: 'raw', + description: 'Rate of InfluxQL queries for the server.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by (' + groupAggListWithInstance + ', result) (rate(influxql_service_requests_total{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{instance}} - {{result}}', + }, + }, + }, + + + iqlQueryResponseTime: { + name: 'IQL query response time', + nameShort: 'IQL query response time', + type: 'raw', + description: 'Response time for recent InfluxQL queries.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by (' + groupAggListWithInstance + ', result) (increase(influxql_service_executing_duration_seconds_sum{%(queriesSelector)s}[$__interval] offset -$__interval))', + legendCustomTemplate: '{{instance}} - {{result}}', + }, + }, + }, + + + boltdbReadOperations: { + name: 'BoltDB read operations', + nameShort: 'BoltDB read operations', + type: 'counter', + description: 'Rate of reads to the underlying BoltDB storage engine for the server.', + unit: 'none', + sources: { + prometheus: { + expr: 'boltdb_reads_total{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}} - read', + }, + }, + }, + + boltdbWriteOperations: { + name: 'BoltDB write operations', + nameShort: 'BoltDB write operations', + type: 'counter', + description: 'Rate of writes to the underlying BoltDB storage engine for the server.', + unit: 'none', + sources: { + prometheus: { + expr: 'boltdb_writes_total{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}} - write', + }, + }, + }, + + activeTasks: { + name: 'Active tasks', + nameShort: 'Active tasks', + type: 'gauge', + description: 'Number of currently executing tasks in the server.', + unit: 'none', + sources: { + prometheus: { + expr: 'task_scheduler_current_execution{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + activeWorkers: { + name: 'Active workers', + nameShort: 'Active workers', + type: 'gauge', + description: 'Number of workers currently running tasks on the server.', + unit: 'none', + sources: { + prometheus: { + expr: 'task_executor_total_runs_active{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + + workerUsage: { + name: 'Worker usage', + nameShort: 'Worker usage', + type: 'gauge', + description: 'Percentage of available workers that are currently busy.', + unit: 'none', + sources: { + prometheus: { + expr: 'task_executor_workers_busy{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + + executionsTotal: { + name: 'Executions total', + nameShort: 'Executions total', + type: 'counter', + description: 'Rate of executions for the server.', + unit: 'none', + sources: { + prometheus: { + expr: 'task_scheduler_total_execution_calls{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}} - total', + }, + }, + }, + + executionsFailures: { + name: 'Executions failures', + nameShort: 'Executions failures', + type: 'counter', + description: 'Rate of failures for the server.', + unit: 'none', + sources: { + prometheus: { + expr: 'task_scheduler_total_execute_failure{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}} - failed', + }, + }, + }, + + scheduleTotals: { + name: 'Schedule totals', + nameShort: 'Schedule totals', + type: 'counter', + description: 'Rate of schedules for the server.', + unit: 'none', + sources: { + prometheus: { + expr: 'task_scheduler_total_schedule_calls{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}} - total', + }, + }, + }, + + scheduleFailures: { + name: 'Schedule failures', + nameShort: 'Schedule failures', + type: 'counter', + description: 'Rate of failures for the server.', + unit: 'none', + sources: { + prometheus: { + expr: 'task_scheduler_total_schedule_fails{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}} - failed', + }, + }, + }, + + + timeSinceLastGC: { + name: 'Time since last GC', + nameShort: 'Time since last GC', + type: 'raw', + description: 'Time since the last garbage collection.', + unit: 'none', + sources: { + prometheus: { + expr: 'time() - go_memstats_last_gc_time_seconds{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + + gcTime: { + name: 'GC time / $__interval', + nameShort: 'GC time', + type: 'counter', + description: 'Server CPU time spent on garbage collection.', + unit: 's', + sources: { + prometheus: { + expr: 'go_gc_duration_seconds_sum{%(queriesSelector)s}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + gcCPUUsage: { + name: 'GC CPU usage', + nameShort: 'GC CPU usage', + type: 'gauge', + description: 'Server CPU time spent on garbage collection.', + unit: 'percent', + sources: { + prometheus: { + expr: 'go_memstats_gc_cpu_fraction{%(queriesSelector)s}', + }, + }, + }, + + + goHeapMemoryUsage: { + name: 'Go heap memory usage', + nameShort: 'Go heap memory usage', + type: 'gauge', + description: 'Heap memory usage for the server.', + unit: 'percentunit', + sources: { + prometheus: { + expr: 'go_memstats_heap_alloc_bytes{%(queriesSelector)s} / clamp_min(go_memstats_heap_idle_bytes{%(queriesSelector)s} + go_memstats_heap_alloc_bytes{%(queriesSelector)s}, 1)', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + goThreads: { + name: 'Go threads', + nameShort: 'Go threads', + type: 'gauge', + description: 'Number of threads for the server.', + unit: 'none', + sources: { + prometheus: { + expr: 'go_threads{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + }, + } diff --git a/influxdb-mixin/signals/overview.libsonnet b/influxdb-mixin/signals/overview.libsonnet new file mode 100644 index 000000000..e5d73ebbe --- /dev/null +++ b/influxdb-mixin/signals/overview.libsonnet @@ -0,0 +1,411 @@ +function(this) + local groupAggListWithoutInstance = std.join(',', this.groupLabels); + local groupAggListWithInstance = groupAggListWithoutInstance + ', ' + std.join(',', this.instanceLabels); + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + legendCustomTemplate: '{{influxdb_cluster}} - ' + std.join(' ', std.map(function(label) '{{' + label + '}}', this.instanceLabels)), + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '2m', + discoveryMetric: { + prometheus: 'influxdb_uptime_seconds', + }, + signals: { + uptime: { + name: 'Uptime', + nameShort: 'Uptime', + type: 'gauge', + description: 'Uptime for a cluster.', + unit: 'seconds', + sources: { + prometheus: { + expr: 'influxdb_uptime_seconds{%(queriesSelector)s}', + legendCustomTemplate: 'Uptime', + }, + }, + }, + + buckets: { + name: 'Buckets', + nameShort: 'Buckets', + type: 'gauge', + description: 'Number of buckets in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'influxdb_buckets_total{%(queriesSelector)s}', + legendCustomTemplate: 'Buckets', + }, + }, + }, + + users: { + name: 'Users', + nameShort: 'Users', + type: 'gauge', + description: 'Number of users in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'influxdb_users_total{%(queriesSelector)s}', + legendCustomTemplate: 'Users', + }, + }, + }, + + replications: { + name: 'Replications', + nameShort: 'Replications', + type: 'gauge', + description: 'Number of replications in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'influxdb_replications_total{%(queriesSelector)s}', + legendCustomTemplate: 'Replications', + }, + }, + }, + + remotes: { + name: 'remotes', + nameShort: 'Remotes', + type: 'gauge', + description: 'Number of dashboards in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'influxdb_remotes_total{%(queriesSelector)s}', + legendCustomTemplate: 'Remotes', + }, + }, + }, + + scrapers: { + name: 'Scrapers', + nameShort: 'Scrapers', + type: 'gauge', + description: 'Number of scrapers in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'influxdb_scrapers_total{%(queriesSelector)s}', + legendCustomTemplate: 'Scrapers', + }, + }, + }, + + dashboards: { + name: 'Dashboards', + nameShort: 'Dashboards', + type: 'gauge', + description: 'Number of dashboards in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'influxdb_dashboards_total{%(queriesSelector)s}', + legendCustomTemplate: 'Dashboards', + }, + }, + }, + + topInstancesByHTTPAPIRequests: { + name: 'Top instances by HTTP API requests', + nameShort: 'Top instances by HTTP API requests', + type: 'raw', + description: 'Top instances by HTTP API requests in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'topk($k, sum by(job, influxdb_cluster, instance) (rate(http_api_requests_total{%(queriesSelector)s}[$__rate_interval])))', + legendCustomTemplate: '{{influxdb_cluster}} - {{instance}}', + }, + }, + }, + + httpAPIRequestDuration: { + name: 'HTTP API request duration', + nameShort: 'HTTP API request duration', + type: 'raw', + description: 'HTTP API request duration in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'histogram_quantile(0.95, sum by(le, job, influxdb_cluster) (rate(http_api_request_duration_seconds_bucket{%(queriesSelector)s}[$__rate_interval])))', + legendCustomTemplate: '{{influxdb_cluster}}', + }, + }, + }, + + httpAPIResponseCodes: { + name: 'HTTP API response codes', + nameShort: 'HTTP API response codes', + type: 'raw', + description: 'HTTP API response codes in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(job, influxdb_cluster, response_code) (rate(http_api_requests_total{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{influxdb_cluster}} - {{response_code}}', + }, + }, + }, + + httpQueryOperations: { + name: 'HTTP operations', + nameShort: 'HTTP operations', + type: 'raw', + description: 'HTTP operations in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(job, influxdb_cluster, status) (rate(http_query_request_count{%(queriesSelector)s}[$__rate_interval])) > 0', + legendCustomTemplate: '{{influxdb_cluster}} - query -{{status}}', + }, + }, + }, + + httpWriteOperations: { + name: 'HTTP write operations', + nameShort: 'HTTP write operations', + type: 'raw', + description: 'HTTP write operations in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(job, influxdb_cluster, status) (rate(http_write_request_count{%(queriesSelector)s}[$__rate_interval])) > 0', + legendCustomTemplate: '{{influxdb_cluster}} - write -{{status}}', + }, + }, + }, + + httpQueryRequestOperationsData: { + name: 'HTTP query request operations data', + nameShort: 'HTTP query request operations data', + type: 'raw', + description: 'HTTP query request operations data in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(job, influxdb_cluster) (rate(http_query_request_bytes{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{influxdb_cluster}} - query - request', + }, + }, + }, + + httpQueryResponseOperationsData: { + name: 'HTTP query response operations data', + nameShort: 'HTTP query response operations data', + type: 'raw', + description: 'HTTP query response operations data in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(job, influxdb_cluster) (rate(http_query_response_bytes{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{influxdb_cluster}} - query - response', + }, + }, + }, + + httpWriteRequestOperationsData: { + name: 'HTTP write request operations data', + nameShort: 'HTTP write request operations data', + type: 'raw', + description: 'HTTP write request operations data in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(job, influxdb_cluster) (rate(http_write_request_bytes{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{influxdb_cluster}} - write - request', + }, + }, + }, + + httpWriteResponseOperationsData: { + name: 'HTTP write response operations data', + nameShort: 'HTTP write response operations data', + type: 'raw', + description: 'HTTP write response operations data in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(job, influxdb_cluster) (rate(http_write_response_bytes{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{influxdb_cluster}} - write - response', + }, + }, + }, + + topInstancesByIQLQueryRate: { + name: 'Top instances by InfluxQL query rate', + nameShort: 'Top instances by InfluxQL query rate', + type: 'raw', + description: 'Top instances by InfluxQL query rate in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'topk($k, sum by(job, influxdb_cluster, instance) (rate(influxql_service_requests_total{%(queriesSelector)s}[$__rate_interval])))', + legendCustomTemplate: '{{influxdb_cluster}} - {{instance}}', + }, + }, + }, + + + iqlQueryResponseTime: { + name: 'InfluxQL query response time', + nameShort: 'InfluxQL query response time', + type: 'raw', + description: 'InfluxQL query response time in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(job, influxdb_cluster, result) (increase(influxql_service_executing_duration_seconds_sum{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{influxdb_cluster}}', + }, + }, + }, + + boltdbReadOperations: { + name: 'BoltDB read operations', + nameShort: 'BoltDB read operations', + type: 'counter', + description: 'BoltDB read operations in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'boltdb_reads_total{%(queriesSelector)s}', + legendCustomTemplate: '{{influxdb_cluster}} - read', + }, + }, + }, + + boltdbWriteOperations: { + name: 'BoltDB write operations', + nameShort: 'BoltDB write operations', + type: 'counter', + description: 'BoltDB write operations in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'boltdb_writes_total{%(queriesSelector)s}', + legendCustomTemplate: '{{influxdb_cluster}} - write', + }, + }, + }, + + activeTasks: { + name: 'Active tasks', + nameShort: 'Active tasks', + type: 'raw', + description: 'Active tasks in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(job, influxdb_cluster) (task_scheduler_current_execution{%(queriesSelector)s})', + legendCustomTemplate: '{{influxdb_cluster}}', + }, + }, + }, + + activeWorkers: { + name: 'Active workers', + nameShort: 'Active workers', + type: 'raw', + description: 'Active workers in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(job, influxdb_cluster) (task_executor_total_runs_active{%(queriesSelector)s})', + legendCustomTemplate: '{{influxdb_cluster}}', + }, + }, + }, + + executionTotals: { + name: 'Execution totals', + nameShort: 'Execution totals', + type: 'raw', + description: 'Execution totals in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(job, influxdb_cluster) (task_scheduler_total_execution_calls{%(queriesSelector)s})', + legendCustomTemplate: '{{influxdb_cluster}} - total', + }, + }, + }, + + executionFailures: { + name: 'Execution failures', + nameShort: 'Execution failures', + type: 'raw', + description: 'Execution failures in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum by(job, influxdb_cluster) (task_scheduler_total_execute_failure{%(queriesSelector)s})', + legendCustomTemplate: '{{influxdb_cluster}} - failed', + }, + }, + }, + + + scheduleTotals: { + name: 'Schedule totals', + nameShort: 'Schedule totals', + type: 'counter', + description: 'Schedule totals in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'task_scheduler_total_schedule_calls{%(queriesSelector)s}', + legendCustomTemplate: '{{influxdb_cluster}} - total', + }, + }, + }, + + scheduleFailures: { + name: 'Schedule failures', + nameShort: 'Schedule failures', + type: 'counter', + description: 'Schedule failures in a cluster.', + unit: 'none', + sources: { + prometheus: { + expr: 'task_scheduler_total_schedule_fails{%(queriesSelector)s}', + legendCustomTemplate: '{{influxdb_cluster}} - failed', + }, + }, + }, + + topInstancesByHeapMemoryUsage: { + name: 'Top instances by heap memory usage', + nameShort: 'Top instances by heap memory usage', + type: 'raw', + description: 'Top instances by heap memory usage in a cluster.', + unit: 'percentunit', + sources: { + prometheus: { + expr: 'topk($k, sum by(job, influxdb_cluster, instance) (go_memstats_heap_alloc_bytes{%(queriesSelector)s}/clamp_min(go_memstats_heap_idle_bytes{%(queriesSelector)s} + go_memstats_heap_alloc_bytes{%(queriesSelector)s}, 1)))', + legendCustomTemplate: '{{influxdb_cluster}} - {{instance}}', + }, + }, + }, + + topInstancesByGCCPUUsage: { + name: 'Top instances by GC CPU usage', + nameShort: 'Top instances by GC CPU usage', + type: 'gauge', + description: 'Top instances by GC CPU usage in a cluster.', + unit: 'percent', + sources: { + prometheus: { + expr: 'go_memstats_gc_cpu_fraction{%(queriesSelector)s}', + legendCustomTemplate: '{{influxdb_cluster}} - {{instance}}', + }, + }, + }, + }, + } From 0cffac6475e5ca7d8f95e40918dcdce6be660fe9 Mon Sep 17 00:00:00 2001 From: schmikei Date: Thu, 25 Sep 2025 17:40:56 -0400 Subject: [PATCH 2/4] make fmt --- influxdb-mixin/dashboards.libsonnet | 2 +- influxdb-mixin/links.libsonnet | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/influxdb-mixin/dashboards.libsonnet b/influxdb-mixin/dashboards.libsonnet index 3ea38efaa..acd09f067 100644 --- a/influxdb-mixin/dashboards.libsonnet +++ b/influxdb-mixin/dashboards.libsonnet @@ -81,7 +81,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; if this.config.enableLokiLogs then { 'influxdb-logs.json': - logslib.new( + logslib.new( prefix + ' logs', datasourceName=this.grafana.variables.datasources.loki.name, datasourceRegex=this.grafana.variables.datasources.loki.regex, diff --git a/influxdb-mixin/links.libsonnet b/influxdb-mixin/links.libsonnet index 6347d1e43..39bd0536e 100644 --- a/influxdb-mixin/links.libsonnet +++ b/influxdb-mixin/links.libsonnet @@ -4,11 +4,11 @@ local g = import './g.libsonnet'; local link = g.dashboard.link, new(this): { influxdbClusterOverview: - link.link.new(this.config.dashboardNamePrefix + ' cluster overview','/d/' + this.grafana.dashboards['influxdb-cluster-overview.json'].uid) + link.link.new(this.config.dashboardNamePrefix + ' cluster overview', '/d/' + this.grafana.dashboards['influxdb-cluster-overview.json'].uid) + link.link.options.withKeepTime(true), influxdbInstanceOverview: - link.link.new(this.config.dashboardNamePrefix + ' instance overview','/d/' + this.grafana.dashboards['influxdb-instance-overview.json'].uid) + link.link.new(this.config.dashboardNamePrefix + ' instance overview', '/d/' + this.grafana.dashboards['influxdb-instance-overview.json'].uid) + link.link.options.withKeepTime(true), otherDashboards: @@ -21,7 +21,7 @@ local g = import './g.libsonnet'; if this.config.enableLokiLogs then { logs: - link.link.new(this.config.dashboardNamePrefix + ' logs','/d/' + this.grafana.dashboards['influxdb-logs.json'].uid) + link.link.new(this.config.dashboardNamePrefix + ' logs', '/d/' + this.grafana.dashboards['influxdb-logs.json'].uid) + link.link.options.withKeepTime(true), } else {}, } From cab87a4a6273c2090ebe26b864d7b479f792f2e9 Mon Sep 17 00:00:00 2001 From: schmikei Date: Fri, 26 Sep 2025 15:44:00 -0400 Subject: [PATCH 3/4] fix some miscellanious issues --- .../dashboards_out/influxdb-cluster-overview.json | 8 ++++---- .../dashboards_out/influxdb-instance-overview.json | 12 ++++++------ influxdb-mixin/dashboards_out/influxdb-logs.json | 8 ++++---- influxdb-mixin/links.libsonnet | 2 +- influxdb-mixin/signals/instance.libsonnet | 4 ++-- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/influxdb-mixin/dashboards_out/influxdb-cluster-overview.json b/influxdb-mixin/dashboards_out/influxdb-cluster-overview.json index a67f9820c..325aafba1 100644 --- a/influxdb-mixin/dashboards_out/influxdb-cluster-overview.json +++ b/influxdb-mixin/dashboards_out/influxdb-cluster-overview.json @@ -20,11 +20,11 @@ "asDropdown": true, "includeVars": true, "keepTime": true, - "title": "All dashboards", - "type": "link", - "url": [ + "tags": [ "influxdb-mixin" - ] + ], + "title": "All dashboards", + "type": "dashboards" } ], "panels": [ diff --git a/influxdb-mixin/dashboards_out/influxdb-instance-overview.json b/influxdb-mixin/dashboards_out/influxdb-instance-overview.json index 4b7d85dec..773591dfe 100644 --- a/influxdb-mixin/dashboards_out/influxdb-instance-overview.json +++ b/influxdb-mixin/dashboards_out/influxdb-instance-overview.json @@ -20,11 +20,11 @@ "asDropdown": true, "includeVars": true, "keepTime": true, - "title": "All dashboards", - "type": "link", - "url": [ + "tags": [ "influxdb-mixin" - ] + ], + "title": "All dashboards", + "type": "dashboards" } ], "panels": [ @@ -564,7 +564,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,influxdb_cluster,instance) (rate(http_query_request_count{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by (job,influxdb_cluster,instance, status) (rate(http_query_request_count{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "interval": "1m", @@ -576,7 +576,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,influxdb_cluster,instance) (rate(http_write_request_count{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by (job,influxdb_cluster,instance, status) (rate(http_write_request_count{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "interval": "1m", diff --git a/influxdb-mixin/dashboards_out/influxdb-logs.json b/influxdb-mixin/dashboards_out/influxdb-logs.json index 10fb1d823..f1b76cd6e 100644 --- a/influxdb-mixin/dashboards_out/influxdb-logs.json +++ b/influxdb-mixin/dashboards_out/influxdb-logs.json @@ -19,11 +19,11 @@ "asDropdown": true, "includeVars": true, "keepTime": true, - "title": "All dashboards", - "type": "link", - "url": [ + "tags": [ "influxdb-mixin" - ] + ], + "title": "All dashboards", + "type": "dashboards" } ], "panels": [ diff --git a/influxdb-mixin/links.libsonnet b/influxdb-mixin/links.libsonnet index 39bd0536e..e68cfe6c0 100644 --- a/influxdb-mixin/links.libsonnet +++ b/influxdb-mixin/links.libsonnet @@ -12,7 +12,7 @@ local g = import './g.libsonnet'; + link.link.options.withKeepTime(true), otherDashboards: - link.link.new('All dashboards', this.config.dashboardTags) + link.dashboards.new('All dashboards', this.config.dashboardTags) + link.dashboards.options.withIncludeVars(true) + link.dashboards.options.withKeepTime(true) + link.dashboards.options.withAsDropdown(true), diff --git a/influxdb-mixin/signals/instance.libsonnet b/influxdb-mixin/signals/instance.libsonnet index f265e1b11..ce0dc6c1b 100644 --- a/influxdb-mixin/signals/instance.libsonnet +++ b/influxdb-mixin/signals/instance.libsonnet @@ -176,7 +176,7 @@ function(this) unit: 'none', sources: { prometheus: { - expr: 'sum by (' + groupAggListWithInstance + ') (rate(http_query_request_count{%(queriesSelector)s}[$__rate_interval]))', + expr: 'sum by (' + groupAggListWithInstance + ', status) (rate(http_query_request_count{%(queriesSelector)s}[$__rate_interval]))', legendCustomTemplate: '{{instance}} - query - {{status}}', }, }, @@ -190,7 +190,7 @@ function(this) unit: 'none', sources: { prometheus: { - expr: 'sum by (' + groupAggListWithInstance + ') (rate(http_write_request_count{%(queriesSelector)s}[$__rate_interval]))', + expr: 'sum by (' + groupAggListWithInstance + ', status) (rate(http_write_request_count{%(queriesSelector)s}[$__rate_interval]))', legendCustomTemplate: '{{instance}} - write - {{status}}', }, }, From 0747061c56f1dc65f93da55f28011c274844fb98 Mon Sep 17 00:00:00 2001 From: schmikei Date: Mon, 6 Oct 2025 13:08:46 -0400 Subject: [PATCH 4/4] adjust dashboard so that instance filteringSelector is not present on cluster overview. Also fix some placeholder descriptions --- influxdb-mixin/dashboards.libsonnet | 5 +- .../influxdb-cluster-overview.json | 149 +++++++++++------- .../influxdb-instance-overview.json | 26 +-- influxdb-mixin/panels.libsonnet | 69 ++++---- influxdb-mixin/rows.libsonnet | 4 +- influxdb-mixin/signals/overview.libsonnet | 9 +- 6 files changed, 150 insertions(+), 112 deletions(-) diff --git a/influxdb-mixin/dashboards.libsonnet b/influxdb-mixin/dashboards.libsonnet index acd09f067..478d93be8 100644 --- a/influxdb-mixin/dashboards.libsonnet +++ b/influxdb-mixin/dashboards.libsonnet @@ -32,7 +32,10 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; ) ) + root.applyCommon( - vars.multiInstance + [ + std.filter( + function(x) x.name != 'instance', + vars.multiInstance, + ) + [ g.dashboard.variable.custom.new( 'k', values=['5', '10', '20', '50'], diff --git a/influxdb-mixin/dashboards_out/influxdb-cluster-overview.json b/influxdb-mixin/dashboards_out/influxdb-cluster-overview.json index 325aafba1..f22c9eac3 100644 --- a/influxdb-mixin/dashboards_out/influxdb-cluster-overview.json +++ b/influxdb-mixin/dashboards_out/influxdb-cluster-overview.json @@ -127,7 +127,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "influxdb_uptime_seconds{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", + "expr": "influxdb_uptime_seconds{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}", "format": "table", "instant": true, "legendFormat": "Uptime", @@ -138,7 +138,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "influxdb_buckets_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", + "expr": "influxdb_buckets_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}", "format": "table", "instant": true, "legendFormat": "Buckets", @@ -149,7 +149,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "influxdb_users_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", + "expr": "influxdb_users_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}", "format": "table", "instant": true, "legendFormat": "Users", @@ -160,7 +160,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "influxdb_replications_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", + "expr": "influxdb_replications_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}", "format": "table", "instant": true, "legendFormat": "Replications", @@ -171,7 +171,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "influxdb_remotes_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", + "expr": "influxdb_remotes_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}", "format": "table", "instant": true, "legendFormat": "Remotes", @@ -182,7 +182,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "influxdb_scrapers_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", + "expr": "influxdb_scrapers_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}", "format": "table", "instant": true, "legendFormat": "Scrapers", @@ -193,7 +193,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "influxdb_dashboards_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", + "expr": "influxdb_dashboards_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}", "format": "table", "instant": true, "legendFormat": "Dashboards", @@ -299,7 +299,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk($k, sum by(job, influxdb_cluster, instance) (rate(http_api_requests_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])))", + "expr": "topk($k, sum by(job, influxdb_cluster, instance) (rate(http_api_requests_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])))", "format": "time_series", "instant": false, "interval": "1m", @@ -315,7 +315,7 @@ "type": "datasource", "uid": "-- Mixed --" }, - "description": "Distribution of HTTP API request durations across the cluster.", + "description": "Time taken to respond to HTTP API requests for the cluster.", "fieldConfig": { "defaults": { "unit": "s" @@ -335,7 +335,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "histogram_quantile(0.95, sum by(le, job, influxdb_cluster) (rate(http_api_request_duration_seconds_bucket{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])))", + "expr": "histogram_quantile(0.95, sum by(le, job, influxdb_cluster) (rate(http_api_request_duration_seconds_bucket{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])))", "format": "time_series", "instant": false, "interval": "1m", @@ -351,7 +351,7 @@ "type": "datasource", "uid": "-- Mixed --" }, - "description": "Share of HTTP API responses by status code across the cluster.", + "description": "Rate of different HTTP response codes for the entire cluster.", "fieldConfig": { "defaults": { "unit": "reqps" @@ -381,7 +381,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, response_code) (rate(http_api_requests_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job, influxdb_cluster, response_code) (rate(http_api_requests_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval]))", "format": "time_series", "instant": false, "interval": "1m", @@ -397,7 +397,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of HTTP query operations by status in the cluster.", + "description": "Rate of database operations from HTTP for the cluster.", "fieldConfig": { "defaults": { "custom": { @@ -435,15 +435,27 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, status) (rate(http_query_request_count{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])) > 0", + "expr": "sum by(job, influxdb_cluster, status) (rate(http_query_request_count{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])) > 0", "format": "time_series", "instant": false, "interval": "1m", - "legendFormat": "{{influxdb_cluster}} - query -{{status}}", + "legendFormat": "{{influxdb_cluster}} - query - {{status}}", "refId": "HTTP operations" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(job, influxdb_cluster, status) (rate(http_write_request_count{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])) > 0", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - write - {{status}}", + "refId": "HTTP write operations" } ], - "title": "HTTP query operations", + "title": "HTTP operations", "type": "timeseries" }, { @@ -451,7 +463,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of HTTP write operations by status in the cluster.", + "description": "Rate of data transferred for HTTP query and write operations in the cluster.", "fieldConfig": { "defaults": { "custom": { @@ -461,7 +473,7 @@ "lineWidth": 2, "showPoints": "never" }, - "unit": "reqps" + "unit": "bytes" } }, "gridPos": { @@ -489,15 +501,51 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, status) (rate(http_write_request_count{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])) > 0", + "expr": "sum by(job, influxdb_cluster) (rate(http_query_request_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval]))", "format": "time_series", "instant": false, "interval": "1m", - "legendFormat": "{{influxdb_cluster}} - write -{{status}}", - "refId": "HTTP write operations" + "legendFormat": "{{influxdb_cluster}} - query - request", + "refId": "HTTP query request operations data" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(job, influxdb_cluster) (rate(http_query_response_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval]))", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - query - response", + "refId": "HTTP query response operations data" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(job, influxdb_cluster) (rate(http_write_request_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval]))", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - write - request", + "refId": "HTTP write request operations data" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(job, influxdb_cluster) (rate(http_write_response_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval]))", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{influxdb_cluster}} - write - response", + "refId": "HTTP write response operations data" } ], - "title": "HTTP write operations", + "title": "HTTP operation data", "type": "timeseries" }, { @@ -505,7 +553,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Top 5 instances by InfluxQL query rate in the cluster.", + "description": "Rate of InfluxQL queries for the instances with the most traffic in the cluster.", "fieldConfig": { "defaults": { "custom": { @@ -542,7 +590,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk($k, sum by(job, influxdb_cluster, instance) (rate(influxql_service_requests_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])))", + "expr": "topk($k, sum by(job, influxdb_cluster, instance) (rate(influxql_service_requests_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])))", "format": "time_series", "instant": false, "interval": "1m", @@ -558,7 +606,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Total time spent executing InfluxQL queries in the cluster.", + "description": "Response time for recent InfluxQL queries, organized by result.", "fieldConfig": { "defaults": { "custom": { @@ -595,7 +643,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster, result) (increase(influxql_service_executing_duration_seconds_sum{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job, influxdb_cluster, result) (increase(influxql_service_executing_duration_seconds_sum{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval]))", "format": "time_series", "instant": false, "interval": "1m", @@ -611,7 +659,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of BoltDB read and write operations in the cluster.", + "description": "Rate of reads and writes to the underlying BoltDB storage engine for the entire cluster.", "fieldConfig": { "defaults": { "custom": { @@ -648,7 +696,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(boltdb_reads_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(boltdb_reads_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])", "format": "time_series", "instant": false, "interval": "1m", @@ -660,7 +708,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(boltdb_writes_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(boltdb_writes_total{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])", "format": "time_series", "instant": false, "interval": "1m", @@ -689,7 +737,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Number of currently executing tasks in the cluster.", + "description": "Number of tasks currently being executed for the cluster.", "fieldConfig": { "defaults": { "custom": { @@ -726,7 +774,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster) (task_scheduler_current_execution{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job, influxdb_cluster) (task_scheduler_current_execution{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"})", "format": "time_series", "instant": false, "interval": "1m", @@ -742,7 +790,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Number of active task executor workers in the cluster.", + "description": "Number of workers currently running tasks on the cluster.", "fieldConfig": { "defaults": { "custom": { @@ -779,7 +827,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster) (task_executor_total_runs_active{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job, influxdb_cluster) (task_executor_total_runs_active{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"})", "format": "time_series", "instant": false, "interval": "1m", @@ -795,7 +843,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Total number of task executions in the cluster.", + "description": "Rate of execution operations and execution failures for the cluster.", "fieldConfig": { "defaults": { "custom": { @@ -832,7 +880,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster) (task_scheduler_total_execution_calls{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job, influxdb_cluster) (task_scheduler_total_execution_calls{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"})", "format": "time_series", "instant": false, "interval": "1m", @@ -844,7 +892,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, influxdb_cluster) (task_scheduler_total_execute_failure{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job, influxdb_cluster) (task_scheduler_total_execute_failure{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"})", "format": "time_series", "instant": false, "interval": "1m", @@ -860,7 +908,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Total and failed task schedules across the cluster.", + "description": "Rate of schedule operations and schedule operation failures for the cluster.", "fieldConfig": { "defaults": { "custom": { @@ -897,7 +945,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(task_scheduler_total_schedule_calls{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(task_scheduler_total_schedule_calls{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])", "format": "time_series", "instant": false, "interval": "1m", @@ -909,7 +957,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(task_scheduler_total_schedule_fails{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(task_scheduler_total_schedule_fails{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}[$__rate_interval])", "format": "time_series", "instant": false, "interval": "1m", @@ -938,7 +986,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Top instances by Go heap memory usage percentage.", + "description": "Heap memory usage for the largest instances in the cluster.", "fieldConfig": { "defaults": { "custom": { @@ -975,7 +1023,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk($k, sum by(job, influxdb_cluster, instance) (go_memstats_heap_alloc_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}/clamp_min(go_memstats_heap_idle_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"} + go_memstats_heap_alloc_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}, 1)))", + "expr": "topk($k, sum by(job, influxdb_cluster, instance) (go_memstats_heap_alloc_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}/clamp_min(go_memstats_heap_idle_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"} + go_memstats_heap_alloc_bytes{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}, 1)))", "format": "time_series", "instant": false, "interval": "1m", @@ -991,7 +1039,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Instances with the highest Go garbage collection CPU usage.", + "description": "Fraction of CPU time used for garbage collection for the top instances in the cluster.", "fieldConfig": { "defaults": { "custom": { @@ -1028,7 +1076,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "go_memstats_gc_cpu_fraction{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\",instance=~\"$instance\"}", + "expr": "go_memstats_gc_cpu_fraction{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}", "format": "time_series", "instant": false, "interval": "1m", @@ -1084,21 +1132,6 @@ "sort": 1, "type": "query" }, - { - "allValue": ".+", - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "includeAll": true, - "label": "Instance", - "multi": true, - "name": "instance", - "query": "label_values(influxdb_uptime_seconds{job=\"integrations/influxdb\",job=~\"$job\",influxdb_cluster=~\"$influxdb_cluster\"}, instance)", - "refresh": 2, - "sort": 1, - "type": "query" - }, { "hide": 2, "label": "Loki data source", diff --git a/influxdb-mixin/dashboards_out/influxdb-instance-overview.json b/influxdb-mixin/dashboards_out/influxdb-instance-overview.json index 773591dfe..d8d1f0c9d 100644 --- a/influxdb-mixin/dashboards_out/influxdb-instance-overview.json +++ b/influxdb-mixin/dashboards_out/influxdb-instance-overview.json @@ -340,7 +340,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Number of threads on the server.", + "description": "Number of threads currenty active on the server.", "fieldConfig": { "defaults": { "color": { @@ -395,7 +395,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of HTTP API requests received by this instance.", + "description": "Rate of HTTP API requests to the API, organized by response code.", "fieldConfig": { "defaults": { "custom": { @@ -788,7 +788,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Rate of BoltDB read and write operations performed by this instance.", + "description": "Rate of reads and writes to the underlying BoltDB storage engine for the server.", "fieldConfig": { "defaults": { "custom": { @@ -866,7 +866,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Number of currently executing tasks on this instance.", + "description": "Number of tasks currently being executed for the server.", "fieldConfig": { "defaults": { "custom": { @@ -919,7 +919,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Number of active task executor workers on this instance.", + "description": "Number of workers currently running tasks on the server.", "fieldConfig": { "defaults": { "custom": { @@ -972,7 +972,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Worker utilization for task execution on this instance.", + "description": "Percentage of available workers that are currently busy.", "fieldConfig": { "defaults": { "custom": { @@ -1025,7 +1025,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Total and failed task executions on this instance.", + "description": "Rate of executions and execution failures for the server.", "fieldConfig": { "defaults": { "custom": { @@ -1090,7 +1090,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Total and failed task schedules on this instance.", + "description": "Rate of schedule operations and schedule operation failures for the server.", "fieldConfig": { "defaults": { "custom": { @@ -1168,7 +1168,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Elapsed seconds since the Go runtime last performed a GC.", + "description": "Amount of time since the last garbage collection cycle.", "fieldConfig": { "defaults": { "color": { @@ -1210,7 +1210,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Time spent in Go garbage collection during each $__interval.", + "description": "Server CPU time spent on garbage collection.", "fieldConfig": { "defaults": { "custom": { @@ -1263,7 +1263,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Fraction of CPU time used by Go garbage collection.", + "description": "Percent of server CPU time used for garbage collection.", "fieldConfig": { "defaults": { "custom": { @@ -1315,7 +1315,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Estimated Go heap memory utilization of this instance.", + "description": "Heap memory usage for the server.", "fieldConfig": { "defaults": { "custom": { @@ -1367,7 +1367,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "Number of OS threads created by this process.", + "description": "Number of OS threads created for the server.", "fieldConfig": { "defaults": { "custom": { diff --git a/influxdb-mixin/panels.libsonnet b/influxdb-mixin/panels.libsonnet index f6ed9f692..10f482fa8 100644 --- a/influxdb-mixin/panels.libsonnet +++ b/influxdb-mixin/panels.libsonnet @@ -127,7 +127,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; httpAPIRequestDurationPanel: g.panel.histogram.new('HTTP API request duration') - + g.panel.histogram.panelOptions.withDescription('Distribution of HTTP API request durations across the cluster.') + + g.panel.histogram.panelOptions.withDescription('Time taken to respond to HTTP API requests for the cluster.') + g.panel.histogram.queryOptions.withTargets([ signals.overview.httpAPIRequestDuration.asTarget() { interval: '1m' }, ]) @@ -137,7 +137,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; g.panel.pieChart.new( 'HTTP API response codes', ) - + g.panel.pieChart.panelOptions.withDescription('Share of HTTP API responses by status code across the cluster.') + + g.panel.pieChart.panelOptions.withDescription('Rate of different HTTP response codes for the entire cluster.') + g.panel.pieChart.queryOptions.withTargets([ signals.overview.httpAPIResponseCodes.asTarget() { interval: '1m' }, ]) @@ -146,27 +146,30 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.pieChart.options.reduceOptions.withCalcs(['sum']), // Query operations panels - httpQueryOperationsPanel: + httpOperationsPanel: commonlib.panels.generic.timeSeries.base.new( - 'HTTP query operations', + 'HTTP operations', targets=[ signals.overview.httpQueryOperations.asTarget() { interval: '1m' }, + signals.overview.httpWriteOperations.asTarget() { interval: '1m' }, ], - description='Rate of HTTP query operations by status in the cluster.', + description='Rate of database operations from HTTP for the cluster.', ) + g.panel.timeSeries.standardOptions.withUnit('reqps') + g.panel.timeSeries.options.legend.withPlacement('right'), - - httpWriteOperationsPanel: + httpOperationsDataPanel: commonlib.panels.generic.timeSeries.base.new( - 'HTTP write operations', + 'HTTP operation data', targets=[ - signals.overview.httpWriteOperations.asTarget() { interval: '1m' }, + signals.overview.httpQueryRequestOperationsData.asTarget() { interval: '1m' }, + signals.overview.httpQueryResponseOperationsData.asTarget() { interval: '1m' }, + signals.overview.httpWriteRequestOperationsData.asTarget() { interval: '1m' }, + signals.overview.httpWriteResponseOperationsData.asTarget() { interval: '1m' }, ], - description='Rate of HTTP write operations by status in the cluster.', + description='Rate of data transferred for HTTP query and write operations in the cluster.', ) - + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.standardOptions.withUnit('bytes') + g.panel.timeSeries.options.legend.withPlacement('right'), @@ -177,7 +180,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; targets=[ signals.overview.topInstancesByIQLQueryRate.asTarget() { interval: '1m' }, ], - description='Top 5 instances by InfluxQL query rate in the cluster.', + description='Rate of InfluxQL queries for the instances with the most traffic in the cluster.', ) + g.panel.timeSeries.standardOptions.withUnit('reqps'), @@ -187,7 +190,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; targets=[ signals.overview.iqlQueryResponseTime.asTarget() { interval: '1m' }, ], - description='Total time spent executing InfluxQL queries in the cluster.', + description='Response time for recent InfluxQL queries, organized by result.', ) + g.panel.timeSeries.standardOptions.withUnit('s'), @@ -198,7 +201,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; signals.overview.boltdbReadOperations.asTarget() { interval: '1m' }, signals.overview.boltdbWriteOperations.asTarget() { interval: '1m' }, ], - description='Rate of BoltDB read and write operations in the cluster.', + description='Rate of reads and writes to the underlying BoltDB storage engine for the entire cluster.', ) + g.panel.timeSeries.standardOptions.withUnit('none'), @@ -209,7 +212,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; targets=[ signals.overview.activeTasks.asTarget() { interval: '1m' }, ], - description='Number of currently executing tasks in the cluster.', + description='Number of tasks currently being executed for the cluster.', ) + g.panel.timeSeries.standardOptions.withUnit('none'), @@ -219,7 +222,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; targets=[ signals.overview.activeWorkers.asTarget() { interval: '1m' }, ], - description='Number of active task executor workers in the cluster.', + description='Number of workers currently running tasks on the cluster.', ) + g.panel.timeSeries.standardOptions.withUnit('none'), @@ -231,7 +234,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; signals.overview.executionTotals.asTarget() { interval: '1m' }, signals.overview.executionFailures.asTarget() { interval: '1m' }, ], - description='Total number of task executions in the cluster.', + description='Rate of execution operations and execution failures for the cluster.', ) + g.panel.timeSeries.standardOptions.withUnit('none'), @@ -243,7 +246,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; signals.overview.scheduleTotals.asTarget() { interval: '1m' }, signals.overview.scheduleFailures.asTarget() { interval: '1m' }, ], - description='Total and failed task schedules across the cluster.', + description='Rate of schedule operations and schedule operation failures for the cluster.', ) + g.panel.timeSeries.standardOptions.withUnit('none'), @@ -254,7 +257,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; targets=[ signals.overview.topInstancesByHeapMemoryUsage.asTarget() { interval: '1m' }, ], - description='Top instances by Go heap memory usage percentage.', + description='Heap memory usage for the largest instances in the cluster.', ) + g.panel.timeSeries.standardOptions.withUnit('percentunit'), @@ -265,7 +268,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; targets=[ signals.overview.topInstancesByGCCPUUsage.asTarget() { interval: '1m' }, ], - description='Instances with the highest Go garbage collection CPU usage.', + description='Fraction of CPU time used for garbage collection for the top instances in the cluster.', ) + g.panel.timeSeries.standardOptions.withUnit('percent'), @@ -348,7 +351,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.stat.base.new( 'Threads', targets=[signals.instance.goThreads.asTarget()], - description='Number of threads on the server.', + description='Number of threads currenty active on the server.', ) + g.panel.timeSeries.standardOptions.withUnit('none') + g.panel.stat.standardOptions.color.withFixedColor('light-green') @@ -358,7 +361,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'HTTP API requests', targets=[signals.instance.httpAPIRequests.asTarget() { interval: '1m' }], - description='Rate of HTTP API requests received by this instance.', + description='Rate of HTTP API requests to the API, organized by response code.', ) + g.panel.timeSeries.standardOptions.withUnit('reqps') + g.panel.timeSeries.options.legend.withPlacement('right'), @@ -424,7 +427,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; signals.instance.boltdbReadOperations.asTarget() { interval: '1m' }, signals.instance.boltdbWriteOperations.asTarget() { interval: '1m' }, ], - description='Rate of BoltDB read and write operations performed by this instance.', + description='Rate of reads and writes to the underlying BoltDB storage engine for the server.', ) + g.panel.timeSeries.standardOptions.withUnit('none'), @@ -432,7 +435,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Active tasks', targets=[signals.instance.activeTasks.asTarget() { interval: '1m' }], - description='Number of currently executing tasks on this instance.', + description='Number of tasks currently being executed for the server.', ) + g.panel.timeSeries.standardOptions.withUnit('none'), @@ -440,7 +443,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Active workers', targets=[signals.instance.activeWorkers.asTarget() { interval: '1m' }], - description='Number of active task executor workers on this instance.', + description='Number of workers currently running tasks on the server.', ) + g.panel.timeSeries.standardOptions.withUnit('none'), @@ -448,7 +451,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Worker usage', targets=[signals.instance.workerUsage.asTarget() { interval: '1m' }], - description='Worker utilization for task execution on this instance.', + description='Percentage of available workers that are currently busy.', ) + g.panel.timeSeries.standardOptions.withUnit('none'), @@ -459,7 +462,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; signals.instance.executionsTotal.asTarget() { interval: '1m' }, signals.instance.executionsFailures.asTarget() { interval: '1m' }, ], - description='Total and failed task executions on this instance.', + description='Rate of executions and execution failures for the server.', ) + g.panel.timeSeries.standardOptions.withUnit('none'), @@ -470,7 +473,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; signals.instance.scheduleTotals.asTarget() { interval: '1m' }, signals.instance.scheduleFailures.asTarget() { interval: '1m' }, ], - description='Total and failed task schedules on this instance.', + description='Rate of schedule operations and schedule operation failures for the server.', ) + g.panel.timeSeries.standardOptions.withUnit('none'), @@ -480,7 +483,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.stat.base.new( 'Time since last GC', targets=[signals.instance.timeSinceLastGC.asTarget()], - description='Elapsed seconds since the Go runtime last performed a GC.', + description='Amount of time since the last garbage collection cycle.', ) + g.panel.timeSeries.standardOptions.withUnit('s') + g.panel.stat.standardOptions.color.withFixedColor('light-green') @@ -490,7 +493,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'GC time', targets=[signals.instance.gcTime.asTarget() { interval: '2m' }], - description='Time spent in Go garbage collection during each $__interval.', + description='Server CPU time spent on garbage collection.', ) + g.panel.timeSeries.standardOptions.withUnit('s'), @@ -499,7 +502,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'GC CPU usage', targets=[signals.instance.gcCPUUsage.asTarget()], - description='Fraction of CPU time used by Go garbage collection.', + description='Percent of server CPU time used for garbage collection.', ) + g.panel.timeSeries.standardOptions.withUnit('percent'), @@ -507,7 +510,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Heap memory usage', targets=[signals.instance.goHeapMemoryUsage.asTarget()], - description='Estimated Go heap memory utilization of this instance.', + description='Heap memory usage for the server.', ) + g.panel.timeSeries.standardOptions.withUnit('percent'), @@ -516,7 +519,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Threads', targets=[signals.instance.goThreads.asTarget()], - description='Number of OS threads created by this process.', + description='Number of OS threads created for the server.', ) + g.panel.timeSeries.standardOptions.withUnit('none'), diff --git a/influxdb-mixin/rows.libsonnet b/influxdb-mixin/rows.libsonnet index 731d633b1..d6dd0211d 100644 --- a/influxdb-mixin/rows.libsonnet +++ b/influxdb-mixin/rows.libsonnet @@ -23,8 +23,8 @@ local g = import './g.libsonnet'; this.grafana.panels.topInstancesByHTTPAPIRequestsPanel { gridPos+: { w: 8 } }, this.grafana.panels.httpAPIRequestDurationPanel { gridPos+: { w: 8 } }, this.grafana.panels.httpAPIResponseCodesPanel { gridPos+: { w: 8 } }, - this.grafana.panels.httpQueryOperationsPanel { gridPos+: { w: 12 } }, - this.grafana.panels.httpWriteOperationsPanel { gridPos+: { w: 12 } }, + this.grafana.panels.httpOperationsPanel { gridPos+: { w: 12 } }, + this.grafana.panels.httpOperationsDataPanel { gridPos+: { w: 12 } }, this.grafana.panels.topInstancesByIQLQueryRatePanel { gridPos+: { w: 8 } }, this.grafana.panels.iqlQueryResponseTimePanel { gridPos+: { w: 8 } }, this.grafana.panels.boltdbOperationsPanel { gridPos+: { w: 8 } }, diff --git a/influxdb-mixin/signals/overview.libsonnet b/influxdb-mixin/signals/overview.libsonnet index e5d73ebbe..615578f60 100644 --- a/influxdb-mixin/signals/overview.libsonnet +++ b/influxdb-mixin/signals/overview.libsonnet @@ -1,10 +1,9 @@ function(this) - local groupAggListWithoutInstance = std.join(',', this.groupLabels); - local groupAggListWithInstance = groupAggListWithoutInstance + ', ' + std.join(',', this.instanceLabels); { filteringSelector: this.filteringSelector, groupLabels: this.groupLabels, - instanceLabels: this.instanceLabels, + // note for the cluster overview we are intentionally dropping the instance labels + instanceLabels: [], enableLokiLogs: this.enableLokiLogs, legendCustomTemplate: '{{influxdb_cluster}} - ' + std.join(' ', std.map(function(label) '{{' + label + '}}', this.instanceLabels)), aggLevel: 'none', @@ -163,7 +162,7 @@ function(this) sources: { prometheus: { expr: 'sum by(job, influxdb_cluster, status) (rate(http_query_request_count{%(queriesSelector)s}[$__rate_interval])) > 0', - legendCustomTemplate: '{{influxdb_cluster}} - query -{{status}}', + legendCustomTemplate: '{{influxdb_cluster}} - query - {{status}}', }, }, }, @@ -177,7 +176,7 @@ function(this) sources: { prometheus: { expr: 'sum by(job, influxdb_cluster, status) (rate(http_write_request_count{%(queriesSelector)s}[$__rate_interval])) > 0', - legendCustomTemplate: '{{influxdb_cluster}} - write -{{status}}', + legendCustomTemplate: '{{influxdb_cluster}} - write - {{status}}', }, }, },