diff --git a/pkg/query-service/app/integrations/builtin_integrations/kafka/assets/dashboards/overview.json b/pkg/query-service/app/integrations/builtin_integrations/kafka/assets/dashboards/overview.json new file mode 100644 index 00000000000..aa1570f265f --- /dev/null +++ b/pkg/query-service/app/integrations/builtin_integrations/kafka/assets/dashboards/overview.json @@ -0,0 +1,575 @@ +{ + "description": "Kafka Server overview built from the OpenTelemetry Kafka metrics receiver.", + "id": "kafka-overview", + "layout": [ + { + "h": 3, + "i": "brokers", + "moved": false, + "static": false, + "w": 6, + "x": 0, + "y": 0 + }, + { + "h": 3, + "i": "topic-partitions", + "moved": false, + "static": false, + "w": 6, + "x": 6, + "y": 0 + }, + { + "h": 3, + "i": "consumer-lag", + "moved": false, + "static": false, + "w": 6, + "x": 0, + "y": 3 + }, + { + "h": 3, + "i": "group-members", + "moved": false, + "static": false, + "w": 6, + "x": 6, + "y": 3 + }, + { + "h": 3, + "i": "offset-growth", + "moved": false, + "static": false, + "w": 6, + "x": 0, + "y": 6 + }, + { + "h": 3, + "i": "replicas-in-sync", + "moved": false, + "static": false, + "w": 6, + "x": 6, + "y": 6 + } + ], + "tags": ["kafka", "messaging"], + "title": "Kafka overview", + "uploadedGrafana": false, + "uuid": "d997250d-e803-48f9-85e7-c8579816b7d2", + "variables": { + "cluster_alias": { + "allSelected": true, + "customValue": "", + "description": "Kafka cluster alias", + "id": "cluster_alias", + "key": "cluster_alias", + "modificationUUID": "cluster-alias-mod", + "multiSelect": true, + "name": "cluster_alias", + "order": 0, + "queryValue": "SELECT JSONExtractString(labels, 'kafka.cluster.alias') AS cluster_alias FROM signoz_metrics.distributed_time_series_v4_1day WHERE metric_name = 'kafka_brokers' GROUP BY cluster_alias", + "selectedValue": [], + "showALLOption": true, + "sort": "ASC", + "textboxValue": "", + "type": "QUERY" + }, + "topic": { + "allSelected": true, + "customValue": "", + "description": "Kafka topic name", + "id": "topic", + "key": "topic", + "modificationUUID": "topic-mod", + "multiSelect": true, + "name": "topic", + "order": 1, + "queryValue": "SELECT JSONExtractString(labels, 'topic') AS topic FROM signoz_metrics.distributed_time_series_v4_1day WHERE metric_name = 'kafka_topic_partitions' GROUP BY topic", + "selectedValue": [], + "showALLOption": true, + "sort": "ASC", + "textboxValue": "", + "type": "QUERY" + }, + "group": { + "allSelected": true, + "customValue": "", + "description": "Kafka consumer group", + "id": "group", + "key": "group", + "modificationUUID": "group-mod", + "multiSelect": true, + "name": "group", + "order": 2, + "queryValue": "SELECT JSONExtractString(labels, 'group') AS consumer_group FROM signoz_metrics.distributed_time_series_v4_1day WHERE metric_name = 'kafka_consumer_group_lag_sum' GROUP BY consumer_group", + "selectedValue": [], + "showALLOption": true, + "sort": "ASC", + "textboxValue": "", + "type": "QUERY" + } + }, + "version": "v5", + "widgets": [ + { + "description": "Broker count reported by the receiver.", + "fillSpans": false, + "id": "brokers", + "isStacked": false, + "nullZeroValues": "zero", + "opacity": "1", + "panelTypes": "graph", + "query": { + "builder": { + "queryData": [ + { + "aggregations": [ + { + "metricName": "kafka_brokers", + "reduceTo": "avg", + "spaceAggregation": "sum", + "temporality": null, + "timeAggregation": "avg" + } + ], + "dataSource": "metrics", + "disabled": false, + "expression": "A", + "filter": { + "expression": "kafka.cluster.alias IN $cluster_alias" + }, + "functions": [], + "groupBy": [], + "having": { + "expression": "" + }, + "legend": "Brokers", + "limit": null, + "orderBy": [], + "queryName": "A", + "stepInterval": 60 + } + ], + "queryFormulas": [] + }, + "clickhouse_sql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "id": "brokers-query", + "promql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "queryType": "builder" + }, + "softMax": null, + "softMin": 0, + "thresholds": [], + "timePreferance": "GLOBAL_TIME", + "title": "Broker count", + "yAxisUnit": "none" + }, + { + "description": "Partition count per topic.", + "fillSpans": false, + "id": "topic-partitions", + "isStacked": false, + "nullZeroValues": "zero", + "opacity": "1", + "panelTypes": "graph", + "query": { + "builder": { + "queryData": [ + { + "aggregations": [ + { + "metricName": "kafka_topic_partitions", + "reduceTo": "avg", + "spaceAggregation": "sum", + "temporality": null, + "timeAggregation": "avg" + } + ], + "dataSource": "metrics", + "disabled": false, + "expression": "A", + "filter": { + "expression": "topic IN $topic" + }, + "functions": [], + "groupBy": [ + { + "dataType": "string", + "id": "topic--string--tag--false", + "isColumn": false, + "isJSON": false, + "key": "topic", + "type": "tag" + } + ], + "having": { + "expression": "" + }, + "legend": "{{topic}}", + "limit": null, + "orderBy": [], + "queryName": "A", + "stepInterval": 60 + } + ], + "queryFormulas": [] + }, + "clickhouse_sql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "id": "topic-partitions-query", + "promql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "queryType": "builder" + }, + "softMax": null, + "softMin": 0, + "thresholds": [], + "timePreferance": "GLOBAL_TIME", + "title": "Topic partitions", + "yAxisUnit": "none" + }, + { + "description": "Lag summed across partitions per consumer group and topic.", + "fillSpans": false, + "id": "consumer-lag", + "isStacked": false, + "nullZeroValues": "zero", + "opacity": "1", + "panelTypes": "graph", + "query": { + "builder": { + "queryData": [ + { + "aggregations": [ + { + "metricName": "kafka_consumer_group_lag_sum", + "reduceTo": "max", + "spaceAggregation": "sum", + "temporality": null, + "timeAggregation": "max" + } + ], + "dataSource": "metrics", + "disabled": false, + "expression": "A", + "filter": { + "expression": "group IN $group AND topic IN $topic" + }, + "functions": [], + "groupBy": [ + { + "dataType": "string", + "id": "group--string--tag--false", + "isColumn": false, + "isJSON": false, + "key": "group", + "type": "tag" + }, + { + "dataType": "string", + "id": "topic--string--tag--false", + "isColumn": false, + "isJSON": false, + "key": "topic", + "type": "tag" + } + ], + "having": { + "expression": "" + }, + "legend": "{{group}} / {{topic}}", + "limit": null, + "orderBy": [], + "queryName": "A", + "stepInterval": 60 + } + ], + "queryFormulas": [] + }, + "clickhouse_sql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "id": "consumer-lag-query", + "promql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "queryType": "builder" + }, + "softMax": null, + "softMin": 0, + "thresholds": [], + "timePreferance": "GLOBAL_TIME", + "title": "Consumer lag", + "yAxisUnit": "none" + }, + { + "description": "Member count per consumer group.", + "fillSpans": false, + "id": "group-members", + "isStacked": false, + "nullZeroValues": "zero", + "opacity": "1", + "panelTypes": "graph", + "query": { + "builder": { + "queryData": [ + { + "aggregations": [ + { + "metricName": "kafka_consumer_group_members", + "reduceTo": "avg", + "spaceAggregation": "sum", + "temporality": null, + "timeAggregation": "avg" + } + ], + "dataSource": "metrics", + "disabled": false, + "expression": "A", + "filter": { + "expression": "group IN $group" + }, + "functions": [], + "groupBy": [ + { + "dataType": "string", + "id": "group--string--tag--false", + "isColumn": false, + "isJSON": false, + "key": "group", + "type": "tag" + } + ], + "having": { + "expression": "" + }, + "legend": "{{group}}", + "limit": null, + "orderBy": [], + "queryName": "A", + "stepInterval": 60 + } + ], + "queryFormulas": [] + }, + "clickhouse_sql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "id": "group-members-query", + "promql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "queryType": "builder" + }, + "softMax": null, + "softMin": 0, + "thresholds": [], + "timePreferance": "GLOBAL_TIME", + "title": "Consumer group members", + "yAxisUnit": "none" + }, + { + "description": "Current partition offsets by topic.", + "fillSpans": false, + "id": "offset-growth", + "isStacked": false, + "nullZeroValues": "zero", + "opacity": "1", + "panelTypes": "graph", + "query": { + "builder": { + "queryData": [ + { + "aggregations": [ + { + "metricName": "kafka_partition_current_offset", + "reduceTo": "max", + "spaceAggregation": "max", + "temporality": null, + "timeAggregation": "max" + } + ], + "dataSource": "metrics", + "disabled": false, + "expression": "A", + "filter": { + "expression": "topic IN $topic" + }, + "functions": [], + "groupBy": [ + { + "dataType": "string", + "id": "topic--string--tag--false", + "isColumn": false, + "isJSON": false, + "key": "topic", + "type": "tag" + } + ], + "having": { + "expression": "" + }, + "legend": "{{topic}}", + "limit": null, + "orderBy": [], + "queryName": "A", + "stepInterval": 60 + } + ], + "queryFormulas": [] + }, + "clickhouse_sql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "id": "offset-growth-query", + "promql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "queryType": "builder" + }, + "softMax": null, + "softMin": 0, + "thresholds": [], + "timePreferance": "GLOBAL_TIME", + "title": "Partition current offsets", + "yAxisUnit": "none" + }, + { + "description": "In-sync replica count across topic partitions.", + "fillSpans": false, + "id": "replicas-in-sync", + "isStacked": false, + "nullZeroValues": "zero", + "opacity": "1", + "panelTypes": "graph", + "query": { + "builder": { + "queryData": [ + { + "aggregations": [ + { + "metricName": "kafka_partition_replicas_in_sync", + "reduceTo": "avg", + "spaceAggregation": "sum", + "temporality": null, + "timeAggregation": "avg" + } + ], + "dataSource": "metrics", + "disabled": false, + "expression": "A", + "filter": { + "expression": "topic IN $topic" + }, + "functions": [], + "groupBy": [ + { + "dataType": "string", + "id": "topic--string--tag--false", + "isColumn": false, + "isJSON": false, + "key": "topic", + "type": "tag" + } + ], + "having": { + "expression": "" + }, + "legend": "{{topic}}", + "limit": null, + "orderBy": [], + "queryName": "A", + "stepInterval": 60 + } + ], + "queryFormulas": [] + }, + "clickhouse_sql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "id": "replicas-in-sync-query", + "promql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "queryType": "builder" + }, + "softMax": null, + "softMin": 0, + "thresholds": [], + "timePreferance": "GLOBAL_TIME", + "title": "Replicas in sync", + "yAxisUnit": "none" + } + ] +} diff --git a/pkg/query-service/app/integrations/builtin_integrations/kafka/assets/dashboards/overview_dot.json b/pkg/query-service/app/integrations/builtin_integrations/kafka/assets/dashboards/overview_dot.json new file mode 100644 index 00000000000..aa1570f265f --- /dev/null +++ b/pkg/query-service/app/integrations/builtin_integrations/kafka/assets/dashboards/overview_dot.json @@ -0,0 +1,575 @@ +{ + "description": "Kafka Server overview built from the OpenTelemetry Kafka metrics receiver.", + "id": "kafka-overview", + "layout": [ + { + "h": 3, + "i": "brokers", + "moved": false, + "static": false, + "w": 6, + "x": 0, + "y": 0 + }, + { + "h": 3, + "i": "topic-partitions", + "moved": false, + "static": false, + "w": 6, + "x": 6, + "y": 0 + }, + { + "h": 3, + "i": "consumer-lag", + "moved": false, + "static": false, + "w": 6, + "x": 0, + "y": 3 + }, + { + "h": 3, + "i": "group-members", + "moved": false, + "static": false, + "w": 6, + "x": 6, + "y": 3 + }, + { + "h": 3, + "i": "offset-growth", + "moved": false, + "static": false, + "w": 6, + "x": 0, + "y": 6 + }, + { + "h": 3, + "i": "replicas-in-sync", + "moved": false, + "static": false, + "w": 6, + "x": 6, + "y": 6 + } + ], + "tags": ["kafka", "messaging"], + "title": "Kafka overview", + "uploadedGrafana": false, + "uuid": "d997250d-e803-48f9-85e7-c8579816b7d2", + "variables": { + "cluster_alias": { + "allSelected": true, + "customValue": "", + "description": "Kafka cluster alias", + "id": "cluster_alias", + "key": "cluster_alias", + "modificationUUID": "cluster-alias-mod", + "multiSelect": true, + "name": "cluster_alias", + "order": 0, + "queryValue": "SELECT JSONExtractString(labels, 'kafka.cluster.alias') AS cluster_alias FROM signoz_metrics.distributed_time_series_v4_1day WHERE metric_name = 'kafka_brokers' GROUP BY cluster_alias", + "selectedValue": [], + "showALLOption": true, + "sort": "ASC", + "textboxValue": "", + "type": "QUERY" + }, + "topic": { + "allSelected": true, + "customValue": "", + "description": "Kafka topic name", + "id": "topic", + "key": "topic", + "modificationUUID": "topic-mod", + "multiSelect": true, + "name": "topic", + "order": 1, + "queryValue": "SELECT JSONExtractString(labels, 'topic') AS topic FROM signoz_metrics.distributed_time_series_v4_1day WHERE metric_name = 'kafka_topic_partitions' GROUP BY topic", + "selectedValue": [], + "showALLOption": true, + "sort": "ASC", + "textboxValue": "", + "type": "QUERY" + }, + "group": { + "allSelected": true, + "customValue": "", + "description": "Kafka consumer group", + "id": "group", + "key": "group", + "modificationUUID": "group-mod", + "multiSelect": true, + "name": "group", + "order": 2, + "queryValue": "SELECT JSONExtractString(labels, 'group') AS consumer_group FROM signoz_metrics.distributed_time_series_v4_1day WHERE metric_name = 'kafka_consumer_group_lag_sum' GROUP BY consumer_group", + "selectedValue": [], + "showALLOption": true, + "sort": "ASC", + "textboxValue": "", + "type": "QUERY" + } + }, + "version": "v5", + "widgets": [ + { + "description": "Broker count reported by the receiver.", + "fillSpans": false, + "id": "brokers", + "isStacked": false, + "nullZeroValues": "zero", + "opacity": "1", + "panelTypes": "graph", + "query": { + "builder": { + "queryData": [ + { + "aggregations": [ + { + "metricName": "kafka_brokers", + "reduceTo": "avg", + "spaceAggregation": "sum", + "temporality": null, + "timeAggregation": "avg" + } + ], + "dataSource": "metrics", + "disabled": false, + "expression": "A", + "filter": { + "expression": "kafka.cluster.alias IN $cluster_alias" + }, + "functions": [], + "groupBy": [], + "having": { + "expression": "" + }, + "legend": "Brokers", + "limit": null, + "orderBy": [], + "queryName": "A", + "stepInterval": 60 + } + ], + "queryFormulas": [] + }, + "clickhouse_sql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "id": "brokers-query", + "promql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "queryType": "builder" + }, + "softMax": null, + "softMin": 0, + "thresholds": [], + "timePreferance": "GLOBAL_TIME", + "title": "Broker count", + "yAxisUnit": "none" + }, + { + "description": "Partition count per topic.", + "fillSpans": false, + "id": "topic-partitions", + "isStacked": false, + "nullZeroValues": "zero", + "opacity": "1", + "panelTypes": "graph", + "query": { + "builder": { + "queryData": [ + { + "aggregations": [ + { + "metricName": "kafka_topic_partitions", + "reduceTo": "avg", + "spaceAggregation": "sum", + "temporality": null, + "timeAggregation": "avg" + } + ], + "dataSource": "metrics", + "disabled": false, + "expression": "A", + "filter": { + "expression": "topic IN $topic" + }, + "functions": [], + "groupBy": [ + { + "dataType": "string", + "id": "topic--string--tag--false", + "isColumn": false, + "isJSON": false, + "key": "topic", + "type": "tag" + } + ], + "having": { + "expression": "" + }, + "legend": "{{topic}}", + "limit": null, + "orderBy": [], + "queryName": "A", + "stepInterval": 60 + } + ], + "queryFormulas": [] + }, + "clickhouse_sql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "id": "topic-partitions-query", + "promql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "queryType": "builder" + }, + "softMax": null, + "softMin": 0, + "thresholds": [], + "timePreferance": "GLOBAL_TIME", + "title": "Topic partitions", + "yAxisUnit": "none" + }, + { + "description": "Lag summed across partitions per consumer group and topic.", + "fillSpans": false, + "id": "consumer-lag", + "isStacked": false, + "nullZeroValues": "zero", + "opacity": "1", + "panelTypes": "graph", + "query": { + "builder": { + "queryData": [ + { + "aggregations": [ + { + "metricName": "kafka_consumer_group_lag_sum", + "reduceTo": "max", + "spaceAggregation": "sum", + "temporality": null, + "timeAggregation": "max" + } + ], + "dataSource": "metrics", + "disabled": false, + "expression": "A", + "filter": { + "expression": "group IN $group AND topic IN $topic" + }, + "functions": [], + "groupBy": [ + { + "dataType": "string", + "id": "group--string--tag--false", + "isColumn": false, + "isJSON": false, + "key": "group", + "type": "tag" + }, + { + "dataType": "string", + "id": "topic--string--tag--false", + "isColumn": false, + "isJSON": false, + "key": "topic", + "type": "tag" + } + ], + "having": { + "expression": "" + }, + "legend": "{{group}} / {{topic}}", + "limit": null, + "orderBy": [], + "queryName": "A", + "stepInterval": 60 + } + ], + "queryFormulas": [] + }, + "clickhouse_sql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "id": "consumer-lag-query", + "promql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "queryType": "builder" + }, + "softMax": null, + "softMin": 0, + "thresholds": [], + "timePreferance": "GLOBAL_TIME", + "title": "Consumer lag", + "yAxisUnit": "none" + }, + { + "description": "Member count per consumer group.", + "fillSpans": false, + "id": "group-members", + "isStacked": false, + "nullZeroValues": "zero", + "opacity": "1", + "panelTypes": "graph", + "query": { + "builder": { + "queryData": [ + { + "aggregations": [ + { + "metricName": "kafka_consumer_group_members", + "reduceTo": "avg", + "spaceAggregation": "sum", + "temporality": null, + "timeAggregation": "avg" + } + ], + "dataSource": "metrics", + "disabled": false, + "expression": "A", + "filter": { + "expression": "group IN $group" + }, + "functions": [], + "groupBy": [ + { + "dataType": "string", + "id": "group--string--tag--false", + "isColumn": false, + "isJSON": false, + "key": "group", + "type": "tag" + } + ], + "having": { + "expression": "" + }, + "legend": "{{group}}", + "limit": null, + "orderBy": [], + "queryName": "A", + "stepInterval": 60 + } + ], + "queryFormulas": [] + }, + "clickhouse_sql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "id": "group-members-query", + "promql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "queryType": "builder" + }, + "softMax": null, + "softMin": 0, + "thresholds": [], + "timePreferance": "GLOBAL_TIME", + "title": "Consumer group members", + "yAxisUnit": "none" + }, + { + "description": "Current partition offsets by topic.", + "fillSpans": false, + "id": "offset-growth", + "isStacked": false, + "nullZeroValues": "zero", + "opacity": "1", + "panelTypes": "graph", + "query": { + "builder": { + "queryData": [ + { + "aggregations": [ + { + "metricName": "kafka_partition_current_offset", + "reduceTo": "max", + "spaceAggregation": "max", + "temporality": null, + "timeAggregation": "max" + } + ], + "dataSource": "metrics", + "disabled": false, + "expression": "A", + "filter": { + "expression": "topic IN $topic" + }, + "functions": [], + "groupBy": [ + { + "dataType": "string", + "id": "topic--string--tag--false", + "isColumn": false, + "isJSON": false, + "key": "topic", + "type": "tag" + } + ], + "having": { + "expression": "" + }, + "legend": "{{topic}}", + "limit": null, + "orderBy": [], + "queryName": "A", + "stepInterval": 60 + } + ], + "queryFormulas": [] + }, + "clickhouse_sql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "id": "offset-growth-query", + "promql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "queryType": "builder" + }, + "softMax": null, + "softMin": 0, + "thresholds": [], + "timePreferance": "GLOBAL_TIME", + "title": "Partition current offsets", + "yAxisUnit": "none" + }, + { + "description": "In-sync replica count across topic partitions.", + "fillSpans": false, + "id": "replicas-in-sync", + "isStacked": false, + "nullZeroValues": "zero", + "opacity": "1", + "panelTypes": "graph", + "query": { + "builder": { + "queryData": [ + { + "aggregations": [ + { + "metricName": "kafka_partition_replicas_in_sync", + "reduceTo": "avg", + "spaceAggregation": "sum", + "temporality": null, + "timeAggregation": "avg" + } + ], + "dataSource": "metrics", + "disabled": false, + "expression": "A", + "filter": { + "expression": "topic IN $topic" + }, + "functions": [], + "groupBy": [ + { + "dataType": "string", + "id": "topic--string--tag--false", + "isColumn": false, + "isJSON": false, + "key": "topic", + "type": "tag" + } + ], + "having": { + "expression": "" + }, + "legend": "{{topic}}", + "limit": null, + "orderBy": [], + "queryName": "A", + "stepInterval": 60 + } + ], + "queryFormulas": [] + }, + "clickhouse_sql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "id": "replicas-in-sync-query", + "promql": [ + { + "disabled": false, + "legend": "", + "name": "A", + "query": "" + } + ], + "queryType": "builder" + }, + "softMax": null, + "softMin": 0, + "thresholds": [], + "timePreferance": "GLOBAL_TIME", + "title": "Replicas in sync", + "yAxisUnit": "none" + } + ] +} diff --git a/pkg/query-service/app/integrations/builtin_integrations/kafka/config/collect-metrics.md b/pkg/query-service/app/integrations/builtin_integrations/kafka/config/collect-metrics.md new file mode 100644 index 00000000000..963c10ad400 --- /dev/null +++ b/pkg/query-service/app/integrations/builtin_integrations/kafka/config/collect-metrics.md @@ -0,0 +1,60 @@ +### Collect Kafka Metrics + +You can collect Kafka broker, topic, partition, and consumer group metrics with the OpenTelemetry `kafkametricsreceiver`. + +#### Create collector config file + +Save the following configuration as `kafka-metrics-collection-config.yaml`: + +```yaml +receivers: + kafkametrics: + brokers: + - ${env:KAFKA_BROKERS} + cluster_alias: ${env:KAFKA_CLUSTER_ALIAS} + collection_interval: 60s + protocol_version: 3.0.0 + scrapers: + - brokers + - topics + - consumers + +processors: + batch: + +exporters: + otlp: + endpoint: "${env:OTLP_DESTINATION_ENDPOINT}" + tls: + insecure: false + headers: + signoz-access-token: "${env:SIGNOZ_INGESTION_KEY}" + +service: + pipelines: + metrics: + receivers: [kafkametrics] + processors: [batch] + exporters: [otlp] +``` + +If your brokers require authentication, add the relevant `tls`, `sasl`, or `kerberos` settings supported by the receiver. + +#### Set Environment Variables + +```bash +export KAFKA_BROKERS="broker-1:9092" +export KAFKA_CLUSTER_ALIAS="kafka-prod" +export OTLP_DESTINATION_ENDPOINT="ingest.us.signoz.cloud:443" +export SIGNOZ_INGESTION_KEY="signoz-ingestion-key" +``` + +#### Use collector config file + +Start the collector with: + +```bash +otelcol-contrib --config kafka-metrics-collection-config.yaml +``` + +If you already run a collector, merge this receiver and pipeline into the existing configuration instead of launching a separate process. diff --git a/pkg/query-service/app/integrations/builtin_integrations/kafka/config/prerequisites.md b/pkg/query-service/app/integrations/builtin_integrations/kafka/config/prerequisites.md new file mode 100644 index 00000000000..04f86061c44 --- /dev/null +++ b/pkg/query-service/app/integrations/builtin_integrations/kafka/config/prerequisites.md @@ -0,0 +1,15 @@ +## Before You Begin + +To monitor Kafka with SigNoz, make sure the following requirements are already covered. + +- **A reachable Kafka cluster** + The OpenTelemetry Collector must be able to connect to at least one broker in the cluster. + +- **Broker metadata access** + The configured Kafka credentials must allow the collector to read broker, topic, partition, and consumer-group metadata. + +- **An OTEL Collector in your environment** + If needed, install an OpenTelemetry Collector first. The collector should be able to forward metrics to SigNoz. + +- **Optional cluster alias** + If you operate more than one Kafka cluster, set `cluster_alias` in the receiver config so the dashboard can filter clusters cleanly. diff --git a/pkg/query-service/app/integrations/builtin_integrations/kafka/icon.svg b/pkg/query-service/app/integrations/builtin_integrations/kafka/icon.svg new file mode 100644 index 00000000000..69852b90956 --- /dev/null +++ b/pkg/query-service/app/integrations/builtin_integrations/kafka/icon.svg @@ -0,0 +1,20 @@ + + Kafka + + + + + + + + + + + + diff --git a/pkg/query-service/app/integrations/builtin_integrations/kafka/integration.json b/pkg/query-service/app/integrations/builtin_integrations/kafka/integration.json new file mode 100644 index 00000000000..6d72d0ffcc9 --- /dev/null +++ b/pkg/query-service/app/integrations/builtin_integrations/kafka/integration.json @@ -0,0 +1,78 @@ +{ + "id": "kafka", + "title": "Kafka", + "description": "Monitor Kafka brokers, consumer lag, partition offsets, and replication health", + "author": { + "name": "SigNoz", + "email": "integrations@signoz.io", + "homepage": "https://signoz.io" + }, + "icon": "file://icon.svg", + "categories": ["Messaging Queue"], + "overview": "file://overview.md", + "configuration": [ + { + "title": "Prerequisites", + "instructions": "file://config/prerequisites.md" + }, + { + "title": "Collect Metrics", + "instructions": "file://config/collect-metrics.md" + } + ], + "assets": { + "logs": { + "pipelines": [] + }, + "dashboards": ["file://assets/dashboards/overview.json"], + "alerts": [] + }, + "connection_tests": {}, + "data_collected": { + "logs": [], + "metrics": [ + { + "name": "kafka_brokers", + "type": "sum", + "unit": "number", + "description": "Number of brokers detected in the cluster." + }, + { + "name": "kafka_consumer_group_lag", + "type": "gauge", + "unit": "number", + "description": "Approximate lag of a consumer group per topic partition." + }, + { + "name": "kafka_consumer_group_lag_sum", + "type": "gauge", + "unit": "number", + "description": "Approximate lag of a consumer group across all partitions in a topic." + }, + { + "name": "kafka_partition_current_offset", + "type": "gauge", + "unit": "number", + "description": "Current partition offset." + }, + { + "name": "kafka_partition_oldest_offset", + "type": "gauge", + "unit": "number", + "description": "Oldest retained partition offset." + }, + { + "name": "kafka_partition_replicas_in_sync", + "type": "sum", + "unit": "number", + "description": "Number of in-sync replicas for a partition." + }, + { + "name": "kafka_topic_partitions", + "type": "sum", + "unit": "number", + "description": "Number of partitions in a topic." + } + ] + } +} diff --git a/pkg/query-service/app/integrations/builtin_integrations/kafka/overview.md b/pkg/query-service/app/integrations/builtin_integrations/kafka/overview.md new file mode 100644 index 00000000000..539a281bbb9 --- /dev/null +++ b/pkg/query-service/app/integrations/builtin_integrations/kafka/overview.md @@ -0,0 +1,3 @@ +### Monitor Kafka with SigNoz + +This integration gives you a focused Kafka Server dashboard built on top of the OpenTelemetry Kafka metrics receiver. It covers brokers, consumer groups, topic growth, partition health, and replication state so you can quickly spot lag or durability regressions.