Add configuration for JMX metrics.

jefchien · jefchien · commit 230140a22e8e · 2024-10-01T14:54:25.000-04:00
diff --git a/instrumentation/jmx-metrics/src/main/resources/README.md b/instrumentation/jmx-metrics/src/main/resources/README.md
@@ -4,6 +4,13 @@ instrumentation support the same metrics as the [JMX Metric Gatherer](https://gi
 
 It is required at least until [open-telemetry/opentelemetry-java-instrumentation#9765](https://github.com/open-telemetry/opentelemetry-java-instrumentation/issues/9765) is addressed.
 
+### view.yaml
+A [Metric View](https://opentelemetry.io/docs/specs/otel/metrics/sdk/#view) is functionality the OpenTelemetry SDK
+supports that allows users to customize the metrics outputted by the SDK. The SDK also supports [configuring views
+via YAML](https://github.com/open-telemetry/opentelemetry-java/tree/main/sdk-extensions/incubator#view-file-configuration),
+which can be specified via property or environment variable. In this case, the view is configured to only retain metrics
+from the JMX Metric Insight instrumentation.
+
 ```
 OTEL_EXPERIMENTAL_METRICS_VIEW_CONFIG: classpath:/jmx/view.yaml 
 ```
diff --git a/instrumentation/jmx-metrics/src/main/resources/jmx/rules/kafka-consumer.yaml b/instrumentation/jmx-metrics/src/main/resources/jmx/rules/kafka-consumer.yaml
@@ -0,0 +1,51 @@
+---
+rules:
+  - bean: kafka.consumer:client-id=*,type=consumer-fetch-manager-metrics
+    metricAttribute:
+      client-id: param(client-id)
+    mapping:
+      fetch-rate:
+        metric: kafka.consumer.fetch-rate
+        type: gauge
+        desc: The number of fetch requests for all topics per second
+        unit: "1"
+      records-lag-max:
+        metric: kafka.consumer.records-lag-max
+        type: gauge
+        desc: Number of messages the consumer lags behind the producer
+        unit: "1"
+      bytes-consumed-rate:
+        metric: kafka.consumer.total.bytes-consumed-rate
+        type: gauge
+        desc: The average number of bytes consumed for all topics per second
+        unit: by
+      fetch-size-avg:
+        metric: kafka.consumer.total.fetch-size-avg
+        type: gauge
+        desc: The average number of bytes fetched per request for all topics
+        unit: by
+      records-consumed-rate:
+        metric: kafka.consumer.total.records-consumed-rate
+        type: gauge
+        desc: The average number of records consumed for all topics per second
+        unit: "1"
+  - bean: kafka.consumer:client-id=*,topic=*,type=consumer-fetch-manager-metrics
+    metricAttribute:
+      client-id: param(client-id)
+      topic: param(topic)
+    mapping:
+      bytes-consumed-rate:
+        metric: kafka.consumer.bytes-consumed-rate
+        type: gauge
+        desc: The average number of bytes consumed per second
+        unit: by
+      fetch-size-avg:
+        metric: kafka.consumer.fetch-size-avg
+        type: gauge
+        desc: The average number of bytes fetched per request
+        unit: by
+      records-consumed-rate:
+        metric: kafka.consumer.records-consumed-rate
+        type: gauge
+        desc: The average number of records consumed per second
+        unit: "1"
diff --git a/instrumentation/jmx-metrics/src/main/resources/jmx/rules/kafka-producer.yaml b/instrumentation/jmx-metrics/src/main/resources/jmx/rules/kafka-producer.yaml
@@ -0,0 +1,61 @@
+---
+rules:
+  - bean: kafka.producer:client-id=*,type=producer-metrics
+    metricAttribute:
+      client-id: param(client-id)
+    mapping:
+      io-wait-time-ns-avg:
+        metric: kafka.producer.io-wait-time-ns-avg
+        type: gauge
+        desc: The average length of time the I/O thread spent waiting for a socket ready for reads or writes
+        unit: ns
+      outgoing-byte-rate:
+        metric: kafka.producer.outgoing-byte-rate
+        type: gauge
+        desc: The average number of outgoing bytes sent per second to all servers
+        unit: by
+      request-latency-avg:
+        metric: kafka.producer.request-latency-avg
+        type: gauge
+        desc: The average request latency
+        unit: ms
+      request-rate:
+        metric: kafka-producer.request-rate
+        type: gauge
+        desc: The average number of requests sent per second
+        unit: "1"
+      response-rate:
+        metric: kafka.producer.response-rate
+        type: gauge
+        desc: Responses received per second
+        unit: "1"
+  - bean: kafka.producer:client-id=*,topic=*,type=producer-topic-metrics
+    metricAttribute:
+      client-id: param(client-id)
+      topic: param(topic)
+    mapping:
+      byte-rate:
+        metric: kafka.producer.byte-rate
+        type: gauge
+        desc: The average number of bytes sent per second for a topic
+        unit: by
+      compression-rate:
+        metric: kafka.producer.compression-rate
+        type: gauge
+        desc: The average compression rate of record batches for a topic
+        unit: "1"
+      record-error-rate:
+        metric: kafka.producer.record-error-rate
+        type: gauge
+        desc: The average per-second number of record sends that resulted in errors for a topic
+        unit: "1"
+      record-retry-rate:
+        metric: kafka.producer.record-retry-rate
+        type: gauge
+        desc: The average per-second number of retried record sends for a topic
+        unit: "1"
+      record-send-rate:
+        metric: kafka.producer.record-send-rate
+        type: gauge
+        desc: The average number of records sent per second for a topic
+        unit: "1"
diff --git a/instrumentation/jmx-metrics/src/main/resources/jmx/rules/kafka.yaml b/instrumentation/jmx-metrics/src/main/resources/jmx/rules/kafka.yaml
@@ -0,0 +1,234 @@
+---
+rules:
+  - bean: kafka.server:type=BrokerTopicMetrics,name=MessagesInPerSec
+    mapping:
+      Count:
+        metric: kafka.message.count
+        type: counter
+        desc: The number of messages received by the broker
+        unit: "{messages}"
+  - bean: kafka.server:type=BrokerTopicMetrics,name=TotalProduceRequestsPerSec
+    metricAttribute:
+      type: const(produce)
+    mapping:
+      Count:
+        metric: kafka.request.count
+        type: counter
+        desc: The number of requests received by the broker
+        unit: "{requests}"
+  - bean: kafka.server:type=BrokerTopicMetrics,name=TotalFetchRequestsPerSec
+    metricAttribute:
+      type: const(fetch)
+    mapping:
+      Count:
+        metric: kafka.request.count
+        type: counter
+        desc: The number of requests received by the broker
+        unit: "{requests}"
+  - bean: kafka.server:type=BrokerTopicMetrics,name=FailedProduceRequestsPerSec
+    metricAttribute:
+      type: const(produce)
+    mapping:
+      Count:
+        metric: kafka.request.failed
+        type: counter
+        desc: The number of requests to the broker resulting in a failure
+        unit: "{requests}"
+  - bean: kafka.server:type=BrokerTopicMetrics,name=FailedFetchRequestsPerSec
+    metricAttribute:
+      type: const(fetch)
+    mapping:
+      Count:
+        metric: kafka.request.failed
+        type: counter
+        desc: The number of requests to the broker resulting in a failure
+        unit: "{requests}"
+  - bean: kafka.network:type=RequestMetrics,name=TotalTimeMs,request=Produce
+    metricAttribute:
+      type: const(produce)
+    unit: ms
+    mapping:
+      Count:
+        metric: kafka.request.time.total
+        type: counter
+        desc: The total time the broker has taken to service requests
+      50thPercentile:
+        metric: kafka.request.time.50p
+        type: gauge
+        desc: The 50th percentile time the broker has taken to service requests
+      99thPercentile:
+        metric: kafka.request.time.99p
+        type: gauge
+        desc: The 99th percentile time the broker has taken to service requests
+      Mean:
+        metric: kafka.request.time.avg
+        type: gauge
+        desc: The average time the broker has taken to service requests
+  - bean: kafka.network:type=RequestMetrics,name=TotalTimeMs,request=FetchConsumer
+    metricAttribute:
+      type: const(fetchconsumer)
+    unit: ms
+    mapping:
+      Count:
+        metric: kafka.request.time.total
+        type: counter
+        desc: The total time the broker has taken to service requests
+      50thPercentile:
+        metric: kafka.request.time.50p
+        type: gauge
+        desc: The 50th percentile time the broker has taken to service requests
+      99thPercentile:
+        metric: kafka.request.time.99p
+        type: gauge
+        desc: The 99th percentile time the broker has taken to service requests
+      Mean:
+        metric: kafka.request.time.avg
+        type: gauge
+        desc: The average time the broker has taken to service requests
+  - bean: kafka.network:type=RequestMetrics,name=TotalTimeMs,request=FetchFollower
+    metricAttribute:
+      type: const(fetchfollower)
+    unit: ms
+    mapping:
+      Count:
+        metric: kafka.request.time.total
+        type: counter
+        desc: The total time the broker has taken to service requests
+      50thPercentile:
+        metric: kafka.request.time.50p
+        type: gauge
+        desc: The 50th percentile time the broker has taken to service requests
+      99thPercentile:
+        metric: kafka.request.time.99p
+        type: gauge
+        desc: The 99th percentile time the broker has taken to service requests
+      Mean:
+        metric: kafka.request.time.avg
+        type: gauge
+        desc: The average time the broker has taken to service requests
+  - bean: kafka.server:type=BrokerTopicMetrics,name=BytesInPerSec
+    metricAttribute:
+      direction: const(in)
+    mapping:
+      Count:
+        metric: kafka.network.io
+        type: counter
+        desc: The bytes received or sent by the broker
+        unit: by
+  - bean: kafka.server:type=BrokerTopicMetrics,name=BytesOutPerSec
+    metricAttribute:
+      direction: const(out)
+    mapping:
+      Count:
+        metric: kafka.network.io
+        type: counter
+        desc: The bytes received or sent by the broker
+        unit: by
+  - bean: kafka.server:type=DelayedOperationPurgatory,name=PurgatorySize,delayedOperation=Produce
+    metricAttribute:
+      type: const(produce)
+    mapping:
+      Value:
+        metric: kafka.purgatory.size
+        type: gauge
+        desc: The number of requests waiting in purgatory
+        unit: "{requests}"
+  - bean: kafka.server:type=DelayedOperationPurgatory,name=PurgatorySize,delayedOperation=Fetch
+    metricAttribute:
+      type: const(fetch)
+    mapping:
+      Value:
+        metric: kafka.purgatory.size
+        type: gauge
+        desc: The number of requests waiting in purgatory
+        unit: "{requests}"
+  - bean: kafka.server:type=ReplicaManager,name=PartitionCount
+    mapping:
+      Value:
+        metric: kafka.partition.count
+        type: gauge
+        desc: The number of partitions on the broker
+        unit: "{partitions}"
+  - bean: kafka.controller:type=KafkaController,name=OfflinePartitionsCount
+    mapping:
+      Value:
+        metric: kafka.partition.offline
+        type: gauge
+        desc: The number of partitions offline
+        unit: "{partitions}"
+  - bean: kafka.server:type=ReplicaManager,name=UnderReplicatedPartitions
+    mapping:
+      Value:
+        metric: kafka.partition.under_replicated
+        type: gauge
+        desc: The number of under replicated partitions
+        unit: "{partitions}"
+  - bean: kafka.server:type=ReplicaManager,name=IsrShrinksPerSec
+    metricAttribute:
+      operation: const(shrink)
+    mapping:
+      Count:
+        metric: kafka.isr.operation.count
+        type: counter
+        desc: The number of in-sync replica shrink and expand operations
+        unit: "{operations}"
+  - bean: kafka.server:type=ReplicaManager,name=IsrExpandsPerSec
+    metricAttribute:
+      operation: const(expand)
+    mapping:
+      Count:
+        metric: kafka.isr.operation.count
+        type: counter
+        desc: The number of in-sync replica shrink and expand operations
+        unit: "{operations}"
+  - bean: kafka.server:type=ReplicaFetcherManager,name=MaxLag,clientId=Replica
+    mapping:
+      Value:
+        metric: kafka.max.lag
+        type: gauge
+        desc: max lag in messages between follower and leader replicas
+        unit: "{messages}"
+  - bean: kafka.controller:type=KafkaController,name=ActiveControllerCount
+    mapping:
+      Value:
+        metric: kafka.controller.active.count
+        type: gauge
+        desc: controller is active on broker
+        unit: "{controllers}"
+  - bean: kafka.controller:type=ControllerStats,name=LeaderElectionRateAndTimeMs
+    mapping:
+      Count:
+        metric: kafka.leader.election.rate
+        type: counter
+        desc: leader election rate - increasing indicates broker failures
+        unit: "{elections}"
+  - bean: kafka.controller:type=ControllerStats,name=UncleanLeaderElectionsPerSec
+    mapping:
+      Count:
+        metric: kafka.unclean.election.rate
+        type: counter
+        desc: unclean leader election rate - increasing indicates broker failures
+        unit: "{elections}"
+  - bean: kafka.network:type=RequestChannel,name=RequestQueueSize
+    mapping:
+      Value:
+        metric: kafka.request.queue
+        type: gauge
+        desc: size of the request queue
+        unit: "{requests}"
+  - bean: kafka.log:type=LogFlushStats,name=LogFlushRateAndTimeMs
+    unit: ms
+    prefix: kafka.logs.flush.time.
+    mapping:
+      Count:
+        metric: count
+        type: counter
+        desc: log flush count
+      50thPercentile:
+        metric: median
+        type: gauge
+        desc: log flush time - 50th percentile
+      99thPercentile:
+        metric: 99p
+        type: gauge
+        desc: log flush time - 99th percentile
diff --git a/instrumentation/jmx-metrics/src/main/resources/jmx/rules/tomcat.yaml b/instrumentation/jmx-metrics/src/main/resources/jmx/rules/tomcat.yaml
diff --git a/instrumentation/jmx-metrics/src/main/resources/jmx/view.yaml b/instrumentation/jmx-metrics/src/main/resources/jmx/view.yaml