open-telemetry · trask · Sep 19, 2025 · Aug 7, 2025 · Aug 12, 2025 · Aug 12, 2025
@@ -31,7 +31,7 @@ No targets are enabled by default. The supported target environments are listed
 - [kafka-broker](javaagent/kafka-broker.md)
 - [tomcat](library/tomcat.md)
 - [wildfly](library/wildfly.md)
-- [hadoop](javaagent/hadoop.md)
+- [hadoop](library/hadoop.md)
 
 The [jvm](library/jvm.md) metrics definitions are also included in the [jmx-metrics library](./library)
 to allow reusing them without instrumentation. When using instrumentation, the [runtime-telemetry](../runtime-telemetry)

@@ -31,8 +31,7 @@
 class JmxMetricInsightInstallerTest {
   private static final String PATH_TO_ALL_EXISTING_RULES = "src/main/resources/jmx/rules";
   private static final Set<String> FILES_TO_BE_TESTED =
-      new HashSet<>(
-          Arrays.asList("activemq.yaml", "camel.yaml", "hadoop.yaml", "kafka-broker.yaml"));
+      new HashSet<>(Arrays.asList("activemq.yaml", "camel.yaml", "kafka-broker.yaml"));
 
   @Test
   void testToVerifyExistingRulesAreValid() throws Exception {

@@ -0,0 +1,16 @@
+# Hadoop Metrics
+
+Here is the list of metrics based on MBeans exposed by Hadoop.
+
+| Metric Name                     | Type          | Unit         | Attributes        | Description                                            |
+|---------------------------------|---------------|--------------|-------------------|--------------------------------------------------------|
+| hadoop.dfs.capacity             | UpDownCounter | By           | hadoop.node.name  | Current raw capacity of data nodes.                    |
+| hadoop.dfs.capacity.used        | UpDownCounter | By           | hadoop.node.name  | Current used capacity across all data nodes.           |
+| hadoop.dfs.block.count          | UpDownCounter | {block}      | hadoop.node.name  | Current number of allocated blocks in the system.      |
+| hadoop.dfs.block.missing        | UpDownCounter | {block}      | hadoop.node.name  | Current number of missing blocks.                      |
+| hadoop.dfs.block.corrupt        | UpDownCounter | {block}      | hadoop.node.name  | Current number of blocks with corrupt replicas.        |
+| hadoop.dfs.volume.failure.count | Counter       | {failure}    | hadoop.node.name  | Total number of volume failures across all data nodes. |
+| hadoop.dfs.file.count           | UpDownCounter | {file}       | hadoop.node.name  | Current number of files and directories.               |
+| hadoop.dfs.connection.count     | UpDownCounter | {connection} | hadoop.node.name  | Current number of connections.                         |
+| hadoop.dfs.data_node.live       | UpDownCounter | {node}       | hadoop.node.name  | Number of data nodes which are currently live.         |
+| hadoop.dfs.data_node.dead       | UpDownCounter | {node}       | hadoop.node.name  | Number of data nodes which are currently dead.         |
@@ -0,0 +1,68 @@
+---
+rules:
+  - bean: Hadoop:service=NameNode,name=FSNamesystem
+    prefix: hadoop.dfs.
+    metricAttribute:
+      hadoop.node.name: beanattr(tag\.Hostname)
+    mapping:
+      # hadoop.dfs.capacity
+      CapacityTotal:
+        metric: capacity
+        type: updowncounter
+        unit: By
+        desc: Current raw capacity of DataNodes.
+      # hadoop.dfs.capacity.used
+      CapacityUsed:
+        metric: capacity.used
+        type: updowncounter
+        unit: By
+        desc: Current used capacity across all DataNodes.
+      # hadoop.dfs.block.count
+      BlocksTotal:
+        metric: block.count
+        type: updowncounter
+        unit: "{block}"
+        desc: Current number of allocated blocks in the system.
+      # hadoop.dfs.block.missing
+      MissingBlocks:
+        metric: block.missing
+        type: updowncounter
+        unit: "{block}"
+        desc: Current number of missing blocks.
+      # hadoop.dfs.block.corrupt
+      CorruptBlocks:
+        metric: block.corrupt
+        type: updowncounter
+        unit: "{block}"
+        desc: Current number of blocks with corrupt replicas.
+      # hadoop.dfs.volume.failure.count
+      VolumeFailuresTotal:
+        metric: volume.failure.count
+        type: counter
+        unit: "{failure}"
+        desc: Total number of volume failures across all DataNodes.
+      # hadoop.dfs.file.count
+      FilesTotal:
+        metric: file.count
+        type: updowncounter
+        unit: "{file}"
+        desc: Current number of files and directories.
+      # hadoop.dfs.connection.count
+      TotalLoad:
+        metric: connection.count
+        type: updowncounter
+        unit: "{connection}"
+        desc: Current number of connections.
+
+      # hadoop.dfs.data_node.live
+      NumLiveDataNodes:
+        metric: data_node.live
+        type: updowncounter
+        unit: "{node}"
+        desc: Number of data nodes which are currently live.
+      # hadoop.dfs.data_node.dead
+      NumDeadDataNodes:
+        metric: data_node.dead
+        type: updowncounter
+        unit: "{node}"
+        desc: Number of data nodes which are currently dead.
@@ -0,0 +1,182 @@
+/*
+ * Copyright The OpenTelemetry Authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package io.opentelemetry.instrumentation.jmx.rules;
+
+import static io.opentelemetry.instrumentation.jmx.rules.assertions.DataPointAttributes.attribute;
+
+import io.opentelemetry.instrumentation.jmx.rules.assertions.AttributeMatcher;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.Duration;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import org.junit.jupiter.api.Test;
+import org.testcontainers.containers.GenericContainer;
+import org.testcontainers.containers.wait.strategy.Wait;
+import org.testcontainers.images.builder.Transferable;
+
+class HadoopTest extends TargetSystemTest {
+
+  public static final String ENDPOINT_PLACEHOLDER = "<<ENDPOINT_PLACEHOLDER>>";
+
+  @Test
+  void testMetrics_Hadoop2x() throws URISyntaxException, IOException {
+    List<String> yamlFiles = Collections.singletonList("hadoop.yaml");
+
+    yamlFiles.forEach(this::validateYamlSyntax);
+
+    // Hadoop startup script does not propagate env vars to launched hadoop daemons,
+    // so all the env vars needs to be embedded inside the hadoop-env.sh file
+    GenericContainer<?> target =
+        new GenericContainer<>("bmedora/hadoop:2.9-base")
+            .withCopyToContainer(
+                Transferable.of(readAndPreprocessEnvFile("hadoop2-env.sh")),
+                "/hadoop/etc/hadoop/hadoop-env.sh")
+            .withCreateContainerCmdModifier(cmd -> cmd.withHostName("test-host"))
+            .withStartupTimeout(Duration.ofMinutes(3))
+            .withExposedPorts(50070)
+            .waitingFor(Wait.forListeningPorts(50070));
+
+    copyAgentToTarget(target);
+    copyYamlFilesToTarget(target, yamlFiles);
+
+    startTarget(target);
+
+    verifyMetrics(createMetricsVerifier());
+  }
+
+  private String readAndPreprocessEnvFile(String fileName) throws URISyntaxException, IOException {
+    Path path = Paths.get(getClass().getClassLoader().getResource(fileName).toURI());
+
+    String data;
+    try (Stream<String> lines = Files.lines(path)) {
+      data =
+          lines
+              .map(line -> line.replace(ENDPOINT_PLACEHOLDER, getOtlpEndpoint()))
+              .collect(Collectors.joining("\n"));
+    }
+
+    return data;
+  }
+
+  @Test
+  void testMetrics_Hadoop3x() throws URISyntaxException, IOException {
+    List<String> yamlFiles = Collections.singletonList("hadoop.yaml");
+
+    yamlFiles.forEach(this::validateYamlSyntax);
+
+    // Hadoop startup script does not propagate env vars to launched hadoop daemons,
+    // so all the env vars needs to be embedded inside the hadoop-env.sh file
+    GenericContainer<?> target =
+        new GenericContainer<>("loum/hadoop-pseudo:3.3.6")
+            .withExposedPorts(9870, 9000)
+            .withCopyToContainer(
+                Transferable.of(readAndPreprocessEnvFile("hadoop3-env.sh")),
+                "/opt/hadoop/etc/hadoop/hadoop-env.sh")
+            .withCreateContainerCmdModifier(cmd -> cmd.withHostName("test-host"))
+            .waitingFor(
+                Wait.forListeningPorts(9870, 9000).withStartupTimeout(Duration.ofMinutes(3)));
+
+    copyAgentToTarget(target);
+    copyYamlFilesToTarget(target, yamlFiles);
+
+    startTarget(target);
+
+    verifyMetrics(createMetricsVerifier());
+  }
+
+  private static MetricsVerifier createMetricsVerifier() {
+    AttributeMatcher nodeNameAttribute = attribute("hadoop.node.name", "test-host");
+
+    return MetricsVerifier.create()
+        .disableStrictMode()
+        .add(
+            "hadoop.dfs.capacity",
+            metric ->
+                metric
+                    .hasDescription("Current raw capacity of DataNodes.")
+                    .hasUnit("By")
+                    .isUpDownCounter()
+                    .hasDataPointsWithOneAttribute(nodeNameAttribute))
+        .add(
+            "hadoop.dfs.capacity.used",
+            metric ->
+                metric
+                    .hasDescription("Current used capacity across all DataNodes.")
+                    .hasUnit("By")
+                    .isUpDownCounter()
+                    .hasDataPointsWithOneAttribute(nodeNameAttribute))
+        .add(
+            "hadoop.dfs.block.count",
+            metric ->
+                metric
+                    .hasDescription("Current number of allocated blocks in the system.")
+                    .hasUnit("{block}")
+                    .isUpDownCounter()
+                    .hasDataPointsWithOneAttribute(nodeNameAttribute))
+        .add(
+            "hadoop.dfs.block.missing",
+            metric ->
+                metric
+                    .hasDescription("Current number of missing blocks.")
+                    .hasUnit("{block}")
+                    .isUpDownCounter()
+                    .hasDataPointsWithOneAttribute(nodeNameAttribute))
+        .add(
+            "hadoop.dfs.block.corrupt",
+            metric ->
+                metric
+                    .hasDescription("Current number of blocks with corrupt replicas.")
+                    .hasUnit("{block}")
+                    .isUpDownCounter()
+                    .hasDataPointsWithOneAttribute(nodeNameAttribute))
+        .add(
+            "hadoop.dfs.volume.failure.count",
+            metric ->
+                metric
+                    .hasDescription("Total number of volume failures across all DataNodes.")
+                    .hasUnit("{failure}")
+                    .isCounter()
+                    .hasDataPointsWithOneAttribute(nodeNameAttribute))
+        .add(
+            "hadoop.dfs.file.count",
+            metric ->
+                metric
+                    .hasDescription("Current number of files and directories.")
+                    .hasUnit("{file}")
+                    .isUpDownCounter()
+                    .hasDataPointsWithOneAttribute(nodeNameAttribute))
+        .add(
+            "hadoop.dfs.connection.count",
+            metric ->
+                metric
+                    .hasDescription("Current number of connections.")
+                    .hasUnit("{connection}")
+                    .isUpDownCounter()
+                    .hasDataPointsWithOneAttribute(nodeNameAttribute))
+        .add(
+            "hadoop.dfs.data_node.live",
+            metric ->
+                metric
+                    .hasDescription("Number of data nodes which are currently live.")
+                    .hasUnit("{node}")
+                    .isUpDownCounter()
+                    .hasDataPointsWithOneAttribute(nodeNameAttribute))
+        .add(
+            "hadoop.dfs.data_node.dead",
+            metric ->
+                metric
+                    .hasDescription("Number of data nodes which are currently dead.")
+                    .hasUnit("{node}")
+                    .isUpDownCounter()
+                    .hasDataPointsWithOneAttribute(nodeNameAttribute));
+  }
+}
@@ -63,6 +63,7 @@ public class TargetSystemTest {
   private static OtlpGrpcServer otlpServer;
   private static Path agentPath;
   private static Path testAppPath;
+
   private static String otlpEndpoint;
 
   private GenericContainer<?> targetSystem;
@@ -120,6 +121,10 @@ static void afterAll() {
     }
   }
 
+  protected static String getOtlpEndpoint() {
+    return otlpEndpoint;
+  }
+
   protected static String javaAgentJvmArgument() {
     return "-javaagent:" + AGENT_PATH;
   }
@@ -150,7 +155,6 @@ protected static Map<String, String> otelConfigProperties(List<String> yamlFiles
     // disable runtime telemetry metrics
     config.put("otel.instrumentation.runtime-telemetry.enabled", "false");
     // set yaml config files to test
-    config.put("otel.jmx.target", "tomcat");
     config.put(
         "otel.jmx.config",
         yamlFiles.stream()