Skip to content

Commit 3df64da

Browse files
robsundayotelbot[bot]SylvainJugetrask
authored
[JMX Insight] Hadoop jmx metics semconv alignment (#14411)
Co-authored-by: otelbot <[email protected]> Co-authored-by: SylvainJuge <[email protected]> Co-authored-by: Trask Stalnaker <[email protected]>
1 parent bbc55b5 commit 3df64da

File tree

10 files changed

+813
-82
lines changed

10 files changed

+813
-82
lines changed

instrumentation/jmx-metrics/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ No targets are enabled by default. The supported target environments are listed
3131
- [kafka-broker](javaagent/kafka-broker.md)
3232
- [tomcat](library/tomcat.md)
3333
- [wildfly](library/wildfly.md)
34-
- [hadoop](javaagent/hadoop.md)
34+
- [hadoop](library/hadoop.md)
3535

3636
The [jvm](library/jvm.md) metrics definitions are also included in the [jmx-metrics library](./library)
3737
to allow reusing them without instrumentation. When using instrumentation, the [runtime-telemetry](../runtime-telemetry)

instrumentation/jmx-metrics/javaagent/hadoop.md

Lines changed: 0 additions & 15 deletions
This file was deleted.

instrumentation/jmx-metrics/javaagent/src/main/resources/jmx/rules/hadoop.yaml

Lines changed: 0 additions & 63 deletions
This file was deleted.

instrumentation/jmx-metrics/javaagent/src/test/java/io/opentelemetry/instrumentation/javaagent/jmx/JmxMetricInsightInstallerTest.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,7 @@
3131
class JmxMetricInsightInstallerTest {
3232
private static final String PATH_TO_ALL_EXISTING_RULES = "src/main/resources/jmx/rules";
3333
private static final Set<String> FILES_TO_BE_TESTED =
34-
new HashSet<>(
35-
Arrays.asList("activemq.yaml", "camel.yaml", "hadoop.yaml", "kafka-broker.yaml"));
34+
new HashSet<>(Arrays.asList("activemq.yaml", "camel.yaml", "kafka-broker.yaml"));
3635

3736
@Test
3837
void testToVerifyExistingRulesAreValid() throws Exception {
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Hadoop Metrics
2+
3+
Here is the list of metrics based on MBeans exposed by Hadoop.
4+
5+
| Metric Name | Type | Unit | Attributes | Description |
6+
|---------------------------------|---------------|--------------|-------------------|--------------------------------------------------------|
7+
| hadoop.dfs.capacity.limit | UpDownCounter | By | hadoop.node.name | Current raw capacity of data nodes. |
8+
| hadoop.dfs.capacity.used | UpDownCounter | By | hadoop.node.name | Current used capacity across all data nodes. |
9+
| hadoop.dfs.block.count | UpDownCounter | {block} | hadoop.node.name | Current number of allocated blocks in the system. |
10+
| hadoop.dfs.block.missing | UpDownCounter | {block} | hadoop.node.name | Current number of missing blocks. |
11+
| hadoop.dfs.block.corrupt | UpDownCounter | {block} | hadoop.node.name | Current number of blocks with corrupt replicas. |
12+
| hadoop.dfs.volume.failure.count | Counter | {failure} | hadoop.node.name | Total number of volume failures across all data nodes. |
13+
| hadoop.dfs.file.count | UpDownCounter | {file} | hadoop.node.name | Current number of files and directories. |
14+
| hadoop.dfs.connection.count | UpDownCounter | {connection} | hadoop.node.name | Current number of connections. |
15+
| hadoop.datanode.live | UpDownCounter | {node} | hadoop.node.name | Number of data nodes which are currently live. |
16+
| hadoop.datanode.dead | UpDownCounter | {node} | hadoop.node.name | Number of data nodes which are currently dead. |
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
---
2+
rules:
3+
- bean: Hadoop:service=NameNode,name=FSNamesystem
4+
prefix: hadoop.
5+
metricAttribute:
6+
hadoop.node.name: beanattr(tag\.Hostname)
7+
mapping:
8+
# hadoop.dfs.capacity.limit
9+
CapacityTotal:
10+
metric: dfs.capacity.limit
11+
type: updowncounter
12+
unit: By
13+
desc: Current raw capacity of DataNodes.
14+
# hadoop.dfs.capacity.used
15+
CapacityUsed:
16+
metric: dfs.capacity.used
17+
type: updowncounter
18+
unit: By
19+
desc: Current used capacity across all DataNodes.
20+
# hadoop.dfs.block.count
21+
BlocksTotal:
22+
metric: dfs.block.count
23+
type: updowncounter
24+
unit: "{block}"
25+
desc: Current number of allocated blocks in the system.
26+
# hadoop.dfs.block.missing
27+
MissingBlocks:
28+
metric: dfs.block.missing
29+
type: updowncounter
30+
unit: "{block}"
31+
desc: Current number of missing blocks.
32+
# hadoop.dfs.block.corrupt
33+
CorruptBlocks:
34+
metric: dfs.block.corrupt
35+
type: updowncounter
36+
unit: "{block}"
37+
desc: Current number of blocks with corrupt replicas.
38+
# hadoop.dfs.volume.failure.count
39+
VolumeFailuresTotal:
40+
metric: dfs.volume.failure.count
41+
type: counter
42+
unit: "{failure}"
43+
desc: Total number of volume failures across all DataNodes.
44+
# hadoop.dfs.file.count
45+
FilesTotal:
46+
metric: dfs.file.count
47+
type: updowncounter
48+
unit: "{file}"
49+
desc: Current number of files and directories.
50+
# hadoop.dfs.connection.count
51+
TotalLoad:
52+
metric: dfs.connection.count
53+
type: updowncounter
54+
unit: "{connection}"
55+
desc: Current number of connections.
56+
57+
# hadoop.datanode.live
58+
NumLiveDataNodes:
59+
metric: datanode.live
60+
type: updowncounter
61+
unit: "{node}"
62+
desc: Number of data nodes which are currently live.
63+
# hadoop.datanode.dead
64+
NumDeadDataNodes:
65+
metric: datanode.dead
66+
type: updowncounter
67+
unit: "{node}"
68+
desc: Number of data nodes which are currently dead.
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
/*
2+
* Copyright The OpenTelemetry Authors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package io.opentelemetry.instrumentation.jmx.rules;
7+
8+
import static io.opentelemetry.instrumentation.jmx.rules.assertions.DataPointAttributes.attribute;
9+
10+
import io.opentelemetry.instrumentation.jmx.rules.assertions.AttributeMatcher;
11+
import java.io.IOException;
12+
import java.net.URISyntaxException;
13+
import java.nio.file.Files;
14+
import java.nio.file.Path;
15+
import java.nio.file.Paths;
16+
import java.time.Duration;
17+
import java.util.Collections;
18+
import java.util.List;
19+
import java.util.stream.Collectors;
20+
import java.util.stream.Stream;
21+
import org.junit.jupiter.api.Test;
22+
import org.testcontainers.containers.GenericContainer;
23+
import org.testcontainers.containers.wait.strategy.Wait;
24+
import org.testcontainers.images.builder.Transferable;
25+
26+
class HadoopTest extends TargetSystemTest {
27+
28+
public static final String ENDPOINT_PLACEHOLDER = "<<ENDPOINT_PLACEHOLDER>>";
29+
30+
@Test
31+
void testMetrics_Hadoop2x() throws URISyntaxException, IOException {
32+
List<String> yamlFiles = Collections.singletonList("hadoop.yaml");
33+
34+
yamlFiles.forEach(this::validateYamlSyntax);
35+
36+
// Hadoop startup script does not propagate env vars to launched hadoop daemons,
37+
// so all the env vars needs to be embedded inside the hadoop-env.sh file
38+
GenericContainer<?> target =
39+
new GenericContainer<>("bmedora/hadoop:2.9-base")
40+
.withCopyToContainer(
41+
Transferable.of(readAndPreprocessEnvFile("hadoop2-env.sh")),
42+
"/hadoop/etc/hadoop/hadoop-env.sh")
43+
.withCreateContainerCmdModifier(cmd -> cmd.withHostName("test-host"))
44+
.withStartupTimeout(Duration.ofMinutes(3))
45+
.withExposedPorts(50070)
46+
.waitingFor(Wait.forListeningPorts(50070));
47+
48+
copyAgentToTarget(target);
49+
copyYamlFilesToTarget(target, yamlFiles);
50+
51+
startTarget(target);
52+
53+
verifyMetrics(createMetricsVerifier());
54+
}
55+
56+
private String readAndPreprocessEnvFile(String fileName) throws URISyntaxException, IOException {
57+
Path path = Paths.get(getClass().getClassLoader().getResource(fileName).toURI());
58+
59+
String data;
60+
try (Stream<String> lines = Files.lines(path)) {
61+
data =
62+
lines
63+
.map(line -> line.replace(ENDPOINT_PLACEHOLDER, getOtlpEndpoint()))
64+
.collect(Collectors.joining("\n"));
65+
}
66+
67+
return data;
68+
}
69+
70+
@Test
71+
void testMetrics_Hadoop3x() throws URISyntaxException, IOException {
72+
List<String> yamlFiles = Collections.singletonList("hadoop.yaml");
73+
74+
yamlFiles.forEach(this::validateYamlSyntax);
75+
76+
// Hadoop startup script does not propagate env vars to launched hadoop daemons,
77+
// so all the env vars needs to be embedded inside the hadoop-env.sh file
78+
GenericContainer<?> target =
79+
new GenericContainer<>("loum/hadoop-pseudo:3.3.6")
80+
.withExposedPorts(9870, 9000)
81+
.withCopyToContainer(
82+
Transferable.of(readAndPreprocessEnvFile("hadoop3-env.sh")),
83+
"/opt/hadoop/etc/hadoop/hadoop-env.sh")
84+
.withCreateContainerCmdModifier(cmd -> cmd.withHostName("test-host"))
85+
.waitingFor(
86+
Wait.forListeningPorts(9870, 9000).withStartupTimeout(Duration.ofMinutes(3)));
87+
88+
copyAgentToTarget(target);
89+
copyYamlFilesToTarget(target, yamlFiles);
90+
91+
startTarget(target);
92+
93+
verifyMetrics(createMetricsVerifier());
94+
}
95+
96+
private static MetricsVerifier createMetricsVerifier() {
97+
AttributeMatcher nodeNameAttribute = attribute("hadoop.node.name", "test-host");
98+
99+
return MetricsVerifier.create()
100+
.disableStrictMode()
101+
.add(
102+
"hadoop.dfs.capacity.limit",
103+
metric ->
104+
metric
105+
.hasDescription("Current raw capacity of DataNodes.")
106+
.hasUnit("By")
107+
.isUpDownCounter()
108+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
109+
.add(
110+
"hadoop.dfs.capacity.used",
111+
metric ->
112+
metric
113+
.hasDescription("Current used capacity across all DataNodes.")
114+
.hasUnit("By")
115+
.isUpDownCounter()
116+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
117+
.add(
118+
"hadoop.dfs.block.count",
119+
metric ->
120+
metric
121+
.hasDescription("Current number of allocated blocks in the system.")
122+
.hasUnit("{block}")
123+
.isUpDownCounter()
124+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
125+
.add(
126+
"hadoop.dfs.block.missing",
127+
metric ->
128+
metric
129+
.hasDescription("Current number of missing blocks.")
130+
.hasUnit("{block}")
131+
.isUpDownCounter()
132+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
133+
.add(
134+
"hadoop.dfs.block.corrupt",
135+
metric ->
136+
metric
137+
.hasDescription("Current number of blocks with corrupt replicas.")
138+
.hasUnit("{block}")
139+
.isUpDownCounter()
140+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
141+
.add(
142+
"hadoop.dfs.volume.failure.count",
143+
metric ->
144+
metric
145+
.hasDescription("Total number of volume failures across all DataNodes.")
146+
.hasUnit("{failure}")
147+
.isCounter()
148+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
149+
.add(
150+
"hadoop.dfs.file.count",
151+
metric ->
152+
metric
153+
.hasDescription("Current number of files and directories.")
154+
.hasUnit("{file}")
155+
.isUpDownCounter()
156+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
157+
.add(
158+
"hadoop.dfs.connection.count",
159+
metric ->
160+
metric
161+
.hasDescription("Current number of connections.")
162+
.hasUnit("{connection}")
163+
.isUpDownCounter()
164+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
165+
.add(
166+
"hadoop.datanode.live",
167+
metric ->
168+
metric
169+
.hasDescription("Number of data nodes which are currently live.")
170+
.hasUnit("{node}")
171+
.isUpDownCounter()
172+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
173+
.add(
174+
"hadoop.datanode.dead",
175+
metric ->
176+
metric
177+
.hasDescription("Number of data nodes which are currently dead.")
178+
.hasUnit("{node}")
179+
.isUpDownCounter()
180+
.hasDataPointsWithOneAttribute(nodeNameAttribute));
181+
}
182+
}

instrumentation/jmx-metrics/library/src/test/java/io/opentelemetry/instrumentation/jmx/rules/TargetSystemTest.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ public class TargetSystemTest {
6363
private static OtlpGrpcServer otlpServer;
6464
private static Path agentPath;
6565
private static Path testAppPath;
66+
6667
private static String otlpEndpoint;
6768

6869
private GenericContainer<?> targetSystem;
@@ -120,6 +121,10 @@ static void afterAll() {
120121
}
121122
}
122123

124+
protected static String getOtlpEndpoint() {
125+
return otlpEndpoint;
126+
}
127+
123128
protected static String javaAgentJvmArgument() {
124129
return "-javaagent:" + AGENT_PATH;
125130
}
@@ -150,7 +155,6 @@ protected static Map<String, String> otelConfigProperties(List<String> yamlFiles
150155
// disable runtime telemetry metrics
151156
config.put("otel.instrumentation.runtime-telemetry.enabled", "false");
152157
// set yaml config files to test
153-
config.put("otel.jmx.target", "tomcat");
154158
config.put(
155159
"otel.jmx.config",
156160
yamlFiles.stream()

0 commit comments

Comments
 (0)