-
Notifications
You must be signed in to change notification settings - Fork 1k
[JMX Insight] Hadoop jmx metics semconv alignment #14411
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 17 commits
889a9c9
3b54d6e
c60c0e5
a0c743b
be3f90c
51c3875
579c8fe
a4626e7
551436c
e8b8375
9c02f4d
ae462de
6217116
01f6503
fb5d0c7
a8a5b63
5a2ba2c
e8f2b77
d2338be
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# Hadoop Metrics | ||
|
||
Here is the list of metrics based on MBeans exposed by Hadoop. | ||
|
||
| Metric Name | Type | Unit | Attributes | Description | | ||
|---------------------------------|---------------|--------------|-------------------|--------------------------------------------------------| | ||
| hadoop.dfs.capacity | UpDownCounter | By | hadoop.node.name | Current raw capacity of data nodes. | | ||
| hadoop.dfs.capacity.used | UpDownCounter | By | hadoop.node.name | Current used capacity across all data nodes. | | ||
| hadoop.dfs.block.count | UpDownCounter | {block} | hadoop.node.name | Current number of allocated blocks in the system. | | ||
| hadoop.dfs.block.missing | UpDownCounter | {block} | hadoop.node.name | Current number of missing blocks. | | ||
| hadoop.dfs.block.corrupt | UpDownCounter | {block} | hadoop.node.name | Current number of blocks with corrupt replicas. | | ||
| hadoop.dfs.volume.failure.count | Counter | {failure} | hadoop.node.name | Total number of volume failures across all data nodes. | | ||
| hadoop.dfs.file.count | UpDownCounter | {file} | hadoop.node.name | Current number of files and directories. | | ||
| hadoop.dfs.connection.count | UpDownCounter | {connection} | hadoop.node.name | Current number of connections. | | ||
| hadoop.dfs.data_node.live | UpDownCounter | {node} | hadoop.node.name | Number of data nodes which are currently live. | | ||
| hadoop.dfs.data_node.dead | UpDownCounter | {node} | hadoop.node.name | Number of data nodes which are currently dead. | |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
--- | ||
rules: | ||
- bean: Hadoop:service=NameNode,name=FSNamesystem | ||
prefix: hadoop.dfs. | ||
SylvainJuge marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
metricAttribute: | ||
hadoop.node.name: beanattr(tag\.Hostname) | ||
mapping: | ||
# hadoop.dfs.capacity | ||
CapacityTotal: | ||
metric: capacity | ||
robsunday marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
type: updowncounter | ||
unit: By | ||
desc: Current raw capacity of DataNodes. | ||
# hadoop.dfs.capacity.used | ||
CapacityUsed: | ||
metric: capacity.used | ||
type: updowncounter | ||
unit: By | ||
desc: Current used capacity across all DataNodes. | ||
# hadoop.dfs.block.count | ||
BlocksTotal: | ||
metric: block.count | ||
type: updowncounter | ||
unit: "{block}" | ||
desc: Current number of allocated blocks in the system. | ||
# hadoop.dfs.block.missing | ||
MissingBlocks: | ||
metric: block.missing | ||
type: updowncounter | ||
unit: "{block}" | ||
desc: Current number of missing blocks. | ||
# hadoop.dfs.block.corrupt | ||
CorruptBlocks: | ||
metric: block.corrupt | ||
type: updowncounter | ||
unit: "{block}" | ||
desc: Current number of blocks with corrupt replicas. | ||
# hadoop.dfs.volume.failure.count | ||
VolumeFailuresTotal: | ||
metric: volume.failure.count | ||
SylvainJuge marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
type: counter | ||
unit: "{failure}" | ||
desc: Total number of volume failures across all DataNodes. | ||
# hadoop.dfs.file.count | ||
FilesTotal: | ||
metric: file.count | ||
type: updowncounter | ||
unit: "{file}" | ||
desc: Current number of files and directories. | ||
# hadoop.dfs.connection.count | ||
TotalLoad: | ||
metric: connection.count | ||
SylvainJuge marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
type: updowncounter | ||
unit: "{connection}" | ||
desc: Current number of connections. | ||
|
||
# hadoop.dfs.data_node.live | ||
NumLiveDataNodes: | ||
metric: data_node.live | ||
type: updowncounter | ||
unit: "{node}" | ||
desc: Number of data nodes which are currently live. | ||
# hadoop.dfs.data_node.dead | ||
NumDeadDataNodes: | ||
metric: data_node.dead | ||
type: updowncounter | ||
unit: "{node}" | ||
desc: Number of data nodes which are currently dead. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
/* | ||
* Copyright The OpenTelemetry Authors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package io.opentelemetry.instrumentation.jmx.rules; | ||
|
||
import static io.opentelemetry.instrumentation.jmx.rules.assertions.DataPointAttributes.attribute; | ||
|
||
import io.opentelemetry.instrumentation.jmx.rules.assertions.AttributeMatcher; | ||
import java.io.IOException; | ||
import java.net.URISyntaxException; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.nio.file.Paths; | ||
import java.time.Duration; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.Stream; | ||
import org.junit.jupiter.api.Test; | ||
import org.testcontainers.containers.GenericContainer; | ||
import org.testcontainers.containers.wait.strategy.Wait; | ||
import org.testcontainers.images.builder.Transferable; | ||
|
||
class HadoopTest extends TargetSystemTest { | ||
|
||
public static final String ENDPOINT_PLACEHOLDER = "<<ENDPOINT_PLACEHOLDER>>"; | ||
|
||
@Test | ||
void testMetrics_Hadoop2x() throws URISyntaxException, IOException { | ||
List<String> yamlFiles = Collections.singletonList("hadoop.yaml"); | ||
|
||
yamlFiles.forEach(this::validateYamlSyntax); | ||
|
||
// Hadoop startup script does not propagate env vars to launched hadoop daemons, | ||
// so all the env vars needs to be embedded inside the hadoop-env.sh file | ||
GenericContainer<?> target = | ||
new GenericContainer<>("bmedora/hadoop:2.9-base") | ||
.withCopyToContainer( | ||
Transferable.of(readAndPreprocessEnvFile("hadoop2-env.sh")), | ||
"/hadoop/etc/hadoop/hadoop-env.sh") | ||
.withCreateContainerCmdModifier(cmd -> cmd.withHostName("test-host")) | ||
.withStartupTimeout(Duration.ofMinutes(3)) | ||
.withExposedPorts(50070) | ||
.waitingFor(Wait.forListeningPorts(50070)); | ||
|
||
copyAgentToTarget(target); | ||
copyYamlFilesToTarget(target, yamlFiles); | ||
|
||
startTarget(target); | ||
|
||
verifyMetrics(createMetricsVerifier()); | ||
} | ||
|
||
private String readAndPreprocessEnvFile(String fileName) throws URISyntaxException, IOException { | ||
Path path = Paths.get(getClass().getClassLoader().getResource(fileName).toURI()); | ||
|
||
String data; | ||
try (Stream<String> lines = Files.lines(path)) { | ||
data = | ||
lines | ||
.map(line -> line.replace(ENDPOINT_PLACEHOLDER, getOtlpEndpoint())) | ||
.collect(Collectors.joining("\n")); | ||
} | ||
|
||
return data; | ||
} | ||
|
||
@Test | ||
void testMetrics_Hadoop3x() throws URISyntaxException, IOException { | ||
List<String> yamlFiles = Collections.singletonList("hadoop.yaml"); | ||
|
||
yamlFiles.forEach(this::validateYamlSyntax); | ||
|
||
// Hadoop startup script does not propagate env vars to launched hadoop daemons, | ||
// so all the env vars needs to be embedded inside the hadoop-env.sh file | ||
GenericContainer<?> target = | ||
new GenericContainer<>("loum/hadoop-pseudo:3.3.6") | ||
.withExposedPorts(9870, 9000) | ||
.withCopyToContainer( | ||
Transferable.of(readAndPreprocessEnvFile("hadoop3-env.sh")), | ||
"/opt/hadoop/etc/hadoop/hadoop-env.sh") | ||
.withCreateContainerCmdModifier(cmd -> cmd.withHostName("test-host")) | ||
.waitingFor( | ||
Wait.forListeningPorts(9870, 9000).withStartupTimeout(Duration.ofMinutes(3))); | ||
robsunday marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
copyAgentToTarget(target); | ||
copyYamlFilesToTarget(target, yamlFiles); | ||
|
||
startTarget(target); | ||
|
||
verifyMetrics(createMetricsVerifier()); | ||
} | ||
|
||
private static MetricsVerifier createMetricsVerifier() { | ||
AttributeMatcher nodeNameAttribute = attribute("hadoop.node.name", "test-host"); | ||
|
||
return MetricsVerifier.create() | ||
.disableStrictMode() | ||
.add( | ||
"hadoop.dfs.capacity", | ||
metric -> | ||
metric | ||
.hasDescription("Current raw capacity of DataNodes.") | ||
.hasUnit("By") | ||
.isUpDownCounter() | ||
.hasDataPointsWithOneAttribute(nodeNameAttribute)) | ||
.add( | ||
"hadoop.dfs.capacity.used", | ||
metric -> | ||
metric | ||
.hasDescription("Current used capacity across all DataNodes.") | ||
.hasUnit("By") | ||
.isUpDownCounter() | ||
.hasDataPointsWithOneAttribute(nodeNameAttribute)) | ||
.add( | ||
"hadoop.dfs.block.count", | ||
metric -> | ||
metric | ||
.hasDescription("Current number of allocated blocks in the system.") | ||
.hasUnit("{block}") | ||
.isUpDownCounter() | ||
.hasDataPointsWithOneAttribute(nodeNameAttribute)) | ||
.add( | ||
"hadoop.dfs.block.missing", | ||
metric -> | ||
metric | ||
.hasDescription("Current number of missing blocks.") | ||
.hasUnit("{block}") | ||
.isUpDownCounter() | ||
.hasDataPointsWithOneAttribute(nodeNameAttribute)) | ||
.add( | ||
"hadoop.dfs.block.corrupt", | ||
metric -> | ||
metric | ||
.hasDescription("Current number of blocks with corrupt replicas.") | ||
.hasUnit("{block}") | ||
.isUpDownCounter() | ||
.hasDataPointsWithOneAttribute(nodeNameAttribute)) | ||
.add( | ||
"hadoop.dfs.volume.failure.count", | ||
metric -> | ||
metric | ||
.hasDescription("Total number of volume failures across all DataNodes.") | ||
.hasUnit("{failure}") | ||
.isCounter() | ||
.hasDataPointsWithOneAttribute(nodeNameAttribute)) | ||
.add( | ||
"hadoop.dfs.file.count", | ||
metric -> | ||
metric | ||
.hasDescription("Current number of files and directories.") | ||
.hasUnit("{file}") | ||
.isUpDownCounter() | ||
.hasDataPointsWithOneAttribute(nodeNameAttribute)) | ||
.add( | ||
"hadoop.dfs.connection.count", | ||
metric -> | ||
metric | ||
.hasDescription("Current number of connections.") | ||
.hasUnit("{connection}") | ||
.isUpDownCounter() | ||
.hasDataPointsWithOneAttribute(nodeNameAttribute)) | ||
.add( | ||
"hadoop.dfs.data_node.live", | ||
metric -> | ||
metric | ||
.hasDescription("Number of data nodes which are currently live.") | ||
.hasUnit("{node}") | ||
.isUpDownCounter() | ||
.hasDataPointsWithOneAttribute(nodeNameAttribute)) | ||
.add( | ||
"hadoop.dfs.data_node.dead", | ||
metric -> | ||
metric | ||
.hasDescription("Number of data nodes which are currently dead.") | ||
.hasUnit("{node}") | ||
.isUpDownCounter() | ||
.hasDataPointsWithOneAttribute(nodeNameAttribute)); | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.