Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions modules/apm/NAMING.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ Examples :

### Attributes

Do not use **high cardinality** attributes, this might result in the APM Java agent dropping events.

Attribute names should follow the same rules. In particular, these rules apply to attributes too:
* elements and separators
* hierarchy/namespaces
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,13 @@

package org.elasticsearch.telemetry.apm.internal;

import org.apache.logging.log4j.LogManager;
import org.elasticsearch.common.regex.Regex;

import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -40,6 +45,20 @@ public class MetricNameValidator {
"security-crypto"
);

// forbidden attributes known to cause issues due to mapping conflicts or high cardinality
static final Predicate<String> FORBIDDEN_ATTRIBUTE_NAMES = Regex.simpleMatcher(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TBC this is just assuming the naming that a developer would use, but doesn't guarantee the same metric isn't used with a different name, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there's two issues:

  • some fields get mapped to timestamps (see ES-13074), we have to prevent usage of these to prevent such metrics from being silently dropped at ingestion. such errors don't surface anywhere unfortunately.
  • high cardinality metrics, adding index here just as precaution for this not to happen again. i also tried to forbid *_id as it indicates some higher cardinality label but there's still two usages of it we have to fix first

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wondering if we should change the interfaces to require metric names to be models as enums to prevent uncontrolled cardinality. Though, that's a massive change :(

"index",
"*.timestamp",
"*_timestamp",
"created",
"*.created",
"*.creation_date",
"ingested",
"*.ingested",
"*.start",
"*.end"
);

private MetricNameValidator() {}

/**
Expand All @@ -65,6 +84,20 @@ public static String validate(String metricName) {
return metricName;
}

public static boolean validateAttributeNames(Map<String, Object> attributes) {
if (attributes == null && attributes.isEmpty()) {
return true;
}
for (String attribute : attributes.keySet()) {
if (FORBIDDEN_ATTRIBUTE_NAMES.test(attribute)) {
LogManager.getLogger(MetricNameValidator.class)
.warn("Attribute name [{}] is forbidden due to potential mapping conflicts or assumed high cardinality", attribute);
return false;
}
}
return true;
}

/**
* Due to backwards compatibility some metric names would have to skip validation.
* This is for instance where a threadpool name is too long, or contains `-`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.telemetry.apm.internal.MetricNameValidator;
import org.elasticsearch.telemetry.metric.DoubleWithAttributes;
import org.elasticsearch.telemetry.metric.LongWithAttributes;

Expand All @@ -30,6 +31,7 @@ static Attributes fromMap(Map<String, Object> attributes) {
if (attributes == null || attributes.isEmpty()) {
return Attributes.empty();
}
assert MetricNameValidator.validateAttributeNames(attributes) : "invalid metric attributes";
var builder = Attributes.builder();
attributes.forEach((k, v) -> {
if (v instanceof String value) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,31 @@ public void testDoubleAsyncAdapter() throws Exception {
assertThat(metrics, hasSize(0));
}

public void testNullGaugeRecord() throws Exception {
public void testLongWithInvalidAttribute() {
registry.registerLongAsyncCounter(
"es.test.name.total",
"desc",
"unit",
() -> new LongWithAttributes(1, Map.of("high_cardinality_id", "27932451"))
);

AssertionError error = assertThrows(AssertionError.class, otelMeter::collectMetrics);
assertThat(error.getMessage(), equalTo("invalid metric attributes"));
}

public void testDoubleWithInvalidAttribute() {
registry.registerDoubleAsyncCounter(
"es.test.name.total",
"desc",
"unit",
() -> new DoubleWithAttributes(1.0, Map.of("has_timestamp", "false"))
);

AssertionError error = assertThrows(AssertionError.class, otelMeter::collectMetrics);
assertThat(error.getMessage(), equalTo("invalid metric attributes"));
}

public void testNullRecord() throws Exception {
DoubleAsyncCounter dcounter = registry.registerDoubleAsyncCounter(
"es.test.name.total",
"desc",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,28 @@ public void testNullGaugeRecord() throws Exception {
metrics = otelMeter.getRecorder().getMeasurements(lgauge);
assertThat(metrics, hasSize(0));
}

public void testLongGaugeWithInvalidAttribute() {
registry.registerLongGauge(
"es.test.name.total",
"desc",
"unit",
() -> new LongWithAttributes(1, Map.of("high_cardinality_id", "27932451"))
);

AssertionError error = assertThrows(AssertionError.class, otelMeter::collectMetrics);
assertThat(error.getMessage(), equalTo("invalid metric attributes"));
}

public void testDoubleGaugeWithInvalidAttribute() {
registry.registerDoubleGauge(
"es.test.name.total",
"desc",
"unit",
() -> new DoubleWithAttributes(1.0, Map.of("has_timestamp", "false"))
);

AssertionError error = assertThrows(AssertionError.class, otelMeter::collectMetrics);
assertThat(error.getMessage(), equalTo("invalid metric attributes"));
}
}