Skip to content

Commit a593d4e

Browse files
committed
Fix escaping bugs
Signed-off-by: Federico Torres <[email protected]>
1 parent 8444ae9 commit a593d4e

File tree

3 files changed

+94
-47
lines changed

3 files changed

+94
-47
lines changed

integration-tests/it-common/src/test/java/io/prometheus/client/it/common/ExporterTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import org.testcontainers.containers.BindMode;
2424
import org.testcontainers.containers.GenericContainer;
2525

26-
public abstract class ExporterTest {
26+
public abstract class ExporterTest {
2727
private final GenericContainer<?> sampleAppContainer;
2828
private final Volume sampleAppVolume;
2929
protected final String sampleApp;

prometheus-metrics-model/src/main/java/io/prometheus/metrics/model/snapshots/PrometheusNaming.java

Lines changed: 41 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
import java.util.regex.Matcher;
99
import java.util.regex.Pattern;
1010

11-
import static java.lang.Character.MAX_LOW_SURROGATE;
12-
import static java.lang.Character.MIN_HIGH_SURROGATE;
11+
import static java.lang.Character.*;
1312

1413
/**
1514
* Utility for Prometheus Metric and Label naming.
@@ -45,8 +44,6 @@ public class PrometheusNaming {
4544
*/
4645
public static final String ESCAPING_KEY = "escaping";
4746

48-
private static final String LOWERHEX = "0123456789abcdef";
49-
5047
private static final String METRIC_NAME_LABEL= "__name__";
5148

5249
/** Legal characters for metric names, including dot. */
@@ -586,54 +583,51 @@ public static String escapeName(String name, EscapingScheme scheme) {
586583
if (isValidLegacyMetricName(name)) {
587584
return name;
588585
}
589-
for (int i = 0; i < name.length(); i++) {
590-
char c = name.charAt(i);
586+
for (int i = 0; i < name.length(); ) {
587+
int c = name.codePointAt(i);
591588
if (isValidLegacyChar(c, i)) {
592-
escaped.append(c);
589+
escaped.appendCodePoint(c);
593590
} else {
594591
escaped.append('_');
595592
}
593+
i += Character.charCount(c);
596594
}
597595
return escaped.toString();
598596
case DOTS_ESCAPING:
599597
// Do not early return for legacy valid names, we still escape underscores.
600-
for (int i = 0; i < name.length(); i++) {
601-
char c = name.charAt(i);
598+
for (int i = 0; i < name.length(); ) {
599+
int c = name.codePointAt(i);
602600
if (c == '_') {
603601
escaped.append("__");
604602
} else if (c == '.') {
605603
escaped.append("_dot_");
606604
} else if (isValidLegacyChar(c, i)) {
607-
escaped.append(c);
605+
escaped.appendCodePoint(c);
608606
} else {
609-
escaped.append('_');
607+
escaped.append("__");
610608
}
609+
i += Character.charCount(c);
611610
}
612611
return escaped.toString();
613612
case VALUE_ENCODING_ESCAPING:
614613
if (isValidLegacyMetricName(name)) {
615614
return name;
616615
}
617616
escaped.append("U__");
618-
for (int i = 0; i < name.length(); i++) {
619-
char c = name.charAt(i);
620-
if (isValidLegacyChar(c, i)) {
621-
escaped.append(c);
617+
for (int i = 0; i < name.length(); ) {
618+
int c = name.codePointAt(i);
619+
if (c == '_') {
620+
escaped.append("__");
621+
} else if (isValidLegacyChar(c, i)) {
622+
escaped.appendCodePoint(c);
622623
} else if (!isValidUTF8Char(c)) {
623624
escaped.append("_FFFD_");
624-
} else if (c < 0x100) {
625-
escaped.append('_');
626-
for (int s = 4; s >= 0; s -= 4) {
627-
escaped.append(LOWERHEX.charAt((c >> s) & 0xF));
628-
}
629-
escaped.append('_');
630625
} else {
631626
escaped.append('_');
632-
for (int s = 12; s >= 0; s -= 4) {
633-
escaped.append(LOWERHEX.charAt((c >> s) & 0xF));
634-
}
627+
escaped.append(Integer.toHexString(c));
635628
escaped.append('_');
636629
}
630+
i += Character.charCount(c);
637631
}
638632
return escaped.toString();
639633
default:
@@ -666,37 +660,42 @@ static String unescapeName(String name, EscapingScheme scheme) {
666660
if (matcher.find()) {
667661
String escapedName = name.substring(matcher.end());
668662
StringBuilder unescaped = new StringBuilder();
669-
TOP:
670-
for (int i = 0; i < escapedName.length(); i++) {
663+
for (int i = 0; i < escapedName.length(); ) {
671664
// All non-underscores are treated normally.
672-
if (escapedName.charAt(i) != '_') {
673-
unescaped.append(escapedName.charAt(i));
665+
int c = escapedName.codePointAt(i);
666+
if (c != '_') {
667+
unescaped.appendCodePoint(c);
668+
i += Character.charCount(c);
674669
continue;
675670
}
676671
i++;
677672
if (i >= escapedName.length()) {
678673
return name;
679674
}
680675
// A double underscore is a single underscore.
681-
if (escapedName.charAt(i) == '_') {
676+
if (escapedName.codePointAt(i) == '_') {
682677
unescaped.append('_');
678+
i++;
683679
continue;
684680
}
685681
// We think we are in a UTF-8 code, process it.
686-
long utf8Val = 0;
682+
int utf8Val = 0;
683+
boolean foundClosingUnderscore = false;
687684
for (int j = 0; i < escapedName.length(); j++) {
688685
// This is too many characters for a UTF-8 value.
689-
if (j > 4) {
686+
if (j >= 6) {
690687
return name;
691688
}
692689
// Found a closing underscore, convert to a char, check validity, and append.
693-
if (escapedName.charAt(i) == '_') {
694-
char utf8Char = (char) utf8Val;
695-
if (!isValidUTF8Char(utf8Char)) {
690+
if (escapedName.codePointAt(i) == '_') {
691+
//char utf8Char = (char) utf8Val;
692+
foundClosingUnderscore = true;
693+
if (!isValidUTF8Char(utf8Val)) {
696694
return name;
697695
}
698-
unescaped.append(utf8Char);
699-
continue TOP;
696+
unescaped.appendCodePoint(utf8Val);
697+
i++;
698+
break;
700699
}
701700
char r = Character.toLowerCase(escapedName.charAt(i));
702701
utf8Val *= 16;
@@ -709,8 +708,9 @@ static String unescapeName(String name, EscapingScheme scheme) {
709708
}
710709
i++;
711710
}
712-
// Didn't find closing underscore, invalid.
713-
return name;
711+
if (!foundClosingUnderscore) {
712+
return name;
713+
}
714714
}
715715
return unescaped.toString();
716716
} else {
@@ -721,12 +721,11 @@ static String unescapeName(String name, EscapingScheme scheme) {
721721
}
722722
}
723723

724-
static boolean isValidLegacyChar(char c, int i) {
724+
static boolean isValidLegacyChar(int c, int i) {
725725
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == ':' || (c >= '0' && c <= '9' && i > 0);
726726
}
727727

728-
private static boolean isValidUTF8Char(char b) {
729-
return ((b < MIN_HIGH_SURROGATE || b > MAX_LOW_SURROGATE) &&
730-
(b < 0xFFFE));
728+
private static boolean isValidUTF8Char(int c) {
729+
return (0 <= c && c < MIN_HIGH_SURROGATE) || (MAX_LOW_SURROGATE < c && c <= MAX_CODE_POINT);
731730
}
732731
}

prometheus-metrics-model/src/test/java/io/prometheus/metrics/model/snapshots/PrometheusNamingTest.java

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,22 @@ public void testEscapeName() {
206206
got = unescapeName(got, EscapingScheme.VALUE_ENCODING_ESCAPING);
207207
assertThat(got).isEqualTo("mysystem.prod.west.cpu.load");
208208

209+
// name with dots and underscore
210+
got = escapeName("mysystem.prod.west.cpu.load_total", EscapingScheme.UNDERSCORE_ESCAPING);
211+
assertThat(got).isEqualTo("mysystem_prod_west_cpu_load_total");
212+
got = unescapeName(got, EscapingScheme.UNDERSCORE_ESCAPING);
213+
assertThat(got).isEqualTo("mysystem_prod_west_cpu_load_total");
214+
215+
got = escapeName("mysystem.prod.west.cpu.load_total", EscapingScheme.DOTS_ESCAPING);
216+
assertThat(got).isEqualTo("mysystem_dot_prod_dot_west_dot_cpu_dot_load__total");
217+
got = unescapeName(got, EscapingScheme.DOTS_ESCAPING);
218+
assertThat(got).isEqualTo("mysystem.prod.west.cpu.load_total");
219+
220+
got = escapeName("mysystem.prod.west.cpu.load_total", EscapingScheme.VALUE_ENCODING_ESCAPING);
221+
assertThat(got).isEqualTo("U__mysystem_2e_prod_2e_west_2e_cpu_2e_load__total");
222+
got = unescapeName(got, EscapingScheme.VALUE_ENCODING_ESCAPING);
223+
assertThat(got).isEqualTo("mysystem.prod.west.cpu.load_total");
224+
209225
// name with dots and colon
210226
got = escapeName("http.status:sum", EscapingScheme.UNDERSCORE_ESCAPING);
211227
assertThat(got).isEqualTo("http_status:sum");
@@ -222,24 +238,56 @@ public void testEscapeName() {
222238
got = unescapeName(got, EscapingScheme.VALUE_ENCODING_ESCAPING);
223239
assertThat(got).isEqualTo("http.status:sum");
224240

241+
// name with spaces and emoji
242+
got = escapeName("label with 😱", EscapingScheme.UNDERSCORE_ESCAPING);
243+
assertThat(got).isEqualTo("label_with__");
244+
got = unescapeName(got, EscapingScheme.UNDERSCORE_ESCAPING);
245+
assertThat(got).isEqualTo("label_with__");
246+
247+
got = escapeName("label with 😱", EscapingScheme.DOTS_ESCAPING);
248+
assertThat(got).isEqualTo("label__with____");
249+
got = unescapeName(got, EscapingScheme.DOTS_ESCAPING);
250+
assertThat(got).isEqualTo("label_with__");
251+
252+
got = escapeName("label with 😱", EscapingScheme.VALUE_ENCODING_ESCAPING);
253+
assertThat(got).isEqualTo("U__label_20_with_20__1f631_");
254+
got = unescapeName(got, EscapingScheme.VALUE_ENCODING_ESCAPING);
255+
assertThat(got).isEqualTo("label with 😱");
256+
225257
// name with unicode characters > 0x100
226258
got = escapeName("花火", EscapingScheme.UNDERSCORE_ESCAPING);
227259
assertThat(got).isEqualTo("__");
228260
got = unescapeName(got, EscapingScheme.UNDERSCORE_ESCAPING);
229261
assertThat(got).isEqualTo("__");
230262

231263
got = escapeName("花火", EscapingScheme.DOTS_ESCAPING);
232-
assertThat(got).isEqualTo("__");
264+
assertThat(got).isEqualTo("____");
233265
got = unescapeName(got, EscapingScheme.DOTS_ESCAPING);
234266
// Dots-replacement does not know the difference between two replaced
235267
// characters and a single underscore.
236-
assertThat(got).isEqualTo("_");
268+
assertThat(got).isEqualTo("__");
237269

238270
got = escapeName("花火", EscapingScheme.VALUE_ENCODING_ESCAPING);
239271
assertThat(got).isEqualTo("U___82b1__706b_");
240272
got = unescapeName(got, EscapingScheme.VALUE_ENCODING_ESCAPING);
241273
assertThat(got).isEqualTo("花火");
242274

275+
// name with spaces and edge-case value
276+
got = escapeName("label with Ā", EscapingScheme.UNDERSCORE_ESCAPING);
277+
assertThat(got).isEqualTo("label_with__");
278+
got = unescapeName(got, EscapingScheme.UNDERSCORE_ESCAPING);
279+
assertThat(got).isEqualTo("label_with__");
280+
281+
got = escapeName("label with Ā", EscapingScheme.DOTS_ESCAPING);
282+
assertThat(got).isEqualTo("label__with____");
283+
got = unescapeName(got, EscapingScheme.DOTS_ESCAPING);
284+
assertThat(got).isEqualTo("label_with__");
285+
286+
got = escapeName("label with Ā", EscapingScheme.VALUE_ENCODING_ESCAPING);
287+
assertThat(got).isEqualTo("U__label_20_with_20__100_");
288+
got = unescapeName(got, EscapingScheme.VALUE_ENCODING_ESCAPING);
289+
assertThat(got).isEqualTo("label with Ā");
290+
243291
nameValidationScheme = ValidationScheme.LEGACY_VALIDATION;
244292
}
245293

@@ -435,13 +483,13 @@ public void testEscapeMetricSnapshotGaugeEscapingNeeded() {
435483
.build();
436484
MetricSnapshot got = escapeMetricSnapshot(original, EscapingScheme.DOTS_ESCAPING);
437485

438-
assertThat(got.getMetadata().getName()).isEqualTo("unicode_dot_and_dot_dots_dot___");
486+
assertThat(got.getMetadata().getName()).isEqualTo("unicode_dot_and_dot_dots_dot_____");
439487
assertThat(got.getMetadata().getHelp()).isEqualTo("some help text");
440488
assertThat(got.getDataPoints().size()).isEqualTo(1);
441489
GaugeSnapshot.GaugeDataPointSnapshot data = (GaugeSnapshot.GaugeDataPointSnapshot) got.getDataPoints().get(0);
442490
assertThat(data.getValue()).isEqualTo(34.2);
443491
assertThat((Iterable<? extends Label>) data.getLabels()).isEqualTo(Labels.builder()
444-
.label("__name__", "unicode_dot_and_dot_dots_dot___")
492+
.label("__name__", "unicode_dot_and_dot_dots_dot_____")
445493
.label("some_label", "label??value")
446494
.build());
447495
assertThat(original.getMetadata().getName()).isEqualTo("unicode.and.dots.花火");

0 commit comments

Comments
 (0)