Skip to content

Commit a4ddc93

Browse files
committed
Fix histogram and timer handling to use quantile instead of le bucket
1 parent 904ba26 commit a4ddc93

File tree

2 files changed

+117
-116
lines changed

2 files changed

+117
-116
lines changed

spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java

Lines changed: 106 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -190,57 +190,54 @@ protected String formatCounter(String name, Counter counter) {
190190

191191
protected String formatHistogram(String name, Histogram histogram) {
192192
if (histogram != null && histogram.getSnapshot() != null) {
193-
StringBuilder stringBuilder = new StringBuilder(300);
194193
String baseName = sanitize(name);
195194
Snapshot snap = histogram.getSnapshot();
196195
long count = histogram.getCount();
197-
stringBuilder
198-
.append("# HELP ")
199-
.append(baseName)
200-
.append(" Histogram metric\n# TYPE ")
201-
.append(baseName)
202-
.append(" histogram\n");
203196
boolean isNanosHistogram = baseName.contains("nanos");
204197
if (isNanosHistogram) {
205198
baseName = nanosMetricsNameToSeconds(baseName);
206199
}
207-
appendBucket(
208-
stringBuilder,
209-
baseName,
210-
"le=\"0.5\"",
211-
isNanosHistogram ? nanosToSeconds(snap.getMedian()) : snap.getMean());
212-
appendBucket(
213-
stringBuilder,
214-
baseName,
215-
"le=\"0.75\"",
216-
isNanosHistogram ? nanosToSeconds(snap.get75thPercentile()) : snap.get75thPercentile());
217-
appendBucket(
218-
stringBuilder,
219-
baseName,
220-
"le=\"0.95\"",
221-
isNanosHistogram ? nanosToSeconds(snap.get95thPercentile()) : snap.get95thPercentile());
222-
appendBucket(
223-
stringBuilder,
224-
baseName,
225-
"le=\"0.98\"",
226-
isNanosHistogram ? nanosToSeconds(snap.get98thPercentile()) : snap.get98thPercentile());
227-
appendBucket(
228-
stringBuilder,
229-
baseName,
230-
"le=\"0.99\"",
231-
isNanosHistogram ? nanosToSeconds(snap.get99thPercentile()) : snap.get99thPercentile());
232200
double sum =
233201
isNanosHistogram ? nanosToSeconds(snap.getMean() * count) : snap.getMean() * count;
234-
stringBuilder
235-
.append(baseName)
236-
.append("_count ")
237-
.append(count)
238-
.append('\n')
239-
.append(baseName)
240-
.append("_sum ")
241-
.append(sum)
242-
.append("\n\n");
243-
return stringBuilder.toString();
202+
return "# HELP "
203+
+ baseName
204+
+ " Histogram metric\n# TYPE "
205+
+ baseName
206+
+ " summary\n"
207+
+ baseName
208+
+ "{quantile=\"0.5\"} "
209+
+ (isNanosHistogram ? nanosToSeconds(snap.getMedian()) : snap.getMean())
210+
+ "\n"
211+
+ baseName
212+
+ "{quantile=\"0.75\"} "
213+
+ (isNanosHistogram ? nanosToSeconds(snap.get75thPercentile()) : snap.get75thPercentile())
214+
+ "\n"
215+
+ baseName
216+
+ "{quantile=\"0.95\"} "
217+
+ (isNanosHistogram ? nanosToSeconds(snap.get95thPercentile()) : snap.get95thPercentile())
218+
+ "\n"
219+
+ baseName
220+
+ "{quantile=\"0.98\"} "
221+
+ (isNanosHistogram ? nanosToSeconds(snap.get98thPercentile()) : snap.get98thPercentile())
222+
+ "\n"
223+
+ baseName
224+
+ "{quantile=\"0.99\"} "
225+
+ (isNanosHistogram ? nanosToSeconds(snap.get99thPercentile()) : snap.get99thPercentile())
226+
+ "\n"
227+
+ baseName
228+
+ "{quantile=\"0.999\"} "
229+
+ (isNanosHistogram
230+
? nanosToSeconds(snap.get999thPercentile())
231+
: snap.get99thPercentile())
232+
+ "\n"
233+
+ baseName
234+
+ "_count "
235+
+ count
236+
+ "\n"
237+
+ baseName
238+
+ "_sum "
239+
+ sum
240+
+ "\n\n";
244241
}
245242
return null;
246243
}
@@ -260,15 +257,15 @@ protected String formatMeter(String name, Meter meter) {
260257
+ baseName
261258
+ "_rate gauge\n"
262259
+ baseName
263-
+ "_rate{interval=\"1m\"} "
260+
+ "_m1_rate "
264261
+ meter.getOneMinuteRate()
265-
+ '\n'
262+
+ "\n"
266263
+ baseName
267-
+ "_rate{interval=\"5m\"} "
264+
+ "_m5_rate "
268265
+ meter.getFiveMinuteRate()
269-
+ '\n'
266+
+ "\n"
270267
+ baseName
271-
+ "_rate{interval=\"15m\"} "
268+
+ "_m15_rate "
272269
+ meter.getFifteenMinuteRate()
273270
+ "\n\n";
274271
}
@@ -277,72 +274,75 @@ protected String formatMeter(String name, Meter meter) {
277274

278275
protected String formatTimer(String name, Timer timer) {
279276
if (timer != null && timer.getSnapshot() != null) {
280-
StringBuilder stringBuilder = new StringBuilder(300);
281277
String baseName = sanitize(name);
282278
Snapshot snap = timer.getSnapshot();
283279
long count = timer.getCount();
284-
stringBuilder
285-
.append("# HELP ")
286-
.append(baseName)
287-
.append("_duration_seconds Timer histogram\n# TYPE ")
288-
.append(baseName)
289-
.append("_duration_seconds histogram\n");
290-
appendBucket(
291-
stringBuilder,
292-
baseName + "_duration_seconds",
293-
"le=\"0.5\"",
294-
nanosToSeconds(snap.getMedian()));
295-
appendBucket(
296-
stringBuilder,
297-
baseName + "_duration_seconds",
298-
"le=\"0.75\"",
299-
nanosToSeconds(snap.get75thPercentile()));
300-
appendBucket(
301-
stringBuilder,
302-
baseName + "_duration_seconds",
303-
"le=\"0.95\"",
304-
nanosToSeconds(snap.get95thPercentile()));
305-
appendBucket(
306-
stringBuilder,
307-
baseName + "_duration_seconds",
308-
"le=\"0.98\"",
309-
nanosToSeconds(snap.get98thPercentile()));
310-
appendBucket(
311-
stringBuilder,
312-
baseName + "_duration_seconds",
313-
"le=\"0.99\"",
314-
nanosToSeconds(snap.get99thPercentile()));
315-
stringBuilder
316-
.append(baseName)
317-
.append("_duration_seconds_count ")
318-
.append(count)
319-
.append('\n')
320-
.append(baseName)
321-
.append("_duration_seconds_sum ")
322-
.append(nanosToSeconds(snap.getMean() * count))
323-
.append("\n\n# TYPE ")
324-
.append(baseName)
325-
.append("_calls_total counter\n")
326-
.append(baseName)
327-
.append("_calls_total ")
328-
.append(count)
329-
.append("\n\n");
330-
return stringBuilder.toString();
280+
return "# HELP "
281+
+ baseName
282+
+ "_duration_seconds Timer summary\n# TYPE "
283+
+ baseName
284+
+ "_duration_seconds summary\n"
285+
+ "\n"
286+
+ baseName
287+
+ "_duration_seconds"
288+
+ "{quantile=\"0.5\"} "
289+
+ nanosToSeconds(snap.getMedian())
290+
+ "\n"
291+
+ baseName
292+
+ "_duration_seconds"
293+
+ "{quantile=\"0.75\"} "
294+
+ nanosToSeconds(snap.get75thPercentile())
295+
+ "\n"
296+
+ baseName
297+
+ "_duration_seconds"
298+
+ "{quantile=\"0.95\"} "
299+
+ nanosToSeconds(snap.get95thPercentile())
300+
+ "\n"
301+
+ baseName
302+
+ "_duration_seconds"
303+
+ "{quantile=\"0.98\"} "
304+
+ nanosToSeconds(snap.get98thPercentile())
305+
+ "\n"
306+
+ baseName
307+
+ "_duration_seconds"
308+
+ "{quantile=\"0.99\"} "
309+
+ nanosToSeconds(snap.get99thPercentile())
310+
+ "\n"
311+
+ baseName
312+
+ "_duration_seconds"
313+
+ "{quantile=\"0.999\"} "
314+
+ nanosToSeconds(snap.get999thPercentile())
315+
+ "\n"
316+
+ baseName
317+
+ "_duration_seconds_count "
318+
+ count
319+
+ "\n"
320+
+ baseName
321+
+ "_duration_seconds_sum "
322+
+ nanosToSeconds(snap.getMean() * count)
323+
+ "\n\n# TYPE "
324+
+ baseName
325+
+ " gauge\n"
326+
+ baseName
327+
+ "_count "
328+
+ count
329+
+ "\n"
330+
+ baseName
331+
+ "_m1_rate "
332+
+ timer.getOneMinuteRate()
333+
+ "\n"
334+
+ baseName
335+
+ "_m5_rate "
336+
+ timer.getFiveMinuteRate()
337+
+ "\n"
338+
+ baseName
339+
+ "_m15_rate "
340+
+ timer.getFifteenMinuteRate()
341+
+ "\n\n";
331342
}
332343
return null;
333344
}
334345

335-
protected void appendBucket(
336-
StringBuilder builder, String baseName, String leLabel, double value) {
337-
builder
338-
.append(baseName)
339-
.append("_bucket{")
340-
.append(leLabel)
341-
.append("} ")
342-
.append(value)
343-
.append('\n');
344-
}
345-
346346
protected double nanosToSeconds(double nanos) {
347347
return nanos / 1_000_000_000.0;
348348
}

spark-operator/src/test/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandlerTest.java

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
import static org.junit.jupiter.api.Assertions.*;
2323

24+
import java.time.Duration;
25+
import java.time.temporal.ChronoUnit;
2426
import java.util.Objects;
2527
import java.util.Properties;
2628

@@ -70,7 +72,7 @@ void testFormatMetricsSnapshotIncludesHistogram() throws Exception {
7072

7173
String output = handler.formatMetricsSnapshot();
7274

73-
assertTrue(output.contains("# TYPE foo_histogram histogram"));
75+
assertTrue(output.contains("# TYPE foo_histogram summary"));
7476
assertTrue(output.contains("foo_histogram_count 2"));
7577
assertTrue(output.contains("foo_histogram_sum"));
7678
}
@@ -87,7 +89,7 @@ void testFormatMetricsSnapshotIncludesHistogramWithNanos() throws Exception {
8789

8890
String output = handler.formatMetricsSnapshot();
8991

90-
assertTrue(output.contains("# TYPE foo_nanos_histogram histogram"));
92+
assertTrue(output.contains("# TYPE foo_seconds_histogram summary"));
9193
assertTrue(output.contains("foo_seconds_histogram_count 3"));
9294
assertTrue(output.contains("foo_seconds_histogram_sum 0.001572032"));
9395
}
@@ -103,24 +105,23 @@ void testFormatMetricsSnapshotIncludesMeter() throws Exception {
103105
String output = handler.formatMetricsSnapshot();
104106
assertTrue(output.contains("# TYPE foo_meter_total counter"));
105107
assertTrue(output.contains("foo_meter_total 3"));
106-
assertTrue(output.contains("foo_meter_rate{interval=\"1m\"}"));
108+
assertTrue(output.contains("foo_meter_m1_rate"));
107109
}
108110

109111
@Test
110112
void testFormatMetricsSnapshotIncludesTimer() throws Exception {
111113
MetricRegistry registry = new MetricRegistry();
112114
Timer timer = registry.timer("foo_timer");
113115

114-
Timer.Context context = timer.time();
115-
Thread.sleep(10);
116-
context.stop();
117-
116+
timer.update(Duration.of(500, ChronoUnit.MILLIS));
117+
timer.update(Duration.of(1000, ChronoUnit.MILLIS));
118118
PrometheusPullModelHandler handler = new PrometheusPullModelHandler(new Properties(), registry);
119119

120120
String output = handler.formatMetricsSnapshot();
121-
assertTrue(output.contains("# TYPE foo_timer_duration_seconds histogram"));
122-
assertTrue(output.contains("foo_timer_duration_seconds_count 1"));
123-
assertTrue(output.contains("foo_timer_duration_seconds_sum"));
121+
assertTrue(output.contains("# TYPE foo_timer_duration_seconds summary"));
122+
assertTrue(output.contains("foo_timer_duration_seconds_count 2"));
123+
assertTrue(output.contains("foo_timer_duration_seconds_sum 1.5"));
124+
assertTrue(output.contains("foo_timer_m1_rate"));
124125
}
125126

126127
@Test

0 commit comments

Comments
 (0)