Skip to content

Commit dffc026

Browse files
yinggehmc-nv
authored andcommitted
feat: Add new histogram metric type (#386)
1 parent 9ed1544 commit dffc026

File tree

7 files changed

+502
-14
lines changed

7 files changed

+502
-14
lines changed

include/triton/core/tritonserver.h

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ struct TRITONSERVER_Server;
6464
struct TRITONSERVER_ServerOptions;
6565
struct TRITONSERVER_Metric;
6666
struct TRITONSERVER_MetricFamily;
67+
struct TRITONSERVER_MetricArgs;
6768

6869
///
6970
/// TRITONSERVER API Version
@@ -91,7 +92,7 @@ struct TRITONSERVER_MetricFamily;
9192
/// }
9293
///
9394
#define TRITONSERVER_API_VERSION_MAJOR 1
94-
#define TRITONSERVER_API_VERSION_MINOR 33
95+
#define TRITONSERVER_API_VERSION_MINOR 34
9596

9697
/// Get the TRITONBACKEND API version supported by the Triton shared
9798
/// library. This value can be compared against the
@@ -2615,7 +2616,8 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerInferAsync(
26152616
///
26162617
typedef enum TRITONSERVER_metrickind_enum {
26172618
TRITONSERVER_METRIC_KIND_COUNTER,
2618-
TRITONSERVER_METRIC_KIND_GAUGE
2619+
TRITONSERVER_METRIC_KIND_GAUGE,
2620+
TRITONSERVER_METRIC_KIND_HISTOGRAM
26192621
} TRITONSERVER_MetricKind;
26202622

26212623
/// Create a new metric family object. The caller takes ownership of the
@@ -2644,6 +2646,44 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricFamilyNew(
26442646
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
26452647
TRITONSERVER_MetricFamilyDelete(struct TRITONSERVER_MetricFamily* family);
26462648

2649+
/// Get the TRITONSERVER_MetricKind of the metric family.
2650+
///
2651+
/// \param family The metric family object to query.
2652+
/// \param kind Returns the TRITONSERVER_MetricKind of metric.
2653+
/// \return a TRITONSERVER_Error indicating success or failure.
2654+
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
2655+
TRITONSERVER_GetMetricFamilyKind(
2656+
struct TRITONSERVER_MetricFamily* family, TRITONSERVER_MetricKind* kind);
2657+
2658+
/// Create a new metric args object. The caller takes ownership of the
2659+
/// TRITONSERVER_MetricArgs object and must call TRITONSERVER_MetricArgsDelete
2660+
/// to release the object.
2661+
///
2662+
/// \param args Returns the new metric args object.
2663+
/// \return a TRITONSERVER_Error indicating success or failure.
2664+
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricArgsNew(
2665+
struct TRITONSERVER_MetricArgs** args);
2666+
2667+
/// Set metric args with histogram metric parameter.
2668+
///
2669+
/// \param args The metric args object to set.
2670+
/// \param buckets The array of bucket boundaries for the expected range of
2671+
/// observed values.
2672+
///
2673+
/// \param buckets_count The number of bucket boundaries.
2674+
/// \return a TRITONSERVER_Error indicating success or failure.
2675+
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
2676+
TRITONSERVER_MetricArgsSetHistogram(
2677+
struct TRITONSERVER_MetricArgs* args, const double* buckets,
2678+
const uint64_t buckets_count);
2679+
2680+
/// Delete a metric args object.
2681+
///
2682+
/// \param args The metric args object.
2683+
/// \return a TRITONSERVER_Error indicating success or failure.
2684+
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricArgsDelete(
2685+
struct TRITONSERVER_MetricArgs* args);
2686+
26472687
/// Create a new metric object. The caller takes ownership of the
26482688
/// TRITONSERVER_Metric object and must call
26492689
/// TRITONSERVER_MetricDelete to release the object. The caller is also
@@ -2661,6 +2701,28 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricNew(
26612701
struct TRITONSERVER_MetricFamily* family,
26622702
const struct TRITONSERVER_Parameter** labels, const uint64_t label_count);
26632703

2704+
/// Create a new metric object. The caller takes ownership of the
2705+
/// TRITONSERVER_Metric object and must call
2706+
/// TRITONSERVER_MetricDelete to release the object. The caller is also
2707+
/// responsible for ownership of the labels passed in.
2708+
/// Each label can be deleted immediately after creating the metric with
2709+
/// TRITONSERVER_ParameterDelete if not re-using the labels.
2710+
/// Metric args can be deleted immediately after creating the metric with
2711+
/// TRITONSERVER_MetricArgsDelete if not re-using the metric args.
2712+
///
2713+
/// \param metric Returns the new metric object.
2714+
/// \param family The metric family to add this new metric to.
2715+
/// \param labels The array of labels to associate with this new metric.
2716+
/// \param label_count The number of labels.
2717+
/// \param args Metric args that store additional arguments to construct
2718+
/// particular metric types, e.g. histogram.
2719+
/// \return a TRITONSERVER_Error indicating success or failure.
2720+
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricNewWithArgs(
2721+
struct TRITONSERVER_Metric** metric,
2722+
struct TRITONSERVER_MetricFamily* family,
2723+
const struct TRITONSERVER_Parameter** labels, const uint64_t label_count,
2724+
const struct TRITONSERVER_MetricArgs* args);
2725+
26642726
/// Delete a metric object.
26652727
/// All TRITONSERVER_Metric* objects should be deleted BEFORE their
26662728
/// corresponding TRITONSERVER_MetricFamily* objects have been deleted.
@@ -2705,7 +2767,17 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricIncrement(
27052767
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricSet(
27062768
struct TRITONSERVER_Metric* metric, double value);
27072769

2708-
/// Get the TRITONSERVER_MetricKind of metric and its corresponding family.
2770+
/// Sample an observation and count it to the appropriate bucket of a metric.
2771+
/// Supports metrics of kind TRITONSERVER_METRIC_KIND_HISTOGRAM and returns
2772+
/// TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.
2773+
///
2774+
/// \param metric The metric object to update.
2775+
/// \param value The amount for metric to sample observation.
2776+
/// \return a TRITONSERVER_Error indicating success or failure.
2777+
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricObserve(
2778+
struct TRITONSERVER_Metric* metric, double value);
2779+
2780+
/// Get the TRITONSERVER_MetricKind of metric of its corresponding family.
27092781
///
27102782
/// \param metric The metric object to query.
27112783
/// \param kind Returns the TRITONSERVER_MetricKind of metric.

src/metric_family.cc

Lines changed: 94 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights
2+
// reserved.
23
//
34
// Redistribution and use in source and binary forms, with or without
45
// modification, are permitted provided that the following conditions
@@ -54,6 +55,12 @@ MetricFamily::MetricFamily(
5455
.Help(description)
5556
.Register(*registry));
5657
break;
58+
case TRITONSERVER_METRIC_KIND_HISTOGRAM:
59+
family_ = reinterpret_cast<void*>(&prometheus::BuildHistogram()
60+
.Name(name)
61+
.Help(description)
62+
.Register(*registry));
63+
break;
5764
default:
5865
throw std::invalid_argument(
5966
"Unsupported kind passed to MetricFamily constructor.");
@@ -63,24 +70,49 @@ MetricFamily::MetricFamily(
6370
}
6471

6572
void*
66-
MetricFamily::Add(std::map<std::string, std::string> label_map, Metric* metric)
73+
MetricFamily::Add(
74+
std::map<std::string, std::string> label_map, Metric* metric,
75+
const TritonServerMetricArgs* args)
6776
{
6877
void* prom_metric = nullptr;
6978
switch (kind_) {
7079
case TRITONSERVER_METRIC_KIND_COUNTER: {
80+
if (args != nullptr) {
81+
throw std::invalid_argument(
82+
"Unexpected args found in counter Metric constructor.");
83+
}
7184
auto counter_family_ptr =
7285
reinterpret_cast<prometheus::Family<prometheus::Counter>*>(family_);
7386
auto counter_ptr = &counter_family_ptr->Add(label_map);
7487
prom_metric = reinterpret_cast<void*>(counter_ptr);
7588
break;
7689
}
7790
case TRITONSERVER_METRIC_KIND_GAUGE: {
91+
if (args != nullptr) {
92+
throw std::invalid_argument(
93+
"Unexpected args found in gauge Metric constructor.");
94+
}
7895
auto gauge_family_ptr =
7996
reinterpret_cast<prometheus::Family<prometheus::Gauge>*>(family_);
8097
auto gauge_ptr = &gauge_family_ptr->Add(label_map);
8198
prom_metric = reinterpret_cast<void*>(gauge_ptr);
8299
break;
83100
}
101+
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
102+
if (args == nullptr) {
103+
throw std::invalid_argument(
104+
"Bucket boundaries not found in Metric args.");
105+
}
106+
if (args->kind() != TRITONSERVER_METRIC_KIND_HISTOGRAM) {
107+
throw std::invalid_argument("Metric args not set to histogram kind.");
108+
}
109+
auto histogram_family_ptr =
110+
reinterpret_cast<prometheus::Family<prometheus::Histogram>*>(family_);
111+
auto histogram_ptr =
112+
&histogram_family_ptr->Add(label_map, args->buckets());
113+
prom_metric = reinterpret_cast<void*>(histogram_ptr);
114+
break;
115+
}
84116
default:
85117
throw std::invalid_argument(
86118
"Unsupported family kind passed to Metric constructor.");
@@ -134,6 +166,14 @@ MetricFamily::Remove(void* prom_metric, Metric* metric)
134166
gauge_family_ptr->Remove(gauge_ptr);
135167
break;
136168
}
169+
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
170+
auto histogram_family_ptr =
171+
reinterpret_cast<prometheus::Family<prometheus::Histogram>*>(family_);
172+
auto histogram_ptr =
173+
reinterpret_cast<prometheus::Histogram*>(prom_metric);
174+
histogram_family_ptr->Remove(histogram_ptr);
175+
break;
176+
}
137177
default:
138178
// Invalid kind should be caught in constructor
139179
LOG_ERROR << "Unsupported kind in Metric destructor.";
@@ -169,7 +209,8 @@ MetricFamily::~MetricFamily()
169209
//
170210
Metric::Metric(
171211
TRITONSERVER_MetricFamily* family,
172-
std::vector<const InferenceParameter*> labels)
212+
std::vector<const InferenceParameter*> labels,
213+
const TritonServerMetricArgs* args)
173214
{
174215
family_ = reinterpret_cast<MetricFamily*>(family);
175216
kind_ = family_->Kind();
@@ -188,7 +229,7 @@ Metric::Metric(
188229
std::string(reinterpret_cast<const char*>(param->ValuePointer()));
189230
}
190231

191-
metric_ = family_->Add(label_map, this);
232+
metric_ = family_->Add(label_map, this, args);
192233
}
193234

194235
Metric::~Metric()
@@ -235,6 +276,11 @@ Metric::Value(double* value)
235276
*value = gauge_ptr->Value();
236277
break;
237278
}
279+
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
280+
return TRITONSERVER_ErrorNew(
281+
TRITONSERVER_ERROR_UNSUPPORTED,
282+
"TRITONSERVER_METRIC_KIND_HISTOGRAM does not support Value");
283+
}
238284
default:
239285
return TRITONSERVER_ErrorNew(
240286
TRITONSERVER_ERROR_UNSUPPORTED,
@@ -279,6 +325,11 @@ Metric::Increment(double value)
279325
}
280326
break;
281327
}
328+
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
329+
return TRITONSERVER_ErrorNew(
330+
TRITONSERVER_ERROR_UNSUPPORTED,
331+
"TRITONSERVER_METRIC_KIND_HISTOGRAM does not support Increment");
332+
}
282333
default:
283334
return TRITONSERVER_ErrorNew(
284335
TRITONSERVER_ERROR_UNSUPPORTED,
@@ -308,6 +359,45 @@ Metric::Set(double value)
308359
gauge_ptr->Set(value);
309360
break;
310361
}
362+
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
363+
return TRITONSERVER_ErrorNew(
364+
TRITONSERVER_ERROR_UNSUPPORTED,
365+
"TRITONSERVER_METRIC_KIND_HISTOGRAM does not support Set");
366+
}
367+
default:
368+
return TRITONSERVER_ErrorNew(
369+
TRITONSERVER_ERROR_UNSUPPORTED,
370+
"Unsupported TRITONSERVER_MetricKind");
371+
}
372+
373+
return nullptr; // Success
374+
}
375+
376+
TRITONSERVER_Error*
377+
Metric::Observe(double value)
378+
{
379+
if (metric_ == nullptr) {
380+
return TRITONSERVER_ErrorNew(
381+
TRITONSERVER_ERROR_INTERNAL,
382+
"Could not set metric value. Metric has been invalidated.");
383+
}
384+
385+
switch (kind_) {
386+
case TRITONSERVER_METRIC_KIND_COUNTER: {
387+
return TRITONSERVER_ErrorNew(
388+
TRITONSERVER_ERROR_UNSUPPORTED,
389+
"TRITONSERVER_METRIC_KIND_COUNTER does not support Observe");
390+
}
391+
case TRITONSERVER_METRIC_KIND_GAUGE: {
392+
return TRITONSERVER_ErrorNew(
393+
TRITONSERVER_ERROR_UNSUPPORTED,
394+
"TRITONSERVER_METRIC_KIND_GAUGE does not support Observe");
395+
}
396+
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
397+
auto histogram_ptr = reinterpret_cast<prometheus::Histogram*>(metric_);
398+
histogram_ptr->Observe(value);
399+
break;
400+
}
311401
default:
312402
return TRITONSERVER_ErrorNew(
313403
TRITONSERVER_ERROR_UNSUPPORTED,

src/metric_family.h

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights
2+
// reserved.
23
//
34
// Redistribution and use in source and binary forms, with or without
45
// modification, are permitted provided that the following conditions
@@ -27,6 +28,7 @@
2728

2829
#ifdef TRITON_ENABLE_METRICS
2930

31+
#include <cstring>
3032
#include <mutex>
3133
#include <set>
3234
#include <unordered_map>
@@ -37,6 +39,29 @@
3739

3840
namespace triton { namespace core {
3941

42+
//
43+
// TritonServerMetricArgs
44+
//
45+
// Implementation for TRITONSERVER_MetricArgs.
46+
//
47+
class TritonServerMetricArgs {
48+
public:
49+
TritonServerMetricArgs() = default;
50+
51+
void* SetHistogramArgs(const double* buckets, uint64_t bucket_count)
52+
{
53+
kind_ = TRITONSERVER_METRIC_KIND_HISTOGRAM;
54+
buckets_ = std::vector<double>(buckets, buckets + bucket_count);
55+
return nullptr;
56+
}
57+
TRITONSERVER_MetricKind kind() const { return kind_; }
58+
const std::vector<double>& buckets() const { return buckets_; }
59+
60+
private:
61+
TRITONSERVER_MetricKind kind_;
62+
std::vector<double> buckets_;
63+
};
64+
4065
//
4166
// Implementation for TRITONSERVER_MetricFamily.
4267
//
@@ -50,7 +75,9 @@ class MetricFamily {
5075
void* Family() const { return family_; }
5176
TRITONSERVER_MetricKind Kind() const { return kind_; }
5277

53-
void* Add(std::map<std::string, std::string> label_map, Metric* metric);
78+
void* Add(
79+
std::map<std::string, std::string> label_map, Metric* metric,
80+
const TritonServerMetricArgs* args);
5481
void Remove(void* prom_metric, Metric* metric);
5582

5683
int NumMetrics()
@@ -86,7 +113,8 @@ class Metric {
86113
public:
87114
Metric(
88115
TRITONSERVER_MetricFamily* family,
89-
std::vector<const InferenceParameter*> labels);
116+
std::vector<const InferenceParameter*> labels,
117+
const TritonServerMetricArgs* args);
90118
~Metric();
91119

92120
MetricFamily* Family() const { return family_; }
@@ -95,6 +123,7 @@ class Metric {
95123
TRITONSERVER_Error* Value(double* value);
96124
TRITONSERVER_Error* Increment(double value);
97125
TRITONSERVER_Error* Set(double value);
126+
TRITONSERVER_Error* Observe(double value);
98127

99128
// If a MetricFamily is deleted before its dependent Metric, we want to
100129
// invalidate the references so we don't access invalid memory.

src/metrics.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -35,6 +35,7 @@
3535

3636
#include "prometheus/counter.h"
3737
#include "prometheus/gauge.h"
38+
#include "prometheus/histogram.h"
3839
#include "prometheus/registry.h"
3940
#include "prometheus/serializer.h"
4041
#include "prometheus/summary.h"

0 commit comments

Comments
 (0)