2424# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2525# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626
27- from typing import Dict , Union
27+ from typing import Dict , List , Union
2828
2929import triton_python_backend_utils as pb_utils
3030from vllm .engine .metrics import StatLoggerBase as VllmStatLoggerBase
@@ -46,6 +46,16 @@ def __init__(self, labels):
4646 description = "Number of generation tokens processed." ,
4747 kind = pb_utils .MetricFamily .COUNTER ,
4848 )
49+ self .histogram_time_to_first_token_family = pb_utils .MetricFamily (
50+ name = "vllm:time_to_first_token_seconds" ,
51+ description = "Histogram of time to first token in seconds." ,
52+ kind = pb_utils .MetricFamily .HISTOGRAM ,
53+ )
54+ self .histogram_time_per_output_token_family = pb_utils .MetricFamily (
55+ name = "vllm:time_per_output_token_seconds" ,
56+ description = "Histogram of time per output token in seconds." ,
57+ kind = pb_utils .MetricFamily .HISTOGRAM ,
58+ )
4959
5060 # Initialize metrics
5161 # Iteration stats
@@ -55,6 +65,49 @@ def __init__(self, labels):
5565 self .counter_generation_tokens = self .counter_generation_tokens_family .Metric (
5666 labels = labels
5767 )
68+ self .histogram_time_to_first_token = (
69+ self .histogram_time_to_first_token_family .Metric (
70+ labels = labels ,
71+ buckets = [
72+ 0.001 ,
73+ 0.005 ,
74+ 0.01 ,
75+ 0.02 ,
76+ 0.04 ,
77+ 0.06 ,
78+ 0.08 ,
79+ 0.1 ,
80+ 0.25 ,
81+ 0.5 ,
82+ 0.75 ,
83+ 1.0 ,
84+ 2.5 ,
85+ 5.0 ,
86+ 7.5 ,
87+ 10.0 ,
88+ ],
89+ )
90+ )
91+ self .histogram_time_per_output_token = (
92+ self .histogram_time_per_output_token_family .Metric (
93+ labels = labels ,
94+ buckets = [
95+ 0.01 ,
96+ 0.025 ,
97+ 0.05 ,
98+ 0.075 ,
99+ 0.1 ,
100+ 0.15 ,
101+ 0.2 ,
102+ 0.3 ,
103+ 0.4 ,
104+ 0.5 ,
105+ 0.75 ,
106+ 1.0 ,
107+ 2.5 ,
108+ ],
109+ )
110+ )
58111
59112
60113class VllmStatLogger (VllmStatLoggerBase ):
@@ -93,6 +146,19 @@ def _log_counter(self, counter, data: Union[int, float]) -> None:
93146 """
94147 if data != 0 :
95148 counter .increment (data )
149+
150+ def _log_histogram (self , histogram , data : Union [List [int ], List [float ]]) -> None :
151+ """Convenience function for logging list to histogram.
152+
153+ Args:
154+ histogram: A histogram metric instance.
155+ data: A list of int or float data to observe into the histogram metric.
156+
157+ Returns:
158+ None
159+ """
160+ for datum in data :
161+ histogram .observe (datum )
96162
97163 def log (self , stats : VllmStats ) -> None :
98164 """Logs tracked stats to triton metrics server every iteration.
@@ -108,4 +174,10 @@ def log(self, stats: VllmStats) -> None:
108174 )
109175 self ._log_counter (
110176 self .metrics .counter_generation_tokens , stats .num_generation_tokens_iter
177+ self ._log_histogram (
178+ self .metrics .histogram_time_to_first_token , stats .time_to_first_tokens_iter
179+ )
180+ self ._log_histogram (
181+ self .metrics .histogram_time_per_output_token ,
182+ stats .time_per_output_tokens_iter ,
111183 )
0 commit comments