2424# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2525# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626
27- from typing import Dict , Union
27+ from typing import Dict , List , Union
2828
2929import triton_python_backend_utils as pb_utils
3030from vllm .engine .metrics import StatLoggerBase as VllmStatLoggerBase
@@ -46,6 +46,16 @@ def __init__(self, labels):
4646 description = "Number of generation tokens processed." ,
4747 kind = pb_utils .MetricFamily .COUNTER ,
4848 )
49+ self .histogram_time_to_first_token_family = pb_utils .MetricFamily (
50+ name = "vllm:time_to_first_token_seconds" ,
51+ description = "Histogram of time to first token in seconds." ,
52+ kind = pb_utils .MetricFamily .HISTOGRAM ,
53+ )
54+ self .histogram_time_per_output_token_family = pb_utils .MetricFamily (
55+ name = "vllm:time_per_output_token_seconds" ,
56+ description = "Histogram of time per output token in seconds." ,
57+ kind = pb_utils .MetricFamily .HISTOGRAM ,
58+ )
4959
5060 # Initialize metrics
5161 # Iteration stats
@@ -55,6 +65,49 @@ def __init__(self, labels):
5565 self .counter_generation_tokens = self .counter_generation_tokens_family .Metric (
5666 labels = labels
5767 )
68+ self .histogram_time_to_first_token = (
69+ self .histogram_time_to_first_token_family .Metric (
70+ labels = labels ,
71+ buckets = [
72+ 0.001 ,
73+ 0.005 ,
74+ 0.01 ,
75+ 0.02 ,
76+ 0.04 ,
77+ 0.06 ,
78+ 0.08 ,
79+ 0.1 ,
80+ 0.25 ,
81+ 0.5 ,
82+ 0.75 ,
83+ 1.0 ,
84+ 2.5 ,
85+ 5.0 ,
86+ 7.5 ,
87+ 10.0 ,
88+ ],
89+ )
90+ )
91+ self .histogram_time_per_output_token = (
92+ self .histogram_time_per_output_token_family .Metric (
93+ labels = labels ,
94+ buckets = [
95+ 0.01 ,
96+ 0.025 ,
97+ 0.05 ,
98+ 0.075 ,
99+ 0.1 ,
100+ 0.15 ,
101+ 0.2 ,
102+ 0.3 ,
103+ 0.4 ,
104+ 0.5 ,
105+ 0.75 ,
106+ 1.0 ,
107+ 2.5 ,
108+ ],
109+ )
110+ )
58111
59112
60113class VllmStatLogger (VllmStatLoggerBase ):
@@ -82,6 +135,19 @@ def _log_counter(self, counter, data: Union[int, float]) -> None:
82135 if data != 0 :
83136 counter .increment (data )
84137
138+ def _log_histogram (self , histogram , data : Union [List [int ], List [float ]]) -> None :
139+ """Convenience function for logging list to histogram.
140+
141+ Args:
142+ histogram: A histogram metric instance.
143+ data: A list of int or float data to observe into the histogram metric.
144+
145+ Returns:
146+ None
147+ """
148+ for datum in data :
149+ histogram .observe (datum )
150+
85151 def log (self , stats : VllmStats ) -> None :
86152 """Logs to triton metrics server every iteration.
87153
@@ -97,3 +163,10 @@ def log(self, stats: VllmStats) -> None:
97163 self ._log_counter (
98164 self .metrics .counter_generation_tokens , stats .num_generation_tokens_iter
99165 )
166+ self ._log_histogram (
167+ self .metrics .histogram_time_to_first_token , stats .time_to_first_tokens_iter
168+ )
169+ self ._log_histogram (
170+ self .metrics .histogram_time_per_output_token ,
171+ stats .time_per_output_tokens_iter ,
172+ )
0 commit comments