Skip to content

Commit 9e6f8c1

Browse files
committed
fix sending latency metrics, use WriteToChannel function
Signed-off-by: Maya Barnea <[email protected]>
1 parent 94be0aa commit 9e6f8c1

File tree

3 files changed

+6
-6
lines changed

3 files changed

+6
-6
lines changed

pkg/llm-d-inference-sim/simulator.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ func (s *VllmSimulator) addRequestToQueue(reqCtx *openaiserverapi.CompletionReqC
492492
func (s *VllmSimulator) handleCompletions(ctx *fasthttp.RequestCtx, isChatCompletion bool) {
493493
startTime := time.Now()
494494
defer func() {
495-
s.metrics.e2eReqLatencyChan <- time.Since(startTime).Seconds()
495+
common.WriteToChannel(s.metrics.e2eReqLatencyChan, time.Since(startTime).Seconds(), s.logger, "metrics.e2eReqLatencyChan")
496496
}()
497497

498498
// Check if we should inject a failure
@@ -623,7 +623,7 @@ func (s *VllmSimulator) sendResponse(reqCtx *openaiserverapi.CompletionReqCtx, r
623623
// report tpot in seconds
624624
common.WriteToChannel(s.metrics.tpotChan, (float64(perTokenLatency) / 1000), s.logger, "metrics.tpotChan")
625625
}
626-
s.metrics.reqDecodeTimeChan <- time.Since(startDecode).Seconds()
626+
common.WriteToChannel(s.metrics.reqDecodeTimeChan, time.Since(startDecode).Seconds(), s.logger, "metrics.reqDecodeTimeChan")
627627

628628
s.sendCompletionResponse(reqCtx.HTTPReqCtx, resp)
629629
s.responseSentCallback(modelName, reqCtx.IsChatCompletion, reqCtx.CompletionReq.GetRequestID())
@@ -683,7 +683,7 @@ func (s *VllmSimulator) dequeue() *openaiserverapi.CompletionReqCtx {
683683
if ok && item.reqCtx != nil && s.loraIsLoaded(item.reqCtx.CompletionReq.GetModel()) {
684684
s.waitingQueue.Remove(elem)
685685
s.incrementLora(item.reqCtx.CompletionReq.GetModel())
686-
s.metrics.reqQueueTimeChan <- time.Since(item.enqueueTime).Seconds()
686+
common.WriteToChannel(s.metrics.reqQueueTimeChan, time.Since(item.enqueueTime).Seconds(), s.logger, "metrics.reqQueueTimeChan")
687687
return item.reqCtx
688688
}
689689
}
@@ -693,7 +693,7 @@ func (s *VllmSimulator) dequeue() *openaiserverapi.CompletionReqCtx {
693693
item, ok := elem.Value.(waitingQueueItem)
694694
if ok && item.reqCtx != nil && s.loadLora(item.reqCtx.CompletionReq.GetModel()) {
695695
s.waitingQueue.Remove(elem)
696-
s.metrics.reqQueueTimeChan <- time.Since(item.enqueueTime).Seconds()
696+
common.WriteToChannel(s.metrics.reqQueueTimeChan, time.Since(item.enqueueTime).Seconds(), s.logger, "metrics.reqQueueTimeChan")
697697
return item.reqCtx
698698
}
699699
}

pkg/llm-d-inference-sim/streaming.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ func (s *VllmSimulator) sendTokenChunks(context *streamingContext, w *bufio.Writ
151151
}
152152
}
153153

154-
s.metrics.reqDecodeTimeChan <- time.Since(startDecode).Seconds()
154+
common.WriteToChannel(s.metrics.reqDecodeTimeChan, time.Since(startDecode).Seconds(), s.logger, "metrics.reqDecodeTimeChan")
155155

156156
// send the last chunk if finish reason is stop
157157
var chunk openaiserverapi.CompletionRespChunk

pkg/llm-d-inference-sim/worker.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ type requestProcessor interface {
6262
func (s *VllmSimulator) processRequest(reqCtx *openaiserverapi.CompletionReqCtx) {
6363
start := time.Now()
6464
defer func() {
65-
s.metrics.reqInferenceTimeChan <- time.Since(start).Seconds()
65+
common.WriteToChannel(s.metrics.reqInferenceTimeChan, time.Since(start).Seconds(), s.logger, "metrics.reqInferenceTimeChan")
6666
}()
6767

6868
req := reqCtx.CompletionReq

0 commit comments

Comments
 (0)