Create UUID string under a lock (#143)

irar2 · web-flow · commit a080a176a76c · 2025-08-19T09:46:33.000+03:00
Signed-off-by: Ira &lt;IRAR@il.ibm.com&gt;
diff --git a/pkg/common/utils.go b/pkg/common/utils.go
@@ -244,6 +244,13 @@ func RandomNorm(mean float64, stddev float64) float64 {
 	return value
 }
 
+// GenerateUUIDString generates a UUID string under a lock
+func GenerateUUIDString() string {
+	randMutex.Lock()
+	defer randMutex.Unlock()
+	return uuid.NewString()
+}
+
 // Regular expression for the response tokenization
 var re *regexp.Regexp
 
diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go
@@ -30,7 +30,6 @@ import (
 
 	"github.com/buaazp/fasthttprouter"
 	"github.com/go-logr/logr"
-	"github.com/google/uuid"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promhttp"
 	"github.com/valyala/fasthttp"
@@ -200,7 +199,7 @@ func (s *VllmSimulator) Printf(format string, args ...interface{}) {
 
 // readRequest reads and parses data from the body of the given request according the type defined by isChatCompletion
 func (s *VllmSimulator) readRequest(ctx *fasthttp.RequestCtx, isChatCompletion bool) (openaiserverapi.CompletionRequest, error) {
-	requestID := uuid.NewString()
+	requestID := common.GenerateUUIDString()
 
 	if isChatCompletion {
 		var req openaiserverapi.ChatCompletionRequest
@@ -546,7 +545,7 @@ func (s *VllmSimulator) HandleError(_ *fasthttp.RequestCtx, err error) {
 func (s *VllmSimulator) createCompletionResponse(isChatCompletion bool, respTokens []string, toolCalls []openaiserverapi.ToolCall,
 	finishReason *string, usageData *openaiserverapi.Usage, modelName string, doRemoteDecode bool) openaiserverapi.CompletionResponse {
 	baseResp := openaiserverapi.BaseCompletionResponse{
-		ID:      chatComplIDPrefix + uuid.NewString(),
+		ID:      chatComplIDPrefix + common.GenerateUUIDString(),
 		Created: time.Now().Unix(),
 		Model:   modelName,
 		Usage:   usageData,
diff --git a/pkg/llm-d-inference-sim/streaming.go b/pkg/llm-d-inference-sim/streaming.go
@@ -22,7 +22,6 @@ import (
 	"fmt"
 	"time"
 
-	"github.com/google/uuid"
 	"github.com/llm-d/llm-d-inference-sim/pkg/common"
 	openaiserverapi "github.com/llm-d/llm-d-inference-sim/pkg/openai-server-api"
 	"github.com/valyala/fasthttp"
@@ -154,7 +153,7 @@ func (s *VllmSimulator) sendTokenChunks(context *streamingContext, w *bufio.Writ
 // supports both modes (text and chat)
 func (s *VllmSimulator) createUsageChunk(context *streamingContext, usageData *openaiserverapi.Usage) openaiserverapi.CompletionRespChunk {
 	baseChunk := openaiserverapi.BaseCompletionResponse{
-		ID:      chatComplIDPrefix + uuid.NewString(),
+		ID:      chatComplIDPrefix + common.GenerateUUIDString(),
 		Created: context.creationTime,
 		Model:   context.model,
 		Usage:   usageData,
@@ -179,7 +178,7 @@ func (s *VllmSimulator) createUsageChunk(context *streamingContext, usageData *o
 func (s *VllmSimulator) createTextCompletionChunk(context *streamingContext, token string, finishReason *string) openaiserverapi.CompletionRespChunk {
 	return &openaiserverapi.TextCompletionResponse{
 		BaseCompletionResponse: openaiserverapi.BaseCompletionResponse{
-			ID:      chatComplIDPrefix + uuid.NewString(),
+			ID:      chatComplIDPrefix + common.GenerateUUIDString(),
 			Created: context.creationTime,
 			Model:   context.model,
 			Object:  textCompletionObject,
@@ -199,7 +198,7 @@ func (s *VllmSimulator) createChatCompletionChunk(context *streamingContext, tok
 	role string, finishReason *string) openaiserverapi.CompletionRespChunk {
 	chunk := openaiserverapi.ChatCompletionRespChunk{
 		BaseCompletionResponse: openaiserverapi.BaseCompletionResponse{
-			ID:      chatComplIDPrefix + uuid.NewString(),
+			ID:      chatComplIDPrefix + common.GenerateUUIDString(),
 			Created: context.creationTime,
 			Model:   context.model,
 			Object:  chatCompletionChunkObject,