fix: Use "cmpl-" prefix for /completions response IDs (#270) (#275)

RohanDSkaria · web-flow · commit 847a06a75ff1 · 2025-12-09T13:22:33.000Z
* fix: Use "cmpl-" prefix for /completions response IDs (#270) Signed-off-by: RohanDSkaria <rohandanny484@gmail.com> * fix: standardize completion response ID prefixing\n\n- move chat/text prefixing into CreateChatCompletionResponse and CreateTextCompletionResponse (and chat chunk) so callers only pass request IDs\n- drop id arg from CreateBaseCompletionResponse\n Signed-off-by: RohanDSkaria <rohandanny484@gmail.com> --------- Signed-off-by: RohanDSkaria <rohandanny484@gmail.com>
diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go
@@ -43,7 +43,6 @@ import (
 )
 
 const (
-	chatComplIDPrefix         = "chatcmpl-"
 	textCompletionObject      = "text_completion"
 	chatCompletionObject      = "chat.completion"
 	chatCompletionChunkObject = "chat.completion.chunk"
@@ -583,7 +582,7 @@ func (s *VllmSimulator) responseSentCallback(model string, isChatCompletion bool
 // from --served-model-name (for a base-model request) or the LoRA adapter name (for a LoRA request).
 func (s *VllmSimulator) createCompletionResponse(logprobs *int, isChatCompletion bool, respTokens []string, toolCalls []openaiserverapi.ToolCall,
 	finishReason *string, usageData *openaiserverapi.Usage, modelName string, doRemoteDecode bool, requestID string) openaiserverapi.CompletionResponse {
-	baseResp := openaiserverapi.CreateBaseCompletionResponse(chatComplIDPrefix+requestID,
+	baseResp := openaiserverapi.CreateBaseCompletionResponse(
 		time.Now().Unix(), modelName, usageData, requestID)
 
 	if doRemoteDecode {
diff --git a/pkg/llm-d-inference-sim/streaming.go b/pkg/llm-d-inference-sim/streaming.go
@@ -179,7 +179,7 @@ func (s *VllmSimulator) sendTokenChunks(context *streamingContext, w *bufio.Writ
 // createUsageChunk creates and returns a CompletionRespChunk with usage data, a single chunk of streamed completion API response,
 // supports both modes (text and chat)
 func (s *VllmSimulator) createUsageChunk(context *streamingContext, usageData *openaiserverapi.Usage) openaiserverapi.CompletionRespChunk {
-	baseChunk := openaiserverapi.CreateBaseCompletionResponse(chatComplIDPrefix+context.requestID,
+	baseChunk := openaiserverapi.CreateBaseCompletionResponse(
 		context.creationTime, context.model, usageData, context.requestID)
 
 	if context.isChatCompletion {
@@ -194,7 +194,7 @@ func (s *VllmSimulator) createUsageChunk(context *streamingContext, usageData *o
 // createTextCompletionChunk creates and returns a CompletionRespChunk, a single chunk of streamed completion API response,
 // for text completion.
 func (s *VllmSimulator) createTextCompletionChunk(context *streamingContext, token string, finishReason *string) openaiserverapi.CompletionRespChunk {
-	baseChunk := openaiserverapi.CreateBaseCompletionResponse(chatComplIDPrefix+context.requestID,
+	baseChunk := openaiserverapi.CreateBaseCompletionResponse(
 		context.creationTime, context.model, nil, context.requestID)
 	baseChunk.Object = textCompletionObject
 
@@ -217,7 +217,7 @@ func (s *VllmSimulator) createTextCompletionChunk(context *streamingContext, tok
 // API response, for chat completion. It sets either role, or token, or tool call info in the message.
 func (s *VllmSimulator) createChatCompletionChunk(context *streamingContext, token string, tool *openaiserverapi.ToolCall,
 	role string, finishReason *string) openaiserverapi.CompletionRespChunk {
-	baseChunk := openaiserverapi.CreateBaseCompletionResponse(chatComplIDPrefix+context.requestID,
+	baseChunk := openaiserverapi.CreateBaseCompletionResponse(
 		context.creationTime, context.model, nil, context.requestID)
 	baseChunk.Object = chatCompletionChunkObject
 	chunk := openaiserverapi.CreateChatCompletionRespChunk(baseChunk,
diff --git a/pkg/openai-server-api/response.go b/pkg/openai-server-api/response.go
@@ -25,6 +25,11 @@ import (
 	"github.com/valyala/fasthttp"
 )
 
+const (
+	chatComplIDPrefix = "chatcmpl-"
+	textComplIDPrefix = "cmpl-"
+)
+
 // CompletionResponse interface representing both completion response types (text and chat)
 type CompletionResponse interface {
 	GetRequestID() string
@@ -307,8 +312,8 @@ func CreateTextRespChoice(base baseResponseChoice, text string) TextRespChoice {
 	return TextRespChoice{baseResponseChoice: base, Text: text, Logprobs: nil}
 }
 
-func CreateBaseCompletionResponse(id string, created int64, model string, usage *Usage, requestID string) baseCompletionResponse {
-	return baseCompletionResponse{ID: id, Created: created, Model: model, Usage: usage, RequestID: requestID}
+func CreateBaseCompletionResponse(created int64, model string, usage *Usage, requestID string) baseCompletionResponse {
+	return baseCompletionResponse{Created: created, Model: model, Usage: usage, RequestID: requestID}
 }
 
 // GetRequestID returns the request ID from the response
@@ -317,13 +322,16 @@ func (b baseCompletionResponse) GetRequestID() string {
 }
 
 func CreateChatCompletionResponse(base baseCompletionResponse, choices []ChatRespChoice) *ChatCompletionResponse {
+	base.ID = chatComplIDPrefix + base.RequestID
 	return &ChatCompletionResponse{baseCompletionResponse: base, Choices: choices}
 }
 
 func CreateTextCompletionResponse(base baseCompletionResponse, choices []TextRespChoice) *TextCompletionResponse {
+	base.ID = textComplIDPrefix + base.RequestID
 	return &TextCompletionResponse{baseCompletionResponse: base, Choices: choices}
 }
 
 func CreateChatCompletionRespChunk(base baseCompletionResponse, choices []ChatRespChunkChoice) *ChatCompletionRespChunk {
+	base.ID = chatComplIDPrefix + base.RequestID
 	return &ChatCompletionRespChunk{baseCompletionResponse: base, Choices: choices}
 }

Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,11 @@ import (`
`25`	`25`	`"github.com/valyala/fasthttp"`
`26`	`26`	`)`
`27`	`27`
	`28`	`+const (`
	`29`	`+ chatComplIDPrefix = "chatcmpl-"`
	`30`	`+ textComplIDPrefix = "cmpl-"`
	`31`	`+)`
	`32`	`+`
`28`	`33`	`// CompletionResponse interface representing both completion response types (text and chat)`
`29`	`34`	`type CompletionResponse interface {`
`30`	`35`	`GetRequestID() string`
`@@ -307,8 +312,8 @@ func CreateTextRespChoice(base baseResponseChoice, text string) TextRespChoice {`
`307`	`312`	`return TextRespChoice{baseResponseChoice: base, Text: text, Logprobs: nil}`
`308`	`313`	`}`
`309`	`314`
`310`		`-func CreateBaseCompletionResponse(id string, created int64, model string, usage *Usage, requestID string) baseCompletionResponse {`
`311`		`- return baseCompletionResponse{ID: id, Created: created, Model: model, Usage: usage, RequestID: requestID}`
	`315`	`+func CreateBaseCompletionResponse(created int64, model string, usage *Usage, requestID string) baseCompletionResponse {`
	`316`	`+ return baseCompletionResponse{Created: created, Model: model, Usage: usage, RequestID: requestID}`
`312`	`317`	`}`
`313`	`318`
`314`	`319`	`// GetRequestID returns the request ID from the response`
`@@ -317,13 +322,16 @@ func (b baseCompletionResponse) GetRequestID() string {`
`317`	`322`	`}`
`318`	`323`
`319`	`324`	`func CreateChatCompletionResponse(base baseCompletionResponse, choices []ChatRespChoice) *ChatCompletionResponse {`
	`325`	`+ base.ID = chatComplIDPrefix + base.RequestID`
`320`	`326`	`return &ChatCompletionResponse{baseCompletionResponse: base, Choices: choices}`
`321`	`327`	`}`
`322`	`328`
`323`	`329`	`func CreateTextCompletionResponse(base baseCompletionResponse, choices []TextRespChoice) *TextCompletionResponse {`
	`330`	`+ base.ID = textComplIDPrefix + base.RequestID`
`324`	`331`	`return &TextCompletionResponse{baseCompletionResponse: base, Choices: choices}`
`325`	`332`	`}`
`326`	`333`
`327`	`334`	`func CreateChatCompletionRespChunk(base baseCompletionResponse, choices []ChatRespChunkChoice) *ChatCompletionRespChunk {`
	`335`	`+ base.ID = chatComplIDPrefix + base.RequestID`
`328`	`336`	`return &ChatCompletionRespChunk{baseCompletionResponse: base, Choices: choices}`
`329`	`337`	`}`