Skip to content

Commit 847a06a

Browse files
authored
fix: Use "cmpl-" prefix for /completions response IDs (#270) (#275)
* fix: Use "cmpl-" prefix for /completions response IDs (#270) Signed-off-by: RohanDSkaria <[email protected]> * fix: standardize completion response ID prefixing\n\n- move chat/text prefixing into CreateChatCompletionResponse and CreateTextCompletionResponse (and chat chunk) so callers only pass request IDs\n- drop id arg from CreateBaseCompletionResponse\n Signed-off-by: RohanDSkaria <[email protected]> --------- Signed-off-by: RohanDSkaria <[email protected]>
1 parent 943f9bc commit 847a06a

File tree

3 files changed

+14
-7
lines changed

3 files changed

+14
-7
lines changed

pkg/llm-d-inference-sim/simulator.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ import (
4343
)
4444

4545
const (
46-
chatComplIDPrefix = "chatcmpl-"
4746
textCompletionObject = "text_completion"
4847
chatCompletionObject = "chat.completion"
4948
chatCompletionChunkObject = "chat.completion.chunk"
@@ -583,7 +582,7 @@ func (s *VllmSimulator) responseSentCallback(model string, isChatCompletion bool
583582
// from --served-model-name (for a base-model request) or the LoRA adapter name (for a LoRA request).
584583
func (s *VllmSimulator) createCompletionResponse(logprobs *int, isChatCompletion bool, respTokens []string, toolCalls []openaiserverapi.ToolCall,
585584
finishReason *string, usageData *openaiserverapi.Usage, modelName string, doRemoteDecode bool, requestID string) openaiserverapi.CompletionResponse {
586-
baseResp := openaiserverapi.CreateBaseCompletionResponse(chatComplIDPrefix+requestID,
585+
baseResp := openaiserverapi.CreateBaseCompletionResponse(
587586
time.Now().Unix(), modelName, usageData, requestID)
588587

589588
if doRemoteDecode {

pkg/llm-d-inference-sim/streaming.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ func (s *VllmSimulator) sendTokenChunks(context *streamingContext, w *bufio.Writ
179179
// createUsageChunk creates and returns a CompletionRespChunk with usage data, a single chunk of streamed completion API response,
180180
// supports both modes (text and chat)
181181
func (s *VllmSimulator) createUsageChunk(context *streamingContext, usageData *openaiserverapi.Usage) openaiserverapi.CompletionRespChunk {
182-
baseChunk := openaiserverapi.CreateBaseCompletionResponse(chatComplIDPrefix+context.requestID,
182+
baseChunk := openaiserverapi.CreateBaseCompletionResponse(
183183
context.creationTime, context.model, usageData, context.requestID)
184184

185185
if context.isChatCompletion {
@@ -194,7 +194,7 @@ func (s *VllmSimulator) createUsageChunk(context *streamingContext, usageData *o
194194
// createTextCompletionChunk creates and returns a CompletionRespChunk, a single chunk of streamed completion API response,
195195
// for text completion.
196196
func (s *VllmSimulator) createTextCompletionChunk(context *streamingContext, token string, finishReason *string) openaiserverapi.CompletionRespChunk {
197-
baseChunk := openaiserverapi.CreateBaseCompletionResponse(chatComplIDPrefix+context.requestID,
197+
baseChunk := openaiserverapi.CreateBaseCompletionResponse(
198198
context.creationTime, context.model, nil, context.requestID)
199199
baseChunk.Object = textCompletionObject
200200

@@ -217,7 +217,7 @@ func (s *VllmSimulator) createTextCompletionChunk(context *streamingContext, tok
217217
// API response, for chat completion. It sets either role, or token, or tool call info in the message.
218218
func (s *VllmSimulator) createChatCompletionChunk(context *streamingContext, token string, tool *openaiserverapi.ToolCall,
219219
role string, finishReason *string) openaiserverapi.CompletionRespChunk {
220-
baseChunk := openaiserverapi.CreateBaseCompletionResponse(chatComplIDPrefix+context.requestID,
220+
baseChunk := openaiserverapi.CreateBaseCompletionResponse(
221221
context.creationTime, context.model, nil, context.requestID)
222222
baseChunk.Object = chatCompletionChunkObject
223223
chunk := openaiserverapi.CreateChatCompletionRespChunk(baseChunk,

pkg/openai-server-api/response.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ import (
2525
"github.com/valyala/fasthttp"
2626
)
2727

28+
const (
29+
chatComplIDPrefix = "chatcmpl-"
30+
textComplIDPrefix = "cmpl-"
31+
)
32+
2833
// CompletionResponse interface representing both completion response types (text and chat)
2934
type CompletionResponse interface {
3035
GetRequestID() string
@@ -307,8 +312,8 @@ func CreateTextRespChoice(base baseResponseChoice, text string) TextRespChoice {
307312
return TextRespChoice{baseResponseChoice: base, Text: text, Logprobs: nil}
308313
}
309314

310-
func CreateBaseCompletionResponse(id string, created int64, model string, usage *Usage, requestID string) baseCompletionResponse {
311-
return baseCompletionResponse{ID: id, Created: created, Model: model, Usage: usage, RequestID: requestID}
315+
func CreateBaseCompletionResponse(created int64, model string, usage *Usage, requestID string) baseCompletionResponse {
316+
return baseCompletionResponse{Created: created, Model: model, Usage: usage, RequestID: requestID}
312317
}
313318

314319
// GetRequestID returns the request ID from the response
@@ -317,13 +322,16 @@ func (b baseCompletionResponse) GetRequestID() string {
317322
}
318323

319324
func CreateChatCompletionResponse(base baseCompletionResponse, choices []ChatRespChoice) *ChatCompletionResponse {
325+
base.ID = chatComplIDPrefix + base.RequestID
320326
return &ChatCompletionResponse{baseCompletionResponse: base, Choices: choices}
321327
}
322328

323329
func CreateTextCompletionResponse(base baseCompletionResponse, choices []TextRespChoice) *TextCompletionResponse {
330+
base.ID = textComplIDPrefix + base.RequestID
324331
return &TextCompletionResponse{baseCompletionResponse: base, Choices: choices}
325332
}
326333

327334
func CreateChatCompletionRespChunk(base baseCompletionResponse, choices []ChatRespChunkChoice) *ChatCompletionRespChunk {
335+
base.ID = chatComplIDPrefix + base.RequestID
328336
return &ChatCompletionRespChunk{baseCompletionResponse: base, Choices: choices}
329337
}

0 commit comments

Comments
 (0)