llm-d · github-actions · Oct 22, 2025 · Oct 22, 2025 · Oct 22, 2025 · Oct 22, 2025
diff --git a/go.mod b/go.mod
@@ -11,7 +11,7 @@ require (
 	github.com/llm-d/llm-d-kv-cache-manager v0.3.0-rc1
 	github.com/onsi/ginkgo/v2 v2.23.4
 	github.com/onsi/gomega v1.37.0
-	github.com/openai/openai-go v0.1.0-beta.10
+	github.com/openai/openai-go/v3 v3.6.1
 	github.com/pebbe/zmq4 v1.4.0
 	github.com/prometheus/client_golang v1.22.0
 	github.com/santhosh-tekuri/jsonschema/v5 v5.3.1

diff --git a/go.sum b/go.sum
@@ -85,8 +85,8 @@ github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus
 github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8=
 github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y=
 github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0=
-github.com/openai/openai-go v0.1.0-beta.10 h1:CknhGXe8aXQMRuqg255PFnWzgRY9nEryMxoNIBBM9tU=
-github.com/openai/openai-go v0.1.0-beta.10/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y=
+github.com/openai/openai-go/v3 v3.6.1 h1:f8J6jhT9wkYnNvHTKR7bxHXSZrSvvcfpHGkmBra04tI=
+github.com/openai/openai-go/v3 v3.6.1/go.mod h1:UOpNxkqC9OdNXNUfpNByKOtB4jAL0EssQXq5p8gO0Xs=
 github.com/pebbe/zmq4 v1.4.0 h1:gO5P92Ayl8GXpPZdYcD62Cwbq0slSBVVQRIXwGSJ6eQ=
 github.com/pebbe/zmq4 v1.4.0/go.mod h1:nqnPueOapVhE2wItZ0uOErngczsJdLOGkebMxaO8r48=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=

diff --git a/pkg/llm-d-inference-sim/failures_test.go b/pkg/llm-d-inference-sim/failures_test.go
@@ -25,7 +25,7 @@ import (
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
-	"github.com/openai/openai-go"
+	"github.com/openai/openai-go/v3"
 
 	"github.com/llm-d/llm-d-inference-sim/pkg/common"
 	openaiserverapi "github.com/llm-d/llm-d-inference-sim/pkg/openai-server-api"
@@ -134,7 +134,7 @@ var _ = Describe("Failures", func() {
 			})
 
 			It("should always return an error response for chat completions", func() {
-				openaiClient, params := getOpenAIClentAndChatParams(client, model, userMessage, false)
+				openaiClient, params := getOpenAIClientAndChatParams(client, model, userMessage, false)
 				_, err := openaiClient.Chat.Completions.New(ctx, params)
 				Expect(err).To(HaveOccurred())
 
@@ -147,7 +147,7 @@ var _ = Describe("Failures", func() {
 			})
 
 			It("should always return an error response for text completions", func() {
-				openaiClient, params := getOpenAIClentAndChatParams(client, model, userMessage, false)
+				openaiClient, params := getOpenAIClientAndChatParams(client, model, userMessage, false)
 				_, err := openaiClient.Chat.Completions.New(ctx, params)
 				Expect(err).To(HaveOccurred())
 
@@ -173,7 +173,7 @@ var _ = Describe("Failures", func() {
 			})
 
 			It("should return only rate limit errors", func() {
-				openaiClient, params := getOpenAIClentAndChatParams(client, model, userMessage, false)
+				openaiClient, params := getOpenAIClientAndChatParams(client, model, userMessage, false)
 				_, err := openaiClient.Chat.Completions.New(ctx, params)
 				Expect(err).To(HaveOccurred())
 
@@ -199,7 +199,7 @@ var _ = Describe("Failures", func() {
 			})
 
 			It("should return only specified error types", func() {
-				openaiClient, params := getOpenAIClentAndChatParams(client, model, userMessage, false)
+				openaiClient, params := getOpenAIClientAndChatParams(client, model, userMessage, false)
 
 				// Make multiple requests to verify we get the expected error types
 				for i := 0; i < 10; i++ {
@@ -230,7 +230,7 @@ var _ = Describe("Failures", func() {
 			})
 
 			It("should never return errors and behave like random mode", func() {
-				openaiClient, params := getOpenAIClentAndChatParams(client, model, userMessage, false)
+				openaiClient, params := getOpenAIClientAndChatParams(client, model, userMessage, false)
 				resp, err := openaiClient.Chat.Completions.New(ctx, params)
 				Expect(err).ToNot(HaveOccurred())
 				Expect(resp.Choices).To(HaveLen(1))
@@ -250,7 +250,7 @@ var _ = Describe("Failures", func() {
 					}, nil)
 					Expect(err).ToNot(HaveOccurred())
 
-					openaiClient, params := getOpenAIClentAndChatParams(client, model, userMessage, false)
+					openaiClient, params := getOpenAIClientAndChatParams(client, model, userMessage, false)
 					_, err = openaiClient.Chat.Completions.New(ctx, params)
 					Expect(err).To(HaveOccurred())
 

diff --git a/pkg/llm-d-inference-sim/lora_test.go b/pkg/llm-d-inference-sim/lora_test.go
@@ -23,8 +23,8 @@ import (
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
-	"github.com/openai/openai-go"
-	"github.com/openai/openai-go/option"
+	"github.com/openai/openai-go/v3"
+	"github.com/openai/openai-go/v3/option"
 
 	"github.com/llm-d/llm-d-inference-sim/pkg/common"
 	vllmapi "github.com/llm-d/llm-d-inference-sim/pkg/vllm-api"
@@ -41,7 +41,7 @@ var _ = Describe("LoRAs", func() {
 			Expect(err).NotTo(HaveOccurred())
 
 			// Request to lora3
-			openaiclient, params := getOpenAIClentAndChatParams(client, "lora3", userMessage, false)
+			openaiclient, params := getOpenAIClientAndChatParams(client, "lora3", userMessage, false)
 			resp, err := openaiclient.Chat.Completions.New(ctx, params)
 			Expect(err).ToNot(HaveOccurred())
 

diff --git a/pkg/llm-d-inference-sim/metrics_test.go b/pkg/llm-d-inference-sim/metrics_test.go
@@ -32,8 +32,8 @@ import (
 	"github.com/llm-d/llm-d-inference-sim/pkg/common"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
-	"github.com/openai/openai-go"
-	"github.com/openai/openai-go/option"
+	"github.com/openai/openai-go/v3"
+	"github.com/openai/openai-go/v3/option"
 )
 
 const (
@@ -73,7 +73,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
 		client, err := startServerWithArgs(ctx, common.ModeRandom, args, nil)
 		Expect(err).NotTo(HaveOccurred())
 
-		openaiclient, params := getOpenAIClentAndChatParams(client, modelName, userMessage, false)
+		openaiclient, params := getOpenAIClientAndChatParams(client, modelName, userMessage, false)
 
 		var wg sync.WaitGroup
 		wg.Add(1)
@@ -316,7 +316,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
 		client, err := startServerWithArgs(ctx, common.ModeRandom, args, nil)
 		Expect(err).NotTo(HaveOccurred())
 
-		openaiclient, params := getOpenAIClentAndChatParams(client, modelName, userMessage, false)
+		openaiclient, params := getOpenAIClientAndChatParams(client, modelName, userMessage, false)
 		params.MaxTokens = openai.Int(5)
 
 		var reqWg, metricsWg sync.WaitGroup

diff --git a/pkg/llm-d-inference-sim/seed_test.go b/pkg/llm-d-inference-sim/seed_test.go
@@ -22,7 +22,7 @@ import (
 	"github.com/llm-d/llm-d-inference-sim/pkg/common"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
-	"github.com/openai/openai-go"
+	"github.com/openai/openai-go/v3"
 )
 
 var _ = Describe("Simulator with seed", func() {

diff --git a/pkg/llm-d-inference-sim/simulator_test.go b/pkg/llm-d-inference-sim/simulator_test.go
@@ -33,9 +33,9 @@ import (
 	"github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
-	"github.com/openai/openai-go"
-	"github.com/openai/openai-go/option"
-	"github.com/openai/openai-go/packages/param"
+	"github.com/openai/openai-go/v3"
+	"github.com/openai/openai-go/v3/option"
+	"github.com/openai/openai-go/v3/packages/param"
 	"github.com/valyala/fasthttp/fasthttputil"
 	"k8s.io/klog/v2"
 )
@@ -163,7 +163,7 @@ var _ = Describe("Simulator", func() {
 			client, err := startServer(ctx, mode)
 			Expect(err).NotTo(HaveOccurred())
 
-			openaiclient, params := getOpenAIClentAndChatParams(client, model, userMessage, true)
+			openaiclient, params := getOpenAIClientAndChatParams(client, model, userMessage, true)
 			stream := openaiclient.Chat.Completions.NewStreaming(ctx, params)
 			defer func() {
 				err := stream.Close()
@@ -264,7 +264,7 @@ var _ = Describe("Simulator", func() {
 			client, err := startServer(ctx, mode)
 			Expect(err).NotTo(HaveOccurred())
 
-			openaiclient, params := getOpenAIClentAndChatParams(client, model, userMessage, false)
+			openaiclient, params := getOpenAIClientAndChatParams(client, model, userMessage, false)
 			numTokens := 0
 			// if maxTokens and maxCompletionTokens are passsed
 			// maxCompletionTokens is used
@@ -539,7 +539,7 @@ var _ = Describe("Simulator", func() {
 			Expect(string(body)).To(ContainSubstring("BadRequestError"))
 
 			// Also test with OpenAI client to ensure it gets an error
-			openaiclient, params := getOpenAIClentAndChatParams(client, model, "This is a test message", false)
+			openaiclient, params := getOpenAIClientAndChatParams(client, model, "This is a test message", false)
 			params.MaxTokens = openai.Int(8)
 
 			_, err = openaiclient.Chat.Completions.New(ctx, params)
@@ -556,7 +556,7 @@ var _ = Describe("Simulator", func() {
 			client, err := startServerWithArgs(ctx, common.ModeEcho, args, nil)
 			Expect(err).NotTo(HaveOccurred())
 
-			openaiclient, params := getOpenAIClentAndChatParams(client, model, "Hello", false)
+			openaiclient, params := getOpenAIClientAndChatParams(client, model, "Hello", false)
 			params.MaxTokens = openai.Int(5)
 
 			// Send a request within the context window
@@ -604,7 +604,7 @@ func sendSimpleChatRequest(envs map[string]string, streaming bool) *http.Respons
 	client, err := startServerWithArgs(ctx, common.ModeRandom, nil, envs)
 	Expect(err).NotTo(HaveOccurred())
 
-	openaiclient, params := getOpenAIClentAndChatParams(client, model, userMessage, streaming)
+	openaiclient, params := getOpenAIClientAndChatParams(client, model, userMessage, streaming)
 	var httpResp *http.Response
 	resp, err := openaiclient.Chat.Completions.New(ctx, params, option.WithResponseInto(&httpResp))
 	Expect(err).NotTo(HaveOccurred())
@@ -616,7 +616,7 @@ func sendSimpleChatRequest(envs map[string]string, streaming bool) *http.Respons
 	return httpResp
 }
 
-func getOpenAIClentAndChatParams(client option.HTTPClient, model string, message string,
+func getOpenAIClientAndChatParams(client option.HTTPClient, model string, message string,
 	streaming bool) (openai.Client, openai.ChatCompletionNewParams) {
 	openaiclient := openai.NewClient(
 		option.WithBaseURL(baseURL),