Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ require (
github.com/llm-d/llm-d-kv-cache-manager v0.3.0-rc1
github.com/onsi/ginkgo/v2 v2.23.4
github.com/onsi/gomega v1.37.0
github.com/openai/openai-go v0.1.0-beta.10
github.com/openai/openai-go/v3 v3.6.1
github.com/pebbe/zmq4 v1.4.0
github.com/prometheus/client_golang v1.22.0
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus
github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8=
github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y=
github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0=
github.com/openai/openai-go v0.1.0-beta.10 h1:CknhGXe8aXQMRuqg255PFnWzgRY9nEryMxoNIBBM9tU=
github.com/openai/openai-go v0.1.0-beta.10/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y=
github.com/openai/openai-go/v3 v3.6.1 h1:f8J6jhT9wkYnNvHTKR7bxHXSZrSvvcfpHGkmBra04tI=
github.com/openai/openai-go/v3 v3.6.1/go.mod h1:UOpNxkqC9OdNXNUfpNByKOtB4jAL0EssQXq5p8gO0Xs=
github.com/pebbe/zmq4 v1.4.0 h1:gO5P92Ayl8GXpPZdYcD62Cwbq0slSBVVQRIXwGSJ6eQ=
github.com/pebbe/zmq4 v1.4.0/go.mod h1:nqnPueOapVhE2wItZ0uOErngczsJdLOGkebMxaO8r48=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
Expand Down
14 changes: 7 additions & 7 deletions pkg/llm-d-inference-sim/failures_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/openai/openai-go"
"github.com/openai/openai-go/v3"

"github.com/llm-d/llm-d-inference-sim/pkg/common"
openaiserverapi "github.com/llm-d/llm-d-inference-sim/pkg/openai-server-api"
Expand Down Expand Up @@ -134,7 +134,7 @@ var _ = Describe("Failures", func() {
})

It("should always return an error response for chat completions", func() {
openaiClient, params := getOpenAIClentAndChatParams(client, model, userMessage, false)
openaiClient, params := getOpenAIClientAndChatParams(client, model, userMessage, false)
_, err := openaiClient.Chat.Completions.New(ctx, params)
Expect(err).To(HaveOccurred())

Expand All @@ -147,7 +147,7 @@ var _ = Describe("Failures", func() {
})

It("should always return an error response for text completions", func() {
openaiClient, params := getOpenAIClentAndChatParams(client, model, userMessage, false)
openaiClient, params := getOpenAIClientAndChatParams(client, model, userMessage, false)
_, err := openaiClient.Chat.Completions.New(ctx, params)
Expect(err).To(HaveOccurred())

Expand All @@ -173,7 +173,7 @@ var _ = Describe("Failures", func() {
})

It("should return only rate limit errors", func() {
openaiClient, params := getOpenAIClentAndChatParams(client, model, userMessage, false)
openaiClient, params := getOpenAIClientAndChatParams(client, model, userMessage, false)
_, err := openaiClient.Chat.Completions.New(ctx, params)
Expect(err).To(HaveOccurred())

Expand All @@ -199,7 +199,7 @@ var _ = Describe("Failures", func() {
})

It("should return only specified error types", func() {
openaiClient, params := getOpenAIClentAndChatParams(client, model, userMessage, false)
openaiClient, params := getOpenAIClientAndChatParams(client, model, userMessage, false)

// Make multiple requests to verify we get the expected error types
for i := 0; i < 10; i++ {
Expand Down Expand Up @@ -230,7 +230,7 @@ var _ = Describe("Failures", func() {
})

It("should never return errors and behave like random mode", func() {
openaiClient, params := getOpenAIClentAndChatParams(client, model, userMessage, false)
openaiClient, params := getOpenAIClientAndChatParams(client, model, userMessage, false)
resp, err := openaiClient.Chat.Completions.New(ctx, params)
Expect(err).ToNot(HaveOccurred())
Expect(resp.Choices).To(HaveLen(1))
Expand All @@ -250,7 +250,7 @@ var _ = Describe("Failures", func() {
}, nil)
Expect(err).ToNot(HaveOccurred())

openaiClient, params := getOpenAIClentAndChatParams(client, model, userMessage, false)
openaiClient, params := getOpenAIClientAndChatParams(client, model, userMessage, false)
_, err = openaiClient.Chat.Completions.New(ctx, params)
Expect(err).To(HaveOccurred())

Expand Down
6 changes: 3 additions & 3 deletions pkg/llm-d-inference-sim/lora_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ import (

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/openai/openai-go"
"github.com/openai/openai-go/option"
"github.com/openai/openai-go/v3"
"github.com/openai/openai-go/v3/option"

"github.com/llm-d/llm-d-inference-sim/pkg/common"
vllmapi "github.com/llm-d/llm-d-inference-sim/pkg/vllm-api"
Expand All @@ -41,7 +41,7 @@ var _ = Describe("LoRAs", func() {
Expect(err).NotTo(HaveOccurred())

// Request to lora3
openaiclient, params := getOpenAIClentAndChatParams(client, "lora3", userMessage, false)
openaiclient, params := getOpenAIClientAndChatParams(client, "lora3", userMessage, false)
resp, err := openaiclient.Chat.Completions.New(ctx, params)
Expect(err).ToNot(HaveOccurred())

Expand Down
8 changes: 4 additions & 4 deletions pkg/llm-d-inference-sim/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ import (
"github.com/llm-d/llm-d-inference-sim/pkg/common"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/openai/openai-go"
"github.com/openai/openai-go/option"
"github.com/openai/openai-go/v3"
"github.com/openai/openai-go/v3/option"
)

const (
Expand Down Expand Up @@ -73,7 +73,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
client, err := startServerWithArgs(ctx, common.ModeRandom, args, nil)
Expect(err).NotTo(HaveOccurred())

openaiclient, params := getOpenAIClentAndChatParams(client, modelName, userMessage, false)
openaiclient, params := getOpenAIClientAndChatParams(client, modelName, userMessage, false)

var wg sync.WaitGroup
wg.Add(1)
Expand Down Expand Up @@ -316,7 +316,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
client, err := startServerWithArgs(ctx, common.ModeRandom, args, nil)
Expect(err).NotTo(HaveOccurred())

openaiclient, params := getOpenAIClentAndChatParams(client, modelName, userMessage, false)
openaiclient, params := getOpenAIClientAndChatParams(client, modelName, userMessage, false)
params.MaxTokens = openai.Int(5)

var reqWg, metricsWg sync.WaitGroup
Expand Down
2 changes: 1 addition & 1 deletion pkg/llm-d-inference-sim/seed_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (
"github.com/llm-d/llm-d-inference-sim/pkg/common"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/openai/openai-go"
"github.com/openai/openai-go/v3"
)

var _ = Describe("Simulator with seed", func() {
Expand Down
18 changes: 9 additions & 9 deletions pkg/llm-d-inference-sim/simulator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ import (
"github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/openai/openai-go"
"github.com/openai/openai-go/option"
"github.com/openai/openai-go/packages/param"
"github.com/openai/openai-go/v3"
"github.com/openai/openai-go/v3/option"
"github.com/openai/openai-go/v3/packages/param"
"github.com/valyala/fasthttp/fasthttputil"
"k8s.io/klog/v2"
)
Expand Down Expand Up @@ -163,7 +163,7 @@ var _ = Describe("Simulator", func() {
client, err := startServer(ctx, mode)
Expect(err).NotTo(HaveOccurred())

openaiclient, params := getOpenAIClentAndChatParams(client, model, userMessage, true)
openaiclient, params := getOpenAIClientAndChatParams(client, model, userMessage, true)
stream := openaiclient.Chat.Completions.NewStreaming(ctx, params)
defer func() {
err := stream.Close()
Expand Down Expand Up @@ -264,7 +264,7 @@ var _ = Describe("Simulator", func() {
client, err := startServer(ctx, mode)
Expect(err).NotTo(HaveOccurred())

openaiclient, params := getOpenAIClentAndChatParams(client, model, userMessage, false)
openaiclient, params := getOpenAIClientAndChatParams(client, model, userMessage, false)
numTokens := 0
// if maxTokens and maxCompletionTokens are passsed
// maxCompletionTokens is used
Expand Down Expand Up @@ -539,7 +539,7 @@ var _ = Describe("Simulator", func() {
Expect(string(body)).To(ContainSubstring("BadRequestError"))

// Also test with OpenAI client to ensure it gets an error
openaiclient, params := getOpenAIClentAndChatParams(client, model, "This is a test message", false)
openaiclient, params := getOpenAIClientAndChatParams(client, model, "This is a test message", false)
params.MaxTokens = openai.Int(8)

_, err = openaiclient.Chat.Completions.New(ctx, params)
Expand All @@ -556,7 +556,7 @@ var _ = Describe("Simulator", func() {
client, err := startServerWithArgs(ctx, common.ModeEcho, args, nil)
Expect(err).NotTo(HaveOccurred())

openaiclient, params := getOpenAIClentAndChatParams(client, model, "Hello", false)
openaiclient, params := getOpenAIClientAndChatParams(client, model, "Hello", false)
params.MaxTokens = openai.Int(5)

// Send a request within the context window
Expand Down Expand Up @@ -604,7 +604,7 @@ func sendSimpleChatRequest(envs map[string]string, streaming bool) *http.Respons
client, err := startServerWithArgs(ctx, common.ModeRandom, nil, envs)
Expect(err).NotTo(HaveOccurred())

openaiclient, params := getOpenAIClentAndChatParams(client, model, userMessage, streaming)
openaiclient, params := getOpenAIClientAndChatParams(client, model, userMessage, streaming)
var httpResp *http.Response
resp, err := openaiclient.Chat.Completions.New(ctx, params, option.WithResponseInto(&httpResp))
Expect(err).NotTo(HaveOccurred())
Expand All @@ -616,7 +616,7 @@ func sendSimpleChatRequest(envs map[string]string, streaming bool) *http.Respons
return httpResp
}

func getOpenAIClentAndChatParams(client option.HTTPClient, model string, message string,
func getOpenAIClientAndChatParams(client option.HTTPClient, model string, message string,
streaming bool) (openai.Client, openai.ChatCompletionNewParams) {
openaiclient := openai.NewClient(
option.WithBaseURL(baseURL),
Expand Down
Loading