Skip to content

Commit 3449fa9

Browse files
committed
fix conflict
Signed-off-by: googs1025 <[email protected]>
1 parent 8c622c6 commit 3449fa9

File tree

4 files changed

+66
-194
lines changed

4 files changed

+66
-194
lines changed

pkg/common/config.go

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ const (
4343
FailureTypeServerError = "server_error"
4444
FailureTypeInvalidRequest = "invalid_request"
4545
FailureTypeModelNotFound = "model_not_found"
46+
47+
StopFinishReason = "stop"
48+
LengthFinishReason = "length"
49+
ToolsFinishReason = "tool_calls"
50+
RemoteDecodeFinishReason = "remote_decode"
4651
)
4752

4853
type Configuration struct {
@@ -225,9 +230,9 @@ type Metrics struct {
225230
TPOTBucketValues []int `yaml:"tpot-buckets-values" json:"tpot-buckets-values"`
226231
// RequestPromptTokens RequestGenerationTokens RequestParamsMaxTokens Histogram fake-observation arrays for init.
227232
// Each value will be passed to Observe() once at start-up.
228-
RequestPromptTokens []float64 `yaml:"request-prompt-tokens" json:"request-prompt-tokens"` // prompt-length samples
229-
RequestGenerationTokens []float64 `yaml:"request-generation-tokens" json:"request-generation-tokens"` // generation-length samples
230-
RequestParamsMaxTokens []float64 `yaml:"request-params-max-tokens" json:"request-params-max-tokens"` // max_tokens parameter samples
233+
RequestPromptTokens []int `yaml:"request-prompt-tokens" json:"request-prompt-tokens"` // prompt-length samples
234+
RequestGenerationTokens []int `yaml:"request-generation-tokens" json:"request-generation-tokens"` // generation-length samples
235+
RequestParamsMaxTokens []int `yaml:"request-params-max-tokens" json:"request-params-max-tokens"` // max_tokens parameter samples
231236
// RequestSuccessTotal is the number of successful requests, key: finish-reason (stop, length, etc.).
232237
RequestSuccessTotal map[string]int64 `yaml:"request-success-total" json:"request-success-total"`
233238
}
@@ -508,6 +513,26 @@ func (c *Configuration) validate() error {
508513
if c.FakeMetrics.KVCacheUsagePercentage < 0 || c.FakeMetrics.KVCacheUsagePercentage > 1 {
509514
return errors.New("fake metrics KV cache usage must be between 0 ans 1")
510515
}
516+
if c.FakeMetrics.TTFTBucketValues != nil {
517+
if len(c.FakeMetrics.TTFTBucketValues) > len(TTFTBucketsBoundaries)+1 {
518+
return errors.New("fake time-to-first-token array is too long")
519+
}
520+
for v := range c.FakeMetrics.TTFTBucketValues {
521+
if v < 0 {
522+
return errors.New("time-to-first-token fake metrics should contain only non-negative values")
523+
}
524+
}
525+
}
526+
if c.FakeMetrics.TPOTBucketValues != nil {
527+
if len(c.FakeMetrics.TPOTBucketValues) > len(TPOTBucketsBoundaries)+1 {
528+
return errors.New("fake time-per-output-token array is too long")
529+
}
530+
for v := range c.FakeMetrics.TPOTBucketValues {
531+
if v < 0 {
532+
return errors.New("time-per-output-token fake metrics should contain only non-negative values")
533+
}
534+
}
535+
}
511536
if c.FakeMetrics.RequestSuccessTotal != nil {
512537
for reason, count := range c.FakeMetrics.RequestSuccessTotal {
513538
if count < 0 {
@@ -541,9 +566,27 @@ func (c *Configuration) validate() error {
541566
if c.DPSize < 1 || c.DPSize > 8 {
542567
return errors.New("data parallel size must be between 1 ans 8")
543568
}
569+
570+
if (c.SSLCertFile == "") != (c.SSLKeyFile == "") {
571+
return errors.New("both ssl-certfile and ssl-keyfile must be provided together")
572+
}
573+
574+
if c.SelfSignedCerts && (c.SSLCertFile != "" || c.SSLKeyFile != "") {
575+
return errors.New("cannot use both self-signed-certs and explicit ssl-certfile/ssl-keyfile")
576+
}
577+
578+
if c.DatasetPath == "" && c.DatasetURL != "" {
579+
return errors.New("dataset-path is required when dataset-url is set")
580+
}
581+
544582
return nil
545583
}
546584

585+
// SSLEnabled returns true if SSL is enabled either via certificate files or self-signed certificates
586+
func (c *Configuration) SSLEnabled() bool {
587+
return (c.SSLCertFile != "" && c.SSLKeyFile != "") || c.SelfSignedCerts
588+
}
589+
547590
func (c *Configuration) Copy() (*Configuration, error) {
548591
var dst Configuration
549592
data, err := json.Marshal(c)
@@ -628,6 +671,10 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
628671
f.Var(&dummyFailureTypes, "failure-types", failureTypesDescription)
629672
f.Lookup("failure-types").NoOptDefVal = dummy
630673

674+
f.StringVar(&config.SSLCertFile, "ssl-certfile", config.SSLCertFile, "Path to SSL certificate file for HTTPS (optional)")
675+
f.StringVar(&config.SSLKeyFile, "ssl-keyfile", config.SSLKeyFile, "Path to SSL private key file for HTTPS (optional)")
676+
f.BoolVar(&config.SelfSignedCerts, "self-signed-certs", config.SelfSignedCerts, "Enable automatic generation of self-signed certificates for HTTPS")
677+
631678
// These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help
632679
var dummyString string
633680
f.StringVar(&dummyString, "config", "", "The path to a yaml configuration file. The command line values overwrite the configuration file values")

pkg/common/config_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -203,11 +203,11 @@ var _ = Describe("Simulator configuration", func() {
203203
"{\"running\":\"lora1,lora2\",\"waiting\":\"lora3\",\"timestamp\":1257894567}",
204204
"{\"running\":\"lora1,lora3\",\"waiting\":\"\",\"timestamp\":1257894569}",
205205
},
206-
TTFTBucketValues: []int{10, 20, 30, 10},
207-
TPOTBucketValues: []int{0, 0, 10, 20, 30},
208-
RequestPromptTokens: []float64{10, 20, 30, 15},
209-
RequestGenerationTokens: []float64{50, 60, 40},
210-
RequestParamsMaxTokens: []float64{128, 256, 512},
206+
TTFTBucketValues: []int{10, 20, 30, 10},
207+
TPOTBucketValues: []int{0, 0, 10, 20, 30},
208+
RequestPromptTokens: []int{10, 20, 30, 15},
209+
RequestGenerationTokens: []int{50, 60, 40},
210+
RequestParamsMaxTokens: []int{128, 256, 512},
211211
RequestSuccessTotal: map[string]int64{
212212
StopFinishReason: 20,
213213
LengthFinishReason: 0,

pkg/llm-d-inference-sim/metrics.go

Lines changed: 8 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -194,42 +194,27 @@ func (s *VllmSimulator) setInitialPrometheusMetrics() {
194194
nRunningReqs = float64(s.config.FakeMetrics.RunningRequests)
195195
nWaitingReqs = float64(s.config.FakeMetrics.WaitingRequests)
196196
kvCacheUsage = float64(s.config.FakeMetrics.KVCacheUsagePercentage)
197-
<<<<<<< HEAD
198-
199197
if s.config.FakeMetrics.TTFTBucketValues != nil {
200198
s.initFakeHistogram(s.ttft, common.TTFTBucketsBoundaries, s.config.FakeMetrics.TTFTBucketValues)
201199
}
202200

203201
if s.config.FakeMetrics.TPOTBucketValues != nil {
204202
s.initFakeHistogram(s.tpot, common.TPOTBucketsBoundaries, s.config.FakeMetrics.TPOTBucketValues)
205203
}
206-
for _, requestPromptToken := range s.config.FakeMetrics.RequestPromptTokens {
207-
s.requestPromptTokens.WithLabelValues(modelName).Observe(requestPromptToken)
204+
buckets := build125Buckets(s.config.MaxModelLen)
205+
if s.config.FakeMetrics.RequestPromptTokens != nil {
206+
s.initFakeHistogram(s.requestPromptTokens, buckets, s.config.FakeMetrics.RequestPromptTokens)
208207
}
209-
for _, requestGenerationToken := range s.config.FakeMetrics.RequestGenerationTokens {
210-
s.requestGenerationTokens.WithLabelValues(modelName).Observe(requestGenerationToken)
208+
if s.config.FakeMetrics.RequestGenerationTokens != nil {
209+
s.initFakeHistogram(s.requestParamsMaxTokens, buckets, s.config.FakeMetrics.RequestGenerationTokens)
211210
}
212-
for _, requestParamsMaxToken := range s.config.FakeMetrics.RequestParamsMaxTokens {
213-
s.requestParamsMaxTokens.WithLabelValues(modelName).Observe(requestParamsMaxToken)
211+
if s.config.FakeMetrics.RequestParamsMaxTokens != nil {
212+
s.initFakeHistogram(s.requestGenerationTokens, buckets, s.config.FakeMetrics.RequestParamsMaxTokens)
214213
}
215-
=======
216-
>>>>>>> 02bc5c3 (fix review comment)
214+
217215
for reason, requestSuccessTotal := range s.config.FakeMetrics.RequestSuccessTotal {
218216
s.requestSuccessTotal.WithLabelValues(modelName, reason).Add(float64(requestSuccessTotal))
219217
}
220-
buckets := build125Buckets(s.config.MaxModelLen)
221-
for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestPromptTokens) {
222-
s.requestPromptTokens.WithLabelValues(modelName).Observe(sample)
223-
}
224-
225-
for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestGenerationTokens) {
226-
s.requestGenerationTokens.WithLabelValues(modelName).Observe(sample)
227-
}
228-
229-
for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestParamsMaxTokens) {
230-
s.requestParamsMaxTokens.WithLabelValues(modelName).Observe(sample)
231-
}
232-
233218
}
234219

235220
s.runningRequests.WithLabelValues(modelName).Set(nRunningReqs)
@@ -554,55 +539,3 @@ func build125Buckets(maxValue int) []float64 {
554539
}
555540
return buckets
556541
}
557-
558-
// padCountsToFull pads the counts slice to length len(boundaries)+1 by appending zeros.
559-
func padCountsToFull(boundaries []float64, counts []float64) []float64 {
560-
fullLen := len(boundaries) + 1
561-
if len(counts) > fullLen {
562-
return counts[:fullLen] // just return limit len
563-
}
564-
padded := make([]float64, fullLen)
565-
copy(padded, counts)
566-
// rest are zero by default
567-
return padded
568-
}
569-
570-
func generateSamplesFromBuckets(boundaries []float64, counts []float64) []float64 {
571-
fullCounts := padCountsToFull(boundaries, counts)
572-
var samples []float64
573-
574-
for i, count := range fullCounts {
575-
if count == 0 {
576-
continue
577-
}
578-
579-
var val float64
580-
if len(boundaries) == 0 {
581-
// No boundaries → one bucket (-Inf, +Inf)
582-
val = 1.0
583-
} else if i == 0 {
584-
// Bucket: (-Inf, boundaries[0]]
585-
val = boundaries[0] - 1.0
586-
if val <= 0 { // avoid non-positive if boundary is small
587-
val = boundaries[0] * 0.5
588-
}
589-
} else if i < len(boundaries) {
590-
// Bucket: (boundaries[i-1], boundaries[i]]
591-
lower := boundaries[i-1]
592-
upper := boundaries[i]
593-
val = (lower + upper) / 2.0
594-
// Ensure it's strictly > lower and <= upper
595-
if val <= lower {
596-
val = upper - (upper-lower)*0.1
597-
}
598-
} else {
599-
// Last bucket: (boundaries[len-1], +Inf)
600-
val = boundaries[len(boundaries)-1] + 1.0
601-
}
602-
603-
for j := 0; j < int(count); j++ {
604-
samples = append(samples, val)
605-
}
606-
}
607-
return samples
608-
}

pkg/llm-d-inference-sim/metrics_test.go

Lines changed: 3 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,7 @@ package llmdinferencesim
1919
import (
2020
"context"
2121
"errors"
22-
"fmt"
2322
"io"
24-
"math"
2523
"net/http"
2624
"os"
2725
"reflect"
@@ -662,9 +660,6 @@ var _ = Describe("Simulator metrics", Ordered, func() {
662660
ctx := context.TODO()
663661
args := []string{"cmd", "--model", model, "--mode", common.ModeRandom,
664662
"--fake-metrics",
665-
<<<<<<< HEAD
666-
"{\"running-requests\":10,\"waiting-requests\":30,\"kv-cache-usage\":0.4,\"loras\":[{\"running\":\"lora4,lora2\",\"waiting\":\"lora3\",\"timestamp\":1257894567},{\"running\":\"lora4,lora3\",\"waiting\":\"\",\"timestamp\":1257894569}],\"ttft-buckets-values\":[1, 2, 3],\"tpot-buckets-values\": [0, 0, 1, 2, 3]}",
667-
=======
668663
`{` +
669664
`"running-requests":10,` +
670665
`"waiting-requests":30,` +
@@ -678,6 +673,8 @@ var _ = Describe("Simulator metrics", Ordered, func() {
678673
`"request-prompt-tokens":[10,20,30],` +
679674
`"request-generation-tokens":[10,20,30],` +
680675
`"request-params-max-tokens":[10,20,30],` +
676+
`"ttft-buckets-values":[1,2,3],` +
677+
`"tpot-buckets-values":[0,0,1,2,3],` +
681678
`"loras":[` +
682679
`{` +
683680
`"running":"lora4,lora2",` +
@@ -691,7 +688,6 @@ var _ = Describe("Simulator metrics", Ordered, func() {
691688
`}` +
692689
`]` +
693690
`}`,
694-
>>>>>>> 02bc5c3 (fix review comment)
695691
}
696692

697693
client, err := startServerWithArgs(ctx, common.ModeRandom, args, nil)
@@ -710,7 +706,6 @@ var _ = Describe("Simulator metrics", Ordered, func() {
710706
Expect(metrics).To(ContainSubstring("vllm:lora_requests_info{max_lora=\"1\",running_lora_adapters=\"lora4,lora2\",waiting_lora_adapters=\"lora3\"} 1.257894567e+09"))
711707
Expect(metrics).To(ContainSubstring("vllm:lora_requests_info{max_lora=\"1\",running_lora_adapters=\"lora4,lora3\",waiting_lora_adapters=\"\"} 1.257894569e+09"))
712708

713-
<<<<<<< HEAD
714709
Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"0.001\"} 1"))
715710
Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"0.005\"} 3"))
716711
Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"0.01\"} 6"))
@@ -767,7 +762,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
767762
Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"640\"} 0"))
768763
Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"2560\"} 0"))
769764
Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"+Inf\"} 1"))
770-
=======
765+
771766
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="1"} 10`))
772767
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="2"} 30`))
773768
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="5"} 60`))
@@ -808,8 +803,6 @@ var _ = Describe("Simulator metrics", Ordered, func() {
808803
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="remote_decode",model_name="my_model"} 0`))
809804
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="stop",model_name="my_model"} 20`))
810805
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="tool_calls",model_name="my_model"} 0`))
811-
812-
>>>>>>> 02bc5c3 (fix review comment)
813806
})
814807
})
815808
})
@@ -993,104 +986,3 @@ func TestBuild125Buckets(t *testing.T) {
993986
})
994987
}
995988
}
996-
997-
func validateSamplesInBuckets(t *testing.T, samples []float64, boundaries []float64, counts []float64) {
998-
fullCounts := padCountsToFull(boundaries, counts)
999-
// Now validate using fullCounts
1000-
sortedSamples := make([]float64, len(samples))
1001-
copy(sortedSamples, samples)
1002-
sort.Float64s(sortedSamples)
1003-
1004-
actualCounts := make([]int, len(fullCounts))
1005-
sampleIndex := 0
1006-
1007-
for bucketIndex := range fullCounts {
1008-
var upperBound float64
1009-
if bucketIndex == len(boundaries) {
1010-
upperBound = math.Inf(+1)
1011-
} else {
1012-
upperBound = boundaries[bucketIndex]
1013-
}
1014-
1015-
for sampleIndex < len(sortedSamples) && sortedSamples[sampleIndex] <= upperBound {
1016-
actualCounts[bucketIndex]++
1017-
sampleIndex++
1018-
}
1019-
}
1020-
1021-
// Verify each bucket
1022-
for i, want := range fullCounts {
1023-
if actualCounts[i] != int(want) {
1024-
var lowerStr, upperStr string
1025-
if i == 0 {
1026-
lowerStr = "-Inf"
1027-
} else {
1028-
lowerStr = fmt.Sprintf("%.3f", boundaries[i-1])
1029-
}
1030-
if i == len(boundaries) {
1031-
upperStr = "+Inf"
1032-
} else {
1033-
upperStr = fmt.Sprintf("%.3f", boundaries[i])
1034-
}
1035-
t.Errorf("bucket[%d] (%s, %s]: want %d, got %d",
1036-
i, lowerStr, upperStr, int(want), actualCounts[i])
1037-
}
1038-
}
1039-
1040-
totalExpected := 0
1041-
for _, c := range fullCounts {
1042-
totalExpected += int(c)
1043-
}
1044-
if len(samples) != totalExpected {
1045-
t.Errorf("total samples: want %d, got %d", totalExpected, len(samples))
1046-
}
1047-
}
1048-
1049-
func TestGenerateSamplesFromBuckets(t *testing.T) {
1050-
tests := []struct {
1051-
name string
1052-
boundaries []float64
1053-
counts []float64
1054-
expectedSamples int
1055-
}{
1056-
{
1057-
name: "short counts with non-zero in middle",
1058-
boundaries: []float64{1, 2, 5, 10, 20, 50, 100, 200, 500, 1000},
1059-
counts: []float64{0, 0, 0, 5, 0, 5},
1060-
expectedSamples: 10,
1061-
},
1062-
{
1063-
name: "empty boundaries → 1 bucket",
1064-
boundaries: []float64{},
1065-
counts: []float64{7},
1066-
expectedSamples: 7,
1067-
},
1068-
{
1069-
name: "single boundary → 2 buckets, short counts",
1070-
boundaries: []float64{10.0},
1071-
counts: []float64{3},
1072-
expectedSamples: 3,
1073-
},
1074-
{
1075-
name: "full counts provided",
1076-
boundaries: []float64{1, 2, 5},
1077-
counts: []float64{1, 0, 2, 1},
1078-
expectedSamples: 4,
1079-
},
1080-
{
1081-
name: "all zeros (short)",
1082-
boundaries: []float64{1, 2, 5},
1083-
counts: []float64{},
1084-
expectedSamples: 0,
1085-
},
1086-
}
1087-
for _, tt := range tests {
1088-
t.Run(tt.name, func(t *testing.T) {
1089-
samples := generateSamplesFromBuckets(tt.boundaries, tt.counts)
1090-
if len(samples) != tt.expectedSamples {
1091-
t.Fatalf("sample count mismatch: want %d, got %d", tt.expectedSamples, len(samples))
1092-
}
1093-
validateSamplesInBuckets(t, samples, tt.boundaries, tt.counts)
1094-
})
1095-
}
1096-
}

0 commit comments

Comments
 (0)