fix conflict

googs1025 · googs1025 · commit 3449fa9b67f3 · 2025-10-20T14:52:44.000+08:00
Signed-off-by: googs1025 &lt;googs1025@gmail.com&gt;
diff --git a/pkg/common/config.go b/pkg/common/config.go
@@ -43,6 +43,11 @@ const (
 	FailureTypeServerError    = "server_error"
 	FailureTypeInvalidRequest = "invalid_request"
 	FailureTypeModelNotFound  = "model_not_found"
+
+	StopFinishReason         = "stop"
+	LengthFinishReason       = "length"
+	ToolsFinishReason        = "tool_calls"
+	RemoteDecodeFinishReason = "remote_decode"
 )
 
 type Configuration struct {
@@ -225,9 +230,9 @@ type Metrics struct {
 	TPOTBucketValues []int `yaml:"tpot-buckets-values" json:"tpot-buckets-values"`
 	// RequestPromptTokens RequestGenerationTokens RequestParamsMaxTokens Histogram fake-observation arrays for init.
 	// Each value will be passed to Observe() once at start-up.
-	RequestPromptTokens     []float64 `yaml:"request-prompt-tokens" json:"request-prompt-tokens"`         // prompt-length samples
-	RequestGenerationTokens []float64 `yaml:"request-generation-tokens" json:"request-generation-tokens"` // generation-length samples
-	RequestParamsMaxTokens  []float64 `yaml:"request-params-max-tokens" json:"request-params-max-tokens"` // max_tokens parameter samples
+	RequestPromptTokens     []int `yaml:"request-prompt-tokens" json:"request-prompt-tokens"`         // prompt-length samples
+	RequestGenerationTokens []int `yaml:"request-generation-tokens" json:"request-generation-tokens"` // generation-length samples
+	RequestParamsMaxTokens  []int `yaml:"request-params-max-tokens" json:"request-params-max-tokens"` // max_tokens parameter samples
 	// RequestSuccessTotal is the number of successful requests, key: finish-reason (stop, length, etc.).
 	RequestSuccessTotal map[string]int64 `yaml:"request-success-total" json:"request-success-total"`
 }
@@ -508,6 +513,26 @@ func (c *Configuration) validate() error {
 		if c.FakeMetrics.KVCacheUsagePercentage < 0 || c.FakeMetrics.KVCacheUsagePercentage > 1 {
 			return errors.New("fake metrics KV cache usage must be between 0 ans 1")
 		}
+		if c.FakeMetrics.TTFTBucketValues != nil {
+			if len(c.FakeMetrics.TTFTBucketValues) > len(TTFTBucketsBoundaries)+1 {
+				return errors.New("fake time-to-first-token array is too long")
+			}
+			for v := range c.FakeMetrics.TTFTBucketValues {
+				if v < 0 {
+					return errors.New("time-to-first-token fake metrics should contain only non-negative values")
+				}
+			}
+		}
+		if c.FakeMetrics.TPOTBucketValues != nil {
+			if len(c.FakeMetrics.TPOTBucketValues) > len(TPOTBucketsBoundaries)+1 {
+				return errors.New("fake time-per-output-token array is too long")
+			}
+			for v := range c.FakeMetrics.TPOTBucketValues {
+				if v < 0 {
+					return errors.New("time-per-output-token fake metrics should contain only non-negative values")
+				}
+			}
+		}
 		if c.FakeMetrics.RequestSuccessTotal != nil {
 			for reason, count := range c.FakeMetrics.RequestSuccessTotal {
 				if count < 0 {
@@ -541,9 +566,27 @@ func (c *Configuration) validate() error {
 	if c.DPSize < 1 || c.DPSize > 8 {
 		return errors.New("data parallel size must be between 1 ans 8")
 	}
+
+	if (c.SSLCertFile == "") != (c.SSLKeyFile == "") {
+		return errors.New("both ssl-certfile and ssl-keyfile must be provided together")
+	}
+
+	if c.SelfSignedCerts && (c.SSLCertFile != "" || c.SSLKeyFile != "") {
+		return errors.New("cannot use both self-signed-certs and explicit ssl-certfile/ssl-keyfile")
+	}
+
+	if c.DatasetPath == "" && c.DatasetURL != "" {
+		return errors.New("dataset-path is required when dataset-url is set")
+	}
+
 	return nil
 }
 
+// SSLEnabled returns true if SSL is enabled either via certificate files or self-signed certificates
+func (c *Configuration) SSLEnabled() bool {
+	return (c.SSLCertFile != "" && c.SSLKeyFile != "") || c.SelfSignedCerts
+}
+
 func (c *Configuration) Copy() (*Configuration, error) {
 	var dst Configuration
 	data, err := json.Marshal(c)
@@ -628,6 +671,10 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
 	f.Var(&dummyFailureTypes, "failure-types", failureTypesDescription)
 	f.Lookup("failure-types").NoOptDefVal = dummy
 
+	f.StringVar(&config.SSLCertFile, "ssl-certfile", config.SSLCertFile, "Path to SSL certificate file for HTTPS (optional)")
+	f.StringVar(&config.SSLKeyFile, "ssl-keyfile", config.SSLKeyFile, "Path to SSL private key file for HTTPS (optional)")
+	f.BoolVar(&config.SelfSignedCerts, "self-signed-certs", config.SelfSignedCerts, "Enable automatic generation of self-signed certificates for HTTPS")
+
 	// These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help
 	var dummyString string
 	f.StringVar(&dummyString, "config", "", "The path to a yaml configuration file. The command line values overwrite the configuration file values")
diff --git a/pkg/common/config_test.go b/pkg/common/config_test.go
@@ -203,11 +203,11 @@ var _ = Describe("Simulator configuration", func() {
 			"{\"running\":\"lora1,lora2\",\"waiting\":\"lora3\",\"timestamp\":1257894567}",
 			"{\"running\":\"lora1,lora3\",\"waiting\":\"\",\"timestamp\":1257894569}",
 		},
-		TTFTBucketValues: []int{10, 20, 30, 10},
-		TPOTBucketValues: []int{0, 0, 10, 20, 30},
-		RequestPromptTokens:     []float64{10, 20, 30, 15},
-		RequestGenerationTokens: []float64{50, 60, 40},
-		RequestParamsMaxTokens:  []float64{128, 256, 512},
+		TTFTBucketValues:        []int{10, 20, 30, 10},
+		TPOTBucketValues:        []int{0, 0, 10, 20, 30},
+		RequestPromptTokens:     []int{10, 20, 30, 15},
+		RequestGenerationTokens: []int{50, 60, 40},
+		RequestParamsMaxTokens:  []int{128, 256, 512},
 		RequestSuccessTotal: map[string]int64{
 			StopFinishReason:         20,
 			LengthFinishReason:       0,
diff --git a/pkg/llm-d-inference-sim/metrics.go b/pkg/llm-d-inference-sim/metrics.go
@@ -194,42 +194,27 @@ func (s *VllmSimulator) setInitialPrometheusMetrics() {
 		nRunningReqs = float64(s.config.FakeMetrics.RunningRequests)
 		nWaitingReqs = float64(s.config.FakeMetrics.WaitingRequests)
 		kvCacheUsage = float64(s.config.FakeMetrics.KVCacheUsagePercentage)
-<<<<<<< HEAD
-
 		if s.config.FakeMetrics.TTFTBucketValues != nil {
 			s.initFakeHistogram(s.ttft, common.TTFTBucketsBoundaries, s.config.FakeMetrics.TTFTBucketValues)
 		}
 
 		if s.config.FakeMetrics.TPOTBucketValues != nil {
 			s.initFakeHistogram(s.tpot, common.TPOTBucketsBoundaries, s.config.FakeMetrics.TPOTBucketValues)
 		}
-		for _, requestPromptToken := range s.config.FakeMetrics.RequestPromptTokens {
-			s.requestPromptTokens.WithLabelValues(modelName).Observe(requestPromptToken)
+		buckets := build125Buckets(s.config.MaxModelLen)
+		if s.config.FakeMetrics.RequestPromptTokens != nil {
+			s.initFakeHistogram(s.requestPromptTokens, buckets, s.config.FakeMetrics.RequestPromptTokens)
 		}
-		for _, requestGenerationToken := range s.config.FakeMetrics.RequestGenerationTokens {
-			s.requestGenerationTokens.WithLabelValues(modelName).Observe(requestGenerationToken)
+		if s.config.FakeMetrics.RequestGenerationTokens != nil {
+			s.initFakeHistogram(s.requestParamsMaxTokens, buckets, s.config.FakeMetrics.RequestGenerationTokens)
 		}
-		for _, requestParamsMaxToken := range s.config.FakeMetrics.RequestParamsMaxTokens {
-			s.requestParamsMaxTokens.WithLabelValues(modelName).Observe(requestParamsMaxToken)
+		if s.config.FakeMetrics.RequestParamsMaxTokens != nil {
+			s.initFakeHistogram(s.requestGenerationTokens, buckets, s.config.FakeMetrics.RequestParamsMaxTokens)
 		}
-=======
->>>>>>> 02bc5c3 (fix review comment)
+
 		for reason, requestSuccessTotal := range s.config.FakeMetrics.RequestSuccessTotal {
 			s.requestSuccessTotal.WithLabelValues(modelName, reason).Add(float64(requestSuccessTotal))
 		}
-		buckets := build125Buckets(s.config.MaxModelLen)
-		for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestPromptTokens) {
-			s.requestPromptTokens.WithLabelValues(modelName).Observe(sample)
-		}
-
-		for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestGenerationTokens) {
-			s.requestGenerationTokens.WithLabelValues(modelName).Observe(sample)
-		}
-
-		for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestParamsMaxTokens) {
-			s.requestParamsMaxTokens.WithLabelValues(modelName).Observe(sample)
-		}
-
 	}
 
 	s.runningRequests.WithLabelValues(modelName).Set(nRunningReqs)
@@ -554,55 +539,3 @@ func build125Buckets(maxValue int) []float64 {
 	}
 	return buckets
 }
-
-// padCountsToFull pads the counts slice to length len(boundaries)+1 by appending zeros.
-func padCountsToFull(boundaries []float64, counts []float64) []float64 {
-	fullLen := len(boundaries) + 1
-	if len(counts) > fullLen {
-		return counts[:fullLen] // just return limit len
-	}
-	padded := make([]float64, fullLen)
-	copy(padded, counts)
-	// rest are zero by default
-	return padded
-}
-
-func generateSamplesFromBuckets(boundaries []float64, counts []float64) []float64 {
-	fullCounts := padCountsToFull(boundaries, counts)
-	var samples []float64
-
-	for i, count := range fullCounts {
-		if count == 0 {
-			continue
-		}
-
-		var val float64
-		if len(boundaries) == 0 {
-			// No boundaries → one bucket (-Inf, +Inf)
-			val = 1.0
-		} else if i == 0 {
-			// Bucket: (-Inf, boundaries[0]]
-			val = boundaries[0] - 1.0
-			if val <= 0 { // avoid non-positive if boundary is small
-				val = boundaries[0] * 0.5
-			}
-		} else if i < len(boundaries) {
-			// Bucket: (boundaries[i-1], boundaries[i]]
-			lower := boundaries[i-1]
-			upper := boundaries[i]
-			val = (lower + upper) / 2.0
-			// Ensure it's strictly > lower and <= upper
-			if val <= lower {
-				val = upper - (upper-lower)*0.1
-			}
-		} else {
-			// Last bucket: (boundaries[len-1], +Inf)
-			val = boundaries[len(boundaries)-1] + 1.0
-		}
-
-		for j := 0; j < int(count); j++ {
-			samples = append(samples, val)
-		}
-	}
-	return samples
-}
diff --git a/pkg/llm-d-inference-sim/metrics_test.go b/pkg/llm-d-inference-sim/metrics_test.go
@@ -19,9 +19,7 @@ package llmdinferencesim
 import (
 	"context"
 	"errors"
-	"fmt"
 	"io"
-	"math"
 	"net/http"
 	"os"
 	"reflect"
@@ -662,9 +660,6 @@ var _ = Describe("Simulator metrics", Ordered, func() {
 			ctx := context.TODO()
 			args := []string{"cmd", "--model", model, "--mode", common.ModeRandom,
 				"--fake-metrics",
-<<<<<<< HEAD
-				"{\"running-requests\":10,\"waiting-requests\":30,\"kv-cache-usage\":0.4,\"loras\":[{\"running\":\"lora4,lora2\",\"waiting\":\"lora3\",\"timestamp\":1257894567},{\"running\":\"lora4,lora3\",\"waiting\":\"\",\"timestamp\":1257894569}],\"ttft-buckets-values\":[1, 2, 3],\"tpot-buckets-values\": [0, 0, 1, 2, 3]}",
-=======
 				`{` +
 					`"running-requests":10,` +
 					`"waiting-requests":30,` +
@@ -678,6 +673,8 @@ var _ = Describe("Simulator metrics", Ordered, func() {
 					`"request-prompt-tokens":[10,20,30],` +
 					`"request-generation-tokens":[10,20,30],` +
 					`"request-params-max-tokens":[10,20,30],` +
+					`"ttft-buckets-values":[1,2,3],` +
+					`"tpot-buckets-values":[0,0,1,2,3],` +
 					`"loras":[` +
 					`{` +
 					`"running":"lora4,lora2",` +
@@ -691,7 +688,6 @@ var _ = Describe("Simulator metrics", Ordered, func() {
 					`}` +
 					`]` +
 					`}`,
->>>>>>> 02bc5c3 (fix review comment)
 			}
 
 			client, err := startServerWithArgs(ctx, common.ModeRandom, args, nil)
@@ -710,7 +706,6 @@ var _ = Describe("Simulator metrics", Ordered, func() {
 			Expect(metrics).To(ContainSubstring("vllm:lora_requests_info{max_lora=\"1\",running_lora_adapters=\"lora4,lora2\",waiting_lora_adapters=\"lora3\"} 1.257894567e+09"))
 			Expect(metrics).To(ContainSubstring("vllm:lora_requests_info{max_lora=\"1\",running_lora_adapters=\"lora4,lora3\",waiting_lora_adapters=\"\"} 1.257894569e+09"))
 
-<<<<<<< HEAD
 			Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"0.001\"} 1"))
 			Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"0.005\"} 3"))
 			Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"0.01\"} 6"))
@@ -767,7 +762,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
 			Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"640\"} 0"))
 			Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"2560\"} 0"))
 			Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"+Inf\"} 1"))
-=======
+
 			Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="1"} 10`))
 			Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="2"} 30`))
 			Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="5"} 60`))
@@ -808,8 +803,6 @@ var _ = Describe("Simulator metrics", Ordered, func() {
 			Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="remote_decode",model_name="my_model"} 0`))
 			Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="stop",model_name="my_model"} 20`))
 			Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="tool_calls",model_name="my_model"} 0`))
-
->>>>>>> 02bc5c3 (fix review comment)
 		})
 	})
 })
@@ -993,104 +986,3 @@ func TestBuild125Buckets(t *testing.T) {
 		})
 	}
 }
-
-func validateSamplesInBuckets(t *testing.T, samples []float64, boundaries []float64, counts []float64) {
-	fullCounts := padCountsToFull(boundaries, counts)
-	// Now validate using fullCounts
-	sortedSamples := make([]float64, len(samples))
-	copy(sortedSamples, samples)
-	sort.Float64s(sortedSamples)
-
-	actualCounts := make([]int, len(fullCounts))
-	sampleIndex := 0
-
-	for bucketIndex := range fullCounts {
-		var upperBound float64
-		if bucketIndex == len(boundaries) {
-			upperBound = math.Inf(+1)
-		} else {
-			upperBound = boundaries[bucketIndex]
-		}
-
-		for sampleIndex < len(sortedSamples) && sortedSamples[sampleIndex] <= upperBound {
-			actualCounts[bucketIndex]++
-			sampleIndex++
-		}
-	}
-
-	// Verify each bucket
-	for i, want := range fullCounts {
-		if actualCounts[i] != int(want) {
-			var lowerStr, upperStr string
-			if i == 0 {
-				lowerStr = "-Inf"
-			} else {
-				lowerStr = fmt.Sprintf("%.3f", boundaries[i-1])
-			}
-			if i == len(boundaries) {
-				upperStr = "+Inf"
-			} else {
-				upperStr = fmt.Sprintf("%.3f", boundaries[i])
-			}
-			t.Errorf("bucket[%d] (%s, %s]: want %d, got %d",
-				i, lowerStr, upperStr, int(want), actualCounts[i])
-		}
-	}
-
-	totalExpected := 0
-	for _, c := range fullCounts {
-		totalExpected += int(c)
-	}
-	if len(samples) != totalExpected {
-		t.Errorf("total samples: want %d, got %d", totalExpected, len(samples))
-	}
-}
-
-func TestGenerateSamplesFromBuckets(t *testing.T) {
-	tests := []struct {
-		name            string
-		boundaries      []float64
-		counts          []float64
-		expectedSamples int
-	}{
-		{
-			name:            "short counts with non-zero in middle",
-			boundaries:      []float64{1, 2, 5, 10, 20, 50, 100, 200, 500, 1000},
-			counts:          []float64{0, 0, 0, 5, 0, 5},
-			expectedSamples: 10,
-		},
-		{
-			name:            "empty boundaries → 1 bucket",
-			boundaries:      []float64{},
-			counts:          []float64{7},
-			expectedSamples: 7,
-		},
-		{
-			name:            "single boundary → 2 buckets, short counts",
-			boundaries:      []float64{10.0},
-			counts:          []float64{3},
-			expectedSamples: 3,
-		},
-		{
-			name:            "full counts provided",
-			boundaries:      []float64{1, 2, 5},
-			counts:          []float64{1, 0, 2, 1},
-			expectedSamples: 4,
-		},
-		{
-			name:            "all zeros (short)",
-			boundaries:      []float64{1, 2, 5},
-			counts:          []float64{},
-			expectedSamples: 0,
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			samples := generateSamplesFromBuckets(tt.boundaries, tt.counts)
-			if len(samples) != tt.expectedSamples {
-				t.Fatalf("sample count mismatch: want %d, got %d", tt.expectedSamples, len(samples))
-			}
-			validateSamplesInBuckets(t, samples, tt.boundaries, tt.counts)
-		})
-	}
-}