Skip to content

Commit 6705df6

Browse files
committed
fix conflict
Signed-off-by: googs1025 <[email protected]>
1 parent 8c622c6 commit 6705df6

File tree

6 files changed

+143
-253
lines changed

6 files changed

+143
-253
lines changed

manifests/config_with_fake.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,7 @@ fake-metrics:
1313
kv-cache-usage: 0.3
1414
request-success-total:
1515
stop: 20
16-
length: 0
17-
tool_calls: 0
18-
remote_decode: 0
16+
1917
request-prompt-tokens: [ 10, 20, 30, 15 ]
2018
request-generation-tokens: [ 50, 60, 40 ]
2119
request-params-max-tokens: [ 128, 256, 512 ]

pkg/common/config.go

Lines changed: 74 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,27 @@ const (
4343
FailureTypeServerError = "server_error"
4444
FailureTypeInvalidRequest = "invalid_request"
4545
FailureTypeModelNotFound = "model_not_found"
46+
47+
StopFinishReason = "stop"
48+
LengthFinishReason = "length"
49+
ToolsFinishReason = "tool_calls"
50+
RemoteDecodeFinishReason = "remote_decode"
51+
)
52+
53+
var (
54+
requiredFinishReasons = []string{
55+
StopFinishReason,
56+
LengthFinishReason,
57+
ToolsFinishReason,
58+
RemoteDecodeFinishReason,
59+
}
60+
61+
validFinishReasons = map[string]struct{}{
62+
StopFinishReason: {},
63+
LengthFinishReason: {},
64+
ToolsFinishReason: {},
65+
RemoteDecodeFinishReason: {},
66+
}
4667
)
4768

4869
type Configuration struct {
@@ -225,9 +246,9 @@ type Metrics struct {
225246
TPOTBucketValues []int `yaml:"tpot-buckets-values" json:"tpot-buckets-values"`
226247
// RequestPromptTokens RequestGenerationTokens RequestParamsMaxTokens Histogram fake-observation arrays for init.
227248
// Each value will be passed to Observe() once at start-up.
228-
RequestPromptTokens []float64 `yaml:"request-prompt-tokens" json:"request-prompt-tokens"` // prompt-length samples
229-
RequestGenerationTokens []float64 `yaml:"request-generation-tokens" json:"request-generation-tokens"` // generation-length samples
230-
RequestParamsMaxTokens []float64 `yaml:"request-params-max-tokens" json:"request-params-max-tokens"` // max_tokens parameter samples
249+
RequestPromptTokens []int `yaml:"request-prompt-tokens" json:"request-prompt-tokens"` // prompt-length samples
250+
RequestGenerationTokens []int `yaml:"request-generation-tokens" json:"request-generation-tokens"` // generation-length samples
251+
RequestParamsMaxTokens []int `yaml:"request-params-max-tokens" json:"request-params-max-tokens"` // max_tokens parameter samples
231252
// RequestSuccessTotal is the number of successful requests, key: finish-reason (stop, length, etc.).
232253
RequestSuccessTotal map[string]int64 `yaml:"request-success-total" json:"request-success-total"`
233254
}
@@ -508,16 +529,40 @@ func (c *Configuration) validate() error {
508529
if c.FakeMetrics.KVCacheUsagePercentage < 0 || c.FakeMetrics.KVCacheUsagePercentage > 1 {
509530
return errors.New("fake metrics KV cache usage must be between 0 ans 1")
510531
}
532+
if c.FakeMetrics.TTFTBucketValues != nil {
533+
if len(c.FakeMetrics.TTFTBucketValues) > len(TTFTBucketsBoundaries)+1 {
534+
return errors.New("fake time-to-first-token array is too long")
535+
}
536+
for v := range c.FakeMetrics.TTFTBucketValues {
537+
if v < 0 {
538+
return errors.New("time-to-first-token fake metrics should contain only non-negative values")
539+
}
540+
}
541+
}
542+
if c.FakeMetrics.TPOTBucketValues != nil {
543+
if len(c.FakeMetrics.TPOTBucketValues) > len(TPOTBucketsBoundaries)+1 {
544+
return errors.New("fake time-per-output-token array is too long")
545+
}
546+
for v := range c.FakeMetrics.TPOTBucketValues {
547+
if v < 0 {
548+
return errors.New("time-per-output-token fake metrics should contain only non-negative values")
549+
}
550+
}
551+
}
511552
if c.FakeMetrics.RequestSuccessTotal != nil {
512553
for reason, count := range c.FakeMetrics.RequestSuccessTotal {
513554
if count < 0 {
514-
return fmt.Errorf("fake metrics request-success-total.%s cannot be negative, got %d", reason, count)
555+
return fmt.Errorf("fake metrics request-success-total.%s "+
556+
"cannot be negative, got %d", reason, count)
557+
}
558+
if _, ok := validFinishReasons[reason]; !ok {
559+
return fmt.Errorf("invalid finish reason in request-success-total: "+
560+
"%s (valid reasons: %v)", reason, requiredFinishReasons)
515561
}
516562
}
517-
requiredReasons := []string{StopFinishReason, LengthFinishReason, ToolsFinishReason, RemoteDecodeFinishReason}
518-
for _, reason := range requiredReasons {
563+
for _, reason := range requiredFinishReasons {
519564
if _, exists := c.FakeMetrics.RequestSuccessTotal[reason]; !exists {
520-
return fmt.Errorf("missing required finish reason in request-success-total: %s", reason)
565+
c.FakeMetrics.RequestSuccessTotal[reason] = 0
521566
}
522567
}
523568
}
@@ -541,9 +586,27 @@ func (c *Configuration) validate() error {
541586
if c.DPSize < 1 || c.DPSize > 8 {
542587
return errors.New("data parallel size must be between 1 ans 8")
543588
}
589+
590+
if (c.SSLCertFile == "") != (c.SSLKeyFile == "") {
591+
return errors.New("both ssl-certfile and ssl-keyfile must be provided together")
592+
}
593+
594+
if c.SelfSignedCerts && (c.SSLCertFile != "" || c.SSLKeyFile != "") {
595+
return errors.New("cannot use both self-signed-certs and explicit ssl-certfile/ssl-keyfile")
596+
}
597+
598+
if c.DatasetPath == "" && c.DatasetURL != "" {
599+
return errors.New("dataset-path is required when dataset-url is set")
600+
}
601+
544602
return nil
545603
}
546604

605+
// SSLEnabled returns true if SSL is enabled either via certificate files or self-signed certificates
606+
func (c *Configuration) SSLEnabled() bool {
607+
return (c.SSLCertFile != "" && c.SSLKeyFile != "") || c.SelfSignedCerts
608+
}
609+
547610
func (c *Configuration) Copy() (*Configuration, error) {
548611
var dst Configuration
549612
data, err := json.Marshal(c)
@@ -628,6 +691,10 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
628691
f.Var(&dummyFailureTypes, "failure-types", failureTypesDescription)
629692
f.Lookup("failure-types").NoOptDefVal = dummy
630693

694+
f.StringVar(&config.SSLCertFile, "ssl-certfile", config.SSLCertFile, "Path to SSL certificate file for HTTPS (optional)")
695+
f.StringVar(&config.SSLKeyFile, "ssl-keyfile", config.SSLKeyFile, "Path to SSL private key file for HTTPS (optional)")
696+
f.BoolVar(&config.SelfSignedCerts, "self-signed-certs", config.SelfSignedCerts, "Enable automatic generation of self-signed certificates for HTTPS")
697+
631698
// These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help
632699
var dummyString string
633700
f.StringVar(&dummyString, "config", "", "The path to a yaml configuration file. The command line values overwrite the configuration file values")

pkg/common/config_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -203,11 +203,11 @@ var _ = Describe("Simulator configuration", func() {
203203
"{\"running\":\"lora1,lora2\",\"waiting\":\"lora3\",\"timestamp\":1257894567}",
204204
"{\"running\":\"lora1,lora3\",\"waiting\":\"\",\"timestamp\":1257894569}",
205205
},
206-
TTFTBucketValues: []int{10, 20, 30, 10},
207-
TPOTBucketValues: []int{0, 0, 10, 20, 30},
208-
RequestPromptTokens: []float64{10, 20, 30, 15},
209-
RequestGenerationTokens: []float64{50, 60, 40},
210-
RequestParamsMaxTokens: []float64{128, 256, 512},
206+
TTFTBucketValues: []int{10, 20, 30, 10},
207+
TPOTBucketValues: []int{0, 0, 10, 20, 30},
208+
RequestPromptTokens: []int{10, 20, 30, 15},
209+
RequestGenerationTokens: []int{50, 60, 40},
210+
RequestParamsMaxTokens: []int{128, 256, 512},
211211
RequestSuccessTotal: map[string]int64{
212212
StopFinishReason: 20,
213213
LengthFinishReason: 0,

pkg/llm-d-inference-sim/metrics.go

Lines changed: 8 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -194,42 +194,27 @@ func (s *VllmSimulator) setInitialPrometheusMetrics() {
194194
nRunningReqs = float64(s.config.FakeMetrics.RunningRequests)
195195
nWaitingReqs = float64(s.config.FakeMetrics.WaitingRequests)
196196
kvCacheUsage = float64(s.config.FakeMetrics.KVCacheUsagePercentage)
197-
<<<<<<< HEAD
198-
199197
if s.config.FakeMetrics.TTFTBucketValues != nil {
200198
s.initFakeHistogram(s.ttft, common.TTFTBucketsBoundaries, s.config.FakeMetrics.TTFTBucketValues)
201199
}
202200

203201
if s.config.FakeMetrics.TPOTBucketValues != nil {
204202
s.initFakeHistogram(s.tpot, common.TPOTBucketsBoundaries, s.config.FakeMetrics.TPOTBucketValues)
205203
}
206-
for _, requestPromptToken := range s.config.FakeMetrics.RequestPromptTokens {
207-
s.requestPromptTokens.WithLabelValues(modelName).Observe(requestPromptToken)
204+
buckets := build125Buckets(s.config.MaxModelLen)
205+
if s.config.FakeMetrics.RequestPromptTokens != nil {
206+
s.initFakeHistogram(s.requestPromptTokens, buckets, s.config.FakeMetrics.RequestPromptTokens)
208207
}
209-
for _, requestGenerationToken := range s.config.FakeMetrics.RequestGenerationTokens {
210-
s.requestGenerationTokens.WithLabelValues(modelName).Observe(requestGenerationToken)
208+
if s.config.FakeMetrics.RequestGenerationTokens != nil {
209+
s.initFakeHistogram(s.requestParamsMaxTokens, buckets, s.config.FakeMetrics.RequestGenerationTokens)
211210
}
212-
for _, requestParamsMaxToken := range s.config.FakeMetrics.RequestParamsMaxTokens {
213-
s.requestParamsMaxTokens.WithLabelValues(modelName).Observe(requestParamsMaxToken)
211+
if s.config.FakeMetrics.RequestParamsMaxTokens != nil {
212+
s.initFakeHistogram(s.requestGenerationTokens, buckets, s.config.FakeMetrics.RequestParamsMaxTokens)
214213
}
215-
=======
216-
>>>>>>> 02bc5c3 (fix review comment)
214+
217215
for reason, requestSuccessTotal := range s.config.FakeMetrics.RequestSuccessTotal {
218216
s.requestSuccessTotal.WithLabelValues(modelName, reason).Add(float64(requestSuccessTotal))
219217
}
220-
buckets := build125Buckets(s.config.MaxModelLen)
221-
for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestPromptTokens) {
222-
s.requestPromptTokens.WithLabelValues(modelName).Observe(sample)
223-
}
224-
225-
for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestGenerationTokens) {
226-
s.requestGenerationTokens.WithLabelValues(modelName).Observe(sample)
227-
}
228-
229-
for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestParamsMaxTokens) {
230-
s.requestParamsMaxTokens.WithLabelValues(modelName).Observe(sample)
231-
}
232-
233218
}
234219

235220
s.runningRequests.WithLabelValues(modelName).Set(nRunningReqs)
@@ -554,55 +539,3 @@ func build125Buckets(maxValue int) []float64 {
554539
}
555540
return buckets
556541
}
557-
558-
// padCountsToFull pads the counts slice to length len(boundaries)+1 by appending zeros.
559-
func padCountsToFull(boundaries []float64, counts []float64) []float64 {
560-
fullLen := len(boundaries) + 1
561-
if len(counts) > fullLen {
562-
return counts[:fullLen] // just return limit len
563-
}
564-
padded := make([]float64, fullLen)
565-
copy(padded, counts)
566-
// rest are zero by default
567-
return padded
568-
}
569-
570-
func generateSamplesFromBuckets(boundaries []float64, counts []float64) []float64 {
571-
fullCounts := padCountsToFull(boundaries, counts)
572-
var samples []float64
573-
574-
for i, count := range fullCounts {
575-
if count == 0 {
576-
continue
577-
}
578-
579-
var val float64
580-
if len(boundaries) == 0 {
581-
// No boundaries → one bucket (-Inf, +Inf)
582-
val = 1.0
583-
} else if i == 0 {
584-
// Bucket: (-Inf, boundaries[0]]
585-
val = boundaries[0] - 1.0
586-
if val <= 0 { // avoid non-positive if boundary is small
587-
val = boundaries[0] * 0.5
588-
}
589-
} else if i < len(boundaries) {
590-
// Bucket: (boundaries[i-1], boundaries[i]]
591-
lower := boundaries[i-1]
592-
upper := boundaries[i]
593-
val = (lower + upper) / 2.0
594-
// Ensure it's strictly > lower and <= upper
595-
if val <= lower {
596-
val = upper - (upper-lower)*0.1
597-
}
598-
} else {
599-
// Last bucket: (boundaries[len-1], +Inf)
600-
val = boundaries[len(boundaries)-1] + 1.0
601-
}
602-
603-
for j := 0; j < int(count); j++ {
604-
samples = append(samples, val)
605-
}
606-
}
607-
return samples
608-
}

0 commit comments

Comments
 (0)