From 6a8116b362bf1fba15d2b9d818b6c5fa0cdf8582 Mon Sep 17 00:00:00 2001 From: Maya Barnea Date: Mon, 25 Aug 2025 13:54:29 +0300 Subject: [PATCH 1/6] when request contains max tokens, calculate length on the response based on a histogram - intial implementation Signed-off-by: Maya Barnea --- pkg/common/utils.go | 59 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/pkg/common/utils.go b/pkg/common/utils.go index 2cb4ad66..6c42b52d 100644 --- a/pkg/common/utils.go +++ b/pkg/common/utils.go @@ -39,6 +39,9 @@ const ( RemoteDecodeFinishReason = "remote_decode" ) +var randomValuesBuckets = []float64{0.2, 0.3, 0.2, 0.05, 0.1, 0.15} +var cumulativeBuckets []float64 + // list of responses to use in random mode for comepltion requests var chatCompletionFakeResponses = []string{ `Testing@, #testing 1$ ,2%,3^, [4&*5], 6~, 7-_ + (8 : 9) / \ < > .`, @@ -54,6 +57,16 @@ var chatCompletionFakeResponses = []string{ `Give a man a fish and you feed him for a day; teach a man to fish and you feed him for a lifetime`, } +func init() { + cumulativeBuckets = make([]float64, len(randomValuesBuckets)) + sum := 0.0 + + for i, val := range randomValuesBuckets { + sum += val + cumulativeBuckets[i] = sum + } +} + // returns the max tokens or error if incorrect func GetMaxTokens(maxCompletionTokens *int64, maxTokens *int64) (*int64, error) { var typeToken string @@ -154,7 +167,8 @@ func GetRandomResponseText(maxCompletionTokens *int64) (string, string) { if maxCompletionTokens == nil { numOfTokens = GetRandomResponseLen() } else { - numOfTokens = int(*maxCompletionTokens) + // max tokens is defined - generate real length of the response based on it + numOfTokens = getResponseLengthByHistogram(int(*maxCompletionTokens)) finishReason = GetRandomFinishReason() } @@ -162,6 +176,49 @@ func GetRandomResponseText(maxCompletionTokens *int64) (string, string) { return text, finishReason } +// length is distributed to 6 buckets: +// 15% - max tokens +// other values are divided to 5 additional buckets with the following probabilities starting from the bucket for one token +// 20%, 30%, 20%, 5%, 10% +func getResponseLengthByHistogram(maxTokens int) int { + if maxTokens <= 1 { + return maxTokens + } + if maxTokens <= len(cumulativeBuckets) { + res := RandomInt(1, maxTokens) + return res + } + + r := RandomFloat(0, 1) + + // probability to return maxToken + if r > cumulativeBuckets[len(cumulativeBuckets)-2] { + return maxTokens + } + + // determine which bucket to use + bucketIndex := 0 + for i, c := range cumulativeBuckets { + if r <= c { + bucketIndex = i + break + } + } + + // compute bucket ranges + nonMaxCount := maxTokens - 1 + bucketSize := float64(nonMaxCount) / 5.0 + + start := int(bucketSize*float64(bucketIndex)) + 1 + end := int(bucketSize * float64(bucketIndex+1)) + if end >= maxTokens { + end = maxTokens - 1 + } + + // Pick uniformly within the bucket’s range + return RandomInt(start, end) +} + // GetResponseText returns response text, from a given text // considering max completion tokens if it is not nil, and a finish reason (stop or length) func GetResponseText(maxCompletionTokens *int64, text string) (string, string) { From e8b0cbb29ea4c76b42bcfa8788a3d8c621776d0b Mon Sep 17 00:00:00 2001 From: Maya Barnea Date: Mon, 25 Aug 2025 15:44:11 +0300 Subject: [PATCH 2/6] - in case max_tokens is defined in the request, finish reason will not be randomly selected, instead it will be stop when response length is maxTokens, otherwise - stop - fix utils_tests Signed-off-by: Maya Barnea --- pkg/common/utils.go | 8 ++++++-- pkg/common/utils_test.go | 20 ++++++++++++++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/pkg/common/utils.go b/pkg/common/utils.go index 6c42b52d..6c5b21db 100644 --- a/pkg/common/utils.go +++ b/pkg/common/utils.go @@ -167,9 +167,13 @@ func GetRandomResponseText(maxCompletionTokens *int64) (string, string) { if maxCompletionTokens == nil { numOfTokens = GetRandomResponseLen() } else { + maxTokens := int(*maxCompletionTokens) // max tokens is defined - generate real length of the response based on it - numOfTokens = getResponseLengthByHistogram(int(*maxCompletionTokens)) - finishReason = GetRandomFinishReason() + numOfTokens = getResponseLengthByHistogram(maxTokens) + if numOfTokens == maxTokens { + // if response should be create with maximum number of tokens - finish reason will be 'length' + finishReason = LengthFinishReason + } } text := GetRandomText(numOfTokens) diff --git a/pkg/common/utils_test.go b/pkg/common/utils_test.go index dd6cadab..b8f3285e 100644 --- a/pkg/common/utils_test.go +++ b/pkg/common/utils_test.go @@ -38,16 +38,28 @@ var _ = Describe("Utils", Ordered, func() { It("should return short text", func() { maxCompletionTokens := int64(2) text, finishReason := GetRandomResponseText(&maxCompletionTokens) - Expect(int64(len(Tokenize(text)))).Should(Equal(maxCompletionTokens)) - Expect([]string{StopFinishReason, LengthFinishReason}).Should(ContainElement(finishReason)) + tokensCnt := int64(len(Tokenize(text))) + Expect(tokensCnt).Should(BeNumerically("<=", maxCompletionTokens)) + if tokensCnt == maxCompletionTokens { + Expect(finishReason).To(Equal(LengthFinishReason)) + } else { + Expect(tokensCnt).To(BeNumerically("<", maxCompletionTokens)) + Expect(finishReason).To(Equal(StopFinishReason)) + } }) It("should return long text", func() { // return required number of tokens although it is higher than ResponseLenMax maxCompletionTokens := int64(ResponseLenMax * 5) text, finishReason := GetRandomResponseText(&maxCompletionTokens) - Expect(int64(len(Tokenize(text)))).Should(Equal(maxCompletionTokens)) + tokensCnt := int64(len(Tokenize(text))) + Expect(tokensCnt).Should(BeNumerically("<=", maxCompletionTokens)) Expect(IsValidText(text)).To(BeTrue()) - Expect([]string{StopFinishReason, LengthFinishReason}).Should(ContainElement(finishReason)) + if tokensCnt == maxCompletionTokens { + Expect(finishReason).To(Equal(LengthFinishReason)) + } else { + Expect(tokensCnt).To(BeNumerically("<", maxCompletionTokens)) + Expect(finishReason).To(Equal(StopFinishReason)) + } }) }) From 1517f287fca72bc0b7616f23182a63256fd5305f Mon Sep 17 00:00:00 2001 From: Maya Barnea Date: Tue, 26 Aug 2025 11:01:07 +0300 Subject: [PATCH 3/6] rename variables + fix problem by PR comment Signed-off-by: Maya Barnea --- pkg/common/utils.go | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/pkg/common/utils.go b/pkg/common/utils.go index 6c5b21db..64be7f4d 100644 --- a/pkg/common/utils.go +++ b/pkg/common/utils.go @@ -39,8 +39,8 @@ const ( RemoteDecodeFinishReason = "remote_decode" ) -var randomValuesBuckets = []float64{0.2, 0.3, 0.2, 0.05, 0.1, 0.15} -var cumulativeBuckets []float64 +var respLenBucketsProbabilities = [...]float64{0.2, 0.3, 0.2, 0.05, 0.1, 0.15} +var cumulativeBucketsProbabilities []float64 // list of responses to use in random mode for comepltion requests var chatCompletionFakeResponses = []string{ @@ -58,12 +58,12 @@ var chatCompletionFakeResponses = []string{ } func init() { - cumulativeBuckets = make([]float64, len(randomValuesBuckets)) + cumulativeBucketsProbabilities = make([]float64, len(respLenBucketsProbabilities)) sum := 0.0 - for i, val := range randomValuesBuckets { + for i, val := range respLenBucketsProbabilities { sum += val - cumulativeBuckets[i] = sum + cumulativeBucketsProbabilities[i] = sum } } @@ -180,15 +180,15 @@ func GetRandomResponseText(maxCompletionTokens *int64) (string, string) { return text, finishReason } -// length is distributed to 6 buckets: -// 15% - max tokens -// other values are divided to 5 additional buckets with the following probabilities starting from the bucket for one token -// 20%, 30%, 20%, 5%, 10% +// getResponseLengthByHistogram calculates length of the response based on the max tokens value and pre-defined buckets +// response length is distributed according the probabilities defined in respLenBucketsProbabilities +// the last element defines probability of reposnse with maxToken tokens +// other values define probabilities for equal sized buckets func getResponseLengthByHistogram(maxTokens int) int { if maxTokens <= 1 { return maxTokens } - if maxTokens <= len(cumulativeBuckets) { + if maxTokens <= len(cumulativeBucketsProbabilities) { res := RandomInt(1, maxTokens) return res } @@ -196,22 +196,21 @@ func getResponseLengthByHistogram(maxTokens int) int { r := RandomFloat(0, 1) // probability to return maxToken - if r > cumulativeBuckets[len(cumulativeBuckets)-2] { + if r > cumulativeBucketsProbabilities[len(cumulativeBucketsProbabilities)-2] { return maxTokens } // determine which bucket to use - bucketIndex := 0 - for i, c := range cumulativeBuckets { + bucketIndex := len(cumulativeBucketsProbabilities) - 1 + for i, c := range cumulativeBucketsProbabilities { if r <= c { bucketIndex = i break } } - // compute bucket ranges - nonMaxCount := maxTokens - 1 - bucketSize := float64(nonMaxCount) / 5.0 + // compute bucket ranges (maxToken is out of scope) + bucketSize := float64(maxTokens-1) / float64(len(cumulativeBucketsProbabilities)-1) start := int(bucketSize*float64(bucketIndex)) + 1 end := int(bucketSize * float64(bucketIndex+1)) From ea7fa7ba740205b0802e5f2c5521788e798c466d Mon Sep 17 00:00:00 2001 From: Maya Barnea Date: Wed, 27 Aug 2025 17:42:05 +0300 Subject: [PATCH 4/6] add more explanations to getResponseLengthByHistogram Signed-off-by: Maya Barnea --- pkg/common/utils.go | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/pkg/common/utils.go b/pkg/common/utils.go index 64be7f4d..5c06edbd 100644 --- a/pkg/common/utils.go +++ b/pkg/common/utils.go @@ -39,6 +39,7 @@ const ( RemoteDecodeFinishReason = "remote_decode" ) +// this array defines probabilities for buckets used for generation of number of tokens in response var respLenBucketsProbabilities = [...]float64{0.2, 0.3, 0.2, 0.05, 0.1, 0.15} var cumulativeBucketsProbabilities []float64 @@ -180,14 +181,17 @@ func GetRandomResponseText(maxCompletionTokens *int64) (string, string) { return text, finishReason } -// getResponseLengthByHistogram calculates length of the response based on the max tokens value and pre-defined buckets -// response length is distributed according the probabilities defined in respLenBucketsProbabilities -// the last element defines probability of reposnse with maxToken tokens -// other values define probabilities for equal sized buckets +// getResponseLengthByHistogram calculates number of tokens to be returned in a response based on the max tokens value and pre-defined buckets. +// the response length is distributed according the probabilities defined in respLenBucketsProbabilities +// the histogram contains equal sized buckets + the last special bucket with contains only maxTokens value +// the last element of respLenBucketsProbabilities defines probability of reposnse with maxToken tokens +// other values define probabilities for the equal sized buckets +// if maxToken is small (smaller than number of buckets) - the reponse length is randomly selected from the range [1, maxTokens] func getResponseLengthByHistogram(maxTokens int) int { if maxTokens <= 1 { return maxTokens } + // maxTokens is pretty small - no need to use the histogram of probabilities, just select a random value in the range [1, maxTokens] if maxTokens <= len(cumulativeBucketsProbabilities) { res := RandomInt(1, maxTokens) return res @@ -195,12 +199,13 @@ func getResponseLengthByHistogram(maxTokens int) int { r := RandomFloat(0, 1) - // probability to return maxToken + // check if r is in the last bucket - the maxToken should be returned if r > cumulativeBucketsProbabilities[len(cumulativeBucketsProbabilities)-2] { return maxTokens } - // determine which bucket to use + // determine which bucket to use, bucket with cumulative probability larger than r - means this is the bicket to use + // initialize bucketIndex with the last bucket for case (shouln'd happen) when probabilities sum is lower than 1 bucketIndex := len(cumulativeBucketsProbabilities) - 1 for i, c := range cumulativeBucketsProbabilities { if r <= c { @@ -209,16 +214,18 @@ func getResponseLengthByHistogram(maxTokens int) int { } } - // compute bucket ranges (maxToken is out of scope) + // calculate size of all buckets (except the special last bucket) bucketSize := float64(maxTokens-1) / float64(len(cumulativeBucketsProbabilities)-1) - + // start is the minimum number in the required bucket start := int(bucketSize*float64(bucketIndex)) + 1 + // end is the maximum number in the required bucket end := int(bucketSize * float64(bucketIndex+1)) + // sometimes end could be maxTokens because of rounding, change the value to maxToken-1 if end >= maxTokens { end = maxTokens - 1 } - // Pick uniformly within the bucket’s range + // pick uniformly within the bucket’s range return RandomInt(start, end) } From 94eda7440c67537d14e4738a586a40eeb16d2978 Mon Sep 17 00:00:00 2001 From: Maya Barnea Date: Thu, 28 Aug 2025 09:46:46 +0300 Subject: [PATCH 5/6] fix misspelling Signed-off-by: Maya Barnea --- pkg/common/utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/common/utils.go b/pkg/common/utils.go index 5c06edbd..697c478b 100644 --- a/pkg/common/utils.go +++ b/pkg/common/utils.go @@ -186,7 +186,7 @@ func GetRandomResponseText(maxCompletionTokens *int64) (string, string) { // the histogram contains equal sized buckets + the last special bucket with contains only maxTokens value // the last element of respLenBucketsProbabilities defines probability of reposnse with maxToken tokens // other values define probabilities for the equal sized buckets -// if maxToken is small (smaller than number of buckets) - the reponse length is randomly selected from the range [1, maxTokens] +// if maxToken is small (smaller than number of buckets) - the response length is randomly selected from the range [1, maxTokens] func getResponseLengthByHistogram(maxTokens int) int { if maxTokens <= 1 { return maxTokens From 6c14aa6e90b4ce4400cda0b1baf6c8c9bd7b8379 Mon Sep 17 00:00:00 2001 From: Maya Barnea Date: Thu, 28 Aug 2025 10:46:19 +0300 Subject: [PATCH 6/6] fixes in comments Signed-off-by: Maya Barnea --- pkg/common/utils.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pkg/common/utils.go b/pkg/common/utils.go index 697c478b..d3ea5b44 100644 --- a/pkg/common/utils.go +++ b/pkg/common/utils.go @@ -39,7 +39,7 @@ const ( RemoteDecodeFinishReason = "remote_decode" ) -// this array defines probabilities for buckets used for generation of number of tokens in response +// this array defines the probabilities for the buckets to be used for the generation of number of tokens in response var respLenBucketsProbabilities = [...]float64{0.2, 0.3, 0.2, 0.05, 0.1, 0.15} var cumulativeBucketsProbabilities []float64 @@ -181,17 +181,17 @@ func GetRandomResponseText(maxCompletionTokens *int64) (string, string) { return text, finishReason } -// getResponseLengthByHistogram calculates number of tokens to be returned in a response based on the max tokens value and pre-defined buckets. -// the response length is distributed according the probabilities defined in respLenBucketsProbabilities -// the histogram contains equal sized buckets + the last special bucket with contains only maxTokens value -// the last element of respLenBucketsProbabilities defines probability of reposnse with maxToken tokens -// other values define probabilities for the equal sized buckets -// if maxToken is small (smaller than number of buckets) - the response length is randomly selected from the range [1, maxTokens] +// getResponseLengthByHistogram calculates the number of tokens to be returned in a response based on the max tokens value and the pre-defined buckets. +// The response length is distributed according to the probabilities, defined in respLenBucketsProbabilities. +// The histogram contains equally sized buckets and the last special bucket, which contains only the maxTokens value. +// The last element of respLenBucketsProbabilities defines the probability of a reposnse with maxToken tokens. +// Other values define probabilities for the equally sized buckets. +// If maxToken is small (smaller than number of buckets) - the response length is randomly selected from the range [1, maxTokens] func getResponseLengthByHistogram(maxTokens int) int { if maxTokens <= 1 { return maxTokens } - // maxTokens is pretty small - no need to use the histogram of probabilities, just select a random value in the range [1, maxTokens] + // maxTokens is small - no need to use the histogram of probabilities, just select a random value in the range [1, maxTokens] if maxTokens <= len(cumulativeBucketsProbabilities) { res := RandomInt(1, maxTokens) return res @@ -199,13 +199,13 @@ func getResponseLengthByHistogram(maxTokens int) int { r := RandomFloat(0, 1) - // check if r is in the last bucket - the maxToken should be returned + // check if r is in the last bucket, then maxTokens should be returned if r > cumulativeBucketsProbabilities[len(cumulativeBucketsProbabilities)-2] { return maxTokens } - // determine which bucket to use, bucket with cumulative probability larger than r - means this is the bicket to use - // initialize bucketIndex with the last bucket for case (shouln'd happen) when probabilities sum is lower than 1 + // determine which bucket to use, the bucket with a cumulative probability larger than r is the bucket to use + // initialize bucketIndex with the last bucket to handle the case (which should not happen) when the probabilities sum is less than 1 bucketIndex := len(cumulativeBucketsProbabilities) - 1 for i, c := range cumulativeBucketsProbabilities { if r <= c { @@ -214,7 +214,7 @@ func getResponseLengthByHistogram(maxTokens int) int { } } - // calculate size of all buckets (except the special last bucket) + // calculate the size of all of the buckets (except the special last bucket) bucketSize := float64(maxTokens-1) / float64(len(cumulativeBucketsProbabilities)-1) // start is the minimum number in the required bucket start := int(bucketSize*float64(bucketIndex)) + 1