Skip to content

Commit b7d5873

Browse files
authored
extend response length buckets calculation to have not necessary equally sized buckets (#176)
* extend response length buckets calculation to have not neccessary equally sized buckets Signed-off-by: Maya Barnea <[email protected]> * remove commented out code Signed-off-by: Maya Barnea <[email protected]> --------- Signed-off-by: Maya Barnea <[email protected]>
1 parent b98882a commit b7d5873

File tree

2 files changed

+79
-7
lines changed

2 files changed

+79
-7
lines changed

pkg/common/utils.go

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ const (
4343
var respLenBucketsProbabilities = [...]float64{0.2, 0.3, 0.2, 0.05, 0.1, 0.15}
4444
var cumulativeBucketsProbabilities []float64
4545

46+
const (
47+
flexBucketIndex = 3
48+
maxFixedBucketSize = 20
49+
)
50+
4651
// list of responses to use in random mode for comepltion requests
4752
var chatCompletionFakeResponses = []string{
4853
`Testing@, #testing 1$ ,2%,3^, [4&*5], 6~, 7-_ + (8 : 9) / \ < > .`,
@@ -215,18 +220,59 @@ func getResponseLengthByHistogram(maxTokens int) int {
215220
}
216221

217222
// calculate the size of all of the buckets (except the special last bucket)
218-
bucketSize := float64(maxTokens-1) / float64(len(cumulativeBucketsProbabilities)-1)
219-
// start is the minimum number in the required bucket
220-
start := int(bucketSize*float64(bucketIndex)) + 1
221-
// end is the maximum number in the required bucket
222-
end := int(bucketSize * float64(bucketIndex+1))
223+
start, end := calcBucketBoundaries(maxTokens, bucketIndex)
224+
225+
// pick uniformly within the bucket’s range
226+
return RandomInt(start, end)
227+
}
228+
229+
// calcBucketBoundaries calculates boundaries of a bucket with the given index.
230+
// Maximum size for equally sized buckets is defined by maxFixedBucketSize.
231+
// [maxFixedBucketSize*(number-of-buckets-1)+1] is the value of maxTokens for which
232+
// division to equally size buckets will give buckets with size maxFixedBucketSize.
233+
// If maxTokens is [maxFixedBucketSize*(number-of-buckets-1)+1] or less,
234+
// all buckets will be of equal size, except the last bucket, which contains only one value.
235+
// If maxTokens is higher than [maxFixedBucketSize*(number-of-buckets-1)+1],
236+
// and flexBucketIndex is valid (between 0 and number of buckets - 1) the buckets sizes will not be equal.
237+
// In this case, all buckets except the one at flexBucketIndex index will have size 20 (and the last is with size 1),
238+
// and the bucket at flexBucketIndex index will 'stretch' to cover the remaining range.
239+
func calcBucketBoundaries(maxTokens int, bucketIndex int) (start int, end int) {
240+
maxEquallyBucketsSz := maxFixedBucketSize*(len(cumulativeBucketsProbabilities)-1) + 1
241+
242+
if maxTokens <= maxEquallyBucketsSz || flexBucketIndex < 0 || flexBucketIndex >= len(cumulativeBucketsProbabilities)-1 {
243+
// create equally size buckets
244+
// calculate the size of all of the buckets (except the special last bucket)
245+
bucketSize := float64(maxTokens-1) / float64(len(cumulativeBucketsProbabilities)-1)
246+
start = int(bucketSize*float64(bucketIndex)) + 1
247+
end = int(bucketSize * float64(bucketIndex+1))
248+
} else {
249+
// create non-equally sized buckets and find boundaries of the required bucket
250+
if bucketIndex < flexBucketIndex {
251+
// the relevant bucket is before the flex bucket, all buckets are of the same size (maxFixedBucketSize)
252+
// start is the minimum number in the required bucket
253+
start = maxFixedBucketSize*bucketIndex + 1
254+
end = maxFixedBucketSize * (bucketIndex + 1)
255+
} else {
256+
flexBucketSize := maxTokens - (maxFixedBucketSize * (len(cumulativeBucketsProbabilities) - 2))
257+
258+
if bucketIndex == flexBucketIndex {
259+
// the relevant bucket is the flex bucket
260+
start = int(maxFixedBucketSize*float64(bucketIndex)) + 1
261+
end = maxFixedBucketSize*bucketIndex + flexBucketSize
262+
} else {
263+
// the relevant bucket is one of buckets after the flex bucket
264+
start = int(maxFixedBucketSize*float64(bucketIndex-1)) + flexBucketSize + 1
265+
end = maxFixedBucketSize*bucketIndex + flexBucketSize
266+
}
267+
}
268+
}
269+
223270
// sometimes end could be maxTokens because of rounding, change the value to maxToken-1
224271
if end >= maxTokens {
225272
end = maxTokens - 1
226273
}
227274

228-
// pick uniformly within the bucket’s range
229-
return RandomInt(start, end)
275+
return start, end
230276
}
231277

232278
// GetResponseText returns response text, from a given text

pkg/common/utils_test.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,4 +168,30 @@ var _ = Describe("Utils", Ordered, func() {
168168
}
169169
})
170170

171+
Context("validateBucketsBoundaries", func() {
172+
type bucketBoundaries struct {
173+
start int
174+
end int
175+
}
176+
type bucketTest struct {
177+
maxTokens int
178+
expectedBuckets []bucketBoundaries
179+
}
180+
181+
tests := []bucketTest{{500, []bucketBoundaries{{1, 20}, {21, 40}, {41, 60}, {61, 480}, {481, 499}}},
182+
{47, []bucketBoundaries{{1, 9}, {10, 18}, {19, 27}, {28, 36}, {37, 46}}},
183+
{50, []bucketBoundaries{{1, 9}, {10, 19}, {20, 29}, {30, 39}, {40, 49}}}}
184+
185+
for _, test := range tests {
186+
Expect(test.expectedBuckets).To(HaveLen(len(cumulativeBucketsProbabilities) - 1))
187+
188+
It(fmt.Sprintf("should return bucket boundaries for maxTokens %d", test.maxTokens), func() {
189+
for i := range len(cumulativeBucketsProbabilities) - 1 {
190+
start, end := calcBucketBoundaries(test.maxTokens, i)
191+
Expect(start).To(Equal(test.expectedBuckets[i].start))
192+
Expect(end).To(Equal(test.expectedBuckets[i].end))
193+
}
194+
})
195+
}
196+
})
171197
})

0 commit comments

Comments
 (0)