Skip to content

Commit 4bc5a3a

Browse files
committed
Refactor: abstract dataset and move response generation from common to dataset
Signed-off-by: Qifan Deng <[email protected]>
1 parent ef6506e commit 4bc5a3a

19 files changed

+633
-428
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@ vendor
77
.DS_Store
88
*.test
99
manifests/dev-config.yaml
10-
pkg/common/.llm-d
10+
pkg/dataset/.llm-d
1111
pkg/llm-d-inference-sim/tests-tmp/

pkg/common/utils.go

Lines changed: 0 additions & 238 deletions
Original file line numberDiff line numberDiff line change
@@ -17,61 +17,13 @@ limitations under the License.
1717
package common
1818

1919
import (
20-
"math"
2120
"math/rand"
2221
"regexp"
23-
"strings"
2422
"sync"
2523

2624
"github.com/google/uuid"
2725
)
2826

29-
const (
30-
ResponseLenMax = 128
31-
responseLenMean = 40
32-
responseLenStddev = 20
33-
stopFinishReasonProbability = 0.8
34-
35-
StopFinishReason = "stop"
36-
LengthFinishReason = "length"
37-
ToolsFinishReason = "tool_calls"
38-
RemoteDecodeFinishReason = "remote_decode"
39-
)
40-
41-
// this array defines the probabilities for the buckets to be used for the generation of number of tokens in response
42-
var respLenBucketsProbabilities = [...]float64{0.2, 0.3, 0.2, 0.05, 0.1, 0.15}
43-
var cumulativeBucketsProbabilities []float64
44-
45-
const (
46-
flexBucketIndex = 3
47-
maxFixedBucketSize = 20
48-
)
49-
50-
// list of responses to use in random mode for comepltion requests
51-
var chatCompletionFakeResponses = []string{
52-
`Testing@, #testing 1$ ,2%,3^, [4&*5], 6~, 7-_ + (8 : 9) / \ < > .`,
53-
`Testing, testing 1,2,3.`,
54-
`I am fine, how are you today?`,
55-
`I am your AI assistant, how can I help you today?`,
56-
`Today is a nice sunny day.`,
57-
`The temperature here is twenty-five degrees centigrade.`,
58-
`Today it is partially cloudy and raining.`,
59-
`To be or not to be that is the question.`,
60-
`Alas, poor Yorick! I knew him, Horatio: A fellow of infinite jest`,
61-
`The rest is silence. `,
62-
`Give a man a fish and you feed him for a day; teach a man to fish and you feed him for a lifetime`,
63-
}
64-
65-
func init() {
66-
cumulativeBucketsProbabilities = make([]float64, len(respLenBucketsProbabilities))
67-
sum := 0.0
68-
69-
for i, val := range respLenBucketsProbabilities {
70-
sum += val
71-
cumulativeBucketsProbabilities[i] = sum
72-
}
73-
}
74-
7527
// ValidateContextWindow checks if the request fits within the model's context window
7628
// Returns validation result, actual completion tokens, and total tokens
7729
func ValidateContextWindow(promptTokens int, maxCompletionTokens *int64, maxModelLen int) (bool, int64, int64) {
@@ -86,196 +38,6 @@ func ValidateContextWindow(promptTokens int, maxCompletionTokens *int64, maxMode
8638
return isValid, completionTokens, totalTokens
8739
}
8840

89-
// GetRandomResponseLen returns int in range [1, responseLenMax]
90-
// numbers are chosen according a gaussian distribution with mean responseLenMean, and standard deviation responseLenStddev
91-
func GetRandomResponseLen() int {
92-
for {
93-
val := rand.NormFloat64()*responseLenStddev + responseLenMean
94-
if val >= 1 && val <= ResponseLenMax {
95-
return int(math.Round(val))
96-
}
97-
// else reject and resample
98-
}
99-
}
100-
101-
// GetRandomFinishReason returns finish reason with the probability for 'stop' as defined by stopFinishReasonProbability
102-
func GetRandomFinishReason() string {
103-
if rand.Float64() < stopFinishReasonProbability {
104-
return StopFinishReason
105-
}
106-
return LengthFinishReason
107-
}
108-
109-
// GetRandomText generates random text for the required number of tokens,
110-
// select randomly a sentence from chatCompletionFakeResponses,
111-
// if number of tokens is lower than required - select another sentence,
112-
// continue until the required number of tokens is achieved
113-
func GetRandomText(numOfTokens int) string {
114-
allTokens := make([]string, 0)
115-
116-
for len(allTokens) < numOfTokens {
117-
index := RandomInt(0, len(chatCompletionFakeResponses)-1)
118-
// create tokens from text, splitting by spaces and special characters
119-
tokens := Tokenize(chatCompletionFakeResponses[index])
120-
remaining := numOfTokens - len(allTokens)
121-
122-
if len(tokens) > remaining {
123-
// there is too many tokens, append only the relevant part
124-
tokens = tokens[:remaining]
125-
}
126-
127-
if len(allTokens) > 0 {
128-
// for not first sentences add space to the first token to separate between sentences without adding an additional token
129-
tokens[0] = " " + tokens[0]
130-
}
131-
132-
allTokens = append(allTokens, tokens...)
133-
}
134-
135-
// return all tokens as text
136-
return strings.Join(allTokens, "")
137-
}
138-
139-
// GetRandomTokens generates tokens to be returned in a response, and the finish reason (stop or length)
140-
// if maxCompletionTokens is defined
141-
// - currently, the generated number of words in the text will be equal to it value
142-
// - in future - need to find statistics about generated tokens distribution and return less tokens in part os requests
143-
// - finish reason will be chosen randomly from the collection (stop, length) with 80% for stop and 20% for length
144-
// if maxCompletionTokens is nil
145-
// - the response text's length is randomly chosen from the range [1, responseLenMax] according additional parameters
146-
// - finish reason is stop
147-
// if ignore_eos is true - the response will be generated with exactly maxCompletionTokens tokens
148-
// - request was validated so that when ignore_eos is true, maxCompletionTokens must be defined
149-
func GetRandomTokens(maxCompletionTokens *int64, ignore_eos bool, dataset *Dataset) ([]string, string) {
150-
numOfTokens := 0
151-
finishReason := StopFinishReason
152-
153-
// no max completion tokens, return text with random length
154-
if maxCompletionTokens == nil {
155-
numOfTokens = GetRandomResponseLen()
156-
} else {
157-
maxTokens := int(*maxCompletionTokens)
158-
if ignore_eos {
159-
numOfTokens = maxTokens
160-
finishReason = LengthFinishReason
161-
} else {
162-
// max tokens is defined - generate real length of the response based on it
163-
numOfTokens = getResponseLengthByHistogram(maxTokens)
164-
if numOfTokens == maxTokens {
165-
// if response should be create with maximum number of tokens - finish reason will be 'length'
166-
finishReason = LengthFinishReason
167-
}
168-
}
169-
}
170-
171-
return Tokenize(GetRandomText(numOfTokens)), finishReason
172-
}
173-
174-
// getResponseLengthByHistogram calculates the number of tokens to be returned in a response based on the max tokens value and the pre-defined buckets.
175-
// The response length is distributed according to the probabilities, defined in respLenBucketsProbabilities.
176-
// The histogram contains equally sized buckets and the last special bucket, which contains only the maxTokens value.
177-
// The last element of respLenBucketsProbabilities defines the probability of a reposnse with maxToken tokens.
178-
// Other values define probabilities for the equally sized buckets.
179-
// If maxToken is small (smaller than number of buckets) - the response length is randomly selected from the range [1, maxTokens]
180-
func getResponseLengthByHistogram(maxTokens int) int {
181-
if maxTokens <= 1 {
182-
return maxTokens
183-
}
184-
// maxTokens is small - no need to use the histogram of probabilities, just select a random value in the range [1, maxTokens]
185-
if maxTokens <= len(cumulativeBucketsProbabilities) {
186-
res := RandomInt(1, maxTokens)
187-
return res
188-
}
189-
190-
r := RandomFloat(0, 1)
191-
192-
// check if r is in the last bucket, then maxTokens should be returned
193-
if r > cumulativeBucketsProbabilities[len(cumulativeBucketsProbabilities)-2] {
194-
return maxTokens
195-
}
196-
197-
// determine which bucket to use, the bucket with a cumulative probability larger than r is the bucket to use
198-
// initialize bucketIndex with the last bucket to handle the case (which should not happen) when the probabilities sum is less than 1
199-
bucketIndex := len(cumulativeBucketsProbabilities) - 1
200-
for i, c := range cumulativeBucketsProbabilities {
201-
if r <= c {
202-
bucketIndex = i
203-
break
204-
}
205-
}
206-
207-
// calculate the size of all of the buckets (except the special last bucket)
208-
start, end := calcBucketBoundaries(maxTokens, bucketIndex)
209-
210-
// pick uniformly within the bucket’s range
211-
return RandomInt(start, end)
212-
}
213-
214-
// calcBucketBoundaries calculates boundaries of a bucket with the given index.
215-
// Maximum size for equally sized buckets is defined by maxFixedBucketSize.
216-
// [maxFixedBucketSize*(number-of-buckets-1)+1] is the value of maxTokens for which
217-
// division to equally size buckets will give buckets with size maxFixedBucketSize.
218-
// If maxTokens is [maxFixedBucketSize*(number-of-buckets-1)+1] or less,
219-
// all buckets will be of equal size, except the last bucket, which contains only one value.
220-
// If maxTokens is higher than [maxFixedBucketSize*(number-of-buckets-1)+1],
221-
// and flexBucketIndex is valid (between 0 and number of buckets - 1) the buckets sizes will not be equal.
222-
// In this case, all buckets except the one at flexBucketIndex index will have size 20 (and the last is with size 1),
223-
// and the bucket at flexBucketIndex index will 'stretch' to cover the remaining range.
224-
func calcBucketBoundaries(maxTokens int, bucketIndex int) (start int, end int) {
225-
maxEquallyBucketsSz := maxFixedBucketSize*(len(cumulativeBucketsProbabilities)-1) + 1
226-
227-
if maxTokens <= maxEquallyBucketsSz || flexBucketIndex < 0 || flexBucketIndex >= len(cumulativeBucketsProbabilities)-1 {
228-
// create equally size buckets
229-
// calculate the size of all of the buckets (except the special last bucket)
230-
bucketSize := float64(maxTokens-1) / float64(len(cumulativeBucketsProbabilities)-1)
231-
start = int(bucketSize*float64(bucketIndex)) + 1
232-
end = int(bucketSize * float64(bucketIndex+1))
233-
} else {
234-
// create non-equally sized buckets and find boundaries of the required bucket
235-
if bucketIndex < flexBucketIndex {
236-
// the relevant bucket is before the flex bucket, all buckets are of the same size (maxFixedBucketSize)
237-
// start is the minimum number in the required bucket
238-
start = maxFixedBucketSize*bucketIndex + 1
239-
end = maxFixedBucketSize * (bucketIndex + 1)
240-
} else {
241-
flexBucketSize := maxTokens - (maxFixedBucketSize * (len(cumulativeBucketsProbabilities) - 2))
242-
243-
if bucketIndex == flexBucketIndex {
244-
// the relevant bucket is the flex bucket
245-
start = int(maxFixedBucketSize*float64(bucketIndex)) + 1
246-
end = maxFixedBucketSize*bucketIndex + flexBucketSize
247-
} else {
248-
// the relevant bucket is one of buckets after the flex bucket
249-
start = int(maxFixedBucketSize*float64(bucketIndex-1)) + flexBucketSize + 1
250-
end = maxFixedBucketSize*bucketIndex + flexBucketSize
251-
}
252-
}
253-
}
254-
255-
// sometimes end could be maxTokens because of rounding, change the value to maxToken-1
256-
if end >= maxTokens {
257-
end = maxTokens - 1
258-
}
259-
260-
return start, end
261-
}
262-
263-
// EchoResponseTokens returns needed tokens, from a given text
264-
// considering max completion tokens if it is not nil, and a finish reason (stop or length)
265-
func EchoResponseTokens(maxCompletionTokens *int64, text string) ([]string, string) {
266-
tokens := Tokenize(text)
267-
// no max completion tokens, return entire text
268-
if maxCompletionTokens == nil {
269-
return tokens, StopFinishReason
270-
}
271-
272-
if *maxCompletionTokens >= int64(len(tokens)) {
273-
return tokens, StopFinishReason
274-
}
275-
// return truncated text
276-
return tokens[0:*maxCompletionTokens], LengthFinishReason
277-
}
278-
27941
func RandomNumericString(length int) string {
28042
digits := "0123456789"
28143
result := make([]byte, length)

0 commit comments

Comments
 (0)