undo utils test changes

npolshakova · npolshakova · commit 50f6940a2a0d · 2025-08-21T09:54:21.000-04:00
Signed-off-by: npolshakova &lt;nina.polshakova@solo.io&gt;
diff --git a/.golangci.yml b/.golangci.yml
@@ -31,7 +31,8 @@ linters:
   - prealloc
   - unparam
   - unused
-  settings:
+
+linters-settings:
     revive:
       rules:
         - name: dot-imports
diff --git a/pkg/common/test_helpers.go b/pkg/common/test_helpers.go
@@ -40,7 +40,7 @@ func IsValidText(text string) bool {
 					// during generation sentences are connected by space, skip it
 					// additional space at the end of the string is invalid
 					if text[charsTested] == ' ' && charsTested < len(text)-1 {
-						charsTested += 1
+						charsTested++
 						found = true
 					}
 					break
diff --git a/pkg/common/utils.go b/pkg/common/utils.go
@@ -200,11 +200,11 @@ func InitRandom(seed int64) {
 	uuid.SetRand(randomGenerator)
 }
 
-// Returns an integer between min and max (included)
-func RandomInt(min int, max int) int {
+// RandomInt returns an integer between minVal and maxVal (included)
+func RandomInt(minVal int, maxVal int) int {
 	randMutex.Lock()
 	defer randMutex.Unlock()
-	return randomGenerator.Intn(max-min+1) + min
+	return randomGenerator.Intn(maxVal-minVal+1) + minVal
 }
 
 // Returns true or false randomly
@@ -219,11 +219,11 @@ func RandomBool(probability int) bool {
 	return randomGenerator.Float64() < float64(probability)/100
 }
 
-// Returns a random float64 in the range [min, max)
-func RandomFloat(min float64, max float64) float64 {
+// RandomFloat returns a random float64 in the range [minVal, maxVal)
+func RandomFloat(minVal float64, maxVal float64) float64 {
 	randMutex.Lock()
 	defer randMutex.Unlock()
-	return randomGenerator.Float64()*(max-min) + min
+	return randomGenerator.Float64()*(maxVal-minVal) + minVal
 }
 
 // Returns a normally distributed float64
diff --git a/pkg/kv-cache/block_cache.go b/pkg/kv-cache/block_cache.go
@@ -72,11 +72,11 @@ func (b *blockCache) start(ctx context.Context) {
 }
 
 // startRequest adds a request with its associated block hashes to the cache
-func (bc *blockCache) startRequest(requestID string, blocks []uint64) error {
-	bc.mu.Lock()
-	defer bc.mu.Unlock()
+func (b *blockCache) startRequest(requestID string, blocks []uint64) error {
+	b.mu.Lock()
+	defer b.mu.Unlock()
 
-	if _, exists := bc.requestToBlocks[requestID]; exists {
+	if _, exists := b.requestToBlocks[requestID]; exists {
 		// request with the same id already exists
 		return fmt.Errorf("request already exists for id %s", requestID)
 	}
@@ -93,67 +93,67 @@ func (bc *blockCache) startRequest(requestID string, blocks []uint64) error {
 	// count number of new blocks + number of blocks that are in the unused blocks
 	// don't update the data until we are sure that it's ok
 	for _, blockHash := range blocks {
-		if _, exists := bc.unusedBlocks[blockHash]; exists {
+		if _, exists := b.unusedBlocks[blockHash]; exists {
 			blockToMoveToUsed = append(blockToMoveToUsed, blockHash)
-		} else if _, exists := bc.usedBlocks[blockHash]; !exists {
+		} else if _, exists := b.usedBlocks[blockHash]; !exists {
 			blocksToAdd = append(blocksToAdd, blockHash)
 		} else {
 			blockAreadyInUse = append(blockAreadyInUse, blockHash)
 		}
 	}
 
-	if len(bc.usedBlocks)+len(blocksToAdd)+len(blockToMoveToUsed) > bc.maxBlocks {
+	if len(b.usedBlocks)+len(blocksToAdd)+len(blockToMoveToUsed) > b.maxBlocks {
 		return errors.New(capacityError)
 	}
 
 	// for blocks that are already in use - update the reference
 	for _, block := range blockAreadyInUse {
-		bc.usedBlocks[block] += 1
+		b.usedBlocks[block]++
 	}
 
 	// for block used in the past - move them to the used blocks collection
 	for _, block := range blockToMoveToUsed {
-		bc.usedBlocks[block] = 1
-		delete(bc.unusedBlocks, block)
+		b.usedBlocks[block] = 1
+		delete(b.unusedBlocks, block)
 	}
 
 	// for new block - add them, if there is no empty slots - evict the oldest block
 	for _, block := range blocksToAdd {
-		if len(bc.usedBlocks)+len(bc.unusedBlocks) == bc.maxBlocks {
+		if len(b.usedBlocks)+len(b.unusedBlocks) == b.maxBlocks {
 			// cache is full but contains unused blocks - evict the oldest
 			var oldestUnusedHash uint64
 			oldestUnusedTime := time.Now()
 
-			for hash, t := range bc.unusedBlocks {
+			for hash, t := range b.unusedBlocks {
 				if t.Before(oldestUnusedTime) {
 					oldestUnusedHash = hash
 					oldestUnusedTime = t
 				}
 			}
 
-			delete(bc.unusedBlocks, oldestUnusedHash)
-			bc.eventChan <- EventData{action: eventActionRemove, hashValues: []uint64{oldestUnusedHash}}
+			delete(b.unusedBlocks, oldestUnusedHash)
+			b.eventChan <- EventData{action: eventActionRemove, hashValues: []uint64{oldestUnusedHash}}
 		}
 
 		// Add the new block
-		bc.usedBlocks[block] = 1
-		bc.eventChan <- EventData{action: eventActionStore, hashValues: []uint64{block}}
+		b.usedBlocks[block] = 1
+		b.eventChan <- EventData{action: eventActionStore, hashValues: []uint64{block}}
 	}
 
 	// store the request mapping
-	bc.requestToBlocks[requestID] = make([]uint64, len(blocks))
-	copy(bc.requestToBlocks[requestID], blocks)
+	b.requestToBlocks[requestID] = make([]uint64, len(blocks))
+	copy(b.requestToBlocks[requestID], blocks)
 
 	return nil
 }
 
 // finishRequest processes the completion of a request, decreasing reference counts
-func (bc *blockCache) finishRequest(requestID string) error {
-	bc.mu.Lock()
-	defer bc.mu.Unlock()
+func (b *blockCache) finishRequest(requestID string) error {
+	b.mu.Lock()
+	defer b.mu.Unlock()
 
 	// Get blocks associated with this request
-	blockHashes, exists := bc.requestToBlocks[requestID]
+	blockHashes, exists := b.requestToBlocks[requestID]
 	if !exists {
 		return errors.New("request not found")
 	}
@@ -163,27 +163,27 @@ func (bc *blockCache) finishRequest(requestID string) error {
 	// Decrease reference count for each block
 	errBlocks := make([]uint64, 0)
 	for _, blockHash := range blockHashes {
-		if refCount, exists := bc.usedBlocks[blockHash]; exists {
+		if refCount, exists := b.usedBlocks[blockHash]; exists {
 			if refCount > 1 {
 				// this block is in use by another request, just update reference count
-				bc.usedBlocks[blockHash] = refCount - 1
+				b.usedBlocks[blockHash] = refCount - 1
 			} else {
 				// this was the last block usage - move this block to unused
-				bc.unusedBlocks[blockHash] = now
-				delete(bc.usedBlocks, blockHash)
+				b.unusedBlocks[blockHash] = now
+				delete(b.usedBlocks, blockHash)
 			}
 		} else {
 			errBlocks = append(errBlocks, blockHash)
 		}
 	}
 
 	// Remove the request mapping
-	delete(bc.requestToBlocks, requestID)
+	delete(b.requestToBlocks, requestID)
 
 	if len(errBlocks) > 0 {
 		errMsg := "Not existing blocks "
-		for _, b := range errBlocks {
-			errMsg += fmt.Sprintf("%d, ", b)
+		for _, bl := range errBlocks {
+			errMsg += fmt.Sprintf("%d, ", bl)
 		}
 		return fmt.Errorf("%s for request %s", errMsg[:len(errMsg)-2], requestID)
 	}
@@ -192,26 +192,26 @@ func (bc *blockCache) finishRequest(requestID string) error {
 }
 
 // GetStats returns current cache statistics (for testing/debugging)
-func (bc *blockCache) getStats() (int, int, int) {
-	bc.mu.RLock()
-	defer bc.mu.RUnlock()
+func (b *blockCache) getStats() (int, int, int) {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
 
-	return len(bc.requestToBlocks), len(bc.usedBlocks) + len(bc.unusedBlocks), len(bc.unusedBlocks)
+	return len(b.requestToBlocks), len(b.usedBlocks) + len(b.unusedBlocks), len(b.unusedBlocks)
 }
 
 // getBlockInfo returns reference count and if it's in the cache for a specific block (for testing)
 // if block is in use by currently running requests the count will be positive, boolean is true
 // if block is in the unused list - count is 0, boolean is true
 // if block is not in both collections - count is 0, boolean is false
-func (bc *blockCache) getBlockInfo(blockHash uint64) (int, bool) {
-	bc.mu.RLock()
-	defer bc.mu.RUnlock()
+func (b *blockCache) getBlockInfo(blockHash uint64) (int, bool) {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
 
-	refCount, exists := bc.usedBlocks[blockHash]
+	refCount, exists := b.usedBlocks[blockHash]
 	if exists {
 		return refCount, true
 	}
-	_, exists = bc.unusedBlocks[blockHash]
+	_, exists = b.unusedBlocks[blockHash]
 	if exists {
 		return 0, true
 	}
diff --git a/pkg/kv-cache/kv_cache.go b/pkg/kv-cache/kv_cache.go
@@ -27,14 +27,14 @@ import (
 	"github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization"
 )
 
-type KVCacheHelper struct {
+type Helper struct {
 	tokenizer       tokenization.Tokenizer
 	tokensProcessor kvblock.TokenProcessor // turns tokens to kv block keys
 	logger          logr.Logger
 	blockCache      *blockCache
 }
 
-func NewKVCacheHelper(config *common.Configuration, logger logr.Logger) (*KVCacheHelper, error) {
+func NewKVCacheHelper(config *common.Configuration, logger logr.Logger) (*Helper, error) {
 	tokenProcConfig := kvblock.DefaultTokenProcessorConfig()
 	tokenProcConfig.BlockSize = config.TokenBlockSize
 	if config.HashSeed != "" {
@@ -54,7 +54,7 @@ func NewKVCacheHelper(config *common.Configuration, logger logr.Logger) (*KVCach
 	if err != nil {
 		return nil, fmt.Errorf("failed to create block cache: %w", err)
 	}
-	return &KVCacheHelper{
+	return &Helper{
 		tokenizer:       tokenizer,
 		tokensProcessor: tokensProcessor,
 		blockCache:      blockCache,
@@ -63,11 +63,11 @@ func NewKVCacheHelper(config *common.Configuration, logger logr.Logger) (*KVCach
 }
 
 // Run starts the helper.
-func (h *KVCacheHelper) Run(ctx context.Context) {
+func (h *Helper) Run(ctx context.Context) {
 	h.blockCache.start(ctx)
 }
 
-func (h *KVCacheHelper) OnRequestStart(vllmReq openaiserverapi.CompletionRequest) error {
+func (h *Helper) OnRequestStart(vllmReq openaiserverapi.CompletionRequest) error {
 	h.logger.Info("KV cache - process request")
 
 	prompt := vllmReq.GetPrompt()
@@ -93,6 +93,6 @@ func (h *KVCacheHelper) OnRequestStart(vllmReq openaiserverapi.CompletionRequest
 	return h.blockCache.startRequest(requestID, blockHashes)
 }
 
-func (h *KVCacheHelper) OnRequestEnd(vllmReq openaiserverapi.CompletionRequest) error {
+func (h *Helper) OnRequestEnd(vllmReq openaiserverapi.CompletionRequest) error {
 	return h.blockCache.finishRequest(vllmReq.GetRequestID())
 }
diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go
@@ -83,7 +83,7 @@ type VllmSimulator struct {
 	// schema validator for tools parameters
 	toolsValidator *openaiserverapi.Validator
 	// kv cache functionality
-	kvcacheHelper *kvcache.KVCacheHelper
+	kvcacheHelper *kvcache.Helper
 	// namespace where simulator is running
 	namespace string
 	// pod name of simulator
@@ -211,12 +211,12 @@ func (s *VllmSimulator) readRequest(ctx *fasthttp.RequestCtx, isChatCompletion b
 		}
 
 		for _, tool := range req.Tools {
-			toolJson, err := json.Marshal(tool.Function)
+			toolJSON, err := json.Marshal(tool.Function)
 			if err != nil {
 				s.logger.Error(err, "failed to marshal request tools")
 				return nil, err
 			}
-			err = s.toolsValidator.ValidateTool(toolJson)
+			err = s.toolsValidator.ValidateTool(toolJSON)
 			if err != nil {
 				s.logger.Error(err, "tool validation failed")
 				return nil, err
@@ -556,8 +556,8 @@ func (s *VllmSimulator) createCompletionResponse(isChatCompletion bool, respToke
 		baseResp.DoRemoteDecode = true
 		baseResp.DoRemotePrefill = false
 		// currently remote prefill information is hard-coded
-		baseResp.RemoteBlockIds = []string{"DUMMY_ID"}
-		baseResp.RemoteEngineId = "DUMMY_ID"
+		baseResp.RemoteBlockIDs = []string{"DUMMY_ID"}
+		baseResp.RemoteEngineID = "DUMMY_ID"
 		baseResp.RemoteHost = "DUMMY"
 		baseResp.RemotePort = 1234
 	}
diff --git a/pkg/openai-server-api/request.go b/pkg/openai-server-api/request.go
@@ -73,10 +73,10 @@ type baseCompletionRequest struct {
 	DoRemoteDecode bool `json:"do_remote_decode"`
 	// DoRemotePrefill boolean value, true when request's prefill was done on remote pod
 	DoRemotePrefill bool `json:"do_remote_prefill"`
-	// RemoteBlockIds is a list of block identifiers to process remotely for distributed decoding
-	RemoteBlockIds []string `json:"remote_block_ids"`
-	// RemoteEngineId is an identifier of the remote inference engine or backend to use for processing requests
-	RemoteEngineId string `json:"remote_engine_id"`
+	// RemoteBlockIDs is a list of block identifiers to process remotely for distributed decoding
+	RemoteBlockIDs []string `json:"remote_block_ids"`
+	// RemoteEngineID is an identifier of the remote inference engine or backend to use for processing requests
+	RemoteEngineID string `json:"remote_engine_id"`
 	// RemoteHost is a hostname or IP address of the remote server handling prefill
 	RemoteHost string `json:"remote_host"`
 	// RemotePort is a port of the remote server handling prefill
@@ -197,10 +197,10 @@ func (c *ChatCompletionRequest) GetMaxCompletionTokens() *int64 {
 
 // getLastUserMsg returns last message from this request's messages with user role,
 // if does not exist - returns an empty string
-func (req *ChatCompletionRequest) getLastUserMsg() string {
-	for i := len(req.Messages) - 1; i >= 0; i-- {
-		if req.Messages[i].Role == RoleUser {
-			return req.Messages[i].Content.PlainText()
+func (c *ChatCompletionRequest) getLastUserMsg() string {
+	for i := len(c.Messages) - 1; i >= 0; i-- {
+		if c.Messages[i].Role == RoleUser {
+			return c.Messages[i].Content.PlainText()
 		}
 	}
 
@@ -210,15 +210,15 @@ func (req *ChatCompletionRequest) getLastUserMsg() string {
 // CreateResponseText creates and returns response payload based on this request,
 // i.e., an array of generated tokens, the finish reason, and the number of created
 // tokens
-func (req ChatCompletionRequest) CreateResponseText(mode string) ([]string, string, int, error) {
-	maxTokens, err := common.GetMaxTokens(req.MaxCompletionTokens, req.MaxTokens)
+func (c ChatCompletionRequest) CreateResponseText(mode string) ([]string, string, int, error) {
+	maxTokens, err := common.GetMaxTokens(c.MaxCompletionTokens, c.MaxTokens)
 	if err != nil {
 		return nil, "", 0, err
 	}
 
 	var text, finishReason string
 	if mode == common.ModeEcho {
-		text, finishReason = common.GetResponseText(maxTokens, req.getLastUserMsg())
+		text, finishReason = common.GetResponseText(maxTokens, c.getLastUserMsg())
 	} else {
 		text, finishReason = common.GetRandomResponseText(maxTokens)
 	}
@@ -250,30 +250,30 @@ func (t *TextCompletionRequest) GetNumberOfPromptTokens() int {
 	return len(common.Tokenize(t.GetPrompt()))
 }
 
-func (c *TextCompletionRequest) GetTools() []Tool {
+func (t *TextCompletionRequest) GetTools() []Tool {
 	return nil
 }
 
-func (c *TextCompletionRequest) GetToolChoice() string {
+func (t *TextCompletionRequest) GetToolChoice() string {
 	return ""
 }
 
-func (c *TextCompletionRequest) GetMaxCompletionTokens() *int64 {
-	return c.MaxTokens
+func (t *TextCompletionRequest) GetMaxCompletionTokens() *int64 {
+	return t.MaxTokens
 }
 
 // CreateResponseText creates and returns response payload based on this request,
 // i.e., an array of generated tokens, the finish reason, and the number of created
 // tokens
-func (req TextCompletionRequest) CreateResponseText(mode string) ([]string, string, int, error) {
-	maxTokens, err := common.GetMaxTokens(nil, req.MaxTokens)
+func (t TextCompletionRequest) CreateResponseText(mode string) ([]string, string, int, error) {
+	maxTokens, err := common.GetMaxTokens(nil, t.MaxTokens)
 	if err != nil {
 		return nil, "", 0, err
 	}
 
 	var text, finishReason string
 	if mode == common.ModeEcho {
-		text, finishReason = common.GetResponseText(maxTokens, req.Prompt)
+		text, finishReason = common.GetResponseText(maxTokens, t.Prompt)
 	} else {
 		text, finishReason = common.GetRandomResponseText(maxTokens)
 	}
diff --git a/pkg/openai-server-api/response.go b/pkg/openai-server-api/response.go
diff --git a/pkg/openai-server-api/tools_utils.go b/pkg/openai-server-api/tools_utils.go

Original file line number	Diff line number	Diff line change
`@@ -40,7 +40,7 @@ func IsValidText(text string) bool {`
`40`	`40`	`// during generation sentences are connected by space, skip it`
`41`	`41`	`// additional space at the end of the string is invalid`
`42`	`42`	`if text[charsTested] == ' ' && charsTested < len(text)-1 {`
`43`		`- charsTested += 1`
	`43`	`+ charsTested++`
`44`	`44`	`found = true`
`45`	`45`	`}`
`46`	`46`	`break`
Original file line number	Diff line number	Diff line change
`@@ -27,14 +27,14 @@ import (`
`27`	`27`	`"github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization"`
`28`	`28`	`)`
`29`	`29`
`30`		`-type KVCacheHelper struct {`
	`30`	`+type Helper struct {`
`31`	`31`	`tokenizer tokenization.Tokenizer`
`32`	`32`	`tokensProcessor kvblock.TokenProcessor // turns tokens to kv block keys`
`33`	`33`	`logger logr.Logger`
`34`	`34`	`blockCache *blockCache`
`35`	`35`	`}`
`36`	`36`
`37`		`-func NewKVCacheHelper(config common.Configuration, logger logr.Logger) (KVCacheHelper, error) {`
	`37`	`+func NewKVCacheHelper(config common.Configuration, logger logr.Logger) (Helper, error) {`
`38`	`38`	`tokenProcConfig := kvblock.DefaultTokenProcessorConfig()`
`39`	`39`	`tokenProcConfig.BlockSize = config.TokenBlockSize`
`40`	`40`	`if config.HashSeed != "" {`
`@@ -54,7 +54,7 @@ func NewKVCacheHelper(config common.Configuration, logger logr.Logger) (KVCach`
`54`	`54`	`if err != nil {`
`55`	`55`	`return nil, fmt.Errorf("failed to create block cache: %w", err)`
`56`	`56`	`}`
`57`		`- return &KVCacheHelper{`
	`57`	`+ return &Helper{`
`58`	`58`	`tokenizer: tokenizer,`
`59`	`59`	`tokensProcessor: tokensProcessor,`
`60`	`60`	`blockCache: blockCache,`
`@@ -63,11 +63,11 @@ func NewKVCacheHelper(config common.Configuration, logger logr.Logger) (KVCach`
`63`	`63`	`}`
`64`	`64`
`65`	`65`	`// Run starts the helper.`
`66`		`-func (h *KVCacheHelper) Run(ctx context.Context) {`
	`66`	`+func (h *Helper) Run(ctx context.Context) {`
`67`	`67`	`h.blockCache.start(ctx)`
`68`	`68`	`}`
`69`	`69`
`70`		`-func (h *KVCacheHelper) OnRequestStart(vllmReq openaiserverapi.CompletionRequest) error {`
	`70`	`+func (h *Helper) OnRequestStart(vllmReq openaiserverapi.CompletionRequest) error {`
`71`	`71`	`h.logger.Info("KV cache - process request")`
`72`	`72`
`73`	`73`	`prompt := vllmReq.GetPrompt()`
`@@ -93,6 +93,6 @@ func (h *KVCacheHelper) OnRequestStart(vllmReq openaiserverapi.CompletionRequest`
`93`	`93`	`return h.blockCache.startRequest(requestID, blockHashes)`
`94`	`94`	`}`
`95`	`95`
`96`		`-func (h *KVCacheHelper) OnRequestEnd(vllmReq openaiserverapi.CompletionRequest) error {`
	`96`	`+func (h *Helper) OnRequestEnd(vllmReq openaiserverapi.CompletionRequest) error {`
`97`	`97`	`return h.blockCache.finishRequest(vllmReq.GetRequestID())`
`98`	`98`	`}`