diff --git a/go.mod b/go.mod index 32fe86a8..05284a9a 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/buaazp/fasthttprouter v0.1.1 github.com/go-logr/logr v1.4.2 github.com/google/uuid v1.6.0 - github.com/llm-d/llm-d-kv-cache-manager v0.3.0-rc1 + github.com/llm-d/llm-d-kv-cache-manager v0.3.2 github.com/onsi/ginkgo/v2 v2.23.4 github.com/onsi/gomega v1.37.0 github.com/openai/openai-go v0.1.0-beta.10 @@ -23,6 +23,11 @@ require ( k8s.io/klog/v2 v2.130.1 ) +require ( + github.com/dgraph-io/ristretto/v2 v2.3.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect +) + require ( github.com/andybalholm/brotli v1.1.1 // indirect github.com/beorn7/perks v1.0.1 // indirect @@ -64,7 +69,7 @@ require ( go.uber.org/automaxprocs v1.6.0 // indirect golang.org/x/net v0.38.0 // indirect golang.org/x/oauth2 v0.27.0 // indirect - golang.org/x/sys v0.32.0 // indirect + golang.org/x/sys v0.35.0 // indirect golang.org/x/term v0.30.0 // indirect golang.org/x/text v0.23.0 // indirect golang.org/x/time v0.9.0 // indirect diff --git a/go.sum b/go.sum index dd46dc12..47133e0e 100644 --- a/go.sum +++ b/go.sum @@ -19,8 +19,14 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgraph-io/ristretto/v2 v2.3.0 h1:qTQ38m7oIyd4GAed/QkUZyPFNMnvVWyazGXRwvOt5zk= +github.com/dgraph-io/ristretto/v2 v2.3.0/go.mod h1:gpoRV3VzrEY1a9dWAYV6T1U7YzfgttXdd/ZzL1s9OZM= +github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa512G+w+Pxci9hJPB8oMnkcP3iZF38= +github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= @@ -68,8 +74,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/llm-d/llm-d-kv-cache-manager v0.3.0-rc1 h1:SDLiNrcreDcA9m9wfXAumFARDHHXpjOjHTzshTiTGxk= -github.com/llm-d/llm-d-kv-cache-manager v0.3.0-rc1/go.mod h1:tN80/D0Faf6pE2ocwFgTNoCxKPsqdsa2XnjQUqOaZ8Q= +github.com/llm-d/llm-d-kv-cache-manager v0.3.2 h1:omSTXtuII3ol37CaoI9h+2VxE0m8EoeVOor+CkQh99I= +github.com/llm-d/llm-d-kv-cache-manager v0.3.2/go.mod h1:q6u7LnzMxNcHHb5/LRdHNNeZzzGMSENFSP1NGfsJEmA= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= @@ -174,8 +180,8 @@ golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= -golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/pkg/kv-cache/block_cache.go b/pkg/kv-cache/block_cache.go index 79d21e50..cf4ad155 100644 --- a/pkg/kv-cache/block_cache.go +++ b/pkg/kv-cache/block_cache.go @@ -139,12 +139,12 @@ func (bc *blockCache) startRequest(requestID string, blocks []uint64) (int, erro } delete(bc.unusedBlocks, oldestUnusedHash) - bc.eventChan <- EventData{action: eventActionRemove, hashValues: []uint64{oldestUnusedHash}} + bc.eventChan <- EventData{action: eventActionRemove, hashValues: []any{oldestUnusedHash}} } // Add the new block bc.usedBlocks[block] = 1 - bc.eventChan <- EventData{action: eventActionStore, hashValues: []uint64{block}} + bc.eventChan <- EventData{action: eventActionStore, hashValues: []any{block}} } // store the request mapping diff --git a/pkg/kv-cache/kv_cache_sender.go b/pkg/kv-cache/kv_cache_sender.go index b0ddac3f..2a27f413 100644 --- a/pkg/kv-cache/kv_cache_sender.go +++ b/pkg/kv-cache/kv_cache_sender.go @@ -33,9 +33,11 @@ const ( eventActionRemove ) +var mediumGPU = string("GPU") + type EventData struct { action EventAction - hashValues []uint64 + hashValues []any } type KVEventSender struct { @@ -93,9 +95,9 @@ func (s *KVEventSender) Run(ctx context.Context) error { switch eventData.action { case eventActionStore: - payload, err = msgpack.Marshal(kvevents.BlockStored{BlockHashes: eventData.hashValues}.ToTaggedUnion()) + payload, err = msgpack.Marshal(kvevents.BlockStored{BlockHashes: eventData.hashValues, Medium: &mediumGPU}.ToTaggedUnion()) case eventActionRemove: - payload, err = msgpack.Marshal(kvevents.BlockRemoved{BlockHashes: eventData.hashValues}.ToTaggedUnion()) + payload, err = msgpack.Marshal(kvevents.BlockRemoved{BlockHashes: eventData.hashValues, Medium: &mediumGPU}.ToTaggedUnion()) default: return fmt.Errorf("invalid event action %d", eventData.action) }