diff --git a/examples/kv_cache_index/main.go b/examples/kv_cache_index/main.go index 61c5bea..2b7f80d 100644 --- a/examples/kv_cache_index/main.go +++ b/examples/kv_cache_index/main.go @@ -132,7 +132,7 @@ func runPrompts(ctx context.Context, kvCacheIndexer *kvcache.Indexer) error { ModelName: modelName, ChunkHash: h, } - }), []kvblock.PodEntry{{"pod1", "gpu"}}) + }), []kvblock.PodEntry{{PodIdentifier: "pod1", DeviceTier: "gpu"}}) // Sleep 3 secs time.Sleep(3 * time.Second) diff --git a/go.mod b/go.mod index badb8b8..4c541af 100644 --- a/go.mod +++ b/go.mod @@ -11,9 +11,14 @@ require ( github.com/pebbe/zmq4 v1.4.0 github.com/prometheus/client_golang v1.22.0 github.com/prometheus/client_model v0.6.1 + github.com/redis/go-redis/extra/redisotel/v9 v9.7.3 github.com/redis/go-redis/v9 v9.7.3 github.com/stretchr/testify v1.10.0 github.com/vmihailenco/msgpack/v5 v5.4.1 + go.opentelemetry.io/otel v1.36.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 + go.opentelemetry.io/otel/sdk v1.36.0 + go.opentelemetry.io/otel/trace v1.36.0 k8s.io/apimachinery v0.33.0 k8s.io/client-go v0.33.0 k8s.io/klog/v2 v2.130.1 @@ -22,10 +27,12 @@ require ( require ( github.com/beorn7/perks v1.0.1 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/go-logr/logr v1.4.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect github.com/go-openapi/swag v0.23.0 // indirect @@ -33,6 +40,7 @@ require ( github.com/google/gnostic-models v0.6.9 // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/google/uuid v1.6.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/mailru/easyjson v0.7.7 // indirect @@ -43,15 +51,23 @@ require ( github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/common v0.62.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect + github.com/redis/go-redis/extra/rediscmd/v9 v9.7.3 // indirect github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect github.com/x448/float16 v0.8.4 // indirect github.com/yuin/gopher-lua v1.1.1 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 // indirect + go.opentelemetry.io/otel/metric v1.36.0 // indirect + go.opentelemetry.io/proto/otlp v1.4.0 // indirect golang.org/x/net v0.38.0 // indirect golang.org/x/oauth2 v0.27.0 // indirect - golang.org/x/sys v0.31.0 // indirect + golang.org/x/sys v0.33.0 // indirect golang.org/x/term v0.30.0 // indirect golang.org/x/text v0.23.0 // indirect golang.org/x/time v0.9.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 // indirect + google.golang.org/grpc v1.68.1 // indirect google.golang.org/protobuf v1.36.5 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect diff --git a/go.sum b/go.sum index 56ba0a5..efa9c97 100644 --- a/go.sum +++ b/go.sum @@ -6,6 +6,8 @@ github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= @@ -21,8 +23,11 @@ github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxER github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= @@ -35,6 +40,8 @@ github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1v github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw= github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= @@ -45,6 +52,8 @@ github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgY github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0 h1:TmHmbvxPmaegwhDubVz0lICL0J5Ka2vwTzhoePEXsGE= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0/go.mod h1:qztMSjm835F2bXf+5HKAPIS5qsmQDqZna/PgVt4rWtI= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -88,6 +97,10 @@ github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/redis/go-redis/extra/rediscmd/v9 v9.7.3 h1:1AXQZkJkFxGV3f78mSnUI70l0orO6FHnYoSmBos8SZM= +github.com/redis/go-redis/extra/rediscmd/v9 v9.7.3/go.mod h1:OgkpkwJYex1oyVAabK+VhVUKhUXw8uZUfewJYH1wG90= +github.com/redis/go-redis/extra/redisotel/v9 v9.7.3 h1:ICBA9xYh+SmZqMfBtjKpp1ohi/V5R1TEZglLZc8IxTc= +github.com/redis/go-redis/extra/redisotel/v9 v9.7.3/go.mod h1:DMzxd0CDyZ9VFw9sEPIVpIgKTAaubfGuaPQSUaS7/fo= github.com/redis/go-redis/v9 v9.7.3 h1:YpPyAayJV+XErNsatSElgRZZVCwXX9QzkKYNvO7x0wM= github.com/redis/go-redis/v9 v9.7.3/go.mod h1:bGUrSggJ9X9GUmZpZNEOQKaANxSGgOEBRltRTZHSvrA= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= @@ -115,6 +128,22 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M= github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg= +go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 h1:Vh5HayB/0HHfOQA7Ctx69E/Y/DcQSMPpKANYVMQ7fBA= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0/go.mod h1:cpgtDBaqD/6ok/UG0jT15/uKjAY8mRA53diogHBg3UI= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 h1:5pojmb1U1AogINhN3SurB+zm/nIcusopeBNp42f45QM= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0/go.mod h1:57gTHJSE5S1tqg+EKsLPlTWhpHMsWlVmer+LA926XiA= +go.opentelemetry.io/otel/metric v1.36.0 h1:MoWPKVhQvJ+eeXWHFBOPoBOi20jh6Iq2CcCREuTYufE= +go.opentelemetry.io/otel/metric v1.36.0/go.mod h1:zC7Ks+yeyJt4xig9DEw9kuUFe5C3zLbVjV2PzT6qzbs= +go.opentelemetry.io/otel/sdk v1.36.0 h1:b6SYIuLRs88ztox4EyrvRti80uXIFy+Sqzoh9kFULbs= +go.opentelemetry.io/otel/sdk v1.36.0/go.mod h1:+lC+mTgD+MUWfjJubi2vvXWcVxyr9rmlshZni72pXeY= +go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w= +go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= +go.opentelemetry.io/proto/otlp v1.4.0 h1:TA9WRvW6zMwP+Ssb6fLoUIuirti1gGbP28GcKG1jgeg= +go.opentelemetry.io/proto/otlp v1.4.0/go.mod h1:PPBWZIP98o2ElSqI35IHfu7hIhSwvc5N38Jw8pXuGFY= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= @@ -136,8 +165,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= -golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= +golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -156,6 +185,12 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 h1:CkkIfIt50+lT6NHAVoRYEyAvQGFM7xEwXUUywFvEb3Q= +google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576/go.mod h1:1R3kvZ1dtP3+4p4d3G8uJ8rFk/fWlScl38vanWACI08= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 h1:8ZmaLZE4XWrtU3MyClkYqqtl6Oegr3235h7jxsDyqCY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU= +google.golang.org/grpc v1.68.1 h1:oI5oTa11+ng8r8XMMN7jAOmWfPZWbYpCFaMUTACxkM0= +google.golang.org/grpc v1.68.1/go.mod h1:+q1XYFJjShcqn0QZHvCyeR4CXPA+llXIeUIfIe00waw= google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/pkg/kvcache/indexer.go b/pkg/kvcache/indexer.go index 0eec0e8..f0513cf 100644 --- a/pkg/kvcache/indexer.go +++ b/pkg/kvcache/indexer.go @@ -20,12 +20,14 @@ import ( "context" "fmt" + "go.opentelemetry.io/otel/attribute" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" "github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvblock" "github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization" "github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization/prefixstore" + "github.com/llm-d/llm-d-kv-cache-manager/pkg/tracing" "github.com/llm-d/llm-d-kv-cache-manager/pkg/utils/logging" ) @@ -117,36 +119,83 @@ func (k *Indexer) KVBlockIndex() kvblock.Index { func (k *Indexer) GetPodScores(ctx context.Context, prompt, modelName string, podIdentifiers []string, ) (map[string]int, error) { + ctx, span := tracing.StartSpan(ctx, "kv-cache-manager.GetPodScores", tracing.OperationGetPodScores) + defer span.End() + + span.SetAttributes( + attribute.String(tracing.AttrGenAIRequestModel, modelName), + attribute.Int(tracing.AttrKVCachePodCount, len(podIdentifiers)), + ) + traceLogger := klog.FromContext(ctx).V(logging.TRACE).WithName("kvcache.GetPodScores") // 0. add to tokenizers pool k.tokenizersPool.AddTask(prompt, modelName) // 1. get available tokens of longest prefix + ctx, findTokensSpan := tracing.StartSpan(ctx, "kv-cache-manager.FindTokens", tracing.OperationFindTokens) tokens := k.tokensIndexer.FindLongestContainedTokens(prompt, modelName) if len(tokens) == 0 { + span.SetAttributes( + attribute.Int(tracing.AttrKVCacheTokenCount, 0), + attribute.String(tracing.AttrOperationOutcome, tracing.OutcomeSuccess), + ) + tracing.SetSpanError(findTokensSpan, nil) + findTokensSpan.End() //nolint:nilnil // no need to return an error return nil, nil } + tracing.SetSpanError(findTokensSpan, nil) + findTokensSpan.End() + + // Set token count attribute + span.SetAttributes(attribute.Int(tracing.AttrKVCacheTokenCount, len(tokens))) // 2. get block keys blockKeys := k.tokensProcessor.TokensToKVBlockKeys(tokens, modelName) traceLogger.Info("found tokens", "tokens", tokens, "block-keys", blockKeys) + // Set block keys attribute + span.SetAttributes(attribute.Int(tracing.AttrKVCacheBlockKeys, len(blockKeys))) + // 3. query kvblock indexer for pods keyToPods, err := k.kvBlockIndex.Lookup(ctx, blockKeys, sets.New(podIdentifiers...)) if err != nil { + tracing.SetSpanError(span, err) return nil, fmt.Errorf("failed to query kvblock indexer: %w", err) } traceLogger.Info("found block keys", "block-keys", blockKeys, "pods", podsPerKeyPrintHelper(keyToPods)) // 4. score pods + _, scorePodSpan := tracing.StartSpan(ctx, "kv-cache-manager.ScorePods", tracing.OperationScorePods) podScores, err := k.kvBlockScorer.Score(blockKeys, keyToPods) if err != nil { + tracing.SetSpanError(scorePodSpan, err) + scorePodSpan.End() + tracing.SetSpanError(span, err) return nil, fmt.Errorf("failed to query kvblock scorer: %w", err) } + tracing.SetSpanError(scorePodSpan, nil) + scorePodSpan.End() traceLogger.Info("found pod scores", "pod-scores", podScores) + // Calculate hit ratio for observability + totalPods := len(podIdentifiers) + if totalPods == 0 { + // If no specific pods requested, use all pods with scores + totalPods = len(podScores) + } + + var hitRatio float64 + if totalPods > 0 { + hitRatio = float64(len(podScores)) / float64(totalPods) + } + + span.SetAttributes( + attribute.Float64(tracing.AttrKVCacheHitRatio, hitRatio), + attribute.String(tracing.AttrOperationOutcome, tracing.OutcomeSuccess), + ) + return podScores, nil } diff --git a/pkg/kvcache/kvblock/index.go b/pkg/kvcache/kvblock/index.go index 7930667..6451f21 100644 --- a/pkg/kvcache/kvblock/index.go +++ b/pkg/kvcache/kvblock/index.go @@ -46,6 +46,7 @@ type IndexConfig struct { func DefaultIndexConfig() *IndexConfig { return &IndexConfig{ InMemoryConfig: DefaultInMemoryIndexConfig(), + RedisConfig: DefaultRedisIndexConfig(), EnableMetrics: false, } } diff --git a/pkg/kvcache/kvblock/redis.go b/pkg/kvcache/kvblock/redis.go index 41689c6..98c3c5c 100644 --- a/pkg/kvcache/kvblock/redis.go +++ b/pkg/kvcache/kvblock/redis.go @@ -23,6 +23,7 @@ import ( "strings" "time" + "github.com/redis/go-redis/extra/redisotel/v9" "github.com/redis/go-redis/v9" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" @@ -57,6 +58,17 @@ func NewRedisIndex(config *RedisIndexConfig) (Index, error) { } redisClient := redis.NewClient(redisOpt) + + // Enable automatic OpenTelemetry tracing for Redis operations + if err := redisotel.InstrumentTracing(redisClient); err != nil { + return nil, fmt.Errorf("failed to instrument Redis tracing: %w", err) + } + + // Enable automatic OpenTelemetry metrics for Redis operations + if err := redisotel.InstrumentMetrics(redisClient); err != nil { + return nil, fmt.Errorf("failed to instrument Redis metrics: %w", err) + } + if err := redisClient.Ping(context.Background()).Err(); err != nil { return nil, fmt.Errorf("failed to connect to Redis: %w", err) } diff --git a/pkg/tracing/tracing.go b/pkg/tracing/tracing.go new file mode 100644 index 0000000..8d63e2d --- /dev/null +++ b/pkg/tracing/tracing.go @@ -0,0 +1,174 @@ +/* +Copyright 2025 The llm-d Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tracing + +import ( + "context" + "fmt" + "os" + "strconv" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/sdk/resource" + "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.26.0" + otelTrace "go.opentelemetry.io/otel/trace" +) + +const ( + ServiceName = "llm-d-kv-cache-manager" + + envOTELTracingEnabled = "OTEL_TRACING_ENABLED" + envOTELExporterEndpoint = "OTEL_EXPORTER_OTLP_ENDPOINT" + envOTELSamplingRate = "OTEL_SAMPLING_RATE" +) + +type Config struct { + Enabled bool + ExporterEndpoint string + SamplingRate float64 + ServiceName string +} + +func NewConfigFromEnv() *Config { + config := &Config{ + Enabled: false, + ExporterEndpoint: "http://localhost:4317", + SamplingRate: 0.1, + ServiceName: ServiceName, + } + + if enabled := os.Getenv(envOTELTracingEnabled); enabled != "" { + if enabledBool, err := strconv.ParseBool(enabled); err == nil { + config.Enabled = enabledBool + } + } + + if endpoint := os.Getenv(envOTELExporterEndpoint); endpoint != "" { + config.ExporterEndpoint = endpoint + } + + if samplingRateStr := os.Getenv(envOTELSamplingRate); samplingRateStr != "" { + if samplingRate, err := strconv.ParseFloat(samplingRateStr, 64); err == nil { + config.SamplingRate = samplingRate + } + } + + return config +} + +func Initialize(ctx context.Context, config *Config) (func(context.Context) error, error) { + // Always set up context propagation, even when tracing is disabled + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( + propagation.TraceContext{}, + propagation.Baggage{}, + )) + + // If tracing is disabled, return a no-op shutdown function + if !config.Enabled { + return func(context.Context) error { return nil }, nil + } + + res, err := resource.New(ctx, + resource.WithAttributes( + semconv.ServiceNameKey.String(config.ServiceName), + semconv.ServiceVersionKey.String("1.0.0"), + ), + ) + if err != nil { + return nil, fmt.Errorf("failed to create resource: %w", err) + } + + traceExporter, err := otlptracegrpc.New(ctx, + otlptracegrpc.WithEndpoint(config.ExporterEndpoint), + otlptracegrpc.WithInsecure(), + ) + if err != nil { + return nil, fmt.Errorf("failed to create trace exporter: %w", err) + } + + tracerProvider := trace.NewTracerProvider( + trace.WithBatcher(traceExporter), + trace.WithResource(res), + trace.WithSampler(trace.TraceIDRatioBased(config.SamplingRate)), + ) + + otel.SetTracerProvider(tracerProvider) + + return tracerProvider.Shutdown, nil +} + +func GetTracer() otelTrace.Tracer { + return otel.Tracer(ServiceName) +} + +const ( + // KV Cache specific attributes. + AttrKVCacheHitRatio = "llm_d.kv_cache.hit_ratio" + AttrKVCacheBlockKeys = "llm_d.kv_cache.block_keys" + AttrKVCachePodCount = "llm_d.kv_cache.pod_count" + AttrKVCacheTokenCount = "llm_d.kv_cache.token_count" //nolint:gosec // false positive - not credentials + AttrKVCachePromptHash = "llm_d.kv_cache.prompt_hash" + + // GenAI request attributes. + AttrGenAIRequestModel = "gen_ai.request.model" + AttrGenAIResponseFinishReason = "gen_ai.response.finish_reason" + + // Operation attributes. + AttrOperationType = "operation.type" + AttrOperationOutcome = "operation.outcome" +) + +// Operation types for KV cache operations. +const ( + OperationGetPodScores = "get_pod_scores" + OperationFindTokens = "find_tokens" + OperationScorePods = "score_pods" +) + +const ( + OutcomeSuccess = "success" + OutcomeError = "error" + OutcomeTimeout = "timeout" +) + +func StartSpan(ctx context.Context, operationName, operationType string) (context.Context, otelTrace.Span) { + tracer := GetTracer() + ctx, span := tracer.Start(ctx, operationName) + + span.SetAttributes( + attribute.String(AttrOperationType, operationType), + ) + + return ctx, span +} + +func SetSpanError(span otelTrace.Span, err error) { + if err != nil { + span.SetAttributes( + attribute.String(AttrOperationOutcome, OutcomeError), + ) + span.RecordError(err) + } else { + span.SetAttributes( + attribute.String(AttrOperationOutcome, OutcomeSuccess), + ) + } +}