Skip to content

Commit c58cf2d

Browse files
vMaroonelevran
andauthored
Enable prefix-cache awareness in active-active multi-replica scheduler deployments (#578)
* - active-active-ha support Signed-off-by: Maroon Ayoub <maroon.ayoub@ibm.com> * Update docs/architecture.md Co-authored-by: Etai Lev Ran <elevran@gmail.com> Signed-off-by: Maroon Ayoub <Maroonay@gmail.com> * lint Signed-off-by: Maroon Ayoub <maroon.ayoub@ibm.com> --------- Signed-off-by: Maroon Ayoub <maroon.ayoub@ibm.com> Signed-off-by: Maroon Ayoub <Maroonay@gmail.com> Co-authored-by: Etai Lev Ran <elevran@gmail.com>
1 parent f0d12fb commit c58cf2d

File tree

3 files changed

+94
-7
lines changed

3 files changed

+94
-7
lines changed

docs/architecture.md

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,31 @@ plugins:
339339
huggingFaceToken: your_hf_token_here # automatically set by `HF_TOKEN` environment variable
340340
```
341341

342+
Example configuration for automatic pod discovery in active-active multi-replica scheduler deployments:
343+
```yaml
344+
- type: precise-prefix-cache-scorer
345+
parameters:
346+
tokenProcessorConfig:
347+
blockSize: 64
348+
hashSeed: "42"
349+
indexerConfig:
350+
tokenizersPoolConfig:
351+
modelName: "Qwen/Qwen3-32B"
352+
hf:
353+
tokenizersCacheDir: "/tmp/tokenizers"
354+
kvEventsConfig:
355+
topicFilter: "kv@"
356+
concurrency: 4
357+
discoverPods: true # enables automatic pod discovery for active-active HA
358+
podDiscoveryConfig:
359+
socketPort: 5556
360+
```
361+
362+
Where the vLLM engines are configured to emit KV-Events on port `5556` as follows:
363+
```yaml
364+
--kv-events-config "{\"enable_kv_cache_events\":true,\"publisher\":\"zmq\",\"endpoint\":\"tcp://*:5556\",\"topic\":\"kv@${POD_IP}@Qwen/Qwen3-32B\"}"
365+
```
366+
342367
Example configuration with all parameters set:
343368

344369
```yaml
@@ -349,9 +374,11 @@ plugins:
349374
blockSize: 16
350375
hashSeed: "12345"
351376
kvEventsConfig:
352-
zmqEndpoint: tcp://*:5557
353-
topicFilter: kv@
354-
concurrency: 8
377+
topicFilter: "kv@"
378+
concurrency: 4
379+
discoverPods: true # enables automatic pod discovery for active-active HA
380+
podDiscoveryConfig:
381+
socketPort: 5556
355382
indexerConfig:
356383
prefixStoreConfig:
357384
cacheSize: 500000

pkg/plugins/scorer/active_request.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ func (s *ActiveRequest) decrementPodCount(podName string) {
250250
}
251251
}
252252

253-
func cleanCachePeriodically(ctx context.Context, cache *ttlcache.Cache[string, *requestEntry], requestTimeout time.Duration) {
253+
func cleanCachePeriodically[K comparable, V any](ctx context.Context, cache *ttlcache.Cache[K, V], requestTimeout time.Duration) {
254254
ticker := time.NewTicker(requestTimeout)
255255
defer ticker.Stop()
256256

pkg/plugins/scorer/precise_prefix_cache.go

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ import (
66
"errors"
77
"fmt"
88
"os"
9+
"time"
910

11+
"github.com/jellydator/ttlcache/v3"
1012
"github.com/llm-d/llm-d-kv-cache/pkg/kvcache"
1113
"github.com/llm-d/llm-d-kv-cache/pkg/kvcache/kvblock"
1214
"github.com/llm-d/llm-d-kv-cache/pkg/kvevents"
@@ -46,7 +48,6 @@ var _ framework.Scorer = &PrecisePrefixCacheScorer{}
4648
// a new instance of the PrefixCacheTrackingPlugin.
4749
func PrecisePrefixCachePluginFactory(name string, rawParameters json.RawMessage,
4850
handle plugins.Handle) (plugins.Plugin, error) {
49-
5051
indexerConfig, err := kvcache.NewDefaultConfig()
5152
if err != nil {
5253
return nil, fmt.Errorf("failed to initialize indexer config: %w", err)
@@ -113,9 +114,39 @@ func New(ctx context.Context, config PrecisePrefixCachePluginConfig) (*PrecisePr
113114
pool := kvevents.NewPool(config.KVEventsConfig, kvCacheIndexer.KVBlockIndex(), tokenProcessor)
114115
pool.Start(ctx)
115116

117+
subscribersManager := kvevents.NewSubscriberManager(pool)
118+
var subscribersCache *ttlcache.Cache[string, struct{}]
119+
120+
// initialize the subscribers cache only if pod discovery is enabled
121+
if config.KVEventsConfig.DiscoverPods {
122+
// initialize the subscribers TTL cache
123+
subscriptionTimeout := 10 * time.Minute
124+
subscribersCache = ttlcache.New[string, struct{}](
125+
ttlcache.WithTTL[string, struct{}](subscriptionTimeout),
126+
)
127+
subscribersCache.OnEviction(func(ctx context.Context, reason ttlcache.EvictionReason,
128+
item *ttlcache.Item[string, struct{}],
129+
) {
130+
if reason == ttlcache.EvictionReasonExpired {
131+
subscribersManager.RemoveSubscriber(ctx, item.Key())
132+
}
133+
})
134+
go cleanCachePeriodically(ctx, subscribersCache, subscriptionTimeout)
135+
}
136+
if config.KVEventsConfig.ZMQEndpoint != "" {
137+
// setup local subscriber to support global socket mode
138+
if err := subscribersManager.EnsureSubscriber(ctx, "local-subscriber",
139+
config.KVEventsConfig.ZMQEndpoint, config.KVEventsConfig.TopicFilter, false); err != nil {
140+
return nil, fmt.Errorf("failed to create local subscriber for global socket mode: %w", err)
141+
}
142+
}
143+
116144
return &PrecisePrefixCacheScorer{
117-
typedName: plugins.TypedName{Type: PrecisePrefixCachePluginType},
118-
kvCacheIndexer: kvCacheIndexer,
145+
typedName: plugins.TypedName{Type: PrecisePrefixCachePluginType},
146+
kvCacheIndexer: kvCacheIndexer,
147+
subscribersCache: subscribersCache,
148+
subscribersManager: subscribersManager,
149+
kvEventsConfig: config.KVEventsConfig,
119150
}, nil
120151
}
121152

@@ -127,6 +158,15 @@ func New(ctx context.Context, config PrecisePrefixCachePluginConfig) (*PrecisePr
127158
type PrecisePrefixCacheScorer struct {
128159
typedName plugins.TypedName
129160
kvCacheIndexer *kvcache.Indexer
161+
162+
// until the IGW data-layer is ready to provide endpoint events,
163+
// we maintain a TTL cache of known pods that are discovered through
164+
// the scoring process. If a pod is not in the received endpoints list
165+
// during scoring for a certain period, we consider it gone and
166+
// stop its KV events subscription.
167+
subscribersCache *ttlcache.Cache[string, struct{}]
168+
subscribersManager *kvevents.SubscriberManager
169+
kvEventsConfig *kvevents.Config
130170
}
131171

132172
// TypedName returns the typed name of the plugin.
@@ -146,6 +186,26 @@ func (s *PrecisePrefixCacheScorer) Score(ctx context.Context, cycleState *types.
146186
logger := log.FromContext(ctx).WithName(s.typedName.String())
147187
debugLogger := logger.V(logutil.DEBUG)
148188

189+
if s.kvEventsConfig.DiscoverPods {
190+
// update subscribers here temporarily
191+
for _, pod := range pods {
192+
podObj := pod.GetPod()
193+
if podObj == nil {
194+
continue
195+
}
196+
podKey := podObj.NamespacedName.String()
197+
s.subscribersCache.Set(podKey, struct{}{}, 0) // use default TTL
198+
199+
if err := s.subscribersManager.EnsureSubscriber(context.Background(), podKey, // dont use request ctx
200+
fmt.Sprintf("tcp://%s:%d", podObj.Address, s.kvEventsConfig.PodDiscoveryConfig.SocketPort),
201+
s.kvEventsConfig.TopicFilter, true); err != nil {
202+
logger.Error(err, "Failed to ensure KV-events subscriber for pod", "pod", podKey,
203+
"endpoint", podObj.Address)
204+
continue
205+
}
206+
}
207+
}
208+
149209
if request == nil {
150210
debugLogger.Info("Request is nil, skipping scoring")
151211
return nil

0 commit comments

Comments
 (0)