Skip to content

Commit 0b141c6

Browse files
authored
initial update to load plugins from file (#1151)
* initial update to load plugins from file Signed-off-by: Nir Rozenbaum <[email protected]> * linter Signed-off-by: Nir Rozenbaum <[email protected]> * updated config loader unit-test to use name plugin names Signed-off-by: Nir Rozenbaum <[email protected]> * renamed files in the config map to use the term plugins Signed-off-by: Nir Rozenbaum <[email protected]> * renamed prefix-cache to prefix-cache-scorer Signed-off-by: Nir Rozenbaum <[email protected]> --------- Signed-off-by: Nir Rozenbaum <[email protected]>
1 parent 72bf2d0 commit 0b141c6

File tree

18 files changed

+264
-114
lines changed

18 files changed

+264
-114
lines changed

cmd/epp/runner/runner.go

Lines changed: 5 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,6 @@ import (
4747
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/requestcontrol"
4848
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/saturationdetector"
4949
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling"
50-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
51-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/prefix"
52-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/picker"
53-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/profile"
54-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/scorer"
5550
runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
5651
envutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env"
5752
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
@@ -144,8 +139,6 @@ var (
144139
setupLog = ctrl.Log.WithName("setup")
145140

146141
// Environment variables
147-
schedulerV2 = envutil.GetEnvBool("EXPERIMENTAL_USE_SCHEDULER_V2", false, setupLog)
148-
prefixCacheScheduling = envutil.GetEnvBool("ENABLE_PREFIX_CACHE_SCHEDULING", false, setupLog)
149142
reqHeaderBasedSchedulerForTesting = envutil.GetEnvBool("ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING", false, setupLog)
150143
)
151144

@@ -283,11 +276,7 @@ func (r *Runner) Run(ctx context.Context) error {
283276
}
284277

285278
// --- Initialize Core EPP Components ---
286-
scheduler, err := r.initializeScheduler()
287-
if err != nil {
288-
setupLog.Error(err, "Failed to create scheduler")
289-
return err
290-
}
279+
scheduler := r.initializeScheduler()
291280

292281
saturationDetector := saturationdetector.NewDetector(sdConfig, datastore, ctrl.Log)
293282

@@ -334,38 +323,19 @@ func (r *Runner) Run(ctx context.Context) error {
334323
return nil
335324
}
336325

337-
func (r *Runner) initializeScheduler() (*scheduling.Scheduler, error) {
326+
func (r *Runner) initializeScheduler() *scheduling.Scheduler {
338327
if r.schedulerConfig != nil {
339-
return scheduling.NewSchedulerWithConfig(r.schedulerConfig), nil
328+
return scheduling.NewSchedulerWithConfig(r.schedulerConfig)
340329
}
341330

342331
// otherwise, no one configured from outside scheduler config. use existing configuration
343332
scheduler := scheduling.NewScheduler()
344-
if schedulerV2 {
345-
queueScorerWeight := envutil.GetEnvInt("QUEUE_SCORE_WEIGHT", scorer.DefaultQueueScorerWeight, setupLog)
346-
kvCacheScorerWeight := envutil.GetEnvInt("KV_CACHE_SCORE_WEIGHT", scorer.DefaultKVCacheScorerWeight, setupLog)
347-
348-
schedulerProfile := framework.NewSchedulerProfile().
349-
WithScorers(framework.NewWeightedScorer(scorer.NewQueueScorer(), queueScorerWeight),
350-
framework.NewWeightedScorer(scorer.NewKVCacheScorer(), kvCacheScorerWeight)).
351-
WithPicker(picker.NewMaxScorePicker(picker.DefaultMaxNumOfEndpoints))
352-
353-
if prefixCacheScheduling {
354-
prefixScorerWeight := envutil.GetEnvInt("PREFIX_CACHE_SCORE_WEIGHT", prefix.DefaultScorerWeight, setupLog)
355-
if err := schedulerProfile.AddPlugins(framework.NewWeightedScorer(prefix.New(loadPrefixCacheConfig()), prefixScorerWeight)); err != nil {
356-
return nil, fmt.Errorf("Failed to register scheduler plugins - %w", err)
357-
}
358-
}
359-
360-
schedulerConfig := scheduling.NewSchedulerConfig(profile.NewSingleProfileHandler(), map[string]*framework.SchedulerProfile{"schedulerv2": schedulerProfile})
361-
scheduler = scheduling.NewSchedulerWithConfig(schedulerConfig)
362-
}
363333

364334
if reqHeaderBasedSchedulerForTesting {
365335
scheduler = conformance_epp.NewReqHeaderBasedScheduler()
366336
}
367337

368-
return scheduler, nil
338+
return scheduler
369339
}
370340

371341
func (r *Runner) parseConfiguration(ctx context.Context) error {
@@ -398,6 +368,7 @@ func (r *Runner) parseConfiguration(ctx context.Context) error {
398368
// Add requestControl plugins
399369
r.requestControlConfig.AddPlugins(handle.GetAllPlugins()...)
400370

371+
log.FromContext(ctx).Info("loaded configuration from file/text successfully")
401372
return nil
402373
}
403374

@@ -419,16 +390,6 @@ func initLogging(opts *zap.Options) {
419390
ctrl.SetLogger(logger)
420391
}
421392

422-
func loadPrefixCacheConfig() prefix.Config {
423-
baseLogger := log.Log.WithName("env-config")
424-
425-
return prefix.Config{
426-
HashBlockSize: envutil.GetEnvInt("PREFIX_CACHE_HASH_BLOCK_SIZE", prefix.DefaultHashBlockSize, baseLogger),
427-
MaxPrefixBlocksToMatch: envutil.GetEnvInt("PREFIX_CACHE_MAX_PREFIX_BLOCKS", prefix.DefaultMaxPrefixBlocks, baseLogger),
428-
LRUCapacityPerServer: envutil.GetEnvInt("PREFIX_CACHE_LRU_CAPACITY_PER_SERVER", prefix.DefaultLRUCapacityPerServer, baseLogger),
429-
}
430-
}
431-
432393
// registerExtProcServer adds the ExtProcServerRunner as a Runnable to the manager.
433394
func registerExtProcServer(mgr manager.Manager, runner *runserver.ExtProcServerRunner, logger logr.Logger) error {
434395
if err := mgr.Add(runner.AsRunnable(logger)); err != nil {

config/manifests/inferencepool-resources.yaml

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ spec:
6363
- "9002"
6464
- -grpcHealthPort
6565
- "9003"
66+
- "-configFile"
67+
- "/config/default-plugins.yaml"
6668
ports:
6769
- containerPort: 9002
6870
- containerPort: 9003
@@ -80,6 +82,95 @@ spec:
8082
service: inference-extension
8183
initialDelaySeconds: 5
8284
periodSeconds: 10
85+
volumeMounts:
86+
- name: plugins-config-volume
87+
mountPath: "/config"
88+
volumes:
89+
- name: plugins-config-volume
90+
configMap:
91+
name: plugins-config
92+
---
93+
apiVersion: v1
94+
kind: ConfigMap
95+
metadata:
96+
name: plugins-config
97+
namespace: default
98+
data:
99+
default-plugins.yaml: |
100+
apiVersion: inference.networking.x-k8s.io/v1alpha1
101+
kind: EndpointPickerConfig
102+
plugins:
103+
- type: low-queue-filter
104+
parameters:
105+
threshold: 128
106+
- type: lora-affinity-filter
107+
parameters:
108+
threshold: 0.999
109+
- type: least-queue-filter
110+
- type: least-kv-cache-filter
111+
- type: decision-tree-filter
112+
name: low-latency-filter
113+
parameters:
114+
current:
115+
pluginRef: low-queue-filter
116+
nextOnSuccess:
117+
decisionTree:
118+
current:
119+
pluginRef: lora-affinity-filter
120+
nextOnSuccessOrFailure:
121+
decisionTree:
122+
current:
123+
pluginRef: least-queue-filter
124+
nextOnSuccessOrFailure:
125+
decisionTree:
126+
current:
127+
pluginRef: least-kv-cache-filter
128+
nextOnFailure:
129+
decisionTree:
130+
current:
131+
pluginRef: least-queue-filter
132+
nextOnSuccessOrFailure:
133+
decisionTree:
134+
current:
135+
pluginRef: lora-affinity-filter
136+
nextOnSuccessOrFailure:
137+
decisionTree:
138+
current:
139+
pluginRef: least-kv-cache-filter
140+
- type: random-picker
141+
parameters:
142+
maxNumOfEndpoints: 1
143+
- type: single-profile-handler
144+
schedulingProfiles:
145+
- name: default
146+
plugins:
147+
- pluginRef: low-latency-filter
148+
- pluginRef: random-picker
149+
plugins-v2.yaml: |
150+
apiVersion: inference.networking.x-k8s.io/v1alpha1
151+
kind: EndpointPickerConfig
152+
plugins:
153+
- type: queue-scorer
154+
- type: kv-cache-scorer
155+
- type: prefix-cache-scorer
156+
parameters:
157+
hashBlockSize: 64
158+
maxPrefixBlocksToMatch: 256
159+
lruCapacityPerServer: 31250
160+
- type: max-score-picker
161+
parameters:
162+
maxNumOfEndpoints: 1
163+
- type: single-profile-handler
164+
schedulingProfiles:
165+
- name: default
166+
plugins:
167+
- pluginRef: queue-scorer
168+
weight: 1
169+
- pluginRef: kv-cache-scorer
170+
weight: 1
171+
- pluginRef: prefix-cache-scorer
172+
weight: 1
173+
- pluginRef: max-score-picker
83174
---
84175
kind: ClusterRole
85176
apiVersion: rbac.authorization.k8s.io/v1

conformance/testing-epp/plugins/filter/request_header_based_filter.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,18 +40,18 @@ var _ framework.Filter = &HeaderBasedTestingFilter{}
4040
// This should only be used for testing purposes.
4141
func NewHeaderBasedTestingFilter() *HeaderBasedTestingFilter {
4242
return &HeaderBasedTestingFilter{
43-
tn: plugins.TypedName{Type: "header-based-testing", Name: "header-based-testing-filter"},
43+
typedName: plugins.TypedName{Type: "header-based-testing", Name: "header-based-testing-filter"},
4444
}
4545
}
4646

4747
// HeaderBasedTestingFilter filters Pods based on an address specified in the "test-epp-endpoint-selection" request header.
4848
type HeaderBasedTestingFilter struct {
49-
tn plugins.TypedName
49+
typedName plugins.TypedName
5050
}
5151

5252
// TypedName returns the type and name tuple of this plugin instance.
5353
func (f *HeaderBasedTestingFilter) TypedName() plugins.TypedName {
54-
return f.tn
54+
return f.typedName
5555
}
5656

5757
// Filter selects pods that match the IP addresses specified in the request header.

pkg/epp/common/config/loader/configloader_test.go

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -653,25 +653,25 @@ const successSchedulerConfigText = `
653653
apiVersion: inference.networking.x-k8s.io/v1alpha1
654654
kind: EndpointPickerConfig
655655
plugins:
656-
- name: lowQueue
657-
type: low-queue
656+
- name: lowQueueFilter
657+
type: low-queue-filter
658658
parameters:
659659
threshold: 10
660-
- name: prefixCache
661-
type: prefix-cache
660+
- name: prefixCacheScorer
661+
type: prefix-cache-scorer
662662
parameters:
663663
hashBlockSize: 32
664-
- name: maxScore
665-
type: max-score
664+
- name: maxScorePicker
665+
type: max-score-picker
666666
- name: profileHandler
667-
type: single-profile
667+
type: single-profile-handler
668668
schedulingProfiles:
669669
- name: default
670670
plugins:
671-
- pluginRef: lowQueue
672-
- pluginRef: prefixCache
671+
- pluginRef: lowQueueFilter
672+
- pluginRef: prefixCacheScorer
673673
weight: 50
674-
- pluginRef: maxScore
674+
- pluginRef: maxScorePicker
675675
`
676676

677677
// invalid parameter configuration for plugin (string passed, in expected)
@@ -683,14 +683,14 @@ kind: EndpointPickerConfig
683683
plugins:
684684
- name:profileHandler
685685
type: single-profile
686-
- name: prefixCache
687-
type: prefix-cache
686+
- name: prefixCacheScorer
687+
type: prefix-cache-scorer
688688
parameters:
689689
hashBlockSize: asdf
690690
schedulingProfiles:
691691
- name: default
692692
plugins:
693-
- pluginRef: prefixCache
693+
- pluginRef: prefixCacheScorer
694694
weight: 50
695695
`
696696

@@ -703,14 +703,14 @@ kind: EndpointPickerConfig
703703
plugins:
704704
- name: profileHandler
705705
type: single-profile
706-
- name: prefixCache
707-
type: prefix-cache
706+
- name: prefixCacheScorer
707+
type: prefix-cache-scorer
708708
parameters:
709709
hashBlockSize: 32
710710
schedulingProfiles:
711711
- name: default
712712
plugins:
713-
- pluginRef: prefixCache
713+
- pluginRef: prefixCacheScorer
714714
`
715715

716716
// multiple pickers in scheduling profile
@@ -740,8 +740,8 @@ const errorConfigText = `
740740
apiVersion: inference.networking.x-k8s.io/v1alpha1
741741
kind: EndpointPickerConfig
742742
plugins:
743-
- name: lowQueue
744-
pluginName: low-queue
743+
- name: lowQueueFilter
744+
pluginName: low-queue-filter
745745
parameters:
746746
threshold: 10
747747
`

pkg/epp/scheduling/framework/plugins/filter/decision_tree_filter.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ import (
3030
)
3131

3232
const (
33-
DecisionTreeFilterType = "decision-tree"
33+
DecisionTreeFilterType = "decision-tree-filter"
3434
)
3535

3636
// compile-time type assertion

pkg/epp/scheduling/framework/plugins/filter/least_kvcache_filter.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import (
2727
)
2828

2929
const (
30-
LeastKVCacheFilterType = "least-KV-cache"
30+
LeastKVCacheFilterType = "least-kv-cache-filter"
3131
)
3232

3333
// compile-time type validation
@@ -41,7 +41,7 @@ func LeastKVCacheFilterFactory(name string, _ json.RawMessage, _ plugins.Handle)
4141
// NewLeastKVCacheFilter initializes a new LeastKVCacheFilter and returns its pointer.
4242
func NewLeastKVCacheFilter() *LeastKVCacheFilter {
4343
return &LeastKVCacheFilter{
44-
tn: plugins.TypedName{Type: LeastKVCacheFilterType, Name: LeastKVCacheFilterType},
44+
typedName: plugins.TypedName{Type: LeastKVCacheFilterType, Name: LeastKVCacheFilterType},
4545
}
4646
}
4747

@@ -51,17 +51,17 @@ func NewLeastKVCacheFilter() *LeastKVCacheFilter {
5151
// should consider them all instead of the absolute minimum one. This worked better than picking the
5252
// least one as it gives more choices for the next filter, which on aggregate gave better results.
5353
type LeastKVCacheFilter struct {
54-
tn plugins.TypedName
54+
typedName plugins.TypedName
5555
}
5656

5757
// TypedName returns the type and name tuple of this plugin instance.
5858
func (f *LeastKVCacheFilter) TypedName() plugins.TypedName {
59-
return f.tn
59+
return f.typedName
6060
}
6161

6262
// WithName sets the name of the filter.
6363
func (f *LeastKVCacheFilter) WithName(name string) *LeastKVCacheFilter {
64-
f.tn.Name = name
64+
f.typedName.Name = name
6565
return f
6666
}
6767

pkg/epp/scheduling/framework/plugins/filter/least_queue_filter.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import (
2727
)
2828

2929
const (
30-
LeastQueueFilterType = "least-queue"
30+
LeastQueueFilterType = "least-queue-filter"
3131
)
3232

3333
// compile-time type validation
@@ -41,7 +41,7 @@ func LeastQueueFilterFactory(name string, _ json.RawMessage, _ plugins.Handle) (
4141
// NewLeastQueueFilter initializes a new LeastQueueFilter and returns its pointer.
4242
func NewLeastQueueFilter() *LeastQueueFilter {
4343
return &LeastQueueFilter{
44-
tn: plugins.TypedName{Type: LeastQueueFilterType, Name: LeastQueueFilterType},
44+
typedName: plugins.TypedName{Type: LeastQueueFilterType, Name: LeastQueueFilterType},
4545
}
4646
}
4747

@@ -51,17 +51,17 @@ func NewLeastQueueFilter() *LeastQueueFilter {
5151
// we should consider them all instead of the absolute minimum one. This worked better than picking
5252
// the least one as it gives more choices for the next filter, which on aggregate gave better results.
5353
type LeastQueueFilter struct {
54-
tn plugins.TypedName
54+
typedName plugins.TypedName
5555
}
5656

5757
// TypedName returns the type and name tuple of this plugin instance.
5858
func (f *LeastQueueFilter) TypedName() plugins.TypedName {
59-
return f.tn
59+
return f.typedName
6060
}
6161

6262
// WithName sets the name of the filter.
6363
func (f *LeastQueueFilter) WithName(name string) *LeastQueueFilter {
64-
f.tn.Name = name
64+
f.typedName.Name = name
6565
return f
6666
}
6767

0 commit comments

Comments
 (0)