Skip to content

Commit 503c5d4

Browse files
nirrozenbaumkfswain
authored andcommitted
cleanup of config from scheduling package (kubernetes-sigs#1263)
Signed-off-by: Nir Rozenbaum <[email protected]>
1 parent a0e5cb2 commit 503c5d4

File tree

11 files changed

+29
-125
lines changed

11 files changed

+29
-125
lines changed

cmd/epp/runner/runner.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ import (
4242

4343
"sigs.k8s.io/gateway-api-inference-extension/internal/runnable"
4444
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
45-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/common/config/loader"
45+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/config/loader"
4646
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
4747
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
4848
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics/collectors"

pkg/epp/common/config/defaults.go

Lines changed: 0 additions & 32 deletions
This file was deleted.

pkg/epp/common/config/loader/configloader.go renamed to pkg/epp/config/loader/configloader.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,19 @@ import (
2626
"k8s.io/apimachinery/pkg/util/sets"
2727

2828
configapi "sigs.k8s.io/gateway-api-inference-extension/apix/config/v1alpha1"
29-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/common/config"
3029
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
3130
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling"
3231
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
3332
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/picker"
3433
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/profile"
3534
)
3635

36+
const (
37+
// DefaultScorerWeight is the weight used for scorers referenced in the
38+
// configuration without explicit weights.
39+
DefaultScorerWeight = 1
40+
)
41+
3742
var scheme = runtime.NewScheme()
3843

3944
func init() {
@@ -72,7 +77,7 @@ func LoadSchedulerConfig(configProfiles []configapi.SchedulingProfile, handle pl
7277
referencedPlugin := handle.Plugin(plugin.PluginRef)
7378
if scorer, ok := referencedPlugin.(framework.Scorer); ok {
7479
// Set default weight if one wasn't set in the configuration
75-
weight := config.DefaultScorerWeight
80+
weight := DefaultScorerWeight
7681
if plugin.Weight != nil {
7782
weight = *plugin.Weight
7883
}

pkg/epp/common/config/loader/configloader_test.go renamed to pkg/epp/config/loader/configloader_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ func TestLoadConfiguration(t *testing.T) {
193193
{
194194
name: "successFromFile",
195195
configText: "",
196-
configFile: "../../../../../test/testdata/configloader_1_test.yaml",
196+
configFile: "../../../../test/testdata/configloader_1_test.yaml",
197197
want: goodConfig,
198198
wantErr: false,
199199
},

pkg/epp/saturationdetector/config.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,15 @@ import (
2121

2222
"sigs.k8s.io/controller-runtime/pkg/log"
2323

24-
commonconfig "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/common/config"
2524
envutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env"
2625
)
2726

2827
// Default configuration values
2928
const (
30-
DefaultQueueDepthThreshold = commonconfig.DefaultQueueThresholdCritical
31-
DefaultKVCacheUtilThreshold = commonconfig.DefaultKVCacheThreshold
29+
// DefaultQueueDepthThreshold is the default backend waiting queue size threshold.
30+
DefaultQueueDepthThreshold = 5
31+
// DefaultKVCacheUtilThreshold is the default KV cache utilization (0.0 to 1.0) threshold.
32+
DefaultKVCacheUtilThreshold = 0.8
3233
// DefaultMetricsStalenessThreshold defines how old metrics can be before they
3334
// are considered stale.
3435
// Given the pod metrics refresh interval is 50ms, a threshold slightly above

pkg/epp/scheduling/config/config.go

Lines changed: 0 additions & 58 deletions
This file was deleted.

pkg/epp/scheduling/framework/plugins/filter/filter_test.go

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ import (
2929
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
3030
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
3131
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
32-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config"
3332
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
3433
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/scorer"
3534
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
@@ -171,17 +170,8 @@ func TestLoRASoftAffinityDistribution(t *testing.T) {
171170
tolerancePercent = 5.0 // Allow 5% tolerance from expected distribution
172171
)
173172

174-
// Save original config value to restore later
175-
originalThreshold := config.Conf.LoraAffinityThreshold
176-
177173
// Set a specific test value for this test
178174
testThreshold := 0.75 // 75%
179-
config.Conf.LoraAffinityThreshold = testThreshold
180-
181-
// Ensure we restore the original threshold when test completes
182-
defer func() {
183-
config.Conf.LoraAffinityThreshold = originalThreshold
184-
}()
185175

186176
// Create a test request and pods
187177
req := &types.LLMRequest{
@@ -213,11 +203,11 @@ func TestLoRASoftAffinityDistribution(t *testing.T) {
213203
availableCount := 0
214204

215205
// Use the test threshold value
216-
expectedAffinityPercent := config.Conf.LoraAffinityThreshold * 100
206+
expectedAffinityPercent := testThreshold * 100
217207
expectedAvailabilityPercent := 100 - expectedAffinityPercent
218208

219209
// initialize LoraAffinityFilter
220-
LoraAffinityFilter := NewLoraAffinityFilter(config.Conf.LoraAffinityThreshold)
210+
LoraAffinityFilter := NewLoraAffinityFilter(testThreshold)
221211

222212
for range numIterations {
223213
result := LoraAffinityFilter.Filter(context.Background(), types.NewCycleState(), req, pods)
@@ -247,8 +237,8 @@ func TestLoRASoftAffinityDistribution(t *testing.T) {
247237
availableUpperBound := expectedAvailabilityPercent + tolerancePercent
248238

249239
t.Logf("Distribution results over %d iterations:", numIterations)
250-
t.Logf("Expected affinity percent: %.2f%% (threshold: %.2f)", expectedAffinityPercent, config.Conf.LoraAffinityThreshold)
251-
t.Logf("Expected availability percent: %.2f%% (threshold: %.2f)", expectedAvailabilityPercent, config.Conf.LoraAffinityThreshold)
240+
t.Logf("Expected affinity percent: %.2f%% (threshold: %.2f)", expectedAffinityPercent, testThreshold)
241+
t.Logf("Expected availability percent: %.2f%% (threshold: %.2f)", expectedAvailabilityPercent, testThreshold)
252242
t.Logf("Actual affinity percent: %.2f%% (%d out of %d)", actualAffinityPercent, affinityCount, numIterations)
253243
t.Logf("Actual available percent: %.2f%% (%d out of %d)", actualAvailablePercent, availableCount, numIterations)
254244

@@ -268,8 +258,8 @@ func TestDecisionTreeFilterFactory(t *testing.T) {
268258

269259
leastKvCacheFilter := NewLeastKVCacheFilter()
270260
leastQueueFilter := NewLeastQueueFilter()
271-
loraAffinityFilter := NewLoraAffinityFilter(config.Conf.LoraAffinityThreshold)
272-
lowQueueFilter := NewLowQueueFilter(config.Conf.QueueingThresholdLoRA)
261+
loraAffinityFilter := NewLoraAffinityFilter(DefaultLoraAffinityThreshold)
262+
lowQueueFilter := NewLowQueueFilter(DefaultQueueingThresholdLoRA)
273263

274264
kvCacheScorer := scorer.NewKVCacheUtilizationScorer()
275265

pkg/epp/scheduling/framework/plugins/filter/lora_affinity_filter.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ import (
2424
"time"
2525

2626
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
27-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config"
2827
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
2928
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
3029
)
3130

3231
const (
33-
LoraAffinityFilterType = "lora-affinity-filter"
32+
LoraAffinityFilterType = "lora-affinity-filter"
33+
DefaultLoraAffinityThreshold = 0.999
3434
)
3535

3636
type loraAffinityFilterParameters struct {
@@ -42,7 +42,7 @@ var _ framework.Filter = &LoraAffinityFilter{}
4242

4343
// LoraAffinityFilterFactory defines the factory function for LoraAffinityFilter.
4444
func LoraAffinityFilterFactory(name string, rawParameters json.RawMessage, _ plugins.Handle) (plugins.Plugin, error) {
45-
parameters := loraAffinityFilterParameters{Threshold: config.DefaultLoraAffinityThreshold}
45+
parameters := loraAffinityFilterParameters{Threshold: DefaultLoraAffinityThreshold}
4646
if rawParameters != nil {
4747
if err := json.Unmarshal(rawParameters, &parameters); err != nil {
4848
return nil, fmt.Errorf("failed to parse the parameters of the '%s' filter - %w", LoraAffinityFilterType, err)

pkg/epp/scheduling/framework/plugins/filter/low_queue_filter.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@ import (
2222
"fmt"
2323

2424
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
25-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config"
2625
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
2726
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
2827
)
2928

3029
const (
31-
LowQueueFilterType = "low-queue-filter"
30+
LowQueueFilterType = "low-queue-filter"
31+
DefaultQueueingThresholdLoRA = 128
3232
)
3333

3434
type lowQueueFilterParameters struct {
@@ -40,7 +40,7 @@ var _ framework.Filter = &LowQueueFilter{}
4040

4141
// LowQueueFilterFactory defines the factory function for LowQueueFilter.
4242
func LowQueueFilterFactory(name string, rawParameters json.RawMessage, _ plugins.Handle) (plugins.Plugin, error) {
43-
parameters := lowQueueFilterParameters{Threshold: config.DefaultQueueingThresholdLoRA}
43+
parameters := lowQueueFilterParameters{Threshold: DefaultQueueingThresholdLoRA}
4444
if rawParameters != nil {
4545
if err := json.Unmarshal(rawParameters, &parameters); err != nil {
4646
return nil, fmt.Errorf("failed to parse the parameters of the '%s' filter - %w", LowQueueFilterType, err)

pkg/epp/scheduling/scheduler_test.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import (
2626

2727
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
2828
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" // Import config for thresholds
29-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config"
3029
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
3130
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/filter"
3231
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/picker"
@@ -36,12 +35,12 @@ import (
3635

3736
// Tests the default scheduler configuration and expected behavior.
3837
func TestSchedule(t *testing.T) {
39-
loraAffinityFilter := filter.NewLoraAffinityFilter(config.Conf.LoraAffinityThreshold)
38+
loraAffinityFilter := filter.NewLoraAffinityFilter(filter.DefaultLoraAffinityThreshold)
4039
leastQueueFilter := filter.NewLeastQueueFilter()
4140
leastKvCacheFilter := filter.NewLeastKVCacheFilter()
4241

4342
lowLatencyFilter := &filter.DecisionTreeFilter{
44-
Current: filter.NewLowQueueFilter(config.Conf.QueueingThresholdLoRA),
43+
Current: filter.NewLowQueueFilter(filter.DefaultQueueingThresholdLoRA),
4544
NextOnSuccess: &filter.DecisionTreeFilter{
4645
Current: loraAffinityFilter,
4746
NextOnSuccessOrFailure: &filter.DecisionTreeFilter{

0 commit comments

Comments
 (0)