Skip to content

Commit a7ac10b

Browse files
authored
Merge branch 'kubernetes-sigs:main' into update-helm-chart
2 parents a10dcec + d27a716 commit a7ac10b

File tree

95 files changed

+3331
-2387
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+3331
-2387
lines changed

Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ RUN go mod download
1818

1919
# Sources
2020
COPY cmd/epp ./cmd/epp
21+
COPY pkg/common ./pkg/common
2122
COPY pkg/epp ./pkg/epp
2223
COPY internal ./internal
2324
COPY apix ./apix

Makefile

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ SHELL = /usr/bin/env bash -o pipefail
2424
GIT_COMMIT_SHA ?= "$(shell git rev-parse HEAD 2>/dev/null)"
2525
GIT_TAG ?= $(shell git describe --tags --dirty --always)
2626
PLATFORMS ?= linux/amd64
27-
PUBLISH_PLATFORMS ?= linux/amd64,linux/arm64
2827
DOCKER_BUILDX_CMD ?= docker buildx
2928
IMAGE_BUILD_CMD ?= $(DOCKER_BUILDX_CMD) build
3029
IMAGE_BUILD_EXTRA_OPTS ?=
@@ -74,11 +73,6 @@ SYNCER_IMAGE_BUILD_EXTRA_OPTS += -t $(SYNCER_IMAGE_EXTRA_TAG)
7473
BBR_IMAGE_BUILD_EXTRA_OPTS += -t $(BBR_IMAGE_EXTRA_TAG)
7574
endif
7675

77-
# Allow `make MULTI=true ...` in CI to switch to multi-arch.
78-
ifdef MULTI
79-
PLATFORMS := $(PUBLISH_PLATFORMS)
80-
endif
81-
8276

8377
# The name of the kind cluster to use for the "kind-load" target.
8478
KIND_CLUSTER ?= kind

apix/config/v1alpha1/defaults.go

Lines changed: 0 additions & 47 deletions
This file was deleted.

apix/config/v1alpha1/endpointpickerconfig_types.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ package v1alpha1
1818

1919
import (
2020
"encoding/json"
21+
"fmt"
2122

2223
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2324
)
2425

25-
// +k8s:defaulter-gen=true
2626
// +kubebuilder:object:root=true
2727

2828
// EndpointPickerConfig is the Schema for the endpointpickerconfigs API
@@ -41,6 +41,14 @@ type EndpointPickerConfig struct {
4141
SchedulingProfiles []SchedulingProfile `json:"schedulingProfiles"`
4242
}
4343

44+
func (cfg EndpointPickerConfig) String() string {
45+
return fmt.Sprintf(
46+
"{Plugins: %v, SchedulingProfiles: %v}",
47+
cfg.Plugins,
48+
cfg.SchedulingProfiles,
49+
)
50+
}
51+
4452
// PluginSpec contains the information that describes a plugin that
4553
// will be instantiated.
4654
type PluginSpec struct {
@@ -61,6 +69,14 @@ type PluginSpec struct {
6169
Parameters json.RawMessage `json:"parameters"`
6270
}
6371

72+
func (ps PluginSpec) String() string {
73+
var parameters string
74+
if ps.Parameters != nil {
75+
parameters = fmt.Sprintf(", Parameters: %s", ps.Parameters)
76+
}
77+
return fmt.Sprintf("{%s/%s%s}", ps.Name, ps.Type, parameters)
78+
}
79+
6480
// SchedulingProfile contains the information to create a SchedulingProfile
6581
// entry to be used by the scheduler.
6682
type SchedulingProfile struct {
@@ -75,6 +91,10 @@ type SchedulingProfile struct {
7591
Plugins []SchedulingPlugin `json:"plugins"`
7692
}
7793

94+
func (sp SchedulingProfile) String() string {
95+
return fmt.Sprintf("{Name: %s, Plugins: %v}", sp.Name, sp.Plugins)
96+
}
97+
7898
// SchedulingPlugin describes a plugin that will be associated with a
7999
// SchedulingProfile entry.
80100
type SchedulingPlugin struct {
@@ -90,3 +110,11 @@ type SchedulingPlugin struct {
90110
// Weight is the weight fo be used if this plugin is a Scorer.
91111
Weight *int `json:"weight"`
92112
}
113+
114+
func (sp SchedulingPlugin) String() string {
115+
var weight string
116+
if sp.Weight != nil {
117+
weight = fmt.Sprintf(", Weight: %d", *sp.Weight)
118+
}
119+
return fmt.Sprintf("{PluginRef: %s%s}", sp.PluginRef, weight)
120+
}

apix/config/v1alpha1/zz_generated.defaults.go

Lines changed: 0 additions & 38 deletions
This file was deleted.

apix/v1alpha2/inferenceobjective_types.go

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -63,20 +63,6 @@ type InferenceObjectiveList struct {
6363
// creation timestamp, will be selected to remain valid. In the event of a race
6464
// condition, one will be selected at random.
6565
type InferenceObjectiveSpec struct {
66-
// ModelName is the name of the model as it will be set in the "model" parameter for an incoming request.
67-
// ModelNames must be unique for a referencing InferencePool
68-
// (names can be reused for a different pool in the same cluster).
69-
// The modelName with the oldest creation timestamp is retained, and the incoming
70-
// InferenceObjective's Ready status is set to false with a corresponding reason.
71-
// In the rare case of a race condition, one Model will be selected randomly to be considered valid, and the other rejected.
72-
// Names can be reserved without an underlying model configured in the pool.
73-
// This can be done by specifying a target model and setting the weight to zero,
74-
// an error will be returned specifying that no valid target model is found.
75-
//
76-
// +kubebuilder:validation:MaxLength=256
77-
// +kubebuilder:validation:Required
78-
// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="modelName is immutable"
79-
ModelName string `json:"modelName"`
8066

8167
// Criticality defines how important it is to serve the model compared to other models referencing the same pool.
8268
// Criticality impacts how traffic is handled in resource constrained situations. It handles this by

client-go/applyconfiguration/apix/v1alpha2/inferenceobjectivespec.go

Lines changed: 0 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cloudbuild.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ steps:
1313
- EXTRA_TAG=$_PULL_BASE_REF
1414
- DOCKER_BUILDX_CMD=/buildx-entrypoint
1515
- GIT_COMMIT_SHA=$_PULL_BASE_SHA
16-
- MULTI=true
1716
- name: gcr.io/k8s-staging-test-infra/gcb-docker-gcloud:v20240718-5ef92b5c36
1817
entrypoint: make
1918
args:

cmd/epp/runner/runner.go

Lines changed: 31 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"go.uber.org/zap/zapcore"
3333
"google.golang.org/grpc"
3434
healthPb "google.golang.org/grpc/health/grpc_health_v1"
35+
"k8s.io/apimachinery/pkg/runtime/schema"
3536
"k8s.io/apimachinery/pkg/types"
3637
ctrl "sigs.k8s.io/controller-runtime"
3738
"sigs.k8s.io/controller-runtime/pkg/log"
@@ -41,6 +42,7 @@ import (
4142
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
4243

4344
"sigs.k8s.io/gateway-api-inference-extension/internal/runnable"
45+
"sigs.k8s.io/gateway-api-inference-extension/pkg/common"
4446
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
4547
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/config/loader"
4648
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
@@ -50,7 +52,6 @@ import (
5052
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/requestcontrol"
5153
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/saturationdetector"
5254
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling"
53-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/filter"
5455
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/prefix"
5556
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/picker"
5657
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/profile"
@@ -82,6 +83,10 @@ var (
8283
"pool-name",
8384
runserver.DefaultPoolName,
8485
"Name of the InferencePool this Endpoint Picker is associated with.")
86+
poolGroup = flag.String(
87+
"pool-group",
88+
runserver.DefaultPoolGroup,
89+
"group of the InferencePool this Endpoint Picker is associated with.")
8590
poolNamespace = flag.String(
8691
"pool-namespace",
8792
runserver.DefaultPoolNamespace,
@@ -104,20 +109,6 @@ var (
104109
"The path to the certificate for secure serving. The certificate and private key files "+
105110
"are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+
106111
"then a self-signed certificate is used.")
107-
// header/metadata flags
108-
destinationEndpointHintKey = flag.String(
109-
"destination-endpoint-hint-key",
110-
runserver.DefaultDestinationEndpointHintKey,
111-
"Header and response metadata key used by Envoy to route to the appropriate pod. This must match Envoy configuration.")
112-
destinationEndpointHintMetadataNamespace = flag.String(
113-
"destination-endpoint-hint-metadata-namespace",
114-
runserver.DefaultDestinationEndpointHintMetadataNamespace,
115-
"The key for the outer namespace struct in the metadata field of the extproc response that is used to wrap the"+
116-
"target endpoint. If not set, then an outer namespace struct should not be created.")
117-
fairnessIDHeaderKey = flag.String(
118-
"fairness-id-header-key",
119-
runserver.DefaultFairnessIDHeaderKey,
120-
"The header key used to pass the fairness ID to be used in Flow Control.")
121112
// metric flags
122113
totalQueuedRequestsMetric = flag.String(
123114
"total-queued-requests-metric",
@@ -196,7 +187,6 @@ func bindEnvToFlags() {
196187
"MODEL_SERVER_METRICS_PATH": "model-server-metrics-path",
197188
"MODEL_SERVER_METRICS_SCHEME": "model-server-metrics-scheme",
198189
"MODEL_SERVER_METRICS_HTTPS_INSECURE_SKIP_VERIFY": "model-server-metrics-https-insecure-skip-verify",
199-
"DESTINATION_ENDPOINT_HINT_KEY": "destination-endpoint-hint-key",
200190
"POOL_NAME": "pool-name",
201191
"POOL_NAMESPACE": "pool-namespace",
202192
// durations & bools work too; flag.Set expects the *string* form
@@ -301,7 +291,15 @@ func (r *Runner) Run(ctx context.Context) error {
301291
Name: *poolName,
302292
Namespace: *poolNamespace,
303293
}
304-
mgr, err := runserver.NewDefaultManager(poolNamespacedName, cfg, metricsServerOptions)
294+
poolGroupKind := schema.GroupKind{
295+
Group: *poolGroup,
296+
Kind: "InferencePool",
297+
}
298+
poolGKNN := common.GKNN{
299+
NamespacedName: poolNamespacedName,
300+
GroupKind: poolGroupKind,
301+
}
302+
mgr, err := runserver.NewDefaultManager(poolGKNN, cfg, metricsServerOptions)
305303
if err != nil {
306304
setupLog.Error(err, "Failed to create controller manager")
307305
return err
@@ -339,19 +337,17 @@ func (r *Runner) Run(ctx context.Context) error {
339337

340338
// --- Setup ExtProc Server Runner ---
341339
serverRunner := &runserver.ExtProcServerRunner{
342-
GrpcPort: *grpcPort,
343-
DestinationEndpointHintMetadataNamespace: *destinationEndpointHintMetadataNamespace,
344-
DestinationEndpointHintKey: *destinationEndpointHintKey,
345-
FairnessIDHeaderKey: *fairnessIDHeaderKey,
346-
PoolNamespacedName: poolNamespacedName,
347-
Datastore: datastore,
348-
SecureServing: *secureServing,
349-
HealthChecking: *healthChecking,
350-
CertPath: *certPath,
351-
RefreshPrometheusMetricsInterval: *refreshPrometheusMetricsInterval,
352-
MetricsStalenessThreshold: *metricsStalenessThreshold,
353-
Director: director,
354-
SaturationDetector: saturationDetector,
340+
GrpcPort: *grpcPort,
341+
PoolNamespacedName: poolNamespacedName,
342+
PoolGKNN: poolGKNN,
343+
Datastore: datastore,
344+
SecureServing: *secureServing,
345+
HealthChecking: *healthChecking,
346+
CertPath: *certPath,
347+
RefreshPrometheusMetricsInterval: *refreshPrometheusMetricsInterval,
348+
MetricsStalenessThreshold: *metricsStalenessThreshold,
349+
Director: director,
350+
SaturationDetector: saturationDetector,
355351
}
356352
if err := serverRunner.SetupWithManager(ctx, mgr); err != nil {
357353
setupLog.Error(err, "Failed to setup EPP controllers")
@@ -382,11 +378,6 @@ func (r *Runner) Run(ctx context.Context) error {
382378

383379
// registerInTreePlugins registers the factory functions of all known plugins
384380
func (r *Runner) registerInTreePlugins() {
385-
plugins.Register(filter.DecisionTreeFilterType, filter.DecisionTreeFilterFactory)
386-
plugins.Register(filter.LeastKVCacheFilterType, filter.LeastKVCacheFilterFactory)
387-
plugins.Register(filter.LeastQueueFilterType, filter.LeastQueueFilterFactory)
388-
plugins.Register(filter.LoraAffinityFilterType, filter.LoraAffinityFilterFactory)
389-
plugins.Register(filter.LowQueueFilterType, filter.LowQueueFilterFactory)
390381
plugins.Register(prefix.PrefixCachePluginType, prefix.PrefixCachePluginFactory)
391382
plugins.Register(picker.MaxScorePickerType, picker.MaxScorePickerFactory)
392383
plugins.Register(picker.RandomPickerType, picker.RandomPickerFactory)
@@ -403,6 +394,8 @@ func (r *Runner) parsePluginsConfiguration(ctx context.Context) error {
403394
return nil // configuring through code, not through file
404395
}
405396

397+
logger := log.FromContext(ctx)
398+
406399
var configBytes []byte
407400
if *configText != "" {
408401
configBytes = []byte(*configText)
@@ -416,20 +409,17 @@ func (r *Runner) parsePluginsConfiguration(ctx context.Context) error {
416409

417410
r.registerInTreePlugins()
418411
handle := plugins.NewEppHandle(ctx)
419-
config, err := loader.LoadConfig(configBytes, handle)
412+
config, err := loader.LoadConfig(configBytes, handle, logger)
420413
if err != nil {
421414
return fmt.Errorf("failed to load the configuration - %w", err)
422415
}
423416

424-
r.schedulerConfig, err = loader.LoadSchedulerConfig(config.SchedulingProfiles, handle)
425-
if err != nil {
426-
return fmt.Errorf("failed to create Scheduler configuration - %w", err)
427-
}
417+
r.schedulerConfig = config.SchedulerConfig
428418

429419
// Add requestControl plugins
430420
r.requestControlConfig.AddPlugins(handle.GetAllPlugins()...)
431421

432-
log.FromContext(ctx).Info("loaded configuration from file/text successfully")
422+
logger.Info("loaded configuration from file/text successfully")
433423
return nil
434424
}
435425

0 commit comments

Comments
 (0)