Skip to content

Commit 0fad789

Browse files
authored
Merge pull request kubernetes#127904 from towca/jtuznik/dra-autoscaling
DRA: allow Cluster Autoscaler to integrate with DRA scheduler plugin
2 parents ab4b869 + 3d22a7f commit 0fad789

File tree

12 files changed

+447
-169
lines changed

12 files changed

+447
-169
lines changed

pkg/scheduler/framework/autoscaler_contract/framework_contract_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import (
3636
type frameworkContract interface {
3737
RunPreFilterPlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status, sets.Set[string])
3838
RunFilterPlugins(context.Context, *framework.CycleState, *v1.Pod, *framework.NodeInfo) *framework.Status
39+
RunReservePluginsReserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status
3940
}
4041

4142
func TestFrameworkContract(t *testing.T) {

pkg/scheduler/framework/autoscaler_contract/lister_contract_test.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,20 @@ limitations under the License.
2121
package contract
2222

2323
import (
24+
resourceapi "k8s.io/api/resource/v1alpha3"
25+
"k8s.io/apimachinery/pkg/types"
26+
"k8s.io/apimachinery/pkg/util/sets"
27+
"k8s.io/dynamic-resource-allocation/structured"
2428
"k8s.io/kubernetes/pkg/scheduler/framework"
2529
)
2630

2731
var _ framework.NodeInfoLister = &nodeInfoListerContract{}
2832
var _ framework.StorageInfoLister = &storageInfoListerContract{}
2933
var _ framework.SharedLister = &shareListerContract{}
34+
var _ framework.ResourceSliceLister = &resourceSliceListerContract{}
35+
var _ framework.DeviceClassLister = &deviceClassListerContract{}
36+
var _ framework.ResourceClaimTracker = &resourceClaimTrackerContract{}
37+
var _ framework.SharedDRAManager = &sharedDRAManagerContract{}
3038

3139
type nodeInfoListerContract struct{}
3240

@@ -61,3 +69,66 @@ func (c *shareListerContract) NodeInfos() framework.NodeInfoLister {
6169
func (c *shareListerContract) StorageInfos() framework.StorageInfoLister {
6270
return nil
6371
}
72+
73+
type resourceSliceListerContract struct{}
74+
75+
func (c *resourceSliceListerContract) List() ([]*resourceapi.ResourceSlice, error) {
76+
return nil, nil
77+
}
78+
79+
type deviceClassListerContract struct{}
80+
81+
func (c *deviceClassListerContract) List() ([]*resourceapi.DeviceClass, error) {
82+
return nil, nil
83+
}
84+
85+
func (c *deviceClassListerContract) Get(_ string) (*resourceapi.DeviceClass, error) {
86+
return nil, nil
87+
}
88+
89+
type resourceClaimTrackerContract struct{}
90+
91+
func (r *resourceClaimTrackerContract) List() ([]*resourceapi.ResourceClaim, error) {
92+
return nil, nil
93+
}
94+
95+
func (r *resourceClaimTrackerContract) Get(_, _ string) (*resourceapi.ResourceClaim, error) {
96+
return nil, nil
97+
}
98+
99+
func (r *resourceClaimTrackerContract) ListAllAllocatedDevices() (sets.Set[structured.DeviceID], error) {
100+
return nil, nil
101+
}
102+
103+
func (r *resourceClaimTrackerContract) SignalClaimPendingAllocation(_ types.UID, _ *resourceapi.ResourceClaim) error {
104+
return nil
105+
}
106+
107+
func (r *resourceClaimTrackerContract) ClaimHasPendingAllocation(_ types.UID) bool {
108+
return false
109+
}
110+
111+
func (r *resourceClaimTrackerContract) RemoveClaimPendingAllocation(_ types.UID) (deleted bool) {
112+
return false
113+
}
114+
115+
func (r *resourceClaimTrackerContract) AssumeClaimAfterAPICall(_ *resourceapi.ResourceClaim) error {
116+
return nil
117+
}
118+
119+
func (r *resourceClaimTrackerContract) AssumedClaimRestore(_, _ string) {
120+
}
121+
122+
type sharedDRAManagerContract struct{}
123+
124+
func (s *sharedDRAManagerContract) ResourceClaims() framework.ResourceClaimTracker {
125+
return nil
126+
}
127+
128+
func (s *sharedDRAManagerContract) ResourceSlices() framework.ResourceSliceLister {
129+
return nil
130+
}
131+
132+
func (s *sharedDRAManagerContract) DeviceClasses() framework.DeviceClassLister {
133+
return nil
134+
}

pkg/scheduler/framework/interface.go

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ import (
3939
"k8s.io/klog/v2"
4040
"k8s.io/kubernetes/pkg/scheduler/apis/config"
4141
"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
42-
"k8s.io/kubernetes/pkg/scheduler/util/assumecache"
4342
)
4443

4544
// NodeScoreList declares a list of nodes and their scores.
@@ -820,10 +819,9 @@ type Handle interface {
820819

821820
SharedInformerFactory() informers.SharedInformerFactory
822821

823-
// ResourceClaimCache returns an assume cache of ResourceClaim objects
824-
// which gets populated by the shared informer factory and the dynamic resources
825-
// plugin.
826-
ResourceClaimCache() *assumecache.AssumeCache
822+
// SharedDRAManager can be used to obtain DRA objects, and track modifications to them in-memory - mainly by the DRA plugin.
823+
// A non-default implementation can be plugged into the framework to simulate the state of DRA objects.
824+
SharedDRAManager() SharedDRAManager
827825

828826
// RunFilterPluginsWithNominatedPods runs the set of configured filter plugins for nominated pod on the given node.
829827
RunFilterPluginsWithNominatedPods(ctx context.Context, state *CycleState, pod *v1.Pod, info *NodeInfo) *Status

pkg/scheduler/framework/listers.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@ limitations under the License.
1616

1717
package framework
1818

19+
import (
20+
resourceapi "k8s.io/api/resource/v1alpha3"
21+
"k8s.io/apimachinery/pkg/types"
22+
"k8s.io/apimachinery/pkg/util/sets"
23+
"k8s.io/dynamic-resource-allocation/structured"
24+
)
25+
1926
// NodeInfoLister interface represents anything that can list/get NodeInfo objects from node name.
2027
type NodeInfoLister interface {
2128
// List returns the list of NodeInfos.
@@ -40,3 +47,65 @@ type SharedLister interface {
4047
NodeInfos() NodeInfoLister
4148
StorageInfos() StorageInfoLister
4249
}
50+
51+
// ResourceSliceLister can be used to obtain ResourceSlices.
52+
type ResourceSliceLister interface {
53+
// List returns a list of all ResourceSlices.
54+
List() ([]*resourceapi.ResourceSlice, error)
55+
}
56+
57+
// DeviceClassLister can be used to obtain DeviceClasses.
58+
type DeviceClassLister interface {
59+
// List returns a list of all DeviceClasses.
60+
List() ([]*resourceapi.DeviceClass, error)
61+
// Get returns the DeviceClass with the given className.
62+
Get(className string) (*resourceapi.DeviceClass, error)
63+
}
64+
65+
// ResourceClaimTracker can be used to obtain ResourceClaims, and track changes to ResourceClaims in-memory.
66+
//
67+
// If the claims are meant to be allocated in the API during the binding phase (when used by scheduler), the tracker helps avoid
68+
// race conditions between scheduling and binding phases (as well as between the binding phase and the informer cache update).
69+
//
70+
// If the binding phase is not run (e.g. when used by Cluster Autoscaler which only runs the scheduling phase, and simulates binding in-memory),
71+
// the tracker allows the framework user to obtain the claim allocations produced by the DRA plugin, and persist them outside of the API (e.g. in-memory).
72+
type ResourceClaimTracker interface {
73+
// List lists ResourceClaims. The result is guaranteed to immediately include any changes made via AssumeClaimAfterAPICall(),
74+
// and SignalClaimPendingAllocation().
75+
List() ([]*resourceapi.ResourceClaim, error)
76+
// Get works like List(), but for a single claim.
77+
Get(namespace, claimName string) (*resourceapi.ResourceClaim, error)
78+
// ListAllAllocatedDevices lists all allocated Devices from allocated ResourceClaims. The result is guaranteed to immediately include
79+
// any changes made via AssumeClaimAfterAPICall(), and SignalClaimPendingAllocation().
80+
ListAllAllocatedDevices() (sets.Set[structured.DeviceID], error)
81+
82+
// SignalClaimPendingAllocation signals to the tracker that the given ResourceClaim will be allocated via an API call in the
83+
// binding phase. This change is immediately reflected in the result of List() and the other accessors.
84+
SignalClaimPendingAllocation(claimUID types.UID, allocatedClaim *resourceapi.ResourceClaim) error
85+
// ClaimHasPendingAllocation answers whether a given claim has a pending allocation during the binding phase. It can be used to avoid
86+
// race conditions in subsequent scheduling phases.
87+
ClaimHasPendingAllocation(claimUID types.UID) bool
88+
// RemoveClaimPendingAllocation removes the pending allocation for the given ResourceClaim from the tracker if any was signaled via
89+
// SignalClaimPendingAllocation(). Returns whether there was a pending allocation to remove. List() and the other accessors immediately
90+
// stop reflecting the pending allocation in the results.
91+
RemoveClaimPendingAllocation(claimUID types.UID) (deleted bool)
92+
93+
// AssumeClaimAfterAPICall signals to the tracker that an API call modifying the given ResourceClaim was made in the binding phase, and the
94+
// changes should be reflected in informers very soon. This change is immediately reflected in the result of List() and the other accessors.
95+
// This mechanism can be used to avoid race conditions between the informer update and subsequent scheduling phases.
96+
AssumeClaimAfterAPICall(claim *resourceapi.ResourceClaim) error
97+
// AssumedClaimRestore signals to the tracker that something went wrong with the API call modifying the given ResourceClaim, and
98+
// the changes won't be reflected in informers after all. List() and the other accessors immediately stop reflecting the assumed change,
99+
// and go back to the informer version.
100+
AssumedClaimRestore(namespace, claimName string)
101+
}
102+
103+
// SharedDRAManager can be used to obtain DRA objects, and track modifications to them in-memory - mainly by the DRA plugin.
104+
// The plugin's default implementation obtains the objects from the API. A different implementation can be
105+
// plugged into the framework in order to simulate the state of DRA objects. For example, Cluster Autoscaler
106+
// can use this to provide the correct DRA object state to the DRA plugin when simulating scheduling changes in-memory.
107+
type SharedDRAManager interface {
108+
ResourceClaims() ResourceClaimTracker
109+
ResourceSlices() ResourceSliceLister
110+
DeviceClasses() DeviceClassLister
111+
}

0 commit comments

Comments
 (0)