Skip to content

Commit 178d62b

Browse files
committed
setup new property checker for capacity scheduling
Signed-off-by: Britania Rodriguez Reyes <britaniar@microsoft.com>
1 parent ffe239f commit 178d62b

File tree

17 files changed

+1382
-158
lines changed

17 files changed

+1382
-158
lines changed

apis/protos/azure/compute/v1/vmsizerecommender.pb.go

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apis/protos/azure/compute/v1/vmsizerecommender_grpc.pb.go

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
//
2+
//Copyright (c) Microsoft Corporation.
3+
//Licensed under the MIT license.
4+
5+
// Package azure provides property checkers for Azure-specific cluster requirements.
6+
// It checks whether the cluster can meet the requirement defined by the property selector.
7+
package azure
8+
9+
import (
10+
"context"
11+
"fmt"
12+
"strconv"
13+
14+
"k8s.io/klog/v2"
15+
16+
clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1"
17+
placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1"
18+
computev1 "go.goms.io/fleet/apis/protos/azure/compute/v1"
19+
"go.goms.io/fleet/pkg/clients/azure/compute"
20+
"go.goms.io/fleet/pkg/utils/labels"
21+
)
22+
23+
const (
24+
// maxVMInstanceCapacity defines the maximum allowed VM instance capacity for SKU capacity requirements.
25+
// This limit is set to prevent excessive resource requests and potential quota issues.
26+
// The value is constrained by uint32 maximum (4,294,967,295) but set to a reasonable upper bound
27+
// of 200 for most production workloads. Upper bound is enforced before adjusting for operator semantics.
28+
maxVMInstanceCapacity = 200
29+
30+
// minVMInstanceCapacity defines the minimum allowed VM instance capacity for SKU capacity requirements.
31+
// Capacity must be at least 1 to be meaningful.
32+
minVMInstanceCapacity = 1
33+
)
34+
35+
// PropertyChecker provides Azure-specific property validation for member clusters.
36+
// It validates compute requirements to determine if clusters
37+
// can meet the specified property selector requirements.
38+
type PropertyChecker struct {
39+
// vmSizeRecommenderClient is the Azure compute client used to generate VM size recommendations
40+
// and validate SKU capacity requirements.
41+
vmSizeRecommenderClient compute.AttributeBasedVMSizeRecommenderClient
42+
}
43+
44+
// NewPropertyChecker creates a new PropertyChecker with the given client.
45+
// The vmSizeRecommenderClient is used to validate SKU capacity requirements.
46+
func NewPropertyChecker(vmSizeRecommenderClient compute.AttributeBasedVMSizeRecommenderClient) *PropertyChecker {
47+
return &PropertyChecker{
48+
vmSizeRecommenderClient: vmSizeRecommenderClient,
49+
}
50+
}
51+
52+
// CheckIfMeetSKUCapacityRequirement validates whether a member cluster can meet the specified
53+
// SKU capacity requirement. It extracts the required SKU and capacity from the property selector
54+
// requirement and checks to determine if the cluster's Azure subscription
55+
// and location can provision the requested VM instances.
56+
//
57+
// The cluster must have both Azure location and subscription ID labels configured.
58+
// Returns true if the SKU capacity requirement can be met, false otherwise.
59+
func (s *PropertyChecker) CheckIfMeetSKUCapacityRequirement(
60+
cluster *clusterv1beta1.MemberCluster,
61+
sku string,
62+
capacity uint32,
63+
) (bool, error) {
64+
location, err := labels.ExtractLabelFromMemberCluster(cluster, labels.AzureLocationLabel)
65+
if err != nil {
66+
return false, fmt.Errorf("failed to extract Azure location label from cluster %s: %w", cluster.Name, err)
67+
}
68+
69+
subID, err := labels.ExtractLabelFromMemberCluster(cluster, labels.AzureSubscriptionIDLabel)
70+
if err != nil {
71+
return false, fmt.Errorf("failed to extract Azure subscription ID label from cluster %s: %w", cluster.Name, err)
72+
}
73+
74+
// Request VM size recommendations to validate SKU availability and capacity.
75+
// The capacity is checked by ensuring the current allocatable capacity is greater than the requested capacity.
76+
request := &computev1.GenerateAttributeBasedRecommendationsRequest{
77+
SubscriptionId: subID,
78+
Location: location,
79+
RegularPriorityProfile: &computev1.RegularPriorityProfile{
80+
CapacityUnitType: computev1.CapacityUnitType_CAPACITY_UNIT_TYPE_VM_INSTANCE_COUNT,
81+
TargetCapacity: capacity, // CurrentAllocatableCapacity > RequestedCapacity
82+
},
83+
ResourceProperties: &computev1.ResourceProperties{
84+
VmAttributes: &computev1.VMAttributes{
85+
AllowedVmSizes: []string{sku},
86+
},
87+
},
88+
RecommendationProperties: &computev1.RecommendationProperties{
89+
RestrictionsFilter: computev1.RecommendationProperties_RESTRICTIONS_FILTER_QUOTA_AND_OFFER_RESTRICTIONS,
90+
},
91+
}
92+
93+
respObj, err := s.vmSizeRecommenderClient.GenerateAttributeBasedRecommendations(context.Background(), request)
94+
if err != nil {
95+
return false, fmt.Errorf("failed to generate VM size recommendations from Azure: %w", err)
96+
}
97+
98+
// This check is a defense mechanism; vmSizeRecommenderClient should return a VM size recommendation
99+
// if the SKU is available in the specified location and subscription.
100+
available := false
101+
for _, vm := range respObj.RecommendedVmSizes.RegularVmSizes {
102+
if vm.Name == sku {
103+
available = true
104+
klog.V(2).Infof("SKU %s is available in cluster %s", sku, cluster.Name)
105+
break
106+
}
107+
}
108+
109+
return available, nil
110+
}
111+
112+
// ExtractCapacityRequirements extracts the capacity value from a PropertySelectorRequirement.
113+
// This function is specifically designed for Azure SKU capacity properties that follow the pattern:
114+
// "kubernetes.azure.com/vm-sizes/{sku}/capacity"
115+
// Returns the capacity as an uint32 if the requirement is valid;
116+
// or an error if the requirement is invalid.
117+
func ExtractCapacityRequirements(req placementv1beta1.PropertySelectorRequirement) (uint32, error) {
118+
if req.Operator != placementv1beta1.PropertySelectorGreaterThan && req.Operator != placementv1beta1.PropertySelectorGreaterThanOrEqualTo {
119+
return 0, fmt.Errorf("unsupported operator %q for SKU capacity property, only GreaterThan (Gt) and GreaterThanOrEqualTo (Ge) are supported", req.Operator)
120+
}
121+
122+
// Validate that we have exactly one value.
123+
if len(req.Values) != 1 {
124+
return 0, fmt.Errorf("azure SKU capacity property must have exactly one value, got %d", len(req.Values))
125+
}
126+
127+
capacity, err := validateCapacity(req.Values[0], req.Operator)
128+
if err != nil {
129+
return 0, fmt.Errorf("failed to validate capacity value %q: %w", req.Values[0], err)
130+
}
131+
132+
// Safe conversion to uint32 - all validations passed
133+
return capacity, nil
134+
}
135+
136+
// validateCapacity checks if the provided capacity value is valid.
137+
// Returns the capacity as uint32 if valid, or a zero and an error if invalid.
138+
// If the operator is GreaterThanOrEqualTo, the returned capacity is adjusted by subtracting 1
139+
// to align with GreaterThan semantics.
140+
func validateCapacity(value string, operator placementv1beta1.PropertySelectorOperator) (uint32, error) {
141+
// Parse directly as uint32 to avoid integer overflow issues.
142+
valueUint, err := strconv.ParseUint(value, 10, 32)
143+
if err != nil {
144+
return 0, fmt.Errorf("invalid capacity value %q: %w", value, err)
145+
}
146+
capacity := uint32(valueUint)
147+
148+
// Validate capacity bounds (capacity is already >= 0 since it's parsed as uint).
149+
if capacity < minVMInstanceCapacity {
150+
// A capacity of zero is only valid for GreaterThan operator.
151+
if capacity == 0 && operator != placementv1beta1.PropertySelectorGreaterThan {
152+
return 0, fmt.Errorf("capacity value cannot be zero for operator %q", operator)
153+
}
154+
}
155+
156+
// Validate against maximum allowed capacity (exceed maxVMInstanceCapacity).
157+
if capacity > maxVMInstanceCapacity {
158+
return 0, fmt.Errorf("capacity value %d exceeds maximum allowed value of %d", capacity, maxVMInstanceCapacity)
159+
}
160+
161+
// A capacity equal to maxVMInstanceCapacity is only valid for GreaterThanOrEqualTo operator.
162+
if capacity == maxVMInstanceCapacity && operator == placementv1beta1.PropertySelectorGreaterThan {
163+
return 0, fmt.Errorf("capacity value %d with operator %q exceeds maximum allowed effective capacity of %d", capacity, operator, maxVMInstanceCapacity)
164+
}
165+
166+
if operator == placementv1beta1.PropertySelectorGreaterThanOrEqualTo {
167+
// For GreaterThanOrEqualTo, subtract 1 as client requests only support GreaterThan.
168+
capacity -= 1
169+
}
170+
return capacity, nil
171+
}

0 commit comments

Comments
 (0)