Skip to content

Commit 492ff3c

Browse files
authored
feat: setup property checker for capacity (#1210)
2 parents 18a6a7e + 0363ca3 commit 492ff3c

File tree

6 files changed

+833
-1
lines changed

6 files changed

+833
-1
lines changed
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
//
2+
//Copyright (c) Microsoft Corporation.
3+
//Licensed under the MIT license.
4+
5+
// Package azure provides property checkers for Azure-specific cluster requirements.
6+
// It checks whether the cluster can meet the requirement defined by the property selector.
7+
package azure
8+
9+
import (
10+
"context"
11+
"fmt"
12+
"strconv"
13+
"strings"
14+
15+
"k8s.io/klog/v2"
16+
17+
clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1"
18+
placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1"
19+
computev1 "go.goms.io/fleet/apis/protos/azure/compute/v1"
20+
"go.goms.io/fleet/pkg/clients/azure/compute"
21+
"go.goms.io/fleet/pkg/utils/labels"
22+
)
23+
24+
const (
25+
// maxVMInstanceCapacity defines the maximum allowed VM instance capacity for SKU capacity requirements.
26+
// This limit is set to prevent excessive resource requests and potential quota issues.
27+
// The value is constrained to a reasonable upper bound of 200 for most production workloads.
28+
// Upper bound is enforced after adjusting for operator semantics.
29+
maxVMInstanceCapacity = 200
30+
)
31+
32+
// PropertyChecker provides Azure-specific property validation for member clusters.
33+
// It validates compute requirements to determine if clusters
34+
// can meet the specified property selector requirements.
35+
type PropertyChecker struct {
36+
// vmSizeRecommenderClient is the Azure compute client used to generate VM size recommendations
37+
// and validate SKU capacity requirements.
38+
vmSizeRecommenderClient compute.AttributeBasedVMSizeRecommenderClient
39+
}
40+
41+
// NewPropertyChecker creates a new PropertyChecker with the given client.
42+
// The vmSizeRecommenderClient is used to validate SKU capacity requirements.
43+
func NewPropertyChecker(vmSizeRecommenderClient compute.AttributeBasedVMSizeRecommenderClient) *PropertyChecker {
44+
return &PropertyChecker{
45+
vmSizeRecommenderClient: vmSizeRecommenderClient,
46+
}
47+
}
48+
49+
// CheckIfMeetSKUCapacityRequirement validates whether a member cluster can meet the specified
50+
// SKU capacity requirement. It extracts the required SKU and capacity from the property selector
51+
// requirement and checks to determine if the cluster's Azure subscription
52+
// and location can provision the requested VM instances.
53+
//
54+
// The cluster must have both Azure location and subscription ID labels configured.
55+
// Returns true if the SKU capacity requirement can be met, false otherwise.
56+
func (s *PropertyChecker) CheckIfMeetSKUCapacityRequirement(
57+
cluster *clusterv1beta1.MemberCluster,
58+
req placementv1beta1.PropertySelectorRequirement,
59+
sku string,
60+
) (bool, error) {
61+
location, err := labels.ExtractLabelFromMemberCluster(cluster, labels.AzureLocationLabel)
62+
if err != nil {
63+
return false, fmt.Errorf("failed to extract Azure location label from cluster %s: %w", cluster.Name, err)
64+
}
65+
66+
subID, err := labels.ExtractLabelFromMemberCluster(cluster, labels.AzureSubscriptionIDLabel)
67+
if err != nil {
68+
return false, fmt.Errorf("failed to extract Azure subscription ID label from cluster %s: %w", cluster.Name, err)
69+
}
70+
71+
// Extract capacity requirements from the property selector requirement.
72+
capacity, err := extractCapacityRequirements(req)
73+
if err != nil {
74+
return false, fmt.Errorf("failed to extract capacity requirements from property selector requirement: %w", err)
75+
}
76+
77+
// Request VM size recommendations to validate SKU availability and capacity.
78+
// The capacity is checked by ensuring the current allocatable capacity is greater than the requested capacity.
79+
klog.V(2).Infof("Checking SKU %s with capacity %d in cluster %s", sku, capacity, cluster.Name)
80+
request := &computev1.GenerateAttributeBasedRecommendationsRequest{
81+
SubscriptionId: subID,
82+
Location: location,
83+
RegularPriorityProfile: &computev1.RegularPriorityProfile{
84+
CapacityUnitType: computev1.CapacityUnitType_CAPACITY_UNIT_TYPE_VM_INSTANCE_COUNT,
85+
TargetCapacity: capacity, // CurrentAllocatableCapacity > RequestedCapacity
86+
},
87+
ResourceProperties: &computev1.ResourceProperties{
88+
VmAttributes: &computev1.VMAttributes{
89+
AllowedVmSizes: []string{sku},
90+
},
91+
},
92+
RecommendationProperties: &computev1.RecommendationProperties{
93+
RestrictionsFilter: computev1.RecommendationProperties_RESTRICTIONS_FILTER_QUOTA_AND_OFFER_RESTRICTIONS,
94+
},
95+
}
96+
97+
respObj, err := s.vmSizeRecommenderClient.GenerateAttributeBasedRecommendations(context.Background(), request)
98+
if err != nil {
99+
return false, fmt.Errorf("failed to generate VM size recommendations from Azure: %w", err)
100+
}
101+
102+
// This check is a defense mechanism; vmSizeRecommenderClient should return a VM size recommendation
103+
// if the SKU is available in the specified location and subscription.
104+
available := false
105+
for _, vm := range respObj.RecommendedVmSizes.RegularVmSizes {
106+
if strings.EqualFold(vm.Name, sku) {
107+
available = true
108+
klog.V(2).Infof("SKU %s is available in cluster %s", sku, cluster.Name)
109+
break
110+
}
111+
}
112+
113+
return available, nil
114+
}
115+
116+
// extractCapacityRequirements extracts the capacity value from a PropertySelectorRequirement.
117+
// This function is specifically designed for Azure SKU capacity properties that follow the pattern:
118+
// "kubernetes.azure.com/vm-sizes/{sku}/capacity"
119+
// Returns the capacity if the requirement is valid;
120+
// The capacity will be updated based on the configured operator as VMSizeRecommender API
121+
// checks if the current allocatableCapacity > the requested capacity.
122+
func extractCapacityRequirements(req placementv1beta1.PropertySelectorRequirement) (uint32, error) {
123+
if req.Operator != placementv1beta1.PropertySelectorGreaterThan && req.Operator != placementv1beta1.PropertySelectorGreaterThanOrEqualTo {
124+
return 0, fmt.Errorf("unsupported operator %q for SKU capacity property, only GreaterThan (Gt) and GreaterThanOrEqualTo (Ge) are supported", req.Operator)
125+
}
126+
127+
// Validate that we have exactly one value.
128+
if len(req.Values) != 1 {
129+
return 0, fmt.Errorf("azure SKU capacity property must have exactly one value, got %d", len(req.Values))
130+
}
131+
132+
capacity, err := validateCapacity(req.Values[0], req.Operator)
133+
if err != nil {
134+
return 0, fmt.Errorf("failed to validate capacity value %q: %w", req.Values[0], err)
135+
}
136+
137+
// Safe conversion to uint32 - all validations passed
138+
return capacity, nil
139+
}
140+
141+
// validateCapacity checks if the provided capacity value is valid.
142+
// Returns the capacity as uint32 if valid, or a zero and an error if invalid.
143+
func validateCapacity(value string, operator placementv1beta1.PropertySelectorOperator) (uint32, error) {
144+
// Parse directly as uint32 to avoid integer overflow issues.
145+
valueUint, err := strconv.ParseUint(value, 10, 32)
146+
if err != nil {
147+
return 0, fmt.Errorf("invalid capacity value %q: %w", value, err)
148+
}
149+
150+
capacity := uint32(valueUint) // capacity is >= 0 since it's parsed as uint.
151+
152+
if operator == placementv1beta1.PropertySelectorGreaterThanOrEqualTo && capacity > 0 {
153+
capacity -= 1
154+
}
155+
156+
// Validate against maximum allowed capacity (exceed maxVMInstanceCapacity).
157+
if capacity >= maxVMInstanceCapacity {
158+
return 0, fmt.Errorf("capacity value %d exceeds maximum allowed value of %d", capacity, maxVMInstanceCapacity)
159+
}
160+
161+
// A capacity of zero is only valid for GreaterThan operator.
162+
if capacity == 0 && operator != placementv1beta1.PropertySelectorGreaterThan {
163+
return 0, fmt.Errorf("capacity value cannot be zero for operator %q", operator)
164+
}
165+
166+
return capacity, nil
167+
}

0 commit comments

Comments
 (0)