Skip to content

Commit 72bc933

Browse files
committed
start setting up capacity
Signed-off-by: Britania Rodriguez Reyes <britaniar@microsoft.com>
1 parent a71b0f3 commit 72bc933

File tree

14 files changed

+605
-9
lines changed

14 files changed

+605
-9
lines changed

charts/hub-agent/templates/deployment.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ spec:
5454
- --cluster-unhealthy-threshold={{ .Values.clusterUnhealthyThreshold }}
5555
- --resource-snapshot-creation-minimum-interval={{ .Values.resourceSnapshotCreationMinimumInterval }}
5656
- --resource-changes-collection-duration={{ .Values.resourceChangesCollectionDuration }}
57+
- --compute-endpoint={{ .Values.computeEndpoint }}
5758
ports:
5859
- name: metrics
5960
containerPort: 8080

charts/hub-agent/values.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ enableEvictionAPIs: true
5454
enablePprof: true
5555
pprofPort: 6065
5656

57+
computeEndpoint: "http://localhost:9090"
58+
5759
hubAPIQPS: 250
5860
hubAPIBurst: 1000
5961
MaxConcurrentClusterPlacement: 100

cmd/hubagent/options/options.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ type Options struct {
104104
EnablePprof bool
105105
// PprofPort is the port for pprof profiling.
106106
PprofPort int
107+
// ComputeEndpoint is the endpoint for the compute service.
108+
ComputeEndpoint string
107109
// DenyModifyMemberClusterLabels indicates if the member cluster labels cannot be modified by groups (excluding system:masters)
108110
DenyModifyMemberClusterLabels bool
109111
// ResourceSnapshotCreationMinimumInterval is the minimum interval at which resource snapshots could be created.
@@ -131,6 +133,7 @@ func NewOptions() *Options {
131133
EnableResourcePlacement: true,
132134
EnablePprof: false,
133135
PprofPort: 6065,
136+
ComputeEndpoint: "http://localhost:9090",
134137
ResourceSnapshotCreationMinimumInterval: 30 * time.Second,
135138
ResourceChangesCollectionDuration: 15 * time.Second,
136139
}
@@ -179,6 +182,7 @@ func (o *Options) AddFlags(flags *flag.FlagSet) {
179182
flags.BoolVar(&o.EnableResourcePlacement, "enable-resource-placement", true, "If set, the agents will watch for the ResourcePlacement APIs.")
180183
flags.BoolVar(&o.EnablePprof, "enable-pprof", false, "If set, the pprof profiling is enabled.")
181184
flags.IntVar(&o.PprofPort, "pprof-port", 6065, "The port for pprof profiling.")
185+
flags.StringVar(&o.ComputeEndpoint, "compute-endpoint", "http://localhost:9090", "The endpoint for the compute service.")
182186
flags.BoolVar(&o.DenyModifyMemberClusterLabels, "deny-modify-member-cluster-labels", false, "If set, users not in the system:masters cannot modify member cluster labels.")
183187
flags.DurationVar(&o.ResourceSnapshotCreationMinimumInterval, "resource-snapshot-creation-minimum-interval", 30*time.Second, "The minimum interval at which resource snapshots could be created.")
184188
flags.DurationVar(&o.ResourceChangesCollectionDuration, "resource-changes-collection-duration", 15*time.Second,

cmd/hubagent/workload/setup.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,9 @@ func SetupControllers(ctx context.Context, wg *sync.WaitGroup, mgr ctrl.Manager,
361361

362362
// Set up the scheduler
363363
klog.Info("Setting up scheduler")
364-
defaultProfile := profile.NewDefaultProfile()
364+
defaultProfile := profile.NewDefaultProfileWithOptions(profile.ProfileOptions{
365+
Endpoint: opts.ComputeEndpoint,
366+
})
365367
defaultFramework := framework.NewFramework(defaultProfile, mgr)
366368
defaultSchedulingQueue := queue.NewSimplePlacementSchedulingQueue(
367369
queue.WithName(schedulerQueueName),

go.mod

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ require (
1010
github.com/evanphx/json-patch/v5 v5.9.11
1111
github.com/go-logr/logr v1.4.3
1212
github.com/google/go-cmp v0.7.0
13+
github.com/gorilla/mux v1.8.1
14+
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0
1315
github.com/onsi/ginkgo/v2 v2.23.4
1416
github.com/onsi/gomega v1.37.0
1517
github.com/prometheus/client_golang v1.22.0
@@ -112,6 +114,9 @@ require (
112114
golang.org/x/term v0.32.0 // indirect
113115
golang.org/x/text v0.25.0 // indirect
114116
golang.org/x/tools v0.31.0 // indirect
117+
google.golang.org/genproto/googleapis/api v0.0.0-20241202173237-19429a94021a // indirect
118+
google.golang.org/genproto/googleapis/rpc v0.0.0-20241202173237-19429a94021a // indirect
119+
google.golang.org/grpc v1.70.0 // indirect
115120
google.golang.org/protobuf v1.36.6 // indirect
116121
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
117122
gopkg.in/inf.v0 v0.9.1 // indirect

go.sum

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,10 @@ github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J
179179
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
180180
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
181181
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
182+
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
183+
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
184+
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0=
185+
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k=
182186
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
183187
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
184188
github.com/jongio/azidext/go/azidext v0.5.0 h1:uPInXD4NZ3J0k79FPwIA0YXknFn+WcqZqSgs3/jPgvQ=
@@ -358,6 +362,10 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
358362
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
359363
gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw=
360364
gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
365+
google.golang.org/genproto/googleapis/api v0.0.0-20241202173237-19429a94021a h1:OAiGFfOiA0v9MRYsSidp3ubZaBnteRUyn3xB2ZQ5G/E=
366+
google.golang.org/genproto/googleapis/api v0.0.0-20241202173237-19429a94021a/go.mod h1:jehYqy3+AhJU9ve55aNOaSml7wUXjF9x6z2LcCfpAhY=
367+
google.golang.org/genproto/googleapis/rpc v0.0.0-20241202173237-19429a94021a h1:hgh8P4EuoxpsuKMXX/To36nOFD7vixReXgn8lPGnt+o=
368+
google.golang.org/genproto/googleapis/rpc v0.0.0-20241202173237-19429a94021a/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU=
361369
google.golang.org/grpc v1.70.0 h1:pWFv03aZoHzlRKHWicjsZytKAiYCtNS0dHbXnIdq7jQ=
362370
google.golang.org/grpc v1.70.0/go.mod h1:ofIJqVKDXx/JiXrwr2IG4/zwdH9txy3IlF40RmcJSQw=
363371
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
package azure
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"net/http"
8+
"strings"
9+
"time"
10+
11+
"k8s.io/apimachinery/pkg/api/resource"
12+
13+
clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1"
14+
placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1"
15+
)
16+
17+
// DefaultAzureCapacityService is the default implementation of AzureCapacityService.
18+
type DefaultAzureCapacityService struct {
19+
endpoint string
20+
client *http.Client
21+
}
22+
23+
// Compile-time check to ensure DefaultAzureCapacityService implements AzureCapacityService
24+
var _ AzureCapacityService = &DefaultAzureCapacityService{}
25+
26+
// NewAzureCapacityService creates a new default Azure capacity service with the given endpoint.
27+
func NewAzureCapacityService(endpoint string) *DefaultAzureCapacityService {
28+
return &DefaultAzureCapacityService{
29+
endpoint: endpoint,
30+
client: &http.Client{
31+
Timeout: 30 * time.Second,
32+
},
33+
}
34+
}
35+
36+
// ValidateCapacityRequirement validates a capacity requirement against Azure APIs.
37+
func (s *DefaultAzureCapacityService) ValidateCapacityRequirement(
38+
cluster *clusterv1beta1.MemberCluster,
39+
req placementv1beta1.PropertySelectorRequirement,
40+
) (bool, error) {
41+
ctx := context.Background()
42+
subID, location := extractAzureInfoFromLabels(cluster)
43+
if subID == "" || location == "" {
44+
return false, fmt.Errorf("cluster %s does not have required Azure labels", cluster.Name)
45+
}
46+
capacity, sku, err := extractCapacityRequirements(req)
47+
if err != nil {
48+
return false, fmt.Errorf("failed to extract capacity requirement: %w", err)
49+
}
50+
51+
url := fmt.Sprintf("%s/fleet/subscriptions/%s/providers/Microsoft.Compute/locations/%s/vmSizeRecommendations/vmAttributeBased/generate",
52+
s.endpoint, subID, location)
53+
payload := map[string]interface{}{
54+
"regular_priority_profile": map[string]interface{}{
55+
"capacity_unit_type": "CAPACITY_UNIT_TYPE_VM_INSTANCE_COUNT",
56+
"target_capacity": capacity.Value(),
57+
},
58+
"recommendation_properties": map[string]interface{}{
59+
"restrictions_filter": "RESTRICTIONS_FILTER_QUOTA_AND_OFFER_RESTRICTIONS",
60+
},
61+
"resource_properties": map[string]interface{}{},
62+
}
63+
body, err := json.Marshal(payload)
64+
if err != nil {
65+
return false, fmt.Errorf("failed to marshal request payload: %w", err)
66+
}
67+
68+
httpReq, err := http.NewRequestWithContext(ctx, "POST", url, strings.NewReader(string(body)))
69+
if err != nil {
70+
return false, fmt.Errorf("failed to create HTTP request: %w", err)
71+
}
72+
httpReq.Header.Set("Content-Type", "application/json")
73+
74+
resp, err := s.client.Do(httpReq)
75+
if err != nil {
76+
return false, fmt.Errorf("failed to make HTTP request to Azure service: %w", err)
77+
}
78+
defer resp.Body.Close()
79+
80+
if resp.StatusCode != http.StatusOK {
81+
return false, fmt.Errorf("Azure service returned status %d", resp.StatusCode)
82+
}
83+
84+
var respObj struct {
85+
RecommendedVmSizes struct {
86+
RegularVmSizes []struct {
87+
Name string `json:"name"`
88+
// You can add other fields if needed
89+
} `json:"regularVmSizes"`
90+
} `json:"recommendedVmSizes"`
91+
}
92+
if err := json.NewDecoder(resp.Body).Decode(&respObj); err != nil {
93+
return false, fmt.Errorf("failed to decode Azure service response: %w", err)
94+
}
95+
96+
available := false
97+
for _, vm := range respObj.RecommendedVmSizes.RegularVmSizes {
98+
if vm.Name == sku {
99+
available = true
100+
break
101+
}
102+
}
103+
104+
return available, nil
105+
}
106+
107+
// extractAzureInfoFromLabels extracts subscription ID and location from MemberCluster labels.
108+
// Returns the subscription ID and location if found, empty strings otherwise.
109+
func extractAzureInfoFromLabels(cluster *clusterv1beta1.MemberCluster) (subscriptionID, location string) {
110+
if cluster == nil || cluster.Labels == nil {
111+
return "", ""
112+
}
113+
114+
subscriptionID = cluster.Labels[AzureSubscriptionIDLabelKey]
115+
location = cluster.Labels[AzureLocationLabelKey]
116+
117+
return subscriptionID, location
118+
}
119+
120+
// extractCapacityRequirements extracts the capacity value from a PropertySelectorRequirement.
121+
// This function is specifically designed for Azure SKU capacity properties that follow the pattern:
122+
// "kubernetes.azure.com/vm-size/{sku}/capacity"
123+
// Returns the capacity as a resource.Quantity and the SKU name if the requirement is valid,
124+
// or an error if the requirement is invalid or not a capacity property.
125+
func extractCapacityRequirements(req placementv1beta1.PropertySelectorRequirement) (*resource.Quantity, string, error) {
126+
// Extract SKU from the property name
127+
// Expected format: "kubernetes.azure.com/vm-size/{sku}/capacity"
128+
if !strings.HasSuffix(req.Name, "/capacity") {
129+
return nil, "", fmt.Errorf("invalid Azure SKU capacity property format: %q", req.Name)
130+
}
131+
132+
// Remove prefix and suffix to get the SKU
133+
sku := strings.TrimSuffix(strings.TrimPrefix(req.Name, SkuCapacityPropertyPrefix+"/"), "/capacity")
134+
if sku == "" {
135+
return nil, "", fmt.Errorf("cannot extract SKU from property name: %q", req.Name)
136+
}
137+
138+
// Validate that we have exactly one value
139+
if len(req.Values) != 1 {
140+
return nil, "", fmt.Errorf("Azure SKU capacity property must have exactly one value, got %d", len(req.Values))
141+
}
142+
143+
// Parse the capacity value
144+
capacity, err := resource.ParseQuantity(req.Values[0])
145+
if err != nil {
146+
return nil, "", fmt.Errorf("failed to parse capacity value %q: %w", req.Values[0], err)
147+
}
148+
149+
return &capacity, sku, nil
150+
}

0 commit comments

Comments
 (0)