Skip to content

Commit eae7731

Browse files
committed
Add IPI installation on AWS dedicated hosts
1 parent 0e43b96 commit eae7731

File tree

13 files changed

+755
-52
lines changed

13 files changed

+755
-52
lines changed

data/data/install.openshift.io_installconfigs.yaml

Lines changed: 216 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package aws
2+
3+
import (
4+
"context"
5+
"fmt"
6+
7+
"github.com/aws/aws-sdk-go/aws"
8+
"github.com/aws/aws-sdk-go/aws/session"
9+
"github.com/aws/aws-sdk-go/service/ec2"
10+
"github.com/sirupsen/logrus"
11+
)
12+
13+
// Host holds metadata for a dedicated host.
14+
type Host struct {
15+
ID string
16+
Zone string
17+
}
18+
19+
// dedicatedHosts retrieves a list of dedicated hosts for the given region and
20+
// returns them in a map keyed by the host ID.
21+
func dedicatedHosts(ctx context.Context, session *session.Session, region string) (map[string]Host, error) {
22+
hostsByID := map[string]Host{}
23+
24+
client := ec2.New(session, aws.NewConfig().WithRegion(region))
25+
input := &ec2.DescribeHostsInput{}
26+
27+
if err := client.DescribeHostsPagesWithContext(ctx, input, func(page *ec2.DescribeHostsOutput, lastPage bool) bool {
28+
for _, h := range page.Hosts {
29+
id := aws.StringValue(h.HostId)
30+
if id == "" {
31+
// Skip entries lacking an ID (should not happen)
32+
continue
33+
}
34+
35+
logrus.Debugf("Found dedicatd host: %s", id)
36+
hostsByID[id] = Host{
37+
ID: id,
38+
Zone: aws.StringValue(h.AvailabilityZone),
39+
}
40+
}
41+
return !lastPage
42+
}); err != nil {
43+
return nil, fmt.Errorf("fetching dedicated hosts: %w", err)
44+
}
45+
46+
return hostsByID, nil
47+
}

pkg/asset/installconfig/aws/metadata.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ type Metadata struct {
2727
vpc VPC
2828
instanceTypes map[string]InstanceType
2929

30+
Hosts map[string]Host
3031
Region string `json:"region,omitempty"`
3132
ProvidedSubnets []typesaws.Subnet `json:"subnets,omitempty"`
3233
Services []typesaws.ServiceEndpoint `json:"services,omitempty"`
@@ -390,3 +391,23 @@ func (m *Metadata) InstanceTypes(ctx context.Context) (map[string]InstanceType,
390391

391392
return m.instanceTypes, nil
392393
}
394+
395+
// DedicatedHosts retrieves all hosts available for use to verify against this installation for configured region.
396+
func (m *Metadata) DedicatedHosts(ctx context.Context) (map[string]Host, error) {
397+
m.mutex.Lock()
398+
defer m.mutex.Unlock()
399+
400+
if len(m.Hosts) == 0 {
401+
awsSession, err := m.unlockedSession(ctx)
402+
if err != nil {
403+
return nil, err
404+
}
405+
406+
m.Hosts, err = dedicatedHosts(ctx, awsSession, m.Region)
407+
if err != nil {
408+
return nil, fmt.Errorf("error listing dedicated hosts: %w", err)
409+
}
410+
}
411+
412+
return m.Hosts, nil
413+
}

pkg/asset/installconfig/aws/validation.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"net"
88
"net/http"
99
"net/url"
10+
"slices"
1011
"sort"
1112

1213
ec2v2 "github.com/aws/aws-sdk-go-v2/service/ec2"
@@ -466,6 +467,8 @@ func validateMachinePool(ctx context.Context, meta *Metadata, fldPath *field.Pat
466467
}
467468
}
468469

470+
allErrs = append(allErrs, validateHostPlacement(ctx, meta, fldPath, pool)...)
471+
469472
return allErrs
470473
}
471474

@@ -484,6 +487,54 @@ func translateEC2Arches(arches []string) sets.Set[string] {
484487
return res
485488
}
486489

490+
func validateHostPlacement(ctx context.Context, meta *Metadata, fldPath *field.Path, pool *awstypes.MachinePool) field.ErrorList {
491+
allErrs := field.ErrorList{}
492+
493+
if pool.HostPlacement == nil {
494+
return allErrs
495+
}
496+
497+
if pool.HostPlacement.Affinity != nil && *pool.HostPlacement.Affinity == awstypes.HostAffinityDedicatedHost {
498+
placementPath := fldPath.Child("hostPlacement")
499+
if pool.HostPlacement.DedicatedHost != nil {
500+
configuredHosts := pool.HostPlacement.DedicatedHost
501+
502+
// Check to see if all configured hosts exist
503+
foundHosts, err := meta.DedicatedHosts(ctx)
504+
if err != nil {
505+
allErrs = append(allErrs, field.InternalError(placementPath.Child("dedicatedHost"), err))
506+
} else {
507+
// Check the returned configured hosts to see if the dedicated hosts defined in install-config exists.
508+
for idx, host := range configuredHosts {
509+
dhPath := placementPath.Child("dedicatedHost").Index(idx)
510+
511+
// Is host in AWS?
512+
foundHost, ok := foundHosts[host.ID]
513+
if !ok {
514+
errMsg := fmt.Sprintf("dedicated host %s not found", host.ID)
515+
allErrs = append(allErrs, field.Invalid(dhPath, host, errMsg))
516+
continue
517+
}
518+
519+
// Is host valid for pools region and zone config?
520+
if !slices.Contains(pool.Zones, foundHost.Zone) {
521+
errMsg := fmt.Sprintf("dedicated host %s is not available in pool's zone list", host.ID)
522+
allErrs = append(allErrs, field.Invalid(dhPath, host, errMsg))
523+
}
524+
525+
// If user configured the zone for the dedicated host, let's check to make sure its correct
526+
if host.Zone != "" && host.Zone != foundHost.Zone {
527+
errMsg := fmt.Sprintf("dedicated host was configured with zone %v but expected zone %v", host.Zone, foundHost.Zone)
528+
allErrs = append(allErrs, field.Invalid(dhPath.Child("zone"), host, errMsg))
529+
}
530+
}
531+
}
532+
}
533+
}
534+
535+
return allErrs
536+
}
537+
487538
func validateSecurityGroupIDs(ctx context.Context, meta *Metadata, fldPath *field.Path, platform *awstypes.Platform, pool *awstypes.MachinePool) field.ErrorList {
488539
allErrs := field.ErrorList{}
489540

pkg/asset/installconfig/aws/validation_test.go

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ func TestValidate(t *testing.T) {
7474
subnetsInVPC *SubnetGroups
7575
vpcTags Tags
7676
instanceTypes map[string]InstanceType
77+
hosts map[string]Host
7778
proxy string
7879
publicOnly bool
7980
expectErr string
@@ -1200,6 +1201,57 @@ func TestValidate(t *testing.T) {
12001201
},
12011202
expectErr: `^\Qplatform.aws.vpc.subnets: Forbidden: subnet subnet-valid-public-a1 is owned by other clusters [another-cluster] and cannot be used for new installations, another subnet must be created separately\E$`,
12021203
},
1204+
{
1205+
name: "valid dedicated host placement on compute",
1206+
installConfig: icBuild.build(
1207+
icBuild.withComputePlatformZones([]string{"a"}, true, 0),
1208+
icBuild.withComputeHostPlacement([]string{"h-1234567890abcdef0"}, 0),
1209+
),
1210+
availRegions: validAvailRegions(),
1211+
availZones: validAvailZones(),
1212+
hosts: map[string]Host{
1213+
"h-1234567890abcdef0": {ID: "h-1234567890abcdef0", Zone: "a"},
1214+
},
1215+
},
1216+
{
1217+
name: "invalid dedicated host not found",
1218+
installConfig: icBuild.build(
1219+
icBuild.withComputePlatformZones([]string{"a"}, true, 0),
1220+
icBuild.withComputeHostPlacement([]string{"h-aaaaaaaaaaaaaaaaa"}, 0),
1221+
),
1222+
availRegions: validAvailRegions(),
1223+
availZones: validAvailZones(),
1224+
hosts: map[string]Host{
1225+
"h-1234567890abcdef0": {ID: "h-1234567890abcdef0", Zone: "a"},
1226+
},
1227+
expectErr: "dedicated host h-aaaaaaaaaaaaaaaaa not found",
1228+
},
1229+
{
1230+
name: "invalid dedicated host zone not in pool zones",
1231+
installConfig: icBuild.build(
1232+
icBuild.withComputePlatformZones([]string{"a"}, true, 0),
1233+
icBuild.withComputeHostPlacement([]string{"h-bbbbbbbbbbbbbbbbb"}, 0),
1234+
),
1235+
availRegions: validAvailRegions(),
1236+
availZones: validAvailZones(),
1237+
hosts: map[string]Host{
1238+
"h-bbbbbbbbbbbbbbbbb": {ID: "h-bbbbbbbbbbbbbbbbb", Zone: "b"},
1239+
},
1240+
expectErr: "is not available in pool's zone list",
1241+
},
1242+
{
1243+
name: "dedicated host placement on compute but for a zone that pool is not using",
1244+
installConfig: icBuild.build(
1245+
icBuild.withComputePlatformZones([]string{"b"}, true, 0),
1246+
icBuild.withComputeHostPlacementAndZone([]string{"h-1234567890abcdef0"}, "b", 0),
1247+
),
1248+
availRegions: validAvailRegions(),
1249+
availZones: validAvailZones(),
1250+
hosts: map[string]Host{
1251+
"h-1234567890abcdef0": {ID: "h-1234567890abcdef0", Zone: "a"},
1252+
},
1253+
expectErr: "dedicated host was configured with zone b but expected zone a",
1254+
},
12031255
}
12041256

12051257
// Register mock http(s) responses for tests.
@@ -1232,6 +1284,7 @@ func TestValidate(t *testing.T) {
12321284
Tags: test.vpcTags,
12331285
},
12341286
instanceTypes: test.instanceTypes,
1287+
Hosts: test.hosts,
12351288
ProvidedSubnets: test.installConfig.Platform.AWS.VPC.Subnets,
12361289
}
12371290

@@ -1952,6 +2005,34 @@ func (icBuild icBuildForAWS) withComputePlatformZones(zones []string, overwrite
19522005
}
19532006
}
19542007

2008+
func (icBuild icBuildForAWS) withComputeHostPlacement(hostIDs []string, index int) icOption {
2009+
return func(ic *types.InstallConfig) {
2010+
aff := aws.HostAffinityDedicatedHost
2011+
dhs := make([]aws.DedicatedHost, 0, len(hostIDs))
2012+
for _, id := range hostIDs {
2013+
dhs = append(dhs, aws.DedicatedHost{ID: id})
2014+
}
2015+
ic.Compute[index].Platform.AWS.HostPlacement = &aws.HostPlacement{
2016+
Affinity: &aff,
2017+
DedicatedHost: dhs,
2018+
}
2019+
}
2020+
}
2021+
2022+
func (icBuild icBuildForAWS) withComputeHostPlacementAndZone(hostIDs []string, zone string, index int) icOption {
2023+
return func(ic *types.InstallConfig) {
2024+
aff := aws.HostAffinityDedicatedHost
2025+
dhs := make([]aws.DedicatedHost, 0, len(hostIDs))
2026+
for _, id := range hostIDs {
2027+
dhs = append(dhs, aws.DedicatedHost{ID: id, Zone: zone})
2028+
}
2029+
ic.Compute[index].Platform.AWS.HostPlacement = &aws.HostPlacement{
2030+
Affinity: &aff,
2031+
DedicatedHost: dhs,
2032+
}
2033+
}
2034+
}
2035+
19552036
func (icBuild icBuildForAWS) withControlPlanePlatformAMI(amiID string) icOption {
19562037
return func(ic *types.InstallConfig) {
19572038
ic.ControlPlane.Platform.AWS.AMIID = amiID

pkg/asset/machines/aws/machines.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ type machineProviderInput struct {
3737
publicSubnet bool
3838
securityGroupIDs []string
3939
cpuOptions *awstypes.CPUOptions
40+
dedicatedHost string
4041
}
4142

4243
// Machines returns a list of machines for a machinepool.
@@ -304,6 +305,15 @@ func provider(in *machineProviderInput) (*machineapi.AWSMachineProviderConfig, e
304305
config.CPUOptions = &cpuOptions
305306
}
306307

308+
if in.dedicatedHost != "" {
309+
config.HostPlacement = &machineapi.HostPlacement{
310+
Affinity: ptr.To(machineapi.HostAffinityDedicatedHost),
311+
DedicatedHost: &machineapi.DedicatedHost{
312+
ID: in.dedicatedHost,
313+
},
314+
}
315+
}
316+
307317
return config, nil
308318
}
309319

@@ -353,3 +363,18 @@ func ConfigMasters(machines []machineapi.Machine, controlPlane *machinev1.Contro
353363
providerSpec := controlPlane.Spec.Template.OpenShiftMachineV1Beta1Machine.Spec.ProviderSpec.Value.Object.(*machineapi.AWSMachineProviderConfig)
354364
providerSpec.LoadBalancers = lbrefs
355365
}
366+
367+
// DedicatedHost sets dedicated hosts for the specified zone.
368+
func DedicatedHost(hosts map[string]aws.Host, placement *awstypes.HostPlacement, zone string) string {
369+
// If install-config has HostPlacements configured, lets check the DedicatedHosts to see if one matches our region & zone.
370+
if placement != nil {
371+
// We only support one host ID currently for an instance. Need to also get host that matches the zone the machines will be put into.
372+
for _, host := range placement.DedicatedHost {
373+
hostDetails, found := hosts[host.ID]
374+
if found && hostDetails.Zone == zone {
375+
return hostDetails.ID
376+
}
377+
}
378+
}
379+
return ""
380+
}

pkg/asset/machines/aws/machinesets.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ type MachineSetInput struct {
2525
Pool *types.MachinePool
2626
Role string
2727
UserDataSecret string
28+
Hosts map[string]icaws.Host
2829
}
2930

3031
// MachineSets returns a list of machinesets for a machinepool.
@@ -87,6 +88,8 @@ func MachineSets(in *MachineSetInput) ([]*machineapi.MachineSet, error) {
8788
instanceProfile = fmt.Sprintf("%s-worker-profile", in.ClusterID)
8889
}
8990

91+
dedicatedHost := DedicatedHost(in.Hosts, mpool.HostPlacement, az)
92+
9093
provider, err := provider(&machineProviderInput{
9194
clusterID: in.ClusterID,
9295
region: in.InstallConfigPlatformAWS.Region,
@@ -103,12 +106,21 @@ func MachineSets(in *MachineSetInput) ([]*machineapi.MachineSet, error) {
103106
publicSubnet: publicSubnet,
104107
securityGroupIDs: in.Pool.Platform.AWS.AdditionalSecurityGroupIDs,
105108
cpuOptions: mpool.CPUOptions,
109+
dedicatedHost: dedicatedHost,
106110
})
107111
if err != nil {
108112
return nil, errors.Wrap(err, "failed to create provider")
109113
}
114+
115+
// If we are using any feature that is only available via CAPI, we must set the authoritativeAPI = ClusterAPI
116+
authoritativeAPI := machineapi.MachineAuthorityMachineAPI
117+
if isAuthoritativeClusterAPIRequired(provider) {
118+
authoritativeAPI = machineapi.MachineAuthorityClusterAPI
119+
}
120+
110121
name := fmt.Sprintf("%s-%s-%s", in.ClusterID, in.Pool.Name, az)
111122
spec := machineapi.MachineSpec{
123+
AuthoritativeAPI: authoritativeAPI,
112124
ProviderSpec: machineapi.ProviderSpec{
113125
Value: &runtime.RawExtension{Object: provider},
114126
},
@@ -131,7 +143,8 @@ func MachineSets(in *MachineSetInput) ([]*machineapi.MachineSet, error) {
131143
},
132144
},
133145
Spec: machineapi.MachineSetSpec{
134-
Replicas: &replicas,
146+
AuthoritativeAPI: authoritativeAPI,
147+
Replicas: &replicas,
135148
Selector: metav1.LabelSelector{
136149
MatchLabels: map[string]string{
137150
"machine.openshift.io/cluster-api-machineset": name,
@@ -152,8 +165,17 @@ func MachineSets(in *MachineSetInput) ([]*machineapi.MachineSet, error) {
152165
},
153166
},
154167
}
168+
155169
machinesets = append(machinesets, mset)
156170
}
157171

158172
return machinesets, nil
159173
}
174+
175+
// isAuthoritativeClusterAPIRequired is called to determine if the machine spec should have the AuthoritativeAPI set to ClusterAPI.
176+
func isAuthoritativeClusterAPIRequired(provider *machineapi.AWSMachineProviderConfig) bool {
177+
if provider.HostPlacement != nil && *provider.HostPlacement.Affinity != machineapi.HostAffinityAnyAvailable {
178+
return true
179+
}
180+
return false
181+
}

pkg/asset/machines/worker.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,14 @@ func (w *Worker) Generate(ctx context.Context, dependencies asset.Parents) error
534534
}
535535
}
536536

537+
dHosts := map[string]icaws.Host{}
538+
if pool.Platform.AWS.HostPlacement != nil {
539+
dHosts, err = installConfig.AWS.DedicatedHosts(ctx)
540+
if err != nil {
541+
return fmt.Errorf("failed to retrieve dedicated hosts for compute pool: %w", err)
542+
}
543+
}
544+
537545
pool.Platform.AWS = &mpool
538546
sets, err := aws.MachineSets(&aws.MachineSetInput{
539547
ClusterID: clusterID.InfraID,
@@ -544,6 +552,7 @@ func (w *Worker) Generate(ctx context.Context, dependencies asset.Parents) error
544552
Pool: &pool,
545553
Role: pool.Name,
546554
UserDataSecret: workerUserDataSecretName,
555+
Hosts: dHosts,
547556
})
548557
if err != nil {
549558
return errors.Wrap(err, "failed to create worker machine objects")

0 commit comments

Comments
 (0)