Skip to content

Commit b77e660

Browse files
committed
Fix Port Leaks
When an instance fails to come up, there's a bug where the port doesn't get cleaned up, and eventually you will exhaust your quota. The code does attempt to garbage collect, but I suspect at some point in the past ports were suffixed with an index, but the garbage collection code wasn't made aware of this, so lists nothing. Replace the APi "filtering" that doesn't work with a better algorithm that uses the instance creation code to generate the expected ports, then generate the names and delete if the port exists. Also protected with some nice unit tests.
1 parent f938384 commit b77e660

File tree

6 files changed

+133
-27
lines changed

6 files changed

+133
-27
lines changed

controllers/openstackcluster_controller.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,9 @@ func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackClust
237237
}
238238
}
239239

240-
rootVolume := openStackCluster.Spec.Bastion.Instance.RootVolume
241-
if err = computeService.DeleteInstance(openStackCluster, instanceStatus, instanceName, rootVolume); err != nil {
240+
instanceSpec := bastionToInstanceSpec(openStackCluster, cluster.Name)
241+
242+
if err = computeService.DeleteInstance(openStackCluster, openStackCluster, instanceStatus, instanceSpec); err != nil {
242243
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to delete bastion: %w", err))
243244
return fmt.Errorf("failed to delete bastion: %w", err)
244245
}

controllers/openstackmachine_controller.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,9 @@ func (r *OpenStackMachineReconciler) reconcileDelete(scope scope.Scope, cluster
273273
}
274274
}
275275

276-
if err := computeService.DeleteInstance(openStackMachine, instanceStatus, openStackMachine.Name, openStackMachine.Spec.RootVolume); err != nil {
276+
instanceSpec := machineToInstanceSpec(openStackCluster, machine, openStackMachine, "")
277+
278+
if err := computeService.DeleteInstance(openStackCluster, openStackMachine, instanceStatus, instanceSpec); err != nil {
277279
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceDeleteFailedReason, clusterv1.ConditionSeverityError, "Deleting instance failed: %v", err)
278280
return ctrl.Result{}, fmt.Errorf("delete instance: %w", err)
279281
}

pkg/cloud/services/compute/instance.go

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ func (s *Service) createInstanceImpl(eventObject runtime.Object, openStackCluste
283283
if len(instanceSpec.Tags) > 0 {
284284
iTags = instanceSpec.Tags
285285
}
286-
portName := getPortName(instanceSpec.Name, portOpts, i)
286+
portName := networking.GetPortName(instanceSpec.Name, portOpts, i)
287287
port, err := networkingService.GetOrCreatePort(eventObject, clusterName, portName, portOpts, securityGroups, iTags)
288288
if err != nil {
289289
return nil, err
@@ -380,14 +380,6 @@ func (s *Service) createInstanceImpl(eventObject runtime.Object, openStackCluste
380380
return createdInstance, nil
381381
}
382382

383-
// getPortName appends a suffix to an instance name in order to try and get a unique name per port.
384-
func getPortName(instanceName string, opts *infrav1.PortOpts, netIndex int) string {
385-
if opts != nil && opts.NameSuffix != "" {
386-
return fmt.Sprintf("%s-%s", instanceName, opts.NameSuffix)
387-
}
388-
return fmt.Sprintf("%s-%d", instanceName, netIndex)
389-
}
390-
391383
func rootVolumeName(instanceName string) string {
392384
return fmt.Sprintf("%s-root", instanceName)
393385
}
@@ -550,7 +542,7 @@ func (s *Service) GetManagementPort(openStackCluster *infrav1.OpenStackCluster,
550542
return &allPorts[0], nil
551543
}
552544

553-
func (s *Service) DeleteInstance(eventObject runtime.Object, instanceStatus *InstanceStatus, instanceName string, rootVolume *infrav1.RootVolume) error {
545+
func (s *Service) DeleteInstance(openStackCluster *infrav1.OpenStackCluster, eventObject runtime.Object, instanceStatus *InstanceStatus, instanceSpec *InstanceSpec) error {
554546
if instanceStatus == nil {
555547
/*
556548
We create a boot-from-volume instance in 2 steps:
@@ -570,8 +562,8 @@ func (s *Service) DeleteInstance(eventObject runtime.Object, instanceStatus *Ins
570562
Note that we don't need to separately delete the root volume when deleting the instance because
571563
DeleteOnTermination will ensure it is deleted in that case.
572564
*/
573-
if hasRootVolume(rootVolume) {
574-
name := rootVolumeName(instanceName)
565+
if hasRootVolume(instanceSpec.RootVolume) {
566+
name := rootVolumeName(instanceSpec.Name)
575567
volume, err := s.getVolumeByName(name)
576568
if err != nil {
577569
return err
@@ -620,7 +612,12 @@ func (s *Service) DeleteInstance(eventObject runtime.Object, instanceStatus *Ins
620612

621613
// delete port of error instance
622614
if instanceStatus.State() == infrav1.InstanceStateError {
623-
if err := networkingService.GarbageCollectErrorInstancesPort(eventObject, instanceStatus.Name()); err != nil {
615+
portOpts, err := s.constructPorts(openStackCluster, instanceSpec)
616+
if err != nil {
617+
return err
618+
}
619+
620+
if err := networkingService.GarbageCollectErrorInstancesPort(eventObject, instanceSpec.Name, portOpts); err != nil {
624621
return err
625622
}
626623
}

pkg/cloud/services/compute/instance_test.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ import (
4848
infrav1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1alpha7"
4949
"sigs.k8s.io/cluster-api-provider-openstack/pkg/clients"
5050
"sigs.k8s.io/cluster-api-provider-openstack/pkg/clients/mock"
51+
"sigs.k8s.io/cluster-api-provider-openstack/pkg/cloud/services/networking"
5152
"sigs.k8s.io/cluster-api-provider-openstack/pkg/scope"
5253
)
5354

@@ -110,7 +111,7 @@ func Test_getPortName(t *testing.T) {
110111
}
111112
for _, tt := range tests {
112113
t.Run(tt.name, func(t *testing.T) {
113-
if got := getPortName(tt.args.instanceName, tt.args.opts, tt.args.netIndex); got != tt.want {
114+
if got := networking.GetPortName(tt.args.instanceName, tt.args.opts, tt.args.netIndex); got != tt.want {
114115
t.Errorf("getPortName() = %v, want %v", got, tt.want)
115116
}
116117
})
@@ -819,7 +820,13 @@ func TestService_DeleteInstance(t *testing.T) {
819820
if err != nil {
820821
t.Fatalf("Failed to create service: %v", err)
821822
}
822-
if err := s.DeleteInstance(tt.eventObject, tt.instanceStatus(), openStackMachineName, tt.rootVolume); (err != nil) != tt.wantErr {
823+
824+
instanceSpec := &InstanceSpec{
825+
Name: openStackMachineName,
826+
RootVolume: tt.rootVolume,
827+
}
828+
829+
if err := s.DeleteInstance(&infrav1.OpenStackCluster{}, tt.eventObject, tt.instanceStatus(), instanceSpec); (err != nil) != tt.wantErr {
823830
t.Errorf("Service.DeleteInstance() error = %v, wantErr %v", err, tt.wantErr)
824831
}
825832
})

pkg/cloud/services/networking/port.go

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -289,18 +289,38 @@ func (s *Service) DeletePorts(openStackCluster *infrav1.OpenStackCluster) error
289289
return nil
290290
}
291291

292-
func (s *Service) GarbageCollectErrorInstancesPort(eventObject runtime.Object, instanceName string) error {
293-
portList, err := s.client.ListPort(ports.ListOpts{
294-
Name: instanceName,
295-
})
296-
if err != nil {
297-
return err
298-
}
299-
for _, p := range portList {
300-
if err := s.DeletePort(eventObject, p.ID); err != nil {
292+
func (s *Service) GarbageCollectErrorInstancesPort(eventObject runtime.Object, instanceName string, portOpts []infrav1.PortOpts) error {
293+
for i := range portOpts {
294+
portOpt := &portOpts[i]
295+
296+
portName := GetPortName(instanceName, portOpt, i)
297+
298+
// TODO: whould be nice if gophercloud could be persuaded to accept multiple
299+
// names as is allowed by the API in order to reduce API traffic.
300+
portList, err := s.client.ListPort(ports.ListOpts{Name: portName})
301+
if err != nil {
302+
return err
303+
}
304+
305+
// NOTE: https://github.com/kubernetes-sigs/cluster-api-provider-openstack/issues/1476
306+
// It is up to the end user to specify a UNIQUE cluster name when provisioning in the
307+
// same project, otherwise things will alias and we could delete more than we should.
308+
if len(portList) > 1 {
309+
return fmt.Errorf("garbage collection of port %s failed, found %d ports with the same name", portName, len(portList))
310+
}
311+
312+
if err := s.DeletePort(eventObject, portList[0].ID); err != nil {
301313
return err
302314
}
303315
}
304316

305317
return nil
306318
}
319+
320+
// GetPortName appends a suffix to an instance name in order to try and get a unique name per port.
321+
func GetPortName(instanceName string, opts *infrav1.PortOpts, netIndex int) string {
322+
if opts != nil && opts.NameSuffix != "" {
323+
return fmt.Sprintf("%s-%s", instanceName, opts.NameSuffix)
324+
}
325+
return fmt.Sprintf("%s-%d", instanceName, netIndex)
326+
}

pkg/cloud/services/networking/port_test.go

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,85 @@ func Test_GetOrCreatePort(t *testing.T) {
535535
}
536536
}
537537

538+
func Test_GarbageCollectErrorInstancesPort(t *testing.T) {
539+
mockCtrl := gomock.NewController(t)
540+
defer mockCtrl.Finish()
541+
542+
instanceName := "foo"
543+
portID1 := "dc6e0ae3-dad6-4240-a9cb-e541916f20d3"
544+
portID2 := "a38ab1cb-c2cc-4c1b-9d1d-696ec73356d2"
545+
portName1 := GetPortName(instanceName, nil, 0)
546+
portName2 := GetPortName(instanceName, nil, 1)
547+
548+
tests := []struct {
549+
// man is the name of the test.
550+
name string
551+
// expect allows definition of any expected calls to the mock.
552+
expect func(m *mock.MockNetworkClientMockRecorder)
553+
// portOpts defines the instance ports as defined in the OSM spec.
554+
portOpts []infrav1.PortOpts
555+
// wantErr defines whether the test is supposed to fail.
556+
wantErr bool
557+
}{
558+
{
559+
name: "garbage collects all ports for an instance",
560+
expect: func(m *mock.MockNetworkClientMockRecorder) {
561+
o1 := ports.ListOpts{
562+
Name: portName1,
563+
}
564+
p1 := []ports.Port{
565+
{
566+
ID: portID1,
567+
Name: portName1,
568+
},
569+
}
570+
m.ListPort(o1).Return(p1, nil)
571+
m.DeletePort(portID1)
572+
o2 := ports.ListOpts{
573+
Name: portName2,
574+
}
575+
p2 := []ports.Port{
576+
{
577+
ID: portID2,
578+
Name: portName2,
579+
},
580+
}
581+
582+
m.ListPort(o2).Return(p2, nil)
583+
m.DeletePort(portID2)
584+
},
585+
portOpts: []infrav1.PortOpts{
586+
{},
587+
{},
588+
},
589+
wantErr: false,
590+
},
591+
}
592+
593+
eventObject := &infrav1.OpenStackMachine{}
594+
595+
for _, tt := range tests {
596+
t.Run(tt.name, func(t *testing.T) {
597+
g := NewWithT(t)
598+
mockClient := mock.NewMockNetworkClient(mockCtrl)
599+
tt.expect(mockClient.EXPECT())
600+
s := Service{
601+
client: mockClient,
602+
}
603+
err := s.GarbageCollectErrorInstancesPort(
604+
eventObject,
605+
instanceName,
606+
tt.portOpts,
607+
)
608+
if tt.wantErr {
609+
g.Expect(err).To(HaveOccurred())
610+
} else {
611+
g.Expect(err).NotTo(HaveOccurred())
612+
}
613+
})
614+
}
615+
}
616+
538617
func pointerTo(b bool) *bool {
539618
return &b
540619
}

0 commit comments

Comments
 (0)