Skip to content

Commit ca1ef98

Browse files
committed
fix/byosg: remove managed SG on BYOSG scenario for CLB
Fix the managed (controller-owned) security group leak when user provided security group is added to an existing Service type-loadBalancer CLB. fix/byosg/tests: unit tests to handle managed SG removal on BYOSG. Introduce unit tests for functions added to validate Service update to BYO Security Group annotations from a managed SG state.
1 parent 60b22c0 commit ca1ef98

File tree

5 files changed

+569
-35
lines changed

5 files changed

+569
-35
lines changed

pkg/providers/v1/aws.go

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2940,19 +2940,9 @@ func (c *Cloud) updateInstanceSecurityGroupsForLoadBalancer(ctx context.Context,
29402940
loadBalancerSecurityGroupID := lbSecurityGroupIDs[0]
29412941

29422942
// Get the actual list of groups that allow ingress from the load-balancer
2943-
actualGroups := make(map[*ec2types.SecurityGroup]bool)
2944-
{
2945-
describeRequest := &ec2.DescribeSecurityGroupsInput{}
2946-
describeRequest.Filters = []ec2types.Filter{
2947-
newEc2Filter("ip-permission.group-id", loadBalancerSecurityGroupID),
2948-
}
2949-
response, err := c.ec2.DescribeSecurityGroups(ctx, describeRequest)
2950-
if err != nil {
2951-
return fmt.Errorf("error querying security groups for ELB: %q", err)
2952-
}
2953-
for _, sg := range response {
2954-
actualGroups[&sg] = c.tagging.hasClusterTag(sg.Tags)
2955-
}
2943+
actualGroups, _, err := c.buildSecurityGroupRuleReferences(ctx, loadBalancerSecurityGroupID)
2944+
if err != nil {
2945+
return fmt.Errorf("error building security group rule references: %w", err)
29562946
}
29572947

29582948
// Open the firewall from the load balancer to the instance

pkg/providers/v1/aws_loadbalancer.go

Lines changed: 125 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"time"
3030

3131
"github.com/aws/aws-sdk-go-v2/aws"
32+
"github.com/aws/aws-sdk-go-v2/service/ec2"
3233
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
3334

3435
elb "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancing"
@@ -1251,24 +1252,25 @@ func (c *Cloud) ensureLoadBalancer(ctx context.Context, namespacedName types.Nam
12511252

12521253
{
12531254
// Sync security groups
1254-
expected := sets.New[string](securityGroupIDs...)
1255-
actual := stringSetFromList(loadBalancer.SecurityGroups)
1255+
expected := sets.New(securityGroupIDs...)
1256+
actual := sets.New(loadBalancer.SecurityGroups...)
12561257

12571258
if !expected.Equal(actual) {
12581259
// This call just replaces the security groups, unlike e.g. subnets (!)
1259-
request := &elb.ApplySecurityGroupsToLoadBalancerInput{}
1260-
request.LoadBalancerName = aws.String(loadBalancerName)
1261-
if securityGroupIDs == nil {
1262-
request.SecurityGroups = nil
1263-
} else {
1264-
request.SecurityGroups = securityGroupIDs
1265-
}
1266-
klog.V(2).Info("Applying updated security groups to load balancer")
1267-
_, err := c.elb.ApplySecurityGroupsToLoadBalancer(ctx, request)
1268-
if err != nil {
1260+
klog.V(2).Infof("Applying updated security groups to load balancer %q", loadBalancerName)
1261+
if _, err := c.elb.ApplySecurityGroupsToLoadBalancer(ctx, &elb.ApplySecurityGroupsToLoadBalancerInput{
1262+
LoadBalancerName: aws.String(loadBalancerName),
1263+
SecurityGroups: securityGroupIDs,
1264+
}); err != nil {
12691265
return nil, fmt.Errorf("error applying AWS loadbalancer security groups: %q", err)
12701266
}
12711267
dirty = true
1268+
1269+
// Ensure the replaced security groups are removed from AWS when owned by the controller.
1270+
// Pass the old/actual security groups (not the new expected ones) to clean up what was replaced.
1271+
if errs := c.removeOwnedSecurityGroups(ctx, loadBalancerName, actual.UnsortedList()); len(errs) > 0 {
1272+
return nil, fmt.Errorf("error removing owned security groups: %v", errs)
1273+
}
12721274
}
12731275
}
12741276

@@ -1879,3 +1881,114 @@ func ValidateHealthCheck(s *elbtypes.HealthCheck) error {
18791881

18801882
return nil
18811883
}
1884+
1885+
// buildSecurityGroupRuleReferences finds all security groups that have ingress rules
1886+
// referencing the specified security group ID, and categorizes them based on cluster tagging.
1887+
// This is used to identify dependencies before removing a security group.
1888+
//
1889+
// Parameters:
1890+
// - ctx: The context for the request.
1891+
// - sgID: The ID of the security group to find references for.
1892+
//
1893+
// Returns:
1894+
// - map[*ec2types.SecurityGroup]bool: All security groups with ingress rules referencing sgID, mapped to their cluster tag status (true/false).
1895+
// - map[*ec2types.SecurityGroup]IPPermissionSet: Only cluster-tagged security groups mapped to their ingress rules that reference sgID.
1896+
// - error: An error if the AWS DescribeSecurityGroups API call fails.
1897+
func (c *Cloud) buildSecurityGroupRuleReferences(ctx context.Context, sgID string) (map[*ec2types.SecurityGroup]bool, map[*ec2types.SecurityGroup]IPPermissionSet, error) {
1898+
groupsHasTags := make(map[*ec2types.SecurityGroup]bool)
1899+
groupsLinkedPermissions := make(map[*ec2types.SecurityGroup]IPPermissionSet)
1900+
sgsOut, err := c.ec2.DescribeSecurityGroups(ctx, &ec2.DescribeSecurityGroupsInput{
1901+
Filters: []ec2types.Filter{
1902+
newEc2Filter("ip-permission.group-id", sgID),
1903+
},
1904+
})
1905+
if err != nil {
1906+
return groupsHasTags, groupsLinkedPermissions, fmt.Errorf("error querying security groups for ELB: %q", err)
1907+
}
1908+
1909+
for _, sg := range sgsOut {
1910+
groupsHasTags[&sg] = c.tagging.hasClusterTag(sg.Tags)
1911+
1912+
groupsLinkedPermissions[&sg] = NewIPPermissionSet()
1913+
for _, rule := range sg.IpPermissions {
1914+
if rule.UserIdGroupPairs != nil {
1915+
for _, pair := range rule.UserIdGroupPairs {
1916+
if pair.GroupId != nil && aws.ToString(pair.GroupId) == sgID {
1917+
groupsLinkedPermissions[&sg].Insert(rule)
1918+
}
1919+
}
1920+
}
1921+
}
1922+
1923+
}
1924+
return groupsHasTags, groupsLinkedPermissions, nil
1925+
}
1926+
1927+
// removeOwnedSecurityGroups removes the CLB owned/managed security groups from AWS.
1928+
// It revokes ingress rules that reference the security groups to be removed,
1929+
// then deletes the security groups that are owned by the controller.
1930+
// This is used when updating load balancer security groups to clean up orphaned ones.
1931+
//
1932+
// Parameters:
1933+
// - `ctx`: The context for the operation.
1934+
// - `loadBalancerName`: The name of the load balancer (used for logging and deletion operations).
1935+
// - `securityGroups`: The list of security group IDs to process for removal.
1936+
//
1937+
// Returns:
1938+
// - `[]error`: Collection of all errors encountered during the removal process.
1939+
func (c *Cloud) removeOwnedSecurityGroups(ctx context.Context, loadBalancerName string, securityGroups []string) []error {
1940+
allErrs := []error{}
1941+
sgMap := make(map[string]struct{})
1942+
1943+
// Validate each security group reference, building a list to be deleted.
1944+
for _, sg := range securityGroups {
1945+
isOwned, err := c.isOwnedSecurityGroup(ctx, sg)
1946+
if err != nil {
1947+
allErrs = append(allErrs, fmt.Errorf("unable to validate if security group %q is owned by the controller: %w", sg, err))
1948+
continue
1949+
}
1950+
1951+
groupsWithClusterTag, groupsLinkedPermissions, err := c.buildSecurityGroupRuleReferences(ctx, sg)
1952+
if err != nil {
1953+
allErrs = append(allErrs, fmt.Errorf("error building security group rule references for %q: %w", sg, err))
1954+
continue
1955+
}
1956+
1957+
// Revoke ingress rules referencing the security group to be deleted
1958+
// from cluster-tagged security groups, when the referenced security
1959+
// group has no cluster tag, skip the revoke assuming it is user-managed.
1960+
for sgTarget, sgPerms := range groupsLinkedPermissions {
1961+
if !groupsWithClusterTag[sgTarget] {
1962+
klog.Warningf("security group %q has no cluster tag, skipping remove lifecycle after update", sg)
1963+
continue
1964+
}
1965+
1966+
klog.V(2).Infof("revoking security group ingress references of %q from %q", sg, aws.ToString(sgTarget.GroupId))
1967+
if _, err := c.ec2.RevokeSecurityGroupIngress(ctx, &ec2.RevokeSecurityGroupIngressInput{
1968+
GroupId: sgTarget.GroupId,
1969+
IpPermissions: sgPerms.List(),
1970+
}); err != nil {
1971+
allErrs = append(allErrs, fmt.Errorf("error revoking security group ingress rules from %q: %w", aws.ToString(sgTarget.GroupId), err))
1972+
continue
1973+
}
1974+
}
1975+
1976+
// Skip security group removal when the security group is not owned by the controller.
1977+
if !isOwned {
1978+
klog.Warningf("security group %q is not owned by the controller, skipping remove lifecycle after update", sg)
1979+
continue
1980+
}
1981+
1982+
klog.V(2).Infof("making loadbalancer owned security group %q ready for deletion", sg)
1983+
sgMap[sg] = struct{}{}
1984+
}
1985+
if len(sgMap) == 0 {
1986+
return allErrs
1987+
}
1988+
1989+
if err := c.deleteSecurityGroupsWithBackoff(ctx, loadBalancerName, sgMap); err != nil {
1990+
return append(allErrs, fmt.Errorf("error deleting security groups %v: %v", sgMap, err))
1991+
}
1992+
klog.V(2).Infof("loadbalancer owned security groups deleted")
1993+
return nil
1994+
}

0 commit comments

Comments
 (0)