Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions api/v1beta1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,23 @@
// +optional
ControlPlaneOutboundLB *LoadBalancerSpec `json:"controlPlaneOutboundLB,omitempty"`

// AdditionalAPIServerLBPorts specifies extra inbound ports for the APIServer load balancer.
// Each port specified (e.g., 9345) creates an inbound rule where the frontend port and the backend port are the same.
// +optional
AdditionalAPIServerLBPorts []LoadBalancerPort `json:"additionalAPIServerLBPorts,omitempty"`

NetworkClassSpec `json:",inline"`
}

// LoadBalancerPort specifies additional port for the API server load balancer.
type LoadBalancerPort struct {
// Name for the additional port within LB definition
Name string `json:"name"`

// Port for the LB definition
Port int32 `json:"port"`
}

// VnetSpec configures an Azure virtual network.
type VnetSpec struct {
// ResourceGroup is the name of the resource group of the existing virtual network
Expand Down Expand Up @@ -893,6 +907,17 @@
return false
}

// GetSecurityRuleByDestination returns security group rule, which matches provided destination ports.
func (s SubnetSpec) GetSecurityRuleByDestination(port string) *SecurityRule {
for _, rule := range s.SecurityGroup.SecurityRules {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably for the long term, we should change s.SecurityGroup.SecurityRules to a dictionary type for faster processing.

if rule.DestinationPorts != nil && *rule.DestinationPorts == port {
return &rule
}

Check warning on line 915 in api/v1beta1/types.go

View check run for this annotation

Codecov / codecov/patch

api/v1beta1/types.go#L911-L915

Added lines #L911 - L915 were not covered by tests
}

return nil

Check warning on line 918 in api/v1beta1/types.go

View check run for this annotation

Codecov / codecov/patch

api/v1beta1/types.go#L918

Added line #L918 was not covered by tests
}

// SecurityProfile specifies the Security profile settings for a
// virtual machine or virtual machine scale set.
type SecurityProfile struct {
Expand Down
5 changes: 5 additions & 0 deletions api/v1beta1/types_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ type NetworkTemplateSpec struct {
// This is different from APIServerLB, and is used only in private clusters (optionally) for enabling outbound traffic.
// +optional
ControlPlaneOutboundLB *LoadBalancerClassSpec `json:"controlPlaneOutboundLB,omitempty"`

// AdditionalAPIServerLBPorts is the configuration for the additional inbound control-plane load balancer ports
// Each port specified (e.g., 9345) creates an inbound rule where the frontend port and the backend port are the same.
// +optional
AdditionalAPIServerLBPorts []LoadBalancerPort `json:"additionalAPIServerLBPorts,omitempty"`
}

// GetSubnetTemplate returns the subnet template based on the subnet role.
Expand Down
25 changes: 25 additions & 0 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 35 additions & 17 deletions azure/scope/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ func (s *ClusterScope) LBSpecs() []azure.ResourceSpecGetter {
BackendPoolName: s.APIServerLB().BackendPool.Name,
IdleTimeoutInMinutes: s.APIServerLB().IdleTimeoutInMinutes,
AdditionalTags: s.AdditionalTags(),
AdditionalPorts: s.AdditionalAPIServerLBPorts(),
}

if s.APIServerLB().FrontendIPs != nil {
Expand Down Expand Up @@ -299,6 +300,7 @@ func (s *ClusterScope) LBSpecs() []azure.ResourceSpecGetter {
BackendPoolName: s.APIServerLB().BackendPool.Name + "-internal",
IdleTimeoutInMinutes: s.APIServerLB().IdleTimeoutInMinutes,
AdditionalTags: s.AdditionalTags(),
AdditionalPorts: s.AdditionalAPIServerLBPorts(),
}

privateIPFound := false
Expand Down Expand Up @@ -771,6 +773,11 @@ func (s *ClusterScope) ControlPlaneOutboundLB() *infrav1.LoadBalancerSpec {
return s.AzureCluster.Spec.NetworkSpec.ControlPlaneOutboundLB
}

// AdditionalAPIServerLBPorts returns the additional API server ports list.
func (s *ClusterScope) AdditionalAPIServerLBPorts() []infrav1.LoadBalancerPort {
return s.AzureCluster.Spec.NetworkSpec.AdditionalAPIServerLBPorts
}

// APIServerLBName returns the API Server LB name.
func (s *ClusterScope) APIServerLBName() string {
apiServerLB := s.APIServerLB()
Expand Down Expand Up @@ -1020,9 +1027,12 @@ func (s *ClusterScope) SetControlPlaneSecurityRules() {
if !s.ControlPlaneEnabled() {
return
}
if s.ControlPlaneSubnet().SecurityGroup.SecurityRules == nil {
subnet := s.ControlPlaneSubnet()
subnet.SecurityGroup.SecurityRules = infrav1.SecurityRules{

subnet := s.ControlPlaneSubnet()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in this flow where are we adding any user-provided security rules? (for example if a user specifies TCP 9345)

or is that elsewhere and the purpose of this change is to filter out 22 and apiserver port if it's not included?

Copy link
Member Author

@Danil-Grigorev Danil-Grigorev Mar 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This flow performs only defaulting on top of the user-provided set of rules, which may not be empty. This way having a field populated does not always need to specify all allowed ports, only additional ones or overrides.

I’m currently validating how this works in tandem with ClusterClass definitions we have. The desired goal is to permit specifying registrationMethod: control-plane-endpoint and allow RKE2 to register on the allowed port. But ideally this should follow internal name resolution, so that the traffic wouldn’t go through external load balancer.

If it doesn’t work, maybe it would still require additional LB rules.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems reasonable to have the additional ports of the APIServer LB opened-up/have an opinion about.

Could you please elaborate on why the security rules should not be updated too ?

Copy link
Member Author

@Danil-Grigorev Danil-Grigorev Apr 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, maybe I’m not fully understanding. I will try to give my thoughts on this:

I was attempting to describe the flow of how the rules are processed. We only apply defaults on top of what the user provides, so the user can still specify ports like TCP 9345 explicitly in security rules. We’re not overriding or stripping anything out, just adding fallback values when needed. LB rules were needed as well, since CP endpoint is external from the cluster perspective.

Current set of changes is what was required to make cluster healthy, and added e2e test displays that. My previous comment was a set of assumptions, or “paths to explore later” if it doesn’t work.


missingSSH := subnet.GetSecurityRuleByDestination("22") == nil
if missingSSH {
subnet.SecurityGroup.SecurityRules = append(subnet.SecurityGroup.SecurityRules,
infrav1.SecurityRule{
Name: "allow_ssh",
Description: "Allow SSH",
Expand All @@ -1034,20 +1044,28 @@ func (s *ClusterScope) SetControlPlaneSecurityRules() {
Destination: ptr.To("*"),
DestinationPorts: ptr.To("22"),
Action: infrav1.SecurityRuleActionAllow,
},
infrav1.SecurityRule{
Name: "allow_apiserver",
Description: "Allow K8s API Server",
Priority: 2201,
Protocol: infrav1.SecurityGroupProtocolTCP,
Direction: infrav1.SecurityRuleDirectionInbound,
Source: ptr.To("*"),
SourcePorts: ptr.To("*"),
Destination: ptr.To("*"),
DestinationPorts: ptr.To(strconv.Itoa(int(s.APIServerPort()))),
Action: infrav1.SecurityRuleActionAllow,
},
}
})
}

port := strconv.Itoa(int(s.APIServerPort()))

missingAPIPort := subnet.GetSecurityRuleByDestination(port) == nil
if missingAPIPort {
subnet.SecurityGroup.SecurityRules = append(subnet.SecurityGroup.SecurityRules, infrav1.SecurityRule{
Name: "allow_apiserver",
Description: "Allow K8s API Server",
Priority: 2201,
Protocol: infrav1.SecurityGroupProtocolTCP,
Direction: infrav1.SecurityRuleDirectionInbound,
Source: ptr.To("*"),
SourcePorts: ptr.To("*"),
Destination: ptr.To("*"),
DestinationPorts: ptr.To(port),
Action: infrav1.SecurityRuleActionAllow,
})
}

if missingSSH || missingAPIPort {
s.AzureCluster.Spec.NetworkSpec.UpdateControlPlaneSubnet(subnet)
}
}
Expand Down
179 changes: 147 additions & 32 deletions azure/scope/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,50 +242,165 @@ func TestAPIServerHost(t *testing.T) {
}

func TestGettingSecurityRules(t *testing.T) {
g := NewWithT(t)

cluster := &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-cluster",
Namespace: "default",
tests := []struct {
name string
cluster *clusterv1.Cluster
azureCluster *infrav1.AzureCluster
expectedRuleCount int
Comment on lines +246 to +249
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for reformatting this! 🙌🏼

}{
{
name: "default control plane subnet with no rules should have 2 security rules defaulted",
cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-cluster",
Namespace: "default",
},
},
azureCluster: &infrav1.AzureCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-azure-cluster",
},
Spec: infrav1.AzureClusterSpec{
AzureClusterClassSpec: infrav1.AzureClusterClassSpec{
SubscriptionID: "123",
IdentityRef: &corev1.ObjectReference{
Kind: infrav1.AzureClusterIdentityKind,
},
},
ControlPlaneEnabled: true,
NetworkSpec: infrav1.NetworkSpec{
Subnets: infrav1.Subnets{
{
SubnetClassSpec: infrav1.SubnetClassSpec{
Role: infrav1.SubnetNode,
Name: "node",
},
},
},
},
},
},
expectedRuleCount: 2,
},
}

azureCluster := &infrav1.AzureCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-azure-cluster",
{
name: "additional rules are preserved",
cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-cluster",
Namespace: "default",
},
},
azureCluster: &infrav1.AzureCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-azure-cluster",
},
Spec: infrav1.AzureClusterSpec{
AzureClusterClassSpec: infrav1.AzureClusterClassSpec{
SubscriptionID: "123",
IdentityRef: &corev1.ObjectReference{
Kind: infrav1.AzureClusterIdentityKind,
},
},
ControlPlaneEnabled: true,
NetworkSpec: infrav1.NetworkSpec{
Subnets: infrav1.Subnets{
{
SecurityGroup: infrav1.SecurityGroup{
SecurityGroupClass: infrav1.SecurityGroupClass{
SecurityRules: []infrav1.SecurityRule{{
Name: "allow_9345",
Description: "Allow port 9345",
Priority: 2200,
Protocol: infrav1.SecurityGroupProtocolTCP,
Direction: infrav1.SecurityRuleDirectionInbound,
Source: ptr.To("*"),
SourcePorts: ptr.To("*"),
Destination: ptr.To("*"),
DestinationPorts: ptr.To("9345"),
Action: infrav1.SecurityRuleActionAllow,
}},
},
},
SubnetClassSpec: infrav1.SubnetClassSpec{
Role: infrav1.SubnetControlPlane,
Name: string(infrav1.SubnetControlPlane),
},
},
},
},
},
},
expectedRuleCount: 3,
},
Spec: infrav1.AzureClusterSpec{
AzureClusterClassSpec: infrav1.AzureClusterClassSpec{
SubscriptionID: "123",
IdentityRef: &corev1.ObjectReference{
Kind: infrav1.AzureClusterIdentityKind,
{
name: "override rules are accepted",
cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-cluster",
Namespace: "default",
},
},
ControlPlaneEnabled: true,
NetworkSpec: infrav1.NetworkSpec{
Subnets: infrav1.Subnets{
{
SubnetClassSpec: infrav1.SubnetClassSpec{
Role: infrav1.SubnetNode,
Name: "node",
azureCluster: &infrav1.AzureCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-azure-cluster",
},
Spec: infrav1.AzureClusterSpec{
AzureClusterClassSpec: infrav1.AzureClusterClassSpec{
SubscriptionID: "123",
IdentityRef: &corev1.ObjectReference{
Kind: infrav1.AzureClusterIdentityKind,
},
},
ControlPlaneEnabled: true,
NetworkSpec: infrav1.NetworkSpec{
Subnets: infrav1.Subnets{
{
SecurityGroup: infrav1.SecurityGroup{
SecurityGroupClass: infrav1.SecurityGroupClass{
SecurityRules: []infrav1.SecurityRule{{
Name: "deny_ssh",
Description: "Deny SSH",
Priority: 2200,
Protocol: infrav1.SecurityGroupProtocolTCP,
Direction: infrav1.SecurityRuleDirectionInbound,
Source: ptr.To("*"),
SourcePorts: ptr.To("*"),
Destination: ptr.To("*"),
DestinationPorts: ptr.To("22"),
Action: infrav1.SecurityRuleActionDeny,
}},
},
},
SubnetClassSpec: infrav1.SubnetClassSpec{
Role: infrav1.SubnetControlPlane,
Name: string(infrav1.SubnetControlPlane),
},
},
},
},
},
},
expectedRuleCount: 2,
},
}
azureCluster.Default()

clusterScope := &ClusterScope{
Cluster: cluster,
AzureCluster: azureCluster,
}
clusterScope.SetControlPlaneSecurityRules()
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)

subnet, err := clusterScope.AzureCluster.Spec.NetworkSpec.GetControlPlaneSubnet()
g.Expect(err).NotTo(HaveOccurred())
g.Expect(subnet.SecurityGroup.SecurityRules).To(HaveLen(2))
tt.azureCluster.Default()

clusterScope := &ClusterScope{
Cluster: tt.cluster,
AzureCluster: tt.azureCluster,
}
clusterScope.SetControlPlaneSecurityRules()

subnet, err := clusterScope.AzureCluster.Spec.NetworkSpec.GetControlPlaneSubnet()
g.Expect(err).NotTo(HaveOccurred())
g.Expect(subnet.SecurityGroup.SecurityRules).To(HaveLen(tt.expectedRuleCount))
})
}
}

func TestPublicIPSpecs(t *testing.T) {
Expand Down
Loading
Loading