Skip to content

Commit c12bdf0

Browse files
authored
Merge pull request #679 from mmamczur/multi-subnet
L4 ILB - skip nodes from the non default network if multi subnet is t…
2 parents 0fc8f79 + 543f0b6 commit c12bdf0

File tree

3 files changed

+281
-0
lines changed

3 files changed

+281
-0
lines changed

providers/gce/gce_loadbalancer_internal.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ const (
5050
maxInstancesPerInstanceGroup = 1000
5151
// maxL4ILBPorts is the maximum number of ports that can be specified in an L4 ILB Forwarding Rule. Beyond this, "AllPorts" field should be used.
5252
maxL4ILBPorts = 5
53+
// labelGKESubnetworkName is the key of the label that contains the subnet name the node is connected to.
54+
labelGKESubnetworkName = "cloud.google.com/gke-node-pool-subnet"
5355
)
5456

5557
func (g *Cloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v1.Service, existingFwdRule *compute.ForwardingRule, nodes []*v1.Node) (*v1.LoadBalancerStatus, error) {
@@ -254,6 +256,54 @@ func (g *Cloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v
254256
return status, nil
255257
}
256258

259+
func removeNodesInNonDefaultNetworks(nodes []*v1.Node, defaultSubnetName string) []*v1.Node {
260+
var newList []*v1.Node
261+
var skippedNodes []string
262+
for _, node := range nodes {
263+
subnetLabel, ok := node.Labels[labelGKESubnetworkName]
264+
// nodes that have no label and no PodCIDR should be filtered out.
265+
// This translates to: if node doesn't have the label but has PodCIDR then it is assumed to be in the default network.
266+
// This check is a safeguard for situations when the cluster might be running older node controller that does not know multi-subnet or multi-subnet feature misbehaves.
267+
if !ok && node.Spec.PodCIDR == "" {
268+
skippedNodes = append(skippedNodes, node.Name)
269+
continue
270+
}
271+
// For clusters that become multi-subnet the label on existing nodes from the default network can be present with an emtpy value.
272+
if ok && subnetLabel != "" && subnetLabel != defaultSubnetName {
273+
skippedNodes = append(skippedNodes, node.Name)
274+
continue
275+
}
276+
newList = append(newList, node)
277+
}
278+
countOfSkippedNodes := len(skippedNodes)
279+
if len(skippedNodes) > 0 {
280+
klog.V(2).Infof("Skipped %d nodes from non default subnetworks. First skipped nodes: %v", countOfSkippedNodes, truncateList(skippedNodes, 10))
281+
}
282+
return newList
283+
}
284+
285+
// Extract the subnet name from the URL.
286+
// example: for `https://www.googleapis.com/compute/v1/projects/project/regions/us-central1/subnetworks/defaultSubnet`
287+
// this will return `defaultSubnet`.
288+
func subnetNameFromURL(url string) (string, error) {
289+
resource, err := cloud.ParseResourceURL(url)
290+
if err != nil {
291+
return "", err
292+
}
293+
if resource.Key == nil {
294+
return "", fmt.Errorf("subnet URL %s is missing the name", url)
295+
}
296+
return resource.Key.Name, nil
297+
}
298+
299+
// truncates a list - will return a sublist of at most max elements.
300+
func truncateList[T any](l []T, max int) []T {
301+
if len(l) <= max {
302+
return l
303+
}
304+
return l[:max]
305+
}
306+
257307
func (g *Cloud) clearPreviousInternalResources(svc *v1.Service, loadBalancerName string, existingBackendService *compute.BackendService, expectedBSName, expectedHCName string) {
258308
// If a new backend service was created, delete the old one.
259309
if existingBackendService.Name != expectedBSName {
@@ -626,6 +676,17 @@ func (g *Cloud) ensureInternalInstanceGroup(name, zone string, nodes []*v1.Node)
626676
// ensureInternalInstanceGroups generates an unmanaged instance group for every zone
627677
// where a K8s node exists. It also ensures that each node belongs to an instance group
628678
func (g *Cloud) ensureInternalInstanceGroups(name string, nodes []*v1.Node) ([]string, error) {
679+
defaultSubnetName, err := subnetNameFromURL(g.SubnetworkURL())
680+
// Perform node filtering only if the subnet URL is valid. Do not stop execution in case some clusters have invalid SubnetworkURL configured.
681+
if err == nil {
682+
// Filter out any node that is not from the default network. This is required for multi-subnet feature.
683+
// This should not change behavior for nodes that are in the default network.
684+
// This can't be done earlier when listing node since the code is shared between internal and external LBs.
685+
nodes = removeNodesInNonDefaultNetworks(nodes, defaultSubnetName)
686+
} else {
687+
klog.Errorf("invalid subnetwork URL configured for the controller, assuming all nodes are in the default subnetwork %s, err: %v", g.SubnetworkURL(), err)
688+
}
689+
629690
zonedNodes := splitNodesByZone(nodes)
630691
klog.V(2).Infof("ensureInternalInstanceGroups(%v): %d nodes over %d zones in region %v", name, len(nodes), len(zonedNodes), g.region)
631692
var igLinks []string

providers/gce/gce_loadbalancer_internal_test.go

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,177 @@ func TestEnsureMultipleInstanceGroups(t *testing.T) {
199199
}
200200
}
201201

202+
func TestEnsureInstanceGroupFromDefaultNetworkMultiSubnetClusterMode(t *testing.T) {
203+
t.Parallel()
204+
205+
vals := DefaultTestClusterValues()
206+
vals.SubnetworkURL = "https://www.googleapis.com/compute/v1/projects/project/regions/us-central1/subnetworks/defaultSubnet"
207+
gce, err := fakeGCECloud(vals)
208+
require.NoError(t, err)
209+
210+
nodes, err := createAndInsertNodes(gce, []string{"n1", "n2", "n3", "n4", "n5"}, vals.ZoneName)
211+
require.NoError(t, err)
212+
// node with a matching subnet
213+
nodes[0].Labels[labelGKESubnetworkName] = "defaultSubnet"
214+
// node with a label of a non-matching subnet
215+
nodes[1].Labels[labelGKESubnetworkName] = "anotherSubnet"
216+
// node with no label but a PodCIDR
217+
nodes[2].Spec.PodCIDR = "10.0.5.0/24"
218+
// node[3] has no label nor PodCIDR
219+
nodes[3].Spec.PodCIDR = ""
220+
// node[4] has a label that contains an empty string (this indicates the default network).
221+
nodes[4].Spec.PodCIDR = ""
222+
nodes[4].Labels[labelGKESubnetworkName] = ""
223+
224+
baseName := makeInstanceGroupName(vals.ClusterID)
225+
226+
igsFromCloud, err := gce.ensureInternalInstanceGroups(baseName, nodes)
227+
require.NoError(t, err)
228+
229+
url, err := cloud.ParseResourceURL(igsFromCloud[0])
230+
require.NoError(t, err)
231+
instances, err := gce.ListInstancesInInstanceGroup(url.Key.Name, url.Key.Zone, "ALL")
232+
require.NoError(t, err)
233+
assert.Len(t, instances, 3, "Incorrect number of Instances in the group")
234+
var instanceURLs []string
235+
for _, inst := range instances {
236+
instanceURLs = append(instanceURLs, inst.Instance)
237+
}
238+
if !hasInstanceForNode(instances, nodes[0]) {
239+
t.Errorf("expected n1 to be in instances but it contained %+v", instanceURLs)
240+
}
241+
if hasInstanceForNode(instances, nodes[1]) {
242+
t.Errorf("expected n2 to NOT be in instances but it was included %+v", instanceURLs)
243+
}
244+
if !hasInstanceForNode(instances, nodes[2]) {
245+
t.Errorf("expected n3 to be in instances but it contained %+v", instanceURLs)
246+
}
247+
if hasInstanceForNode(instances, nodes[3]) {
248+
t.Errorf("expected n4 to NOT be in instances but it was included %+v", instanceURLs)
249+
}
250+
if !hasInstanceForNode(instances, nodes[4]) {
251+
t.Errorf("expected n5 to be in instances but it contained %+v", instanceURLs)
252+
}
253+
}
254+
255+
// Test that the node filtering will not be done if the cluster subnetwork value is not set properly.
256+
func TestEnsureInstanceGroupFromDefaultNetworkMultiSubnetClusterModeIfSubnetworkValueIsInvalid(t *testing.T) {
257+
t.Parallel()
258+
259+
vals := DefaultTestClusterValues()
260+
vals.SubnetworkURL = "invalid"
261+
gce, err := fakeGCECloud(vals)
262+
require.NoError(t, err)
263+
264+
nodes, err := createAndInsertNodes(gce, []string{"n1", "n2"}, vals.ZoneName)
265+
require.NoError(t, err)
266+
// node with a matching subnet
267+
nodes[0].Labels[labelGKESubnetworkName] = "defaultSubnet"
268+
// node with a label of a non-matching subnet
269+
nodes[1].Labels[labelGKESubnetworkName] = "anotherSubnet"
270+
271+
baseName := makeInstanceGroupName(vals.ClusterID)
272+
273+
igsFromCloud, err := gce.ensureInternalInstanceGroups(baseName, nodes)
274+
require.NoError(t, err)
275+
276+
url, err := cloud.ParseResourceURL(igsFromCloud[0])
277+
require.NoError(t, err)
278+
instances, err := gce.ListInstancesInInstanceGroup(url.Key.Name, url.Key.Zone, "ALL")
279+
require.NoError(t, err)
280+
assert.Len(t, instances, 2, "Incorrect number of Instances in the group")
281+
var instanceURLs []string
282+
for _, inst := range instances {
283+
instanceURLs = append(instanceURLs, inst.Instance)
284+
}
285+
if !hasInstanceForNode(instances, nodes[0]) {
286+
t.Errorf("expected n1 to be in instances but it contained %+v", instanceURLs)
287+
}
288+
if !hasInstanceForNode(instances, nodes[1]) {
289+
t.Errorf("expected n2 to be in instances but it contained %+v", instanceURLs)
290+
}
291+
}
292+
293+
func hasInstanceForNode(instances []*compute.InstanceWithNamedPorts, node *v1.Node) bool {
294+
for _, instance := range instances {
295+
if strings.HasSuffix(instance.Instance, node.Name) {
296+
return true
297+
}
298+
}
299+
return false
300+
}
301+
302+
func TestRemoveNodesInNonDefaultNetworks(t *testing.T) {
303+
t.Parallel()
304+
305+
testInput := []struct {
306+
node *v1.Node
307+
shouldBeInDefaultSubnet bool
308+
}{
309+
{
310+
node: &v1.Node{
311+
ObjectMeta: metav1.ObjectMeta{
312+
Name: "defaultSubnetNodeWithLabel",
313+
Labels: map[string]string{labelGKESubnetworkName: "defaultSubnet"},
314+
},
315+
},
316+
shouldBeInDefaultSubnet: true,
317+
},
318+
{
319+
node: &v1.Node{
320+
ObjectMeta: metav1.ObjectMeta{
321+
Name: "nonDefaultSubnetNode",
322+
Labels: map[string]string{labelGKESubnetworkName: "secondarySubnet"},
323+
},
324+
},
325+
shouldBeInDefaultSubnet: false,
326+
},
327+
{
328+
node: &v1.Node{
329+
ObjectMeta: metav1.ObjectMeta{
330+
Name: "defaultSubnetNodeWithEmptyLabel",
331+
Labels: map[string]string{labelGKESubnetworkName: ""},
332+
},
333+
},
334+
shouldBeInDefaultSubnet: true,
335+
},
336+
{
337+
node: &v1.Node{
338+
ObjectMeta: metav1.ObjectMeta{
339+
Name: "defaultSubnetNodeWithoutLabel",
340+
},
341+
Spec: v1.NodeSpec{PodCIDR: "10.0.0.0/28"},
342+
},
343+
shouldBeInDefaultSubnet: true,
344+
},
345+
{
346+
node: &v1.Node{
347+
ObjectMeta: metav1.ObjectMeta{
348+
Name: "nodeInUnknownSubnet",
349+
},
350+
},
351+
shouldBeInDefaultSubnet: false,
352+
},
353+
}
354+
var nodes []*v1.Node
355+
for _, testNode := range testInput {
356+
nodes = append(nodes, testNode.node)
357+
}
358+
359+
onlyDefaultSubnetNodes := removeNodesInNonDefaultNetworks(nodes, "defaultSubnet")
360+
361+
defaultSubnetNodesSet := make(map[string]struct{})
362+
for _, node := range onlyDefaultSubnetNodes {
363+
defaultSubnetNodesSet[node.Name] = struct{}{}
364+
}
365+
for _, testNode := range testInput {
366+
_, hasNode := defaultSubnetNodesSet[testNode.node.Name]
367+
if testNode.shouldBeInDefaultSubnet != hasNode {
368+
t.Errorf("Node %s should not be in the default subnet but it was present in %v", testNode.node.Name, defaultSubnetNodesSet)
369+
}
370+
}
371+
}
372+
202373
func TestEnsureInternalLoadBalancer(t *testing.T) {
203374
t.Parallel()
204375

@@ -2017,3 +2188,48 @@ func TestEnsureInternalLoadBalancerAllPorts(t *testing.T) {
20172188
}
20182189
assertInternalLbResourcesDeleted(t, gce, svc, vals, true)
20192190
}
2191+
2192+
func TestSubnetNameFromURL(t *testing.T) {
2193+
cases := []struct {
2194+
desc string
2195+
url string
2196+
wantName string
2197+
wantErr bool
2198+
}{
2199+
{
2200+
desc: "full URL",
2201+
url: "https://www.googleapis.com/compute/v1/projects/project/regions/us-central1/subnetworks/defaultSubnet",
2202+
wantName: "defaultSubnet",
2203+
},
2204+
{
2205+
desc: "project path",
2206+
url: "projects/project/regions/us-central1/subnetworks/defaultSubnet",
2207+
wantName: "defaultSubnet",
2208+
},
2209+
{
2210+
desc: "missing name",
2211+
url: "projects/project/regions/us-central1/subnetworks",
2212+
wantErr: true,
2213+
},
2214+
{
2215+
desc: "invalid",
2216+
url: "invalid",
2217+
wantErr: true,
2218+
},
2219+
}
2220+
for _, tc := range cases {
2221+
t.Run(tc.desc, func(t *testing.T) {
2222+
subnetName, err := subnetNameFromURL(tc.url)
2223+
if err != nil && !tc.wantErr {
2224+
t.Errorf("unexpected error %v", err)
2225+
}
2226+
if err == nil && tc.wantErr {
2227+
t.Errorf("wanted an error but got none")
2228+
}
2229+
if !tc.wantErr && subnetName != tc.wantName {
2230+
t.Errorf("invalid name extracted from URL %s, want=%s, got=%s", tc.url, tc.wantName, subnetName)
2231+
}
2232+
})
2233+
}
2234+
2235+
}

providers/gce/gce_loadbalancer_utils_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ func createAndInsertNodes(gce *Cloud, nodeNames []string, zoneName string) ([]*v
114114
v1.LabelTopologyZone: zoneName,
115115
},
116116
},
117+
Spec: v1.NodeSpec{
118+
// Add PodCIDR for multi subnet purpose. Nodes need to have PodCIDR to be regarded as nodes from the default subnet (unless they have the subnet label).
119+
PodCIDR: "192.168.0.0/0",
120+
},
117121
},
118122
)
119123

0 commit comments

Comments
 (0)