Skip to content

Commit cd53a54

Browse files
committed
gcp-controller: don't make assumptions on PodCIDRs
The gcp-controller, to avoid issues with preemtible VMs, detect when a VM has changed and delete all Pods associated to avoid Kubernetes controllers think this was a restart, when it actually was a completely replacement of the VM. The heuristic for detecting a Node change are based on: - the instance ID, that is unique for each instance - the pod.Spec.PodCIDRs value The logic for the PodCIDR value is not easy to generalize and current implementation is based on a specific implementation and configuration. In addition, the pod.Spec.PodCIDRs field is inmutable once set, so it can only change if the VM is new, ie. the instanceID has changed, so no need to duplicate the logic. Change-Id: Ic39996f473682973c9ac6c81bfa10a14dd473b77 Signed-off-by: Antonio Ojea <[email protected]>
1 parent 32ba3c7 commit cd53a54

File tree

2 files changed

+9
-75
lines changed

2 files changed

+9
-75
lines changed

cmd/gcp-controller-manager/node_csr_approver.go

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -963,27 +963,6 @@ func shouldDeleteNode(ctx *controllerContext, node *v1.Node, getInstance func(*c
963963
return true, nil
964964
}
965965
}
966-
// Newly created node might not have pod CIDR allocated yet.
967-
if node.Spec.PodCIDR == "" {
968-
klog.V(2).Infof("Node %q has empty podCIDR.", node.Name)
969-
return false, nil
970-
}
971-
var unmatchedRanges []string
972-
for _, networkInterface := range inst.NetworkInterfaces {
973-
for _, r := range networkInterface.AliasIpRanges {
974-
if node.Spec.PodCIDR == r.IpCidrRange {
975-
klog.V(2).Infof("Instance %q has alias range that matches node's podCIDR.", inst.Name)
976-
return false, nil
977-
}
978-
unmatchedRanges = append(unmatchedRanges, r.IpCidrRange)
979-
}
980-
}
981-
if len(unmatchedRanges) != 0 {
982-
klog.Warningf("Instance %q has alias range(s) %v and none of them match node's podCIDR %s, will trigger node deletion.", inst.Name, unmatchedRanges, node.Spec.PodCIDR)
983-
return true, nil
984-
}
985-
// Instance with no alias range is route based, for which node object deletion is unnecessary.
986-
klog.V(2).Infof("Instance %q has no alias range.", inst.Name)
987966
return false, nil
988967
}
989968

cmd/gcp-controller-manager/node_csr_approver_test.go

Lines changed: 9 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,53 +1252,6 @@ func TestShouldDeleteNode(t *testing.T) {
12521252
getInstanceErr error
12531253
expectedErr error
12541254
}{
1255-
{
1256-
desc: "instance with 1 alias range and matches podCIDR",
1257-
ctx: &controllerContext{},
1258-
node: &v1.Node{
1259-
ObjectMeta: metav1.ObjectMeta{
1260-
Name: "node-test",
1261-
},
1262-
Spec: v1.NodeSpec{
1263-
PodCIDR: "10.0.0.1/24",
1264-
},
1265-
},
1266-
instance: &compute.Instance{
1267-
NetworkInterfaces: []*compute.NetworkInterface{
1268-
{
1269-
AliasIpRanges: []*compute.AliasIpRange{
1270-
{
1271-
IpCidrRange: "10.0.0.1/24",
1272-
},
1273-
},
1274-
},
1275-
},
1276-
},
1277-
},
1278-
{
1279-
desc: "instance with 1 alias range doesn't match podCIDR",
1280-
ctx: &controllerContext{},
1281-
node: &v1.Node{
1282-
ObjectMeta: metav1.ObjectMeta{
1283-
Name: "node-test",
1284-
},
1285-
Spec: v1.NodeSpec{
1286-
PodCIDR: "10.0.0.1/24",
1287-
},
1288-
},
1289-
instance: &compute.Instance{
1290-
NetworkInterfaces: []*compute.NetworkInterface{
1291-
{
1292-
AliasIpRanges: []*compute.AliasIpRange{
1293-
{
1294-
IpCidrRange: "10.0.0.2/24",
1295-
},
1296-
},
1297-
},
1298-
},
1299-
},
1300-
shouldDelete: true,
1301-
},
13021255
{
13031256
desc: "instance with 2 alias range and 1 matches podCIDR",
13041257
ctx: &controllerContext{},
@@ -1399,13 +1352,15 @@ func TestShouldDeleteNode(t *testing.T) {
13991352
},
14001353
}
14011354
for _, c := range cases {
1402-
fakeGetInstance := func(_ *controllerContext, _ string) (*compute.Instance, error) {
1403-
return c.instance, c.getInstanceErr
1404-
}
1405-
shouldDelete, err := shouldDeleteNode(c.ctx, c.node, fakeGetInstance)
1406-
if err != c.expectedErr || shouldDelete != c.shouldDelete {
1407-
t.Errorf("%s: shouldDeleteNode=(%v, %v), want (%v, %v)", c.desc, shouldDelete, err, c.shouldDelete, c.expectedErr)
1408-
}
1355+
t.Run(c.desc, func(t *testing.T) {
1356+
fakeGetInstance := func(_ *controllerContext, _ string) (*compute.Instance, error) {
1357+
return c.instance, c.getInstanceErr
1358+
}
1359+
shouldDelete, err := shouldDeleteNode(c.ctx, c.node, fakeGetInstance)
1360+
if err != c.expectedErr || shouldDelete != c.shouldDelete {
1361+
t.Errorf("%s: shouldDeleteNode=(%v, %v), want (%v, %v)", c.desc, shouldDelete, err, c.shouldDelete, c.expectedErr)
1362+
}
1363+
})
14091364
}
14101365
}
14111366

0 commit comments

Comments
 (0)