Skip to content

Commit a492546

Browse files
committed
Fix getHostNamespaceAddressesForNode error wrapping
Incorrect wrapping of errors in getHostNamespaceAddressesForNode prevented callers to assert the type of error returned. In this specific case the caller delIPFromHostNetworkNamespaceAddrSet could not appropriately check that the error returned was AnnotationNotSetError and was failing a deletion flow that should be noop instead. This was triggered by [1] changes that cause the deletion flow to depend on annotations set by different controllers instead of annotations set atomically by a single controller, making it possible that some annotations were set but not others and opening up a new error handling flow that wasn't possible before. The scenario is a node that is added and deleted quickly enough for some controller in cluster manager to be able to annotate the node and some not. [1] ovn-kubernetes/ovn-kubernetes#5396 Signed-off-by: Jaime Caamaño Ruiz <[email protected]>
1 parent d3e489a commit a492546

File tree

2 files changed

+64
-46
lines changed

2 files changed

+64
-46
lines changed

go-controller/pkg/ovn/master_test.go

Lines changed: 53 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1566,45 +1566,62 @@ var _ = ginkgo.Describe("Default network controller operations", func() {
15661566
gomega.Expect(err).NotTo(gomega.HaveOccurred())
15671567
})
15681568

1569-
ginkgo.It("doesn't retry deleting a node that doesn't have a node-subnet annotation", func() {
1570-
app.Action = func(ctx *cli.Context) error {
1571-
_, err := config.InitConfig(ctx, nil, nil)
1569+
ginkgo.DescribeTable("doesn't retry deleting a node that is missing annotation",
1570+
func(node *corev1.Node) {
1571+
app.Action = func(ctx *cli.Context) error {
1572+
_, err := config.InitConfig(ctx, nil, nil)
1573+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1574+
startFakeController(oc, wg)
1575+
ginkgo.By("create new node with no annotation defined and ensure there's a retry entry")
1576+
_, err = kubeFakeClient.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{})
1577+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1578+
retry.CheckRetryObjectMultipleFieldsEventually(
1579+
node.Name,
1580+
oc.retryNodes,
1581+
gomega.BeNil(), // oldObj should be nil
1582+
gomega.Not(gomega.BeNil()), // newObj should not be nil
1583+
)
1584+
keyExists := true
1585+
retry.CheckRetryObjectEventually(node.Name, keyExists, oc.retryNodes)
1586+
ginkgo.By("delete node and check that there are no retries for the deleted node")
1587+
err = kubeFakeClient.CoreV1().Nodes().Delete(context.TODO(), node.Name, metav1.DeleteOptions{})
1588+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1589+
retry.CheckRetryObjectEventually(node.Name, !keyExists, oc.retryNodes)
1590+
ginkgo.By("ensure failures for sync host network address or adding nodes are cleared")
1591+
gomega.Eventually(func() bool {
1592+
_, foundSyncHostNetAddrSetFailed := oc.syncHostNetAddrSetFailed.Load(node.Name)
1593+
_, foundAddNodeFailed := oc.addNodeFailed.Load(node.Name)
1594+
return foundSyncHostNetAddrSetFailed || foundAddNodeFailed
1595+
}).Should(gomega.BeFalse())
1596+
return nil
1597+
}
1598+
err := app.Run([]string{
1599+
app.Name,
1600+
})
15721601
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1573-
startFakeController(oc, wg)
1574-
newNode := &corev1.Node{
1602+
1603+
},
1604+
ginkgo.Entry("k8s.ovn.org/node-subnets",
1605+
&corev1.Node{
15751606
ObjectMeta: metav1.ObjectMeta{
1576-
Name: "newNode",
1577-
Annotations: map[string]string{},
1607+
Name: "newNode",
1608+
Annotations: map[string]string{
1609+
"k8s.ovn.org/node-id": "2",
1610+
},
15781611
},
1579-
}
1580-
ginkgo.By("create new node with no host-subnet annotation defined and ensure theres a retry entry")
1581-
_, err = kubeFakeClient.CoreV1().Nodes().Create(context.TODO(), newNode, metav1.CreateOptions{})
1582-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1583-
retry.CheckRetryObjectMultipleFieldsEventually(
1584-
newNode.Name,
1585-
oc.retryNodes,
1586-
gomega.BeNil(), // oldObj should be nil
1587-
gomega.Not(gomega.BeNil()), // newObj should not be nil
1588-
)
1589-
keyExists := true
1590-
retry.CheckRetryObjectEventually(newNode.Name, keyExists, oc.retryNodes)
1591-
ginkgo.By("delete node and check that there are no retries for the deleted node")
1592-
err = kubeFakeClient.CoreV1().Nodes().Delete(context.TODO(), newNode.Name, metav1.DeleteOptions{})
1593-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1594-
retry.CheckRetryObjectEventually(newNode.Name, !keyExists, oc.retryNodes)
1595-
ginkgo.By("ensure failures for sync host network address or adding nodes are cleared")
1596-
gomega.Eventually(func() bool {
1597-
_, foundSyncHostNetAddrSetFailed := oc.syncHostNetAddrSetFailed.Load(newNode.Name)
1598-
_, foundAddNodeFailed := oc.addNodeFailed.Load(newNode.Name)
1599-
return foundSyncHostNetAddrSetFailed || foundAddNodeFailed
1600-
}).Should(gomega.BeFalse())
1601-
return nil
1602-
}
1603-
err := app.Run([]string{
1604-
app.Name,
1605-
})
1606-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1607-
})
1612+
},
1613+
),
1614+
ginkgo.Entry("k8s.ovn.org/node-id",
1615+
&corev1.Node{
1616+
ObjectMeta: metav1.ObjectMeta{
1617+
Name: "newNode",
1618+
Annotations: map[string]string{
1619+
"k8s.ovn.org/node-subnets": "{\"default\": [\"10.130.0.0/23\", \"fd01:0:0:2::/64\"]}",
1620+
},
1621+
},
1622+
},
1623+
),
1624+
)
16081625

16091626
ginkgo.It("delete a partially constructed node", func() {
16101627
app.Action = func(ctx *cli.Context) error {

go-controller/pkg/ovn/namespace.go

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -376,16 +376,17 @@ func (oc *DefaultNetworkController) getHostNamespaceAddressesForNode(node *corev
376376
}
377377
// for shared gateway mode we will use LRP IPs to SNAT host network traffic
378378
// so add these to the address set.
379-
lrpIPs, err := udn.GetGWRouterIPs(node, oc.GetNetInfo())
380-
if err != nil {
381-
if util.IsAnnotationNotSetError(err) {
382-
// FIXME(tssurya): This is present for backwards compatibility
383-
// Remove me a few months from now
384-
var err1 error
385-
lrpIPs, err1 = util.ParseNodeGatewayRouterLRPAddrs(node)
386-
if err1 != nil {
387-
return nil, fmt.Errorf("failed to get join switch port IP address for node %s: %v/%v", node.Name, err, err1)
388-
}
379+
lrpIPs, gwIPsErr := udn.GetGWRouterIPs(node, oc.GetNetInfo())
380+
if gwIPsErr != nil {
381+
if !util.IsAnnotationNotSetError(gwIPsErr) {
382+
return nil, gwIPsErr
383+
}
384+
// FIXME(tssurya): This is present for backwards compatibility
385+
// Remove me a few months from now
386+
var lrpAddrsErr error
387+
lrpIPs, lrpAddrsErr = util.ParseNodeGatewayRouterLRPAddrs(node)
388+
if lrpAddrsErr != nil {
389+
return nil, fmt.Errorf("failed to fallback to annotations after error %q: %w", gwIPsErr, lrpAddrsErr)
389390
}
390391
}
391392

0 commit comments

Comments
 (0)