Skip to content

Commit 7608e0c

Browse files
authored
fix: Avoid race when removing interfaces via NNCP (#2347) (#2354)
Removing an interface that was created using an NNCP, is done by editing the same NNCP. This sometimes resulted in a race, in which the NNCP success status actually presented the prvious status, leading to deleting the NNCP before the configuration was completed, leaving hanging interfaces in the cluster nodes, with node native interfaces occupied as the ports of these tests-created interfaces. A recent PR made this failed flow to always occur. This PR aims to assure that the timestamp of the AVAIALBLE status is updated for the recent change (the interface removal) and not the previous change (setup or modification). This PR is based on the fix that was presented in RedHatQE/openshift-virtualization-tests#430.
1 parent b11c120 commit 7608e0c

File tree

2 files changed

+37
-2
lines changed

2 files changed

+37
-2
lines changed

.flake8

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ fcn_exclude_functions =
5858
click,
5959
ast,
6060
filecmp,
61+
datetime,
6162

6263
enable-extensions =
6364
FCN,

ocp_resources/node_network_configuration_policy.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
import re
2+
from datetime import datetime
23

34
from kubernetes.dynamic.exceptions import ConflictError
45

5-
from ocp_resources.utils.constants import TIMEOUT_4MINUTES
6+
from ocp_resources.utils.constants import TIMEOUT_1MINUTE, TIMEOUT_4MINUTES, TIMEOUT_5SEC
67
from ocp_resources.exceptions import NNCPConfigurationFailed
78
from ocp_resources.node import Node
89
from ocp_resources.node_network_configuration_enactment import (
910
NodeNetworkConfigurationEnactment,
1011
)
1112
from ocp_resources.node_network_state import NodeNetworkState
1213
from ocp_resources.resource import Resource, ResourceEditor
13-
from timeout_sampler import TimeoutExpiredError, TimeoutSampler, TimeoutWatch
14+
from timeout_sampler import TimeoutExpiredError, TimeoutSampler, TimeoutWatch, retry
1415

1516
IPV4_STR = "ipv4"
1617
IPV6_STR = "ipv6"
@@ -322,10 +323,43 @@ def _absent_interface(self):
322323
if self.ports:
323324
self.add_ports()
324325

326+
# The current time-stamp of the NNCP's available status will change after the NNCP is updated, therefore
327+
# it must be fetched and stored before the update, and compared with the new time-stamp after.
328+
initial_success_status_time = self._get_last_successful_transition_time()
325329
ResourceEditor(
326330
patches={self: {"spec": {"desiredState": {"interfaces": self.desired_state["interfaces"]}}}}
327331
).update()
328332

333+
# If the NNCP failed on setup, then its tear-down AVAIALBLE status will necessarily be the first.
334+
if initial_success_status_time:
335+
self._wait_for_nncp_status_update(initial_transition_time=initial_success_status_time)
336+
337+
def _get_last_successful_transition_time(self) -> str | None:
338+
for condition in self.instance.status.conditions:
339+
if (
340+
condition["type"] == self.Conditions.Type.AVAILABLE
341+
and condition["status"] == Resource.Condition.Status.TRUE
342+
and condition["reason"] == self.Conditions.Reason.SUCCESSFULLY_CONFIGURED
343+
):
344+
return condition["lastTransitionTime"]
345+
return None
346+
347+
@retry(
348+
wait_timeout=TIMEOUT_1MINUTE,
349+
sleep=TIMEOUT_5SEC,
350+
)
351+
def _wait_for_nncp_status_update(self, initial_transition_time: str) -> bool:
352+
date_format = "%Y-%m-%dT%H:%M:%SZ"
353+
formatted_initial_transition_time = datetime.strptime(initial_transition_time, date_format)
354+
for condition in self.instance.get("status", {}).get("conditions", []):
355+
if (
356+
condition["type"] == self.Conditions.Type.AVAILABLE
357+
and condition["status"] == Resource.Condition.Status.TRUE
358+
and datetime.strptime(condition["lastTransitionTime"], date_format) > formatted_initial_transition_time
359+
):
360+
return True
361+
return False
362+
329363
@property
330364
def status(self):
331365
for condition in self.instance.status.conditions:

0 commit comments

Comments
 (0)