Skip to content

Commit 5719743

Browse files
feat: add retries to delete_node
1 parent 9b6a512 commit 5719743

File tree

4 files changed

+54
-25
lines changed

4 files changed

+54
-25
lines changed

integration-tests/robot/tests/shared/lib/ZookeeperLibrary.py

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,11 @@
1313
# limitations under the License.
1414

1515
import os
16+
import time
1617

1718
from kazoo.client import KazooClient
1819
from kazoo.security import make_digest_acl, make_acl
20+
from kazoo.exceptions import ConnectionLoss
1921
from robot.api import logger
2022
from robot.libraries.BuiltIn import BuiltIn
2123

@@ -156,21 +158,34 @@ def set_access_control_lists(self, zk, node_path, acls):
156158
"""
157159
zk.set_acls(node_path, acls)
158160

159-
def create_node(self, zk, node_path, data=None):
161+
def create_node(self, zk, node_path, data=None, retries=3, delay=5):
160162
"""
161163
Creates ZooKeeper node.
162164
*Args:*\n
163165
_zk_ (KazooClient) - ZooKeeper client;\n
164166
_node_path_ (str) - path of the node;\n
165167
_data_ (str) - initial bytes value of node;\n
168+
_retries_ (int, optional) - number of retry attempts (default: 3);\n
169+
_delay_ (int, optional) - initial delay between retries in seconds (default: 5);\n
166170
*Example:*\n
167-
| Create Node | zk | /zookeeper_crud | Creation data |
171+
| Create Node | zk | /zookeeper_crud | Creation data | retries=3 | delay=1 |
168172
"""
169-
if data:
170-
zk.create(node_path, value=data.encode())
171-
else:
172-
zk.create(node_path)
173-
logger.debug('Node "{}" is created.'.format(node_path))
173+
for attempt in range(1, retries + 1):
174+
try:
175+
if data:
176+
zk.create(node_path, value=data.encode())
177+
else:
178+
zk.create(node_path)
179+
logger.debug('Node "{}" is created.'.format(node_path))
180+
return
181+
182+
except ConnectionLoss as e:
183+
msg = (f'Attempt {attempt}/{retries}: cannot create node "{node_path}" '
184+
f'due to ConnectionLoss')
185+
logger.warn(msg)
186+
if attempt == retries:
187+
self.builtin.fail(f'Failed to create node "{node_path}" after {retries} attempts: {e}')
188+
time.sleep(delay)
174189

175190
def create_node_with_children(self, zk, node_path, children_number: int, data):
176191
zk.create(node_path)
@@ -264,17 +279,35 @@ def update_node_value(self, zk, node_path, new_value):
264279
stat = zk.set(node_path, new_value.encode())
265280
logger.debug('Node "{}" is updated: {}'.format(node_path, stat))
266281

267-
def delete_node(self, zk, node_path):
282+
def delete_node(self, zk, node_path, retries=3, delay=5):
268283
"""
269284
Delete the node.
270285
*Args:*\n
271286
_zk_ (KazooClient) - ZooKeeper client;\n
272287
_node_path_ (str) - path of the node;\n
288+
_retries_ (int, optional) - number of retry attempts (default: 5);\n
289+
_delay_ (int, optional) - initial delay between retries in seconds (default: 2);\n
290+
273291
*Example:*\n
274-
| Delete Node | zk | /zookeeper_crud/tests |
292+
| Delete Node | zk | /zookeeper_crud/tests | retries=3 | delay=1 |
275293
"""
276-
zk.delete(node_path, recursive=True)
277-
logger.debug('Node "{}" is deleted.'.format(node_path))
294+
for attempt in range(1, retries + 1):
295+
try:
296+
if zk.exists(node_path):
297+
zk.delete(node_path, recursive=True)
298+
logger.debug(f'Node "{node_path}" is deleted.')
299+
return
300+
else:
301+
logger.debug(f'Node "{node_path}" does not exist.')
302+
return
303+
304+
except ConnectionLoss as e:
305+
msg = (f'Attempt {attempt}/{retries}: cannot delete node "{node_path}" '
306+
f'due to ConnectionLoss')
307+
logger.warn(msg)
308+
if attempt == retries:
309+
self.builtin.fail(f'Failed to delete node "{node_path}" after {retries} attempts: {e}')
310+
time.sleep(delay)
278311

279312
def find_minimum(self, first, second):
280313
"""

integration-tests/robot/tests/zookeeper/acl_tests/acl_tests.robot

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,14 @@ ${ADMIN_PASSWORD} admin
66
${USERNAME} user123
77
${PASSWORD} pwd123
88
${ACL_VALUE} ACL
9-
${SUITE_RETRY_TIME} 2min
10-
${SUITE_RETRY_INTERVAL} 10s
119

1210
*** Settings ***
1311
Library String
1412
Library Collections
1513
Library RetryFailed
1614
Resource ../../shared/keywords.robot
17-
Suite Setup Wait Until Keyword Succeeds ${SUITE_RETRY_TIME} ${SUITE_RETRY_INTERVAL} Setup
18-
Suite Teardown Wait Until Keyword Succeeds ${SUITE_RETRY_TIME} ${SUITE_RETRY_INTERVAL} Cleanup
15+
Suite Setup Setup
16+
Suite Teardown Cleanup
1917
Test Teardown Run Keyword If Test Failed Sleep 5s
2018

2119
*** Keywords ***

integration-tests/robot/tests/zookeeper/sasl/sasl_tests.robot

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,14 @@ ${ZOOKEEPER_ADMIN_PASSWORD} %{ZOOKEEPER_ADMIN_PASSWORD}
66
${ZOOKEEPER_CLIENT_USERNAME} %{ZOOKEEPER_CLIENT_USERNAME}
77
${ZOOKEEPER_CLIENT_PASSWORD} %{ZOOKEEPER_CLIENT_PASSWORD}
88
${ACL_VALUE} ACL
9-
${SUITE_RETRY_TIME} 2min
10-
${SUITE_RETRY_INTERVAL} 10s
119

1210
*** Settings ***
1311
Library String
1412
Library Collections
1513
Library RetryFailed
1614
Resource ../../shared/keywords.robot
17-
Suite Setup Wait Until Keyword Succeeds ${SUITE_RETRY_TIME} ${SUITE_RETRY_INTERVAL} Setup
18-
Suite Teardown Wait Until Keyword Succeeds ${SUITE_RETRY_TIME} ${SUITE_RETRY_INTERVAL} Cleanup
15+
Suite Setup Setup
16+
Suite Teardown Cleanup
1917
Test Teardown Run Keyword If Test Failed Sleep 5s
2018

2119
*** Keywords ***

monitoring/exec-scripts/health_metric.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,15 +112,15 @@ def get_server_state(server_address):
112112
logger.info(f"Server {zookeeper_server} state: {state}")
113113
return state
114114
else:
115-
logger.warning(f"Unexpected output format from {zookeeper_server}: {stdout}")
115+
logger.warn(f"Unexpected output format from {zookeeper_server}: {stdout}")
116116
else:
117-
logger.warning(f"Failed to retrieve state information from {zookeeper_server} (return code: {process.returncode})")
117+
logger.warn(f"Failed to retrieve state information from {zookeeper_server} (return code: {process.returncode})")
118118
except subprocess.TimeoutExpired:
119-
logger.warning(f"Timeout while connecting to {zookeeper_server}")
119+
logger.warn(f"Timeout while connecting to {zookeeper_server}")
120120
except Exception as e:
121121
logger.exception(f"Error retrieving state information from {zookeeper_server}: {e}")
122122

123-
logger.warning(f"Returning 'NA' for server {zookeeper_server}")
123+
logger.warn(f"Returning 'NA' for server {zookeeper_server}")
124124
return "NA"
125125

126126
def get_leader_node(zookeeper_hosts):
@@ -143,9 +143,9 @@ def get_leader_node(zookeeper_hosts):
143143
logger.info(f"Unique leader found: {leaders[0]}")
144144
return leaders[0] # Return just the hostname without port
145145
elif len(leaders) > 1:
146-
logger.warning(f"Multiple leaders found: {leaders}. This is unexpected.")
146+
logger.warn(f"Multiple leaders found: {leaders}. This is unexpected.")
147147
else:
148-
logger.warning("No leader found among all hosts")
148+
logger.warn("No leader found among all hosts")
149149

150150
return "NA"
151151

0 commit comments

Comments
 (0)