Skip to content

Commit b96d29c

Browse files
Fix automation scenarion maint notification (#3788)
* fix up migrate issues * fix case where all nodes are full with shards * fix review issues, add linter fixes --------- Co-authored-by: petyaslavova <[email protected]>
1 parent 028245c commit b96d29c

File tree

3 files changed

+44
-38
lines changed

3 files changed

+44
-38
lines changed

pyproject.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,12 @@ filterwarnings = [
8080
# Ignore a coverage warning when COVERAGE_CORE=sysmon for Pythons < 3.12.
8181
"ignore:sys.monitoring isn't available:coverage.exceptions.CoverageWarning",
8282
]
83+
log_cli_level = "INFO"
84+
log_cli_date_format = "%H:%M:%S:%f"
85+
log_cli = false
86+
log_cli_format = "%(asctime)s %(levelname)s %(threadName)s: %(message)s"
87+
log_level = "INFO"
88+
capture = "yes"
8389

8490
[tool.ruff]
8591
target-version = "py39"

tests/test_scenario/maint_notifications_helpers.py

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,9 @@ def find_target_node_and_empty_node(
116116

117117
# Get all node IDs from CLUSTER NODES section
118118
all_nodes = set()
119-
nodes_with_shards = set()
120-
master_nodes = set()
119+
nodes_with_any_shards = set() # Nodes with shards from ANY database
120+
nodes_with_target_db_shards = set() # Nodes with shards from target database
121+
master_nodes = set() # Master nodes for target database only
121122

122123
for line in lines:
123124
line = line.strip()
@@ -146,31 +147,45 @@ def find_target_node_and_empty_node(
146147
# Parse shard line: db:1 m-standard redis:1 node:2 master 0-8191 1.4MB OK
147148
parts = line.split()
148149
if len(parts) >= 5:
150+
db_id = parts[0] # db:1, db:2, etc.
149151
node_id = parts[3] # node:2
150152
shard_role = parts[4] # master/slave - this is what matters
151153

152-
nodes_with_shards.add(node_id)
153-
if shard_role == "master":
154-
master_nodes.add(node_id)
154+
# Track ALL nodes with shards (for finding truly empty nodes)
155+
nodes_with_any_shards.add(node_id)
156+
157+
# Only track master nodes for the specific database we're testing
158+
bdb_id = endpoint_config.get("bdb_id")
159+
if db_id == f"db:{bdb_id}":
160+
nodes_with_target_db_shards.add(node_id)
161+
if shard_role == "master":
162+
master_nodes.add(node_id)
155163
elif line.startswith("ENDPOINTS:") or not line:
156164
shards_section_started = False
157165

158-
# Find empty node (node with no shards)
159-
empty_nodes = all_nodes - nodes_with_shards
166+
# Find empty node (node with no shards from ANY database)
167+
nodes_with_no_shards_target_bdb = all_nodes - nodes_with_target_db_shards
160168

161169
logging.debug(f"All nodes: {all_nodes}")
162-
logging.debug(f"Nodes with shards: {nodes_with_shards}")
163-
logging.debug(f"Master nodes: {master_nodes}")
164-
logging.debug(f"Empty nodes: {empty_nodes}")
170+
logging.debug(f"Nodes with shards from any database: {nodes_with_any_shards}")
171+
logging.debug(
172+
f"Nodes with target database shards: {nodes_with_target_db_shards}"
173+
)
174+
logging.debug(f"Master nodes (target database only): {master_nodes}")
175+
logging.debug(
176+
f"Nodes with no shards from target database: {nodes_with_no_shards_target_bdb}"
177+
)
165178

166-
if not empty_nodes:
167-
raise ValueError("No empty nodes (nodes without shards) found")
179+
if not nodes_with_no_shards_target_bdb:
180+
raise ValueError("All nodes have shards from target database")
168181

169182
if not master_nodes:
170-
raise ValueError("No nodes with master shards found")
183+
raise ValueError("No nodes with master shards from target database found")
171184

172185
# Return the first available empty node and master node (numeric part only)
173-
empty_node = next(iter(empty_nodes)).split(":")[1] # node:1 -> 1
186+
empty_node = next(iter(nodes_with_no_shards_target_bdb)).split(":")[
187+
1
188+
] # node:1 -> 1
174189
target_node = next(iter(master_nodes)).split(":")[1] # node:2 -> 2
175190

176191
return target_node, empty_node

tests/test_scenario/test_maint_notifications.py

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
datefmt="%Y-%m-%d %H:%M:%S:%f",
3737
)
3838

39-
BIND_TIMEOUT = 30
39+
BIND_TIMEOUT = 60
4040
MIGRATE_TIMEOUT = 60
4141
FAILOVER_TIMEOUT = 15
4242

@@ -109,29 +109,6 @@ def setup_and_cleanup(
109109
except Exception as e:
110110
logging.error(f"Failed to revert failover: {e}")
111111

112-
if self._migration_executed:
113-
try:
114-
if self.target_node and self.empty_node:
115-
self._execute_migration(
116-
fault_injector_client=fault_injector_client,
117-
endpoints_config=endpoints_config,
118-
target_node=self.empty_node,
119-
empty_node=self.target_node,
120-
)
121-
logging.info("Migration cleanup completed")
122-
except Exception as e:
123-
logging.error(f"Failed to revert migration: {e}")
124-
125-
if self._bind_executed:
126-
try:
127-
if self.endpoint_id:
128-
self._execute_bind(
129-
fault_injector_client, endpoints_config, self.endpoint_id
130-
)
131-
logging.info("Bind cleanup completed")
132-
except Exception as e:
133-
logging.error(f"Failed to revert bind endpoint: {e}")
134-
135112
logging.info("Cleanup finished")
136113

137114
def _execute_failover(
@@ -916,7 +893,15 @@ def test_new_connections_receive_migrating(
916893
)
917894

918895
migrate_thread.join()
896+
logging.info("Executing rladmin bind endpoint command for cleanup...")
919897

898+
bind_thread = Thread(
899+
target=self._execute_bind,
900+
name="bind_thread",
901+
args=(fault_injector_client, endpoints_config, self.endpoint_id),
902+
)
903+
bind_thread.start()
904+
bind_thread.join()
920905
client_maint_notifications.connection_pool.release(first_conn)
921906
client_maint_notifications.connection_pool.release(second_connection)
922907

0 commit comments

Comments
 (0)