Skip to content

Commit f655ec1

Browse files
authored
fix: accumulate failed scenarios across all scenario types instead of overwriting (#1178)
Signed-off-by: Arpit Raj <vrxn.arp1traj@gmail.com>
1 parent dfc350a commit f655ec1

File tree

2 files changed

+155
-1
lines changed

2 files changed

+155
-1
lines changed

run_kraken.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,11 +369,12 @@ def main(options, command: Optional[str]) -> int:
369369
)
370370
sys.exit(-1)
371371

372-
failed_post_scenarios, scenario_telemetries = (
372+
failed_scenarios_current, scenario_telemetries = (
373373
scenario_plugin.run_scenarios(
374374
run_uuid, scenarios_list, config, telemetry_ocp
375375
)
376376
)
377+
failed_post_scenarios.extend(failed_scenarios_current)
377378
chaos_telemetry.scenarios.extend(scenario_telemetries)
378379

379380
post_critical_alerts = 0
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
Test to verify that failed scenarios are accumulated across all scenario types
5+
and not silently overwritten when iterating through multiple scenario types.
6+
7+
Regression test for: https://github.com/krkn-chaos/krkn/issues/1777
8+
9+
Usage:
10+
python -m coverage run -a -m unittest tests/test_failed_scenarios_accumulation.py -v
11+
"""
12+
13+
import unittest
14+
from unittest.mock import MagicMock
15+
from krkn_lib.models.telemetry import ScenarioTelemetry
16+
17+
18+
class TestFailedScenariosAccumulation(unittest.TestCase):
19+
20+
def _simulate_scenario_loop(self, chaos_scenarios):
21+
"""
22+
Simulates the core scenario loop from run_kraken.py (lines 345-380)
23+
to test that failed_post_scenarios accumulates correctly across
24+
multiple scenario types.
25+
"""
26+
failed_post_scenarios = []
27+
28+
for scenario in chaos_scenarios:
29+
scenario_type = list(scenario.keys())[0]
30+
scenarios_list = scenario[scenario_type]
31+
if scenarios_list:
32+
# Simulate what create_plugin + run_scenarios returns
33+
mock_plugin = MagicMock()
34+
mock_plugin.run_scenarios.return_value = (
35+
scenario["_mock_failures"],
36+
scenario["_mock_telemetries"],
37+
)
38+
39+
failed_scenarios_current, scenario_telemetries = (
40+
mock_plugin.run_scenarios(
41+
"test-uuid", scenarios_list, {}, None
42+
)
43+
)
44+
# This is the fix — .extend() instead of = (overwrite)
45+
failed_post_scenarios.extend(failed_scenarios_current)
46+
47+
return failed_post_scenarios
48+
49+
def test_failures_from_earlier_scenarios_are_preserved(self):
50+
"""
51+
When an earlier scenario type fails but a later one succeeds,
52+
the earlier failures must still be present in failed_post_scenarios.
53+
"""
54+
chaos_scenarios = [
55+
{
56+
"pod_disruption_scenarios": ["scenarios/etcd.yml"],
57+
"_mock_failures": ["scenarios/etcd.yml"],
58+
"_mock_telemetries": [ScenarioTelemetry()],
59+
},
60+
{
61+
"hog_scenarios": ["scenarios/cpu-hog.yml"],
62+
"_mock_failures": [],
63+
"_mock_telemetries": [ScenarioTelemetry()],
64+
},
65+
]
66+
67+
failed = self._simulate_scenario_loop(chaos_scenarios)
68+
69+
self.assertEqual(len(failed), 1)
70+
self.assertIn("scenarios/etcd.yml", failed)
71+
72+
def test_failures_from_multiple_scenarios_are_accumulated(self):
73+
"""
74+
When multiple scenario types fail, all failures must be collected.
75+
"""
76+
chaos_scenarios = [
77+
{
78+
"pod_disruption_scenarios": ["scenarios/etcd.yml"],
79+
"_mock_failures": ["scenarios/etcd.yml"],
80+
"_mock_telemetries": [ScenarioTelemetry()],
81+
},
82+
{
83+
"hog_scenarios": ["scenarios/cpu-hog.yml"],
84+
"_mock_failures": ["scenarios/cpu-hog.yml"],
85+
"_mock_telemetries": [ScenarioTelemetry()],
86+
},
87+
{
88+
"node_scenarios": ["scenarios/node.yml"],
89+
"_mock_failures": [],
90+
"_mock_telemetries": [ScenarioTelemetry()],
91+
},
92+
]
93+
94+
failed = self._simulate_scenario_loop(chaos_scenarios)
95+
96+
self.assertEqual(len(failed), 2)
97+
self.assertIn("scenarios/etcd.yml", failed)
98+
self.assertIn("scenarios/cpu-hog.yml", failed)
99+
100+
def test_no_failures_returns_empty_list(self):
101+
"""
102+
When all scenarios pass, failed_post_scenarios should be empty.
103+
"""
104+
chaos_scenarios = [
105+
{
106+
"pod_disruption_scenarios": ["scenarios/etcd.yml"],
107+
"_mock_failures": [],
108+
"_mock_telemetries": [ScenarioTelemetry()],
109+
},
110+
{
111+
"hog_scenarios": ["scenarios/cpu-hog.yml"],
112+
"_mock_failures": [],
113+
"_mock_telemetries": [ScenarioTelemetry()],
114+
},
115+
]
116+
117+
failed = self._simulate_scenario_loop(chaos_scenarios)
118+
119+
self.assertEqual(len(failed), 0)
120+
121+
def test_last_scenario_failure_is_not_only_one_kept(self):
122+
"""
123+
Regression: before the fix, only the last scenario type's failures
124+
survived. This test ensures that's no longer the case.
125+
"""
126+
chaos_scenarios = [
127+
{
128+
"pod_disruption_scenarios": ["scenarios/etcd.yml"],
129+
"_mock_failures": ["scenarios/etcd.yml"],
130+
"_mock_telemetries": [ScenarioTelemetry()],
131+
},
132+
{
133+
"hog_scenarios": ["scenarios/cpu-hog.yml"],
134+
"_mock_failures": [],
135+
"_mock_telemetries": [ScenarioTelemetry()],
136+
},
137+
{
138+
"node_scenarios": ["scenarios/node.yml"],
139+
"_mock_failures": ["scenarios/node.yml"],
140+
"_mock_telemetries": [ScenarioTelemetry()],
141+
},
142+
]
143+
144+
failed = self._simulate_scenario_loop(chaos_scenarios)
145+
146+
# Before the fix, only ["scenarios/node.yml"] would survive
147+
self.assertEqual(len(failed), 2)
148+
self.assertIn("scenarios/etcd.yml", failed)
149+
self.assertIn("scenarios/node.yml", failed)
150+
151+
152+
if __name__ == "__main__":
153+
unittest.main()

0 commit comments

Comments
 (0)