Skip to content

Commit e7fa6bd

Browse files
authored
checking chunk error in ci tests (#937)
Signed-off-by: Paige Patton <prubenda@redhat.com>
1 parent c3f6b1a commit e7fa6bd

File tree

8 files changed

+57
-52
lines changed

8 files changed

+57
-52
lines changed

.github/workflows/tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ jobs:
102102
echo "test_pod_network_filter" >> ./CI/tests/functional_tests
103103
echo "test_pod_server" >> ./CI/tests/functional_tests
104104
echo "test_node" >> ./CI/tests/functional_tests
105-
echo "test_pvc" >> ./CI/tests/functional_tests
105+
# echo "test_pvc" >> ./CI/tests/functional_tests
106106
107107
# Push on main only steps + all other functional to collect coverage
108108
# for the badge
@@ -140,7 +140,7 @@ jobs:
140140
echo "test_pod_network_filter" >> ./CI/tests/functional_tests
141141
echo "test_pod_server" >> ./CI/tests/functional_tests
142142
echo "test_node" >> ./CI/tests/functional_tests
143-
echo "test_pvc" >> ./CI/tests/functional_tests
143+
# echo "test_pvc" >> ./CI/tests/functional_tests
144144
# Final common steps
145145
- name: Run Functional tests
146146
env:

CI/tests/test_container.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@ function functional_test_container_crash {
1616
export post_config=""
1717
envsubst < CI/config/common_test_config.yaml > CI/config/container_config.yaml
1818

19-
python3 -m coverage run -a run_kraken.py -c CI/config/container_config.yaml
19+
python3 -m coverage run -a run_kraken.py -c CI/config/container_config.yaml -d True
2020
echo "Container scenario test: Success"
21+
22+
kubectl get pods -n kube-system -l component=etcd
2123
}
2224

2325
functional_test_container_crash

CI/tests/test_customapp_pod.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ function functional_test_customapp_pod_node_selector {
1111
export post_config=""
1212
envsubst < CI/config/common_test_config.yaml > CI/config/customapp_pod_config.yaml
1313

14-
python3 -m coverage run -a run_kraken.py -c CI/config/customapp_pod_config.yaml
14+
python3 -m coverage run -a run_kraken.py -c CI/config/customapp_pod_config.yaml -d True
1515
echo "Pod disruption with node_label_selector test: Success"
1616
}
1717

CI/tests/test_pod.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@ function functional_test_pod_crash {
1010
export scenario_file="scenarios/kind/pod_etcd.yml"
1111
export post_config=""
1212
envsubst < CI/config/common_test_config.yaml > CI/config/pod_config.yaml
13-
cat CI/config/pod_config.yaml
13+
1414
python3 -m coverage run -a run_kraken.py -c CI/config/pod_config.yaml
1515
echo "Pod disruption scenario test: Success"
16+
date
17+
kubectl get pods -n kube-system -l component=etcd -o yaml
1618
}
1719

1820
functional_test_pod_crash

krkn/scenario_plugins/container/container_scenario_plugin.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
import random
33
import time
4+
import traceback
45
from asyncio import Future
56
import yaml
67
from krkn_lib.k8s import KrknKubernetes
@@ -41,6 +42,7 @@ def run(
4142
logging.info("ContainerScenarioPlugin failed with unrecovered containers")
4243
return 1
4344
except (RuntimeError, Exception) as e:
45+
logging.error("Stack trace:\n%s", traceback.format_exc())
4446
logging.error("ContainerScenarioPlugin exiting due to Exception %s" % e)
4547
return 1
4648
else:
@@ -50,7 +52,6 @@ def get_scenario_types(self) -> list[str]:
5052
return ["container_scenarios"]
5153

5254
def start_monitoring(self, kill_scenario: dict, lib_telemetry: KrknTelemetryOpenshift) -> Future:
53-
5455
namespace_pattern = f"^{kill_scenario['namespace']}$"
5556
label_selector = kill_scenario["label_selector"]
5657
recovery_time = kill_scenario["expected_recovery_time"]
@@ -232,4 +233,5 @@ def check_failed_containers(
232233
timer += 5
233234
logging.info("Waiting 5 seconds for containers to become ready")
234235
time.sleep(5)
236+
235237
return killed_container_list

krkn/scenario_plugins/pod_disruption/pod_disruption_scenario_plugin.py

Lines changed: 43 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import random
33
import time
44
from asyncio import Future
5-
5+
import traceback
66
import yaml
77
from krkn_lib.k8s import KrknKubernetes
88
from krkn_lib.k8s.pod_monitor import select_and_monitor_by_namespace_pattern_and_label, \
@@ -74,6 +74,7 @@ def run(
7474
return 1
7575

7676
except (RuntimeError, Exception) as e:
77+
logging.error("Stack trace:\n%s", traceback.format_exc())
7778
logging.error("PodDisruptionScenariosPlugin exiting due to Exception %s" % e)
7879
return 1
7980
else:
@@ -150,7 +151,7 @@ def _select_pods_with_field_selector(self, name_pattern, label_selector, namespa
150151
field_selector=combined_field_selector
151152
)
152153

153-
def get_pods(self, name_pattern, label_selector, namespace, kubecli: KrknKubernetes, field_selector: str = None, node_label_selector: str = None, node_names: list = None, quiet: bool = False):
154+
def get_pods(self, name_pattern, label_selector, namespace, kubecli: KrknKubernetes, field_selector: str = None, node_label_selector: str = None, node_names: list = None):
154155
if label_selector and name_pattern:
155156
logging.error('Only, one of name pattern or label pattern can be specified')
156157
return []
@@ -161,8 +162,7 @@ def get_pods(self, name_pattern, label_selector, namespace, kubecli: KrknKuberne
161162

162163
# If specific node names are provided, make multiple calls with field selector
163164
if node_names:
164-
if not quiet:
165-
logging.info(f"Targeting pods on {len(node_names)} specific nodes")
165+
logging.debug(f"Targeting pods on {len(node_names)} specific nodes")
166166
all_pods = []
167167
for node_name in node_names:
168168
pods = self._select_pods_with_field_selector(
@@ -172,20 +172,18 @@ def get_pods(self, name_pattern, label_selector, namespace, kubecli: KrknKuberne
172172
if pods:
173173
all_pods.extend(pods)
174174

175-
if not quiet:
176-
logging.info(f"Found {len(all_pods)} target pods across {len(node_names)} nodes")
175+
logging.debug(f"Found {len(all_pods)} target pods across {len(node_names)} nodes")
177176
return all_pods
178177

179178
# Node label selector approach - use field selectors
180179
if node_label_selector:
181180
# Get nodes matching the label selector first
182181
nodes_with_label = kubecli.list_nodes(label_selector=node_label_selector)
183182
if not nodes_with_label:
184-
logging.info(f"No nodes found with label selector: {node_label_selector}")
183+
logging.debug(f"No nodes found with label selector: {node_label_selector}")
185184
return []
186185

187-
if not quiet:
188-
logging.info(f"Targeting pods on {len(nodes_with_label)} nodes with label: {node_label_selector}")
186+
logging.debug(f"Targeting pods on {len(nodes_with_label)} nodes with label: {node_label_selector}")
189187
# Use field selector for each node
190188
all_pods = []
191189
for node_name in nodes_with_label:
@@ -196,8 +194,7 @@ def get_pods(self, name_pattern, label_selector, namespace, kubecli: KrknKuberne
196194
if pods:
197195
all_pods.extend(pods)
198196

199-
if not quiet:
200-
logging.info(f"Found {len(all_pods)} target pods across {len(nodes_with_label)} nodes")
197+
logging.debug(f"Found {len(all_pods)} target pods across {len(nodes_with_label)} nodes")
201198
return all_pods
202199

203200
# Standard pod selection (no node targeting)
@@ -207,37 +204,40 @@ def get_pods(self, name_pattern, label_selector, namespace, kubecli: KrknKuberne
207204

208205
def killing_pods(self, config: InputParams, kubecli: KrknKubernetes):
209206
# region Select target pods
210-
211-
namespace = config.namespace_pattern
212-
if not namespace:
213-
logging.error('Namespace pattern must be specified')
214-
return 2
207+
try:
208+
namespace = config.namespace_pattern
209+
if not namespace:
210+
logging.error('Namespace pattern must be specified')
215211

216-
pods = self.get_pods(config.name_pattern,config.label_selector,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
217-
exclude_pods = set()
218-
if config.exclude_label:
219-
_exclude_pods = self.get_pods("",config.exclude_label,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
220-
for pod in _exclude_pods:
221-
exclude_pods.add(pod[0])
212+
pods = self.get_pods(config.name_pattern,config.label_selector,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
213+
exclude_pods = set()
214+
if config.exclude_label:
215+
_exclude_pods = self.get_pods("",config.exclude_label,config.namespace_pattern, kubecli, field_selector="status.phase=Running", node_label_selector=config.node_label_selector, node_names=config.node_names)
216+
for pod in _exclude_pods:
217+
exclude_pods.add(pod[0])
222218

223-
pods_count = len(pods)
224-
if len(pods) < config.kill:
225-
logging.error("Not enough pods match the criteria, expected {} but found only {} pods".format(
226-
config.kill, len(pods)))
227-
return 2
228-
229-
random.shuffle(pods)
230-
for i in range(config.kill):
231-
pod = pods[i]
232-
logging.info(pod)
233-
if pod[0] in exclude_pods:
234-
logging.info(f"Excluding {pod[0]} from chaos")
235-
else:
236-
logging.info(f'Deleting pod {pod[0]}')
237-
kubecli.delete_pod(pod[0], pod[1])
238-
239-
ret = self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli, config.node_label_selector, config.node_names)
240-
return ret
219+
220+
pods_count = len(pods)
221+
if len(pods) < config.kill:
222+
logging.error("Not enough pods match the criteria, expected {} but found only {} pods".format(
223+
config.kill, len(pods)))
224+
return 1
225+
226+
random.shuffle(pods)
227+
for i in range(config.kill):
228+
pod = pods[i]
229+
logging.info(pod)
230+
if pod[0] in exclude_pods:
231+
logging.info(f"Excluding {pod[0]} from chaos")
232+
else:
233+
logging.info(f'Deleting pod {pod[0]}')
234+
kubecli.delete_pod(pod[0], pod[1])
235+
236+
return_val = self.wait_for_pods(config.label_selector,config.name_pattern,config.namespace_pattern, pods_count, config.duration, config.timeout, kubecli, config.node_label_selector, config.node_names)
237+
except Exception as e:
238+
raise(e)
239+
240+
return return_val
241241

242242
def wait_for_pods(
243243
self, label_selector, pod_name, namespace, pod_count, duration, wait_timeout, kubecli: KrknKubernetes, node_label_selector, node_names
@@ -246,10 +246,10 @@ def wait_for_pods(
246246
start_time = datetime.now()
247247

248248
while not timeout:
249-
pods = self.get_pods(name_pattern=pod_name, label_selector=label_selector,namespace=namespace, field_selector="status.phase=Running", kubecli=kubecli, node_label_selector=node_label_selector, node_names=node_names, quiet=True)
249+
pods = self.get_pods(name_pattern=pod_name, label_selector=label_selector,namespace=namespace, field_selector="status.phase=Running", kubecli=kubecli, node_label_selector=node_label_selector, node_names=node_names)
250250
if pod_count == len(pods):
251251
return 0
252-
252+
253253
time.sleep(duration)
254254

255255
now_time = datetime.now()
@@ -258,6 +258,5 @@ def wait_for_pods(
258258
if time_diff.seconds > wait_timeout:
259259
logging.error("timeout while waiting for pods to come up")
260260
return 1
261-
262-
# should never get to this return
261+
263262
return 0

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ google-cloud-compute==1.22.0
1616
ibm_cloud_sdk_core==3.18.0
1717
ibm_vpc==0.20.0
1818
jinja2==3.1.6
19-
krkn-lib==5.1.12
19+
krkn-lib==5.1.13
2020
lxml==5.1.0
2121
kubernetes==34.1.0
2222
numpy==1.26.4

scenarios/kind/pvc_scenario.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@ pvc_scenario:
22
pvc_name: kraken-test-pvc # Name of the target PVC
33
pod_name: kraken-test-pod # Name of the pod where the PVC is mounted, it will be ignored if the pvc_name is defined
44
namespace: kraken # Namespace where the PVC is
5-
fill_percentage: 38 # Target percentage to fill up the cluster, value must be higher than current percentage, valid values are between 0 and 99
5+
fill_percentage: 98 # Target percentage to fill up the cluster, value must be higher than current percentage, valid values are between 0 and 99
66
duration: 10 # Duration in seconds for the fault
77
block_size: 102400 # used only by dd if fallocate not present in the container

0 commit comments

Comments
 (0)