@@ -46,7 +46,7 @@ def run(
4646 try :
4747 with open (scenario , "r" ) as f :
4848 scenario_config = yaml .full_load (f )
49-
49+
5050 self .init_clients (lib_telemetry .get_lib_kubernetes ())
5151 pods_status = PodsStatus ()
5252 for config in scenario_config ["scenarios" ]:
@@ -71,75 +71,14 @@ def init_clients(self, k8s_client: KrknKubernetes):
7171 self .custom_object_client = k8s_client .custom_object_client
7272 logging .info ("Successfully initialized Kubernetes client for KubeVirt operations" )
7373
74- def get_vmi (self , name : str , namespace : str ) -> Optional [Dict ]:
75- """
76- Get a Virtual Machine Instance by name and namespace.
77-
78- :param name: Name of the VMI to retrieve
79- :param namespace: Namespace of the VMI
80- :return: The VMI object if found, None otherwise
81- """
82- try :
83- vmi = self .custom_object_client .get_namespaced_custom_object (
84- group = "kubevirt.io" ,
85- version = "v1" ,
86- namespace = namespace ,
87- plural = "virtualmachineinstances" ,
88- name = name
89- )
90- return vmi
91- except ApiException as e :
92- if e .status == 404 :
93- logging .warning (f"VMI { name } not found in namespace { namespace } " )
94- return None
95- else :
96- logging .error (f"Error getting VMI { name } : { e } " )
97- raise
98- except Exception as e :
99- logging .error (f"Unexpected error getting VMI { name } : { e } " )
100- raise
101-
102- def get_vmis (self , regex_name : str , namespace : str ) -> Optional [Dict ]:
103- """
104- Get a Virtual Machine Instance by name and namespace.
105-
106- :param name: Name of the VMI to retrieve
107- :param namespace: Namespace of the VMI
108- :return: The VMI object if found, None otherwise
109- """
110- try :
111- namespaces = self .k8s_client .list_namespaces_by_regex (namespace )
112- for namespace in namespaces :
113- vmis = self .custom_object_client .list_namespaced_custom_object (
114- group = "kubevirt.io" ,
115- version = "v1" ,
116- namespace = namespace ,
117- plural = "virtualmachineinstances" ,
118- )
119-
120- for vmi in vmis .get ("items" ):
121- vmi_name = vmi .get ("metadata" ,{}).get ("name" )
122- match = re .match (regex_name , vmi_name )
123- if match :
124- self .vmis_list .append (vmi )
125- except ApiException as e :
126- if e .status == 404 :
127- logging .warning (f"VMI { regex_name } not found in namespace { namespace } " )
128- return []
129- else :
130- logging .error (f"Error getting VMI { regex_name } : { e } " )
131- raise
132- except Exception as e :
133- logging .error (f"Unexpected error getting VMI { regex_name } : { e } " )
134- raise
13574
136- def execute_scenario (self , config : Dict [str , Any ], scenario_telemetry : ScenarioTelemetry ) -> int :
75+ def execute_scenario (self , config : Dict [str , Any ], scenario_telemetry : ScenarioTelemetry ) -> PodsStatus :
13776 """
13877 Execute a KubeVirt VM outage scenario based on the provided configuration.
139-
78+
14079 :param config: The scenario configuration
14180 :param scenario_telemetry: The telemetry object for recording metrics
142- :return: 0 for success, 1 for failure
81+ :return: PodsStatus object containing recovered and unrecovered pods
14382 """
14483 self .pods_status = PodsStatus ()
14584 try :
@@ -149,12 +88,12 @@ def execute_scenario(self, config: Dict[str, Any], scenario_telemetry: ScenarioT
14988 timeout = params .get ("timeout" , 60 )
15089 kill_count = params .get ("kill_count" , 1 )
15190 disable_auto_restart = params .get ("disable_auto_restart" , False )
152-
91+
15392 if not vm_name :
15493 logging .error ("vm_name parameter is required" )
155- return 1
94+ return self . pods_status
15695 self .pods_status = PodsStatus ()
157- self .get_vmis (vm_name ,namespace )
96+ self .vmis_list = self . k8s_client . get_vmis (vm_name ,namespace )
15897 for _ in range (kill_count ):
15998
16099 rand_int = random .randint (0 , len (self .vmis_list ) - 1 )
@@ -163,17 +102,22 @@ def execute_scenario(self, config: Dict[str, Any], scenario_telemetry: ScenarioT
163102 logging .info (f"Starting KubeVirt VM outage scenario for VM: { vm_name } in namespace: { namespace } " )
164103 vmi_name = vmi .get ("metadata" ).get ("name" )
165104 vmi_namespace = vmi .get ("metadata" ).get ("namespace" )
166- if not self .validate_environment (vmi_name , vmi_namespace ):
167- return 1
168-
169- vmi = self .get_vmi (vmi_name , vmi_namespace )
105+
106+ # Create affected_pod early so we can track failures
170107 self .affected_pod = AffectedPod (
171108 pod_name = vmi_name ,
172109 namespace = vmi_namespace ,
173110 )
111+
112+ if not self .validate_environment (vmi_name , vmi_namespace ):
113+ self .pods_status .unrecovered .append (self .affected_pod )
114+ continue
115+
116+ vmi = self .k8s_client .get_vmi (vmi_name , vmi_namespace )
174117 if not vmi :
175118 logging .error (f"VMI { vm_name } not found in namespace { namespace } " )
176- return 1
119+ self .pods_status .unrecovered .append (self .affected_pod )
120+ continue
177121
178122 self .original_vmi = vmi
179123 logging .info (f"Captured initial state of VMI: { vm_name } " )
@@ -212,15 +156,13 @@ def validate_environment(self, vm_name: str, namespace: str) -> bool:
212156 """
213157 try :
214158 # Check if KubeVirt CRDs exist
215- crd_list = self .custom_object_client .list_namespaced_custom_object ("kubevirt.io" ,"v1" ,namespace ,"virtualmachines" )
216- kubevirt_crds = [crd for crd in crd_list .items () ]
217-
159+ kubevirt_crds = self .k8s_client .get_vms (vm_name , namespace )
218160 if not kubevirt_crds :
219161 logging .error ("KubeVirt CRDs not found. Ensure KubeVirt/CNV is installed in the cluster" )
220162 return False
221163
222164 # Check if VMI exists
223- vmi = self .get_vmi (vm_name , namespace )
165+ vmi = self .k8s_client . get_vmi (vm_name , namespace )
224166 if not vmi :
225167 logging .error (f"VMI { vm_name } not found in namespace { namespace } " )
226168 return False
@@ -243,28 +185,15 @@ def patch_vm_spec(self, vm_name: str, namespace: str, running: bool) -> bool:
243185 """
244186 try :
245187 # Get the VM object first to get its current spec
246- vm = self .custom_object_client .get_namespaced_custom_object (
247- group = "kubevirt.io" ,
248- version = "v1" ,
249- namespace = namespace ,
250- plural = "virtualmachines" ,
251- name = vm_name
252- )
188+ vm = self .k8s_client .get_vm (vm_name , namespace )
253189
254190 # Update the running state
255191 if 'spec' not in vm :
256192 vm ['spec' ] = {}
257193 vm ['spec' ]['running' ] = running
258194
259195 # Apply the patch
260- self .custom_object_client .patch_namespaced_custom_object (
261- group = "kubevirt.io" ,
262- version = "v1" ,
263- namespace = namespace ,
264- plural = "virtualmachines" ,
265- name = vm_name ,
266- body = vm
267- )
196+ self .k8s_client .patch_vm (vm_name ,namespace ,vm )
268197 return True
269198
270199 except ApiException as e :
@@ -293,26 +222,12 @@ def delete_vmi(self, vm_name: str, namespace: str, disable_auto_restart: bool =
293222 " - proceeding with deletion but VM may auto-restart" )
294223 start_creation_time = self .original_vmi .get ('metadata' , {}).get ('creationTimestamp' )
295224 start_time = time .time ()
296- try :
297- self .custom_object_client .delete_namespaced_custom_object (
298- group = "kubevirt.io" ,
299- version = "v1" ,
300- namespace = namespace ,
301- plural = "virtualmachineinstances" ,
302- name = vm_name
303- )
304- except ApiException as e :
305- if e .status == 404 :
306- logging .warning (f"VMI { vm_name } not found during deletion" )
307- return 1
308- else :
309- logging .error (f"API error during VMI deletion: { e } " )
310- return 1
225+ self .k8s_client .delete_vmi (vm_name , namespace )
311226
312227 # Wait for the VMI to be deleted
313228
314229 while time .time () - start_time < timeout :
315- deleted_vmi = self .get_vmi (vm_name , namespace )
230+ deleted_vmi = self .k8s_client . get_vmi (vm_name , namespace )
316231 if deleted_vmi :
317232 if start_creation_time != deleted_vmi .get ('metadata' , {}).get ('creationTimestamp' ):
318233 logging .info (f"VMI { vm_name } successfully recreated" )
@@ -337,7 +252,7 @@ def wait_for_running(self, vm_name: str, namespace: str, timeout: int = 120) ->
337252 while time .time () - start_time < timeout :
338253
339254 # Check current state once since we've already waited for the duration
340- vmi = self .get_vmi (vm_name , namespace )
255+ vmi = self .k8s_client . get_vmi (vm_name , namespace )
341256
342257 if vmi :
343258 if vmi .get ('status' , {}).get ('phase' ) == "Running" :
@@ -378,13 +293,7 @@ def recover(self, vm_name: str, namespace: str, disable_auto_restart: bool = Fal
378293 del metadata [field ]
379294
380295 # Create the VMI
381- self .custom_object_client .create_namespaced_custom_object (
382- group = "kubevirt.io" ,
383- version = "v1" ,
384- namespace = namespace ,
385- plural = "virtualmachineinstances" ,
386- body = vmi_dict
387- )
296+ self .k8s_client .create_vmi (vm_name , namespace , vmi_dict )
388297 logging .info (f"Successfully recreated VMI { vm_name } " )
389298
390299 # Wait for VMI to start running
0 commit comments