@@ -1161,9 +1161,7 @@ def _create_pod(
11611161 # Delete the existing Pod first, then create a new one.
11621162 with watch (
11631163 core_api .list_namespaced_pod ,
1164- resource_version = (
1165- None if envs .GPUSTACK_RUNTIME_KUBERNETES_QUORUM_READ else "0"
1166- ),
1164+ resource_version = _get_quorum_read_resource_version (),
11671165 namespace = workload .namespace ,
11681166 ) as es :
11691167 core_api .delete_namespaced_pod (
@@ -1537,9 +1535,7 @@ def _get(
15371535
15381536 list_options = {
15391537 "label_selector" : f"{ _LABEL_WORKLOAD } ={ name } " ,
1540- "resource_version" : (
1541- None if envs .GPUSTACK_RUNTIME_KUBERNETES_QUORUM_READ else "0"
1542- ),
1538+ "resource_version" : _get_quorum_read_resource_version (),
15431539 }
15441540
15451541 core_api = kubernetes .client .CoreV1Api (self ._client )
@@ -1604,40 +1600,94 @@ def _delete(
16041600 if not workload :
16051601 return None
16061602
1603+ resource_version = _get_quorum_read_resource_version ()
1604+ label_selector = f"{ _LABEL_WORKLOAD } ={ name } "
1605+ propagation_policy = envs .GPUSTACK_RUNTIME_KUBERNETES_DELETE_PROPAGATION_POLICY
1606+
16071607 core_api = kubernetes .client .CoreV1Api (self ._client )
16081608
16091609 # Remove all Pods with the workload label.
16101610 try :
16111611 core_api .delete_collection_namespaced_pod (
16121612 namespace = namespace ,
1613- label_selector = f" { _LABEL_WORKLOAD } = { name } " ,
1614- propagation_policy = envs . GPUSTACK_RUNTIME_KUBERNETES_DELETE_PROPAGATION_POLICY ,
1613+ label_selector = label_selector ,
1614+ propagation_policy = propagation_policy ,
16151615 )
16161616 except kubernetes .client .exceptions .ApiException as e :
1617- msg = f"Failed to delete pod of workload { name } { _detail_api_call_error (e )} "
1618- raise OperationError (msg ) from e
1617+ if e .status != 405 :
1618+ msg = f"Failed to delete pod of workload { name } { _detail_api_call_error (e )} "
1619+ raise OperationError (msg ) from e
1620+ try :
1621+ pods = core_api .list_namespaced_pod (
1622+ namespace = namespace ,
1623+ label_selector = label_selector ,
1624+ resource_version = resource_version ,
1625+ )
1626+ for pod in pods .items or []:
1627+ core_api .delete_namespaced_pod (
1628+ name = pod .metadata .name ,
1629+ namespace = namespace ,
1630+ propagation_policy = propagation_policy ,
1631+ )
1632+ except kubernetes .client .exceptions .ApiException as e2 :
1633+ msg = f"Failed to delete pod of workload { name } { _detail_api_call_error (e2 )} "
1634+ raise OperationError (msg ) from e2
16191635
16201636 # Remove all Services with the workload label.
16211637 try :
16221638 core_api .delete_collection_namespaced_service (
16231639 namespace = namespace ,
1624- label_selector = f" { _LABEL_WORKLOAD } = { name } " ,
1625- propagation_policy = envs . GPUSTACK_RUNTIME_KUBERNETES_DELETE_PROPAGATION_POLICY ,
1640+ label_selector = label_selector ,
1641+ propagation_policy = propagation_policy ,
16261642 )
16271643 except kubernetes .client .exceptions .ApiException as e :
1628- msg = f"Failed to delete service of workload { name } { _detail_api_call_error (e )} "
1629- raise OperationError (msg ) from e
1644+ # If method not allowed(405),
1645+ # list services with the label and delete them one by one.
1646+ if e .status != 405 :
1647+ msg = f"Failed to delete service of workload { name } { _detail_api_call_error (e )} "
1648+ raise OperationError (msg ) from e
1649+ try :
1650+ services = core_api .list_namespaced_service (
1651+ namespace = namespace ,
1652+ label_selector = label_selector ,
1653+ resource_version = resource_version ,
1654+ )
1655+ for svc in services .items or []:
1656+ core_api .delete_namespaced_service (
1657+ name = svc .metadata .name ,
1658+ namespace = namespace ,
1659+ propagation_policy = propagation_policy ,
1660+ )
1661+ except kubernetes .client .exceptions .ApiException as e2 :
1662+ msg = f"Failed to delete service of workload { name } { _detail_api_call_error (e2 )} "
1663+ raise OperationError (msg ) from e2
16301664
16311665 # Remove all ConfigMaps with the workload label.
16321666 try :
16331667 core_api .delete_collection_namespaced_config_map (
16341668 namespace = namespace ,
1635- label_selector = f" { _LABEL_WORKLOAD } = { name } " ,
1636- propagation_policy = envs . GPUSTACK_RUNTIME_KUBERNETES_DELETE_PROPAGATION_POLICY ,
1669+ label_selector = label_selector ,
1670+ propagation_policy = propagation_policy ,
16371671 )
16381672 except kubernetes .client .exceptions .ApiException as e :
1639- msg = f"Failed to delete configmap of workload { name } { _detail_api_call_error (e )} "
1640- raise OperationError (msg ) from e
1673+ if e .status != 405 :
1674+ msg = f"Failed to delete configmap of workload { name } { _detail_api_call_error (e )} "
1675+ raise OperationError (msg ) from e
1676+ try :
1677+ configmaps = core_api .list_namespaced_config_map (
1678+ namespace = namespace ,
1679+ label_selector = label_selector ,
1680+ resource_version = resource_version ,
1681+ )
1682+ for cm in configmaps .items or []:
1683+ core_api .delete_namespaced_config_map (
1684+ name = cm .metadata .name ,
1685+ namespace = namespace ,
1686+ propagation_policy = propagation_policy ,
1687+ )
1688+ except kubernetes .client .exceptions .ApiException as e2 :
1689+ msg = f"Failed to delete configmap of workload { name } { _detail_api_call_error (e2 )} "
1690+ raise OperationError (msg ) from e2
16411691
16421692 return workload
16431693
@@ -1679,9 +1729,7 @@ def _list(
16791729 _LABEL_WORKLOAD ,
16801730 ],
16811731 ),
1682- "resource_version" : (
1683- None if envs .GPUSTACK_RUNTIME_KUBERNETES_QUORUM_READ else "0"
1684- ),
1732+ "resource_version" : _get_quorum_read_resource_version (),
16851733 }
16861734
16871735 core_api = kubernetes .client .CoreV1Api (self ._client )
@@ -2136,3 +2184,11 @@ def _detail_api_call_error(err: kubernetes.client.exceptions.ApiException) -> st
21362184 msg += f": status code { err .status } "
21372185
21382186 return msg
2187+
2188+
2189+ def _get_quorum_read_resource_version () -> str | None :
2190+ """
2191+ Get the resource version for quorum read based on environment settings.
2192+
2193+ """
2194+ return None if envs .GPUSTACK_RUNTIME_KUBERNETES_QUORUM_READ else "0"
0 commit comments