Skip to content

Commit 30f3e75

Browse files
committed
refactor: k8s processing
Signed-off-by: thxCode <[email protected]>
1 parent a193e16 commit 30f3e75

File tree

2 files changed

+78
-22
lines changed

2 files changed

+78
-22
lines changed

gpustack_runtime/deployer/kuberentes.py

Lines changed: 77 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,9 +1161,7 @@ def _create_pod(
11611161
# Delete the existing Pod first, then create a new one.
11621162
with watch(
11631163
core_api.list_namespaced_pod,
1164-
resource_version=(
1165-
None if envs.GPUSTACK_RUNTIME_KUBERNETES_QUORUM_READ else "0"
1166-
),
1164+
resource_version=_get_quorum_read_resource_version(),
11671165
namespace=workload.namespace,
11681166
) as es:
11691167
core_api.delete_namespaced_pod(
@@ -1537,9 +1535,7 @@ def _get(
15371535

15381536
list_options = {
15391537
"label_selector": f"{_LABEL_WORKLOAD}={name}",
1540-
"resource_version": (
1541-
None if envs.GPUSTACK_RUNTIME_KUBERNETES_QUORUM_READ else "0"
1542-
),
1538+
"resource_version": _get_quorum_read_resource_version(),
15431539
}
15441540

15451541
core_api = kubernetes.client.CoreV1Api(self._client)
@@ -1604,40 +1600,94 @@ def _delete(
16041600
if not workload:
16051601
return None
16061602

1603+
resource_version = _get_quorum_read_resource_version()
1604+
label_selector = f"{_LABEL_WORKLOAD}={name}"
1605+
propagation_policy = envs.GPUSTACK_RUNTIME_KUBERNETES_DELETE_PROPAGATION_POLICY
1606+
16071607
core_api = kubernetes.client.CoreV1Api(self._client)
16081608

16091609
# Remove all Pods with the workload label.
16101610
try:
16111611
core_api.delete_collection_namespaced_pod(
16121612
namespace=namespace,
1613-
label_selector=f"{_LABEL_WORKLOAD}={name}",
1614-
propagation_policy=envs.GPUSTACK_RUNTIME_KUBERNETES_DELETE_PROPAGATION_POLICY,
1613+
label_selector=label_selector,
1614+
propagation_policy=propagation_policy,
16151615
)
16161616
except kubernetes.client.exceptions.ApiException as e:
1617-
msg = f"Failed to delete pod of workload {name}{_detail_api_call_error(e)}"
1618-
raise OperationError(msg) from e
1617+
if e.status != 405:
1618+
msg = f"Failed to delete pod of workload {name}{_detail_api_call_error(e)}"
1619+
raise OperationError(msg) from e
1620+
try:
1621+
pods = core_api.list_namespaced_pod(
1622+
namespace=namespace,
1623+
label_selector=label_selector,
1624+
resource_version=resource_version,
1625+
)
1626+
for pod in pods.items or []:
1627+
core_api.delete_namespaced_pod(
1628+
name=pod.metadata.name,
1629+
namespace=namespace,
1630+
propagation_policy=propagation_policy,
1631+
)
1632+
except kubernetes.client.exceptions.ApiException as e2:
1633+
msg = f"Failed to delete pod of workload {name}{_detail_api_call_error(e2)}"
1634+
raise OperationError(msg) from e2
16191635

16201636
# Remove all Services with the workload label.
16211637
try:
16221638
core_api.delete_collection_namespaced_service(
16231639
namespace=namespace,
1624-
label_selector=f"{_LABEL_WORKLOAD}={name}",
1625-
propagation_policy=envs.GPUSTACK_RUNTIME_KUBERNETES_DELETE_PROPAGATION_POLICY,
1640+
label_selector=label_selector,
1641+
propagation_policy=propagation_policy,
16261642
)
16271643
except kubernetes.client.exceptions.ApiException as e:
1628-
msg = f"Failed to delete service of workload {name}{_detail_api_call_error(e)}"
1629-
raise OperationError(msg) from e
1644+
# If method not allowed(405),
1645+
# list services with the label and delete them one by one.
1646+
if e.status != 405:
1647+
msg = f"Failed to delete service of workload {name}{_detail_api_call_error(e)}"
1648+
raise OperationError(msg) from e
1649+
try:
1650+
services = core_api.list_namespaced_service(
1651+
namespace=namespace,
1652+
label_selector=label_selector,
1653+
resource_version=resource_version,
1654+
)
1655+
for svc in services.items or []:
1656+
core_api.delete_namespaced_service(
1657+
name=svc.metadata.name,
1658+
namespace=namespace,
1659+
propagation_policy=propagation_policy,
1660+
)
1661+
except kubernetes.client.exceptions.ApiException as e2:
1662+
msg = f"Failed to delete service of workload {name}{_detail_api_call_error(e2)}"
1663+
raise OperationError(msg) from e2
16301664

16311665
# Remove all ConfigMaps with the workload label.
16321666
try:
16331667
core_api.delete_collection_namespaced_config_map(
16341668
namespace=namespace,
1635-
label_selector=f"{_LABEL_WORKLOAD}={name}",
1636-
propagation_policy=envs.GPUSTACK_RUNTIME_KUBERNETES_DELETE_PROPAGATION_POLICY,
1669+
label_selector=label_selector,
1670+
propagation_policy=propagation_policy,
16371671
)
16381672
except kubernetes.client.exceptions.ApiException as e:
1639-
msg = f"Failed to delete configmap of workload {name}{_detail_api_call_error(e)}"
1640-
raise OperationError(msg) from e
1673+
if e.status != 405:
1674+
msg = f"Failed to delete configmap of workload {name}{_detail_api_call_error(e)}"
1675+
raise OperationError(msg) from e
1676+
try:
1677+
configmaps = core_api.list_namespaced_config_map(
1678+
namespace=namespace,
1679+
label_selector=label_selector,
1680+
resource_version=resource_version,
1681+
)
1682+
for cm in configmaps.items or []:
1683+
core_api.delete_namespaced_config_map(
1684+
name=cm.metadata.name,
1685+
namespace=namespace,
1686+
propagation_policy=propagation_policy,
1687+
)
1688+
except kubernetes.client.exceptions.ApiException as e2:
1689+
msg = f"Failed to delete configmap of workload {name}{_detail_api_call_error(e2)}"
1690+
raise OperationError(msg) from e2
16411691

16421692
return workload
16431693

@@ -1679,9 +1729,7 @@ def _list(
16791729
_LABEL_WORKLOAD,
16801730
],
16811731
),
1682-
"resource_version": (
1683-
None if envs.GPUSTACK_RUNTIME_KUBERNETES_QUORUM_READ else "0"
1684-
),
1732+
"resource_version": _get_quorum_read_resource_version(),
16851733
}
16861734

16871735
core_api = kubernetes.client.CoreV1Api(self._client)
@@ -2136,3 +2184,11 @@ def _detail_api_call_error(err: kubernetes.client.exceptions.ApiException) -> st
21362184
msg += f": status code {err.status}"
21372185

21382186
return msg
2187+
2188+
2189+
def _get_quorum_read_resource_version() -> str | None:
2190+
"""
2191+
Get the resource version for quorum read based on environment settings.
2192+
2193+
"""
2194+
return None if envs.GPUSTACK_RUNTIME_KUBERNETES_QUORUM_READ else "0"

gpustack_runtime/envs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@
469469
"Foreground",
470470
),
471471
options=["Foreground", "Background", "Orphan"],
472-
default="Background",
472+
default="Foreground",
473473
),
474474
}
475475

0 commit comments

Comments
 (0)