Skip to content

Commit 985764b

Browse files
authored
Kubernetes: check namespace in backend config and kubeconfig (#3858)
The first step to deprecate and remove the backend config property. It's still the only source of truth. The description in the docs is updated to explain the current behavior and planned deprecation. If it does not match the namespace from the current kubeconfig context, show a warning instructing the user to update kubeconfig. Migration guide: * If the property is not set or set to `default` in backend config and not set or set to `default` in kubeconfig, `default` is used, no action required * If the property is set to `ns-a` in both backend config and kubeconfig, `ns-a` is used, no action required * If the property is set to `ns-a` in backend config and not set or set to `ns-b` in kubeconfig, `ns-a` is used, set namespace to `ns-a` in kubeconfig to prepare for future versions * In general, it's only safe to remove the property from backend config if it is equal to `default`, which is a default value Part-of: #3857
1 parent d2fdd17 commit 985764b

5 files changed

Lines changed: 111 additions & 28 deletions

File tree

src/dstack/_internal/core/backends/kubernetes/compute.py

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@
6060
)
6161
from dstack._internal.core.backends.kubernetes.utils import (
6262
call_api_method,
63-
get_api_from_config_data,
63+
get_api_from_kubeconfig_dict,
64+
kubeconfig_data_to_kubeconfig_dict,
65+
kubeconfig_dict_to_kubeconfig,
6466
)
6567
from dstack._internal.core.consts import DSTACK_RUNNER_SSH_PORT
6668
from dstack._internal.core.errors import ComputeError, ProvisioningError
@@ -127,7 +129,29 @@ def __init__(self, config: KubernetesConfig):
127129
if proxy_jump is None:
128130
proxy_jump = KubernetesProxyJumpConfig()
129131
self.proxy_jump = proxy_jump
130-
self.api = get_api_from_config_data(config.kubeconfig.data)
132+
kubeconfig_dict = kubeconfig_data_to_kubeconfig_dict(config.kubeconfig.data)
133+
self.api = get_api_from_kubeconfig_dict(kubeconfig_dict)
134+
kubeconfig = kubeconfig_dict_to_kubeconfig(kubeconfig_dict)
135+
current_context = kubeconfig.get_context()
136+
if current_context.namespace != config.namespace:
137+
logger.warning(
138+
(
139+
"Namespace mismatch: kubeconfig -> '%s', backend config -> '%s'."
140+
" The current dstack version ignores kubeconfig"
141+
" and uses deprecated namespace property from backend config."
142+
" Future versions will use namespace from kubeconfig."
143+
" To keep using '%s' namespace in future versions and suppress this warning,"
144+
" set namespace to '%s' in kubeconfig context '%s'"
145+
),
146+
current_context.namespace,
147+
config.namespace,
148+
config.namespace,
149+
config.namespace,
150+
kubeconfig.current_context,
151+
)
152+
# TODO: switch to current_context.namespace
153+
self.namespace = config.namespace
154+
logger.debug("Using namespace '%s'", self.namespace)
131155

132156
def get_offers_by_requirements(
133157
self, requirements: Requirements
@@ -156,7 +180,7 @@ def run_job(
156180
jump_pod_service_name = _get_pod_service_name(jump_pod_name)
157181
_create_jump_pod_service_if_not_exists(
158182
api=self.api,
159-
namespace=self.config.namespace,
183+
namespace=self.namespace,
160184
jump_pod_name=jump_pod_name,
161185
jump_pod_service_name=jump_pod_service_name,
162186
jump_pod_port=self.proxy_jump.port,
@@ -177,7 +201,7 @@ def run_job(
177201
string_data={".dockerconfigjson": dockerconfigjson},
178202
)
179203
self.api.create_namespaced_secret(
180-
namespace=self.config.namespace,
204+
namespace=self.namespace,
181205
body=registry_auth_secret,
182206
)
183207
image_pull_secrets = [client.V1LocalObjectReference(name=registry_auth_secret_name)]
@@ -342,11 +366,11 @@ def run_job(
342366
),
343367
)
344368
self.api.create_namespaced_pod(
345-
namespace=self.config.namespace,
369+
namespace=self.namespace,
346370
body=pod,
347371
)
348372
self.api.create_namespaced_service(
349-
namespace=self.config.namespace,
373+
namespace=self.namespace,
350374
body=client.V1Service(
351375
metadata=client.V1ObjectMeta(name=_get_pod_service_name(instance_name)),
352376
spec=client.V1ServiceSpec(
@@ -395,7 +419,7 @@ def update_provisioning_data(
395419
backend_data = KubernetesBackendData.load(provisioning_data.backend_data)
396420
ssh_proxy = _check_and_configure_jump_pod_service(
397421
api=self.api,
398-
namespace=self.config.namespace,
422+
namespace=self.namespace,
399423
jump_pod_name=backend_data.jump_pod_name,
400424
jump_pod_service_name=backend_data.jump_pod_service_name,
401425
jump_pod_hostname=self.proxy_jump.hostname,
@@ -412,7 +436,7 @@ def update_provisioning_data(
412436

413437
pod = self.api.read_namespaced_pod(
414438
name=provisioning_data.instance_id,
415-
namespace=self.config.namespace,
439+
namespace=self.namespace,
416440
)
417441
if pod.status is None:
418442
return
@@ -422,7 +446,7 @@ def update_provisioning_data(
422446
provisioning_data.internal_ip = pod_ip
423447
service = self.api.read_namespaced_service(
424448
name=_get_pod_service_name(provisioning_data.instance_id),
425-
namespace=self.config.namespace,
449+
namespace=self.namespace,
426450
)
427451
service_spec = get_or_error(service.spec)
428452
provisioning_data.hostname = get_or_error(service_spec.cluster_ip)
@@ -450,21 +474,21 @@ def terminate_instance(
450474
self.api.delete_namespaced_service,
451475
expected=404,
452476
name=_get_pod_service_name(instance_id),
453-
namespace=self.config.namespace,
477+
namespace=self.namespace,
454478
body=client.V1DeleteOptions(),
455479
)
456480
call_api_method(
457481
self.api.delete_namespaced_pod,
458482
expected=404,
459483
name=instance_id,
460-
namespace=self.config.namespace,
484+
namespace=self.namespace,
461485
body=client.V1DeleteOptions(),
462486
)
463487
call_api_method(
464488
self.api.delete_namespaced_secret,
465489
expected=404,
466490
name=_get_registry_auth_secret_name(instance_id),
467-
namespace=self.config.namespace,
491+
namespace=self.namespace,
468492
body=client.V1DeleteOptions(),
469493
)
470494

@@ -520,7 +544,7 @@ def create_gateway(
520544
),
521545
)
522546
self.api.create_namespaced_pod(
523-
namespace=self.config.namespace,
547+
namespace=self.namespace,
524548
body=pod,
525549
)
526550
service = client.V1Service(
@@ -550,13 +574,13 @@ def create_gateway(
550574
),
551575
)
552576
self.api.create_namespaced_service(
553-
namespace=self.config.namespace,
577+
namespace=self.namespace,
554578
body=service,
555579
)
556580
# address is eiher a domain name or an IP address
557581
address = _wait_for_load_balancer_address(
558582
api=self.api,
559-
namespace=self.config.namespace,
583+
namespace=self.namespace,
560584
service_name=_get_pod_service_name(instance_name),
561585
)
562586
if address is None:
@@ -591,7 +615,7 @@ def register_volume(self, volume: Volume) -> VolumeProvisioningData:
591615
pvc = call_api_method(
592616
self.api.read_namespaced_persistent_volume_claim,
593617
expected=404,
594-
namespace=self.config.namespace,
618+
namespace=self.namespace,
595619
name=pvc_name,
596620
)
597621
if pvc is None:
@@ -650,7 +674,7 @@ def create_volume(self, volume: Volume) -> VolumeProvisioningData:
650674
),
651675
)
652676
self.api.create_namespaced_persistent_volume_claim(
653-
namespace=self.config.namespace,
677+
namespace=self.namespace,
654678
body=pvc,
655679
)
656680
logger.debug("Created PVC %s for volume %s", pvc_name, volume.name)
@@ -671,7 +695,7 @@ def delete_volume(self, volume: Volume):
671695
pvc = call_api_method(
672696
self.api.delete_namespaced_persistent_volume_claim,
673697
expected=404,
674-
namespace=self.config.namespace,
698+
namespace=self.namespace,
675699
name=pvc_name,
676700
)
677701
if pvc is None:

src/dstack/_internal/core/backends/kubernetes/configurator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def validate_config(
3030
self, config: KubernetesBackendConfigWithCreds, default_creds_enabled: bool
3131
):
3232
try:
33-
api = kubernetes_utils.get_api_from_config_data(config.kubeconfig.data)
33+
api = kubernetes_utils.get_api_from_kubeconfig_data(config.kubeconfig.data)
3434
api.list_node()
3535
except Exception as e:
3636
logger.debug("Invalid kubeconfig: %s", str(e))

src/dstack/_internal/core/backends/kubernetes/models.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,20 @@ class KubernetesBackendConfig(CoreModel):
2828
Optional[KubernetesProxyJumpConfig], Field(description="The SSH proxy jump configuration")
2929
] = None
3030
namespace: Annotated[
31-
str, Field(description="The namespace for resources managed by `dstack`")
31+
str,
32+
Field(
33+
description=(
34+
"The namespace for resources managed by `dstack`."
35+
" Always overrides the namespace set in the kubeconfig, even if not set. "
36+
" Deprecated and will be eventually removed in futute versions, but"
37+
" in the current version must be set unless equals to `default`."
38+
" Future versions will use the namespace from the kubeconfig instead."
39+
" To prepare for future versions, set the same value in the kubeconfig"
40+
)
41+
),
3242
] = DEFAULT_NAMESPACE
43+
"""`namespace` is formally deprecated since 0.20.20 but still used. Future versions will switch
44+
to namespace from kubeconfig context, which is currently ignored"""
3345

3446

3547
class KubernetesBackendConfigWithCreds(KubernetesBackendConfig):

src/dstack/_internal/core/backends/kubernetes/utils.py

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Callable, Optional, TypeVar, Union
1+
from typing import Annotated, Callable, Optional, TypeVar, Union
22

33
import yaml
44
from kubernetes.client import CoreV1Api
@@ -7,19 +7,66 @@
77
# XXX: This function is missing in the stubs package
88
new_client_from_config_dict, # pyright: ignore[reportAttributeAccessIssue]
99
)
10+
from pydantic import Field
1011
from typing_extensions import ParamSpec
1112

13+
from dstack._internal.core.models.common import CoreModel
14+
1215
T = TypeVar("T")
1316
P = ParamSpec("P")
1417

1518

16-
def get_api_from_config_data(kubeconfig_data: str) -> CoreV1Api:
17-
config_dict = yaml.load(kubeconfig_data, yaml.FullLoader)
18-
return get_api_from_config_dict(config_dict)
19+
class KubeconfigContext(CoreModel):
20+
namespace: str = "default"
21+
22+
23+
class KubeconfigNamedContext(CoreModel):
24+
name: str
25+
context: KubeconfigContext
26+
27+
28+
class Kubeconfig(CoreModel):
29+
"""
30+
`Kubeconfig` model only includes fields used by `dstack`.
31+
Reference: https://kubernetes.io/docs/reference/config-api/kubeconfig.v1/
32+
"""
33+
34+
contexts: list[KubeconfigNamedContext] = []
35+
current_context: Annotated[Optional[str], Field(alias="current-context")] = None
36+
37+
def get_context(self, name: Optional[str] = None) -> KubeconfigContext:
38+
if name is None:
39+
name = self.current_context
40+
if name is None:
41+
raise ValueError("current-context is not set")
42+
for named_context in self.contexts:
43+
if named_context.name == name:
44+
return named_context.context
45+
raise ValueError(f"context {name} not found")
46+
47+
48+
def kubeconfig_data_to_kubeconfig_dict(kubeconfig_data: str) -> dict:
49+
kubeconfig_dict = yaml.load(kubeconfig_data, yaml.FullLoader)
50+
if not isinstance(kubeconfig_dict, dict):
51+
raise TypeError(f"Unexpected kubeconfig_data type: {kubeconfig_dict.__class__.__name__}")
52+
return kubeconfig_dict
53+
54+
55+
def kubeconfig_dict_to_kubeconfig(kubeconfig_dict: dict) -> Kubeconfig:
56+
return Kubeconfig.__response__.parse_obj(kubeconfig_dict)
57+
58+
59+
def get_api_from_kubeconfig_data(
60+
kubeconfig_data: str, *, context: Optional[str] = None
61+
) -> CoreV1Api:
62+
kubeconfig_dict = kubeconfig_data_to_kubeconfig_dict(kubeconfig_data)
63+
return get_api_from_kubeconfig_dict(kubeconfig_dict, context=context)
1964

2065

21-
def get_api_from_config_dict(kubeconfig: dict) -> CoreV1Api:
22-
api_client = new_client_from_config_dict(config_dict=kubeconfig)
66+
def get_api_from_kubeconfig_dict(
67+
kubeconfig_dict: dict, *, context: Optional[str] = None
68+
) -> CoreV1Api:
69+
api_client = new_client_from_config_dict(config_dict=kubeconfig_dict, context=context)
2370
return CoreV1Api(api_client=api_client)
2471

2572

src/tests/_internal/core/backends/kubernetes/test_configurator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def test_validate_config_valid(self):
2020
proxy_jump=KubernetesProxyJumpConfig(hostname=None, port=None),
2121
)
2222
with patch(
23-
"dstack._internal.core.backends.kubernetes.utils.get_api_from_config_data"
23+
"dstack._internal.core.backends.kubernetes.utils.get_api_from_kubeconfig_data"
2424
) as get_api_mock:
2525
api_mock = Mock()
2626
api_mock.list_node.return_value = Mock()
@@ -34,7 +34,7 @@ def test_validate_config_invalid_config(self):
3434
)
3535
with (
3636
patch(
37-
"dstack._internal.core.backends.kubernetes.utils.get_api_from_config_data"
37+
"dstack._internal.core.backends.kubernetes.utils.get_api_from_kubeconfig_data"
3838
) as get_api_mock,
3939
pytest.raises(BackendInvalidCredentialsError) as exc_info,
4040
):

0 commit comments

Comments
 (0)