Skip to content

Commit a758bd3

Browse files
authored
Kubernetes: support IP based load balancers (#3283)
Extends Kubernetes backend gateway support to providers that provide IPs, not domain names, for externally-accessible Services. Tested on Nebius (mk8s) and Google Cloud (GKE). Part-of: #3126
1 parent da4943d commit a758bd3

File tree

3 files changed

+22
-14
lines changed

3 files changed

+22
-14
lines changed

gateway/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ requires-python = ">=3.10"
1111
dynamic = ["version"]
1212
dependencies = [
1313
# release builds of dstack-gateway depend on a PyPI version of dstack instead
14-
"dstack[gateway] @ git+https://github.com/dstackai/dstack.git@master",
14+
"dstack[gateway] @ https://github.com/dstackai/dstack/archive/refs/heads/master.tar.gz",
1515
]
1616

1717
[tool.setuptools.package-data]

src/dstack/_internal/core/backends/kubernetes/compute.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -367,8 +367,6 @@ def create_gateway(
367367
# If the cluster does not support Load Balancer, the service will be provisioned but
368368
# the external IP/hostname will never be allocated.
369369

370-
# TODO: This implementation is only tested on EKS. Test other managed Kubernetes.
371-
372370
# TODO: By default EKS creates a Classic Load Balancer for Load Balancer services.
373371
# Consider deploying an NLB. It seems it requires some extra configuration on the cluster:
374372
# https://docs.aws.amazon.com/eks/latest/userguide/network-load-balancing.html
@@ -397,6 +395,10 @@ def create_gateway(
397395
container_port=443,
398396
),
399397
],
398+
security_context=client.V1SecurityContext(
399+
run_as_user=0,
400+
run_as_group=0,
401+
),
400402
)
401403
]
402404
),
@@ -435,21 +437,22 @@ def create_gateway(
435437
namespace=self.config.namespace,
436438
body=service,
437439
)
438-
hostname = _wait_for_load_balancer_hostname(
440+
# address is eiher a domain name or an IP address
441+
address = _wait_for_load_balancer_address(
439442
api=self.api,
440443
namespace=self.config.namespace,
441444
service_name=_get_pod_service_name(instance_name),
442445
)
443446
region = DUMMY_REGION
444-
if hostname is None:
447+
if address is None:
445448
self.terminate_instance(instance_name, region=region)
446449
raise ComputeError(
447450
"Failed to get gateway hostname. "
448451
"Ensure the Kubernetes cluster supports Load Balancer services."
449452
)
450453
return GatewayProvisioningData(
451454
instance_id=instance_name,
452-
ip_address=hostname,
455+
ip_address=address,
453456
region=region,
454457
)
455458

@@ -927,7 +930,7 @@ def _wait_for_pod_ready(
927930
time.sleep(1)
928931

929932

930-
def _wait_for_load_balancer_hostname(
933+
def _wait_for_load_balancer_address(
931934
api: client.CoreV1Api,
932935
namespace: str,
933936
service_name: str,
@@ -945,10 +948,16 @@ def _wait_for_load_balancer_hostname(
945948
service is not None
946949
and (service_status := service.status) is not None
947950
and (lb_status := service_status.load_balancer) is not None
948-
and (ingresses := lb_status.ingress)
949-
and (hostname := ingresses[0].hostname) is not None
951+
and (ingress_points := lb_status.ingress)
950952
):
951-
return hostname
953+
ingress_point = ingress_points[0]
954+
# > Hostname is set for load-balancer ingress points that are DNS based (typically
955+
# > AWS load-balancers)
956+
# > IP is set for load-balancer ingress points that are IP based (typically GCE or
957+
# > OpenStack load-balancers)
958+
address = ingress_point.hostname or ingress_point.ip
959+
if address is not None:
960+
return address
952961
elapsed_time = time.time() - start_time
953962
if elapsed_time >= timeout_seconds:
954963
logger.warning("Timeout waiting for load balancer %s to get ip", service_name)
@@ -982,16 +991,15 @@ def _get_gateway_commands(authorized_keys: list[str]) -> list[str]:
982991
"apt-get update && apt-get install -y sudo wget openssh-server nginx python3.10-venv libaugeas0",
983992
# install docker-systemctl-replacement
984993
"wget https://raw.githubusercontent.com/gdraheim/docker-systemctl-replacement/b18d67e521f0d1cf1d705dbb8e0416bef23e377c/files/docker/systemctl3.py -O /usr/bin/systemctl",
985-
"chmod + /usr/bin/systemctl",
994+
"chmod a+rx /usr/bin/systemctl",
986995
# install certbot
987996
"python3 -m venv /root/certbotvenv/",
988997
"/root/certbotvenv/bin/pip install certbot-nginx",
989998
"ln -s /root/certbotvenv/bin/certbot /usr/bin/certbot",
990999
# prohibit password authentication
9911000
'sed -i "s/.*PasswordAuthentication.*/PasswordAuthentication no/g" /etc/ssh/sshd_config',
9921001
# set up ubuntu user
993-
"adduser ubuntu",
994-
"usermod -aG sudo ubuntu",
1002+
"useradd -mUG sudo ubuntu",
9951003
"echo 'ubuntu ALL=(ALL:ALL) NOPASSWD: ALL' | tee /etc/sudoers.d/ubuntu",
9961004
# create ssh dirs and add public key
9971005
"mkdir -p /run/sshd /home/ubuntu/.ssh",

src/dstack/_internal/server/settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
DSTACK_DIR_PATH = Path("~/.dstack/").expanduser()
1616

17-
SERVER_DIR_PATH = Path(os.getenv("DSTACK_SERVER_DIR", DSTACK_DIR_PATH / "server"))
17+
SERVER_DIR_PATH = Path(os.getenv("DSTACK_SERVER_DIR", DSTACK_DIR_PATH / "server")).resolve()
1818

1919
SERVER_CONFIG_FILE_PATH = SERVER_DIR_PATH / "config.yml"
2020

0 commit comments

Comments
 (0)