Skip to content

Commit 8cff2cd

Browse files
Add v25.11.0 (#86)
* Prep for v25.10.0 * Switch to Flex LBs and update OKE module version * Add custom wildcard domain for Grafana * Add support to use image URI * Add support for credential-provider for OCIR * update schema.yaml * updated oke-cluster.tf --------- Co-authored-by: Andrei Ilas <[email protected]>
1 parent 781eaba commit 8cff2cd

File tree

8 files changed

+218
-24
lines changed

8 files changed

+218
-24
lines changed

files/oke-ubuntu-cloud-init.sh

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,38 @@ EOF
3030
fi
3131
}
3232

33+
# Install OKE credential provider for OCIR
34+
download_oke_credential_provider_for_ocir() {
35+
ARCH=$(uname -m)
36+
37+
case "$ARCH" in
38+
x86_64)
39+
ARCH="amd64"
40+
;;
41+
aarch64 | arm64)
42+
ARCH="arm64"
43+
;;
44+
*)
45+
return 1
46+
;;
47+
48+
esac
49+
50+
wget --tries=5 --waitretry=3 --retry-connrefused -O /usr/local/bin/credential-provider-oke \
51+
https://github.com/oracle-devrel/oke-credential-provider-for-ocir/releases/latest/download/oke-credential-provider-for-ocir-linux-$ARCH && \
52+
chmod +x /usr/local/bin/credential-provider-oke || true
53+
54+
mkdir -p /etc/kubernetes/
55+
wget --tries=5 --waitretry=3 --retry-connrefused -P /etc/kubernetes/ \
56+
https://github.com/oracle-devrel/oke-credential-provider-for-ocir/releases/latest/download/credential-provider-config.yaml || true
57+
58+
if [[ -f /usr/local/bin/credential-provider-oke && -f /etc/kubernetes/credential-provider-config.yaml ]]; then
59+
return 0
60+
else
61+
return 1
62+
fi
63+
}
64+
3365
# Disable nvidia-imex.service for GB200 and GB300 shapes for Dynamic Resource Allocation (DRA) compatibility
3466
SHAPE=$(curl -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/instance/shape 2>/dev/null) || true
3567
if [[ -z "$SHAPE" ]]; then
@@ -43,17 +75,28 @@ elif [[ "$SHAPE" == BM.GPU.GB200* ]] || [[ "$SHAPE" == BM.GPU.GB300* ]]; then
4375
fi
4476
fi
4577

78+
kubernetes_version="${1-}"
79+
setup_credential_provider="${2:-false}"
80+
81+
if [[ "$setup_credential_provider" == "true" ]]; then
82+
credential_provider_done=$(download_oke_credential_provider_for_ocir)
83+
else
84+
credential_provider_done=1
85+
fi
86+
4687
case "$ID" in
4788
ubuntu)
4889
echo "Detected Ubuntu"
4990
if command -v oke >/dev/null 2>&1; then
5091
echo "[Ubuntu] oke binary already present, running bootstrap only"
51-
kubernetes_version="${1-}"
5292
configure_crio_defaults "$kubernetes_version"
53-
oke bootstrap
93+
if [[ "$credential_provider_done" -eq 0 ]]; then
94+
oke bootstrap --kubelet-extra-args "--image-credential-provider-bin-dir=/usr/local/bin/ --image-credential-provider-config=/etc/kubernetes/credential-provider-config.yaml"
95+
else
96+
oke bootstrap
97+
fi
5498
else
5599
echo "[Ubuntu] oke binary not found, installing package"
56-
kubernetes_version="${1-}"
57100
oke_package_version="${kubernetes_version:1}"
58101
oke_package_repo_version="${oke_package_version:0:4}"
59102
oke_package_name="oci-oke-node-all-$oke_package_version"
@@ -78,26 +121,38 @@ EOF
78121

79122
echo "[Ubuntu] Running bootstrap"
80123
configure_crio_defaults "$kubernetes_version"
81-
oke bootstrap
124+
if [[ "$credential_provider_done" -eq 0 ]]; then
125+
oke bootstrap --kubelet-extra-args "--image-credential-provider-bin-dir=/usr/local/bin/ --image-credential-provider-config=/etc/kubernetes/credential-provider-config.yaml"
126+
else
127+
oke bootstrap
128+
fi
82129
fi
83130
;;
84131
ol)
85132
echo "Detected Oracle Linux"
86133
if command -v oke >/dev/null 2>&1; then
87134
echo "[Oracle Linux] oke binary already present, running bootstrap only"
88-
kubernetes_version="${1-}"
135+
89136
configure_crio_defaults "$kubernetes_version"
90-
oke bootstrap
137+
if [[ "$credential_provider_done" -eq 0 ]]; then
138+
oke bootstrap --kubelet-extra-args "--image-credential-provider-bin-dir=/usr/local/bin/ --image-credential-provider-config=/etc/kubernetes/credential-provider-config.yaml"
139+
else
140+
oke bootstrap
141+
fi
91142
else
92143
echo "[Oracle Linux] oke binary not found, fetching init script"
93144
curl --fail -H "Authorization: Bearer Oracle" \
94145
-L0 http://169.254.169.254/opc/v2/instance/metadata/oke_init_script \
95146
| base64 --decode >/var/run/oke-init.sh
96147

97148
echo "[Oracle Linux] Running init script"
98-
kubernetes_version="${1-}"
99149
configure_crio_defaults "$kubernetes_version"
100-
bash /var/run/oke-init.sh
150+
if [[ "$credential_provider_done" -eq 0 ]]; then
151+
bash /var/run/oke-init.sh --kubelet-extra-args "--image-credential-provider-bin-dir=/usr/local/bin/ --image-credential-provider-config=/etc/kubernetes/credential-provider-config.yaml"
152+
else
153+
bash /var/run/oke-init.sh
154+
fi
155+
101156
fi
102157
;;
103158
*)
@@ -106,4 +161,4 @@ EOF
106161
;;
107162
esac
108163

109-
echo "OKE setup completed successfully."
164+
echo "OKE setup completed successfully."

terraform/iam.tf

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ locals {
2121
compartment_matches = format("instance.compartment.id = '%v'", var.compartment_ocid)
2222
compartment_rule = format("ANY {%v}", join(", ", [local.compartment_matches]))
2323

24-
rule_templates = [
24+
rule_templates = compact([
2525
"Allow dynamic-group %v to manage cluster-node-pools in compartment id %v",
2626
"Allow dynamic-group %v to manage cluster-family in compartment id %v",
2727
"Allow dynamic-group %v to manage file-family in compartment id %v",
@@ -37,8 +37,9 @@ locals {
3737
"Allow dynamic-group %v to {CLUSTER_JOIN} in compartment id %v",
3838
"Allow dynamic-group %v to read metrics in compartment id %v",
3939
"Allow dynamic-group %v to use metrics in compartment id %v where target.metrics.namespace='gpu_infrastructure_health'",
40-
"Allow dynamic-group %v to use metrics in compartment id %v where target.metrics.namespace='rdma_infrastructure_health'"
41-
]
40+
"Allow dynamic-group %v to use metrics in compartment id %v where target.metrics.namespace='rdma_infrastructure_health'",
41+
var.setup_credential_provider_for_ocir ? "Allow dynamic-group %v to read repos in compartment id %v" : ""
42+
])
4243

4344
wris_template = [
4445
"request.principal.type = 'workload'",

terraform/image.tf

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright (c) 2025 Oracle Corporation and/or its affiliates.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl
3+
4+
locals {
5+
unique_image_urls = distinct(compact([var.worker_ops_image_custom_uri, var.worker_cpu_image_custom_uri, var.worker_gpu_image_custom_uri, var.worker_rdma_image_custom_uri]))
6+
}
7+
8+
resource "oci_core_image" "imported_image" {
9+
for_each = toset(local.unique_image_urls)
10+
11+
compartment_id = var.compartment_ocid
12+
display_name = format("%v-%v", element(split("/", each.value), length(split("/", each.value))-1), local.state_id)
13+
14+
image_source_details {
15+
source_type = "objectStorageUri"
16+
source_uri = each.value
17+
}
18+
}

terraform/oke-cluster.tf

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ locals {
152152
module "oke" {
153153
source = "oracle-terraform-modules/oke/oci"
154154
version = "5.3.3"
155-
155+
156156
providers = { oci.home = oci.home }
157157

158158
region = var.region
@@ -196,11 +196,14 @@ module "oke" {
196196
}
197197
]
198198
}
199+
},
200+
var.install_monitoring && var.install_node_problem_detector_kube_prometheus_stack ?
201+
{
199202
"KubernetesMetricsServer" = {
200203
remove_addon_resources_on_delete = true
201204
override_existing = true
202205
}
203-
},
206+
} : {},
204207
var.install_monitoring && var.install_node_problem_detector_kube_prometheus_stack && var.preferred_kubernetes_services == "public" ?
205208
{
206209
"CertManager" = {

terraform/oke-workers.tf

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@ locals {
77
trimspace(local.ssh_public_key),
88
])
99

10-
worker_ops_image_id = coalesce(var.worker_ops_image_custom_id, "none")
10+
worker_ops_image_id = var.worker_ops_image_use_uri ? lookup(lookup(oci_core_image.imported_image, var.worker_ops_image_custom_uri, {}), "id", null) : coalesce(var.worker_ops_image_custom_id, "none")
1111
worker_cpu_image_type = contains(["platform", "custom"], lower(var.worker_cpu_image_type)) ? "custom" : "oke"
12-
worker_cpu_image_id = coalesce(var.worker_cpu_image_custom_id, var.worker_cpu_image_platform_id, "none")
12+
worker_cpu_image_id = var.worker_cpu_image_use_uri ? lookup(lookup(oci_core_image.imported_image, var.worker_cpu_image_custom_uri, {}), "id", null) : coalesce(var.worker_cpu_image_custom_id, var.worker_cpu_image_platform_id, "none")
1313
worker_gpu_image_type = contains(["platform", "custom"], lower(var.worker_gpu_image_type)) ? "custom" : "oke"
14-
worker_gpu_image_id = coalesce(var.worker_gpu_image_custom_id, var.worker_gpu_image_platform_id, "none")
14+
worker_gpu_image_id = var.worker_gpu_image_use_uri ? lookup(lookup(oci_core_image.imported_image, var.worker_gpu_image_use_uri, {}), "id", null) : coalesce(var.worker_gpu_image_custom_id, var.worker_gpu_image_platform_id, "none")
1515
worker_rdma_image_type = contains(["platform", "custom"], lower(var.worker_rdma_image_type)) ? "custom" : "oke"
16-
worker_rdma_image_id = coalesce(var.worker_rdma_image_custom_id, var.worker_rdma_image_platform_id, "none")
16+
worker_rdma_image_id = var.worker_rdma_image_use_uri ? lookup(lookup(oci_core_image.imported_image, var.worker_rdma_image_custom_uri, {}), "id", null) : coalesce(var.worker_rdma_image_custom_id, var.worker_rdma_image_platform_id, "none")
1717

1818
runcmd_bootstrap = local.create_workers ? format(
19-
"curl -sL -o /var/run/oke-ubuntu-cloud-init.sh https://raw.githubusercontent.com/oracle-quickstart/oci-hpc-oke/refs/heads/main/files/oke-ubuntu-cloud-init.sh && (bash /var/run/oke-ubuntu-cloud-init.sh '%v' '%v' || echo 'Error bootstrapping OKE' >&2)",
20-
var.kubernetes_version, var.override_hostnames,
19+
"curl -sL -o /var/run/oke-ubuntu-cloud-init.sh https://raw.githubusercontent.com/oracle-quickstart/oci-hpc-oke/refs/heads/main/files/oke-ubuntu-cloud-init.sh && (bash /var/run/oke-ubuntu-cloud-init.sh '%v' '%v' '%v' || echo 'Error bootstrapping OKE' >&2)",
20+
var.kubernetes_version, var.setup_credential_provider_for_ocir, var.override_hostnames
2121
) : ""
2222

2323
runcmd_nvme_raid = var.nvme_raid_enabled ? format(
@@ -86,7 +86,6 @@ locals {
8686
boot_volume_size = var.worker_gpu_boot_volume_size
8787
image_type = "custom"
8888
image_id = local.worker_gpu_image_id
89-
cloud_init = [{ content_type = "text/cloud-config", content = yamlencode(local.cloud_init) }]
9089
node_labels = { "oci.oraclecloud.com/disable-gpu-device-plugin" : var.disable_gpu_device_plugin ? "true" : "false" },
9190
cloud_init = [{ content_type = "text/cloud-config", content = yamlencode(local.cloud_init) }]
9291
}

0 commit comments

Comments
 (0)