diff --git a/soperator/installations/example/.envrc b/soperator/installations/example/.envrc index b228e046..6a384e18 100644 --- a/soperator/installations/example/.envrc +++ b/soperator/installations/example/.envrc @@ -2,10 +2,6 @@ NEBIUS_TENANT_ID="$NEBIUS_TENANT_ID" # ='tenant-...' NEBIUS_PROJECT_ID="$NEBIUS_PROJECT_ID" # ='project-...' NEBIUS_REGION="${NEBIUS_REGION:-eu-north1}" -# O11y setup. -NEBIUS_OLLY_PROFILE="${NEBIUS_OLLY_PROFILE:-soperator-telemetry}" -NEBIUS_OLLY_TENANT_ID="${NEBIUS_OLLY_TENANT_ID:-tenant-e00vyb5y1x5vqkzw5e}" # ='tenant-...' - if [ -z "${NEBIUS_TENANT_ID}" ]; then echo "Error: NEBIUS_TENANT_ID is not set" return 1 @@ -16,37 +12,6 @@ if [ -z "${NEBIUS_PROJECT_ID}" ]; then return 1 fi -# region IAM token - -unset NEBIUS_IAM_TOKEN -nebius iam whoami > /dev/null -nebius iam get-access-token > /dev/null -NEBIUS_IAM_TOKEN=$(nebius iam get-access-token) -export NEBIUS_IAM_TOKEN - -if [ -f "$HOME/.nebius/credentials.yaml" ]; then - IAM_TOKEN_EXPIRES_AT=$(yq '.tokens[].expires_at' "$HOME/.nebius/credentials.yaml" 2>/dev/null) - if [ -n "$IAM_TOKEN_EXPIRES_AT" ]; then - if [[ "$(uname)" == "Darwin" ]]; then - echo "IAM token expires at: $(date -r "$IAM_TOKEN_EXPIRES_AT")" - else - echo "IAM token expires at: $(date -d @"$IAM_TOKEN_EXPIRES_AT")" - fi - fi -fi - -# endregion IAM token - -# region VPC subnet - -NEBIUS_VPC_SUBNET_ID=$(nebius vpc subnet list \ - --parent-id "${NEBIUS_PROJECT_ID}" \ - --format json \ - | jq -r '.items[0].metadata.id') -export NEBIUS_VPC_SUBNET_ID - -# endregion VPC subnet - # region Remote state # region Service account @@ -209,12 +174,8 @@ EOF # region TF variables export TF_VAR_region="${NEBIUS_REGION}" -export TF_VAR_iam_token="${NEBIUS_IAM_TOKEN}" export TF_VAR_iam_tenant_id="${NEBIUS_TENANT_ID}" export TF_VAR_iam_project_id="${NEBIUS_PROJECT_ID}" -export TF_VAR_o11y_iam_tenant_id="${NEBIUS_OLLY_TENANT_ID}" -export TF_VAR_o11y_profile="${NEBIUS_OLLY_PROFILE}" -export TF_VAR_vpc_subnet_id="${NEBIUS_VPC_SUBNET_ID}" export TF_VAR_aws_access_key_id="${AWS_ACCESS_KEY_ID}" export TF_VAR_aws_secret_access_key="${AWS_SECRET_ACCESS_KEY}" export TFE_PARALLELISM=20 @@ -223,9 +184,6 @@ echo "Exported variables:" echo "TF_VAR_region: ${TF_VAR_region}" echo "TF_VAR_iam_tenant_id: ${TF_VAR_iam_tenant_id}" echo "TF_VAR_iam_project_id: ${TF_VAR_iam_project_id}" -echo "TF_VAR_o11y_iam_tenant_id: ${TF_VAR_o11y_iam_tenant_id}" -echo "TF_VAR_o11y_profile: ${TF_VAR_o11y_profile}" -echo "TF_VAR_vpc_subnet_id: ${TF_VAR_vpc_subnet_id}" echo "TF_VAR_aws_access_key_id: ${TF_VAR_aws_access_key_id}" echo "TFE_PARALLELISM: ${TFE_PARALLELISM}" diff --git a/soperator/installations/example/main.tf b/soperator/installations/example/main.tf index abf34d87..104f64d6 100644 --- a/soperator/installations/example/main.tf +++ b/soperator/installations/example/main.tf @@ -164,7 +164,7 @@ module "nfs-server" { module "cleanup" { source = "../../modules/cleanup" - iam_project_id = var.iam_project_id + iam_project_id = local.iam_project_id } module "k8s_cleanup" { @@ -291,9 +291,9 @@ module "o11y" { source = "../../modules/o11y" - iam_project_id = var.iam_project_id - o11y_iam_tenant_id = var.o11y_iam_tenant_id - o11y_profile = var.o11y_profile + iam_project_id = local.iam_project_id + o11y_iam_tenant_id = local.o11y_iam_tenant_id + o11y_profile = local.o11y_profile k8s_cluster_context = module.k8s.cluster_context company_name = var.company_name } @@ -310,9 +310,9 @@ module "slurm" { active_checks_scope = var.active_checks_scope - region = var.region - iam_tenant_id = var.iam_tenant_id - iam_project_id = var.iam_project_id + region = local.region + iam_tenant_id = data.nebius_iam_v1_tenant.this.id + iam_project_id = data.nebius_iam_v1_project.this.id cluster_name = var.company_name name = local.slurm_cluster_name k8s_cluster_context = module.k8s.cluster_context @@ -525,7 +525,7 @@ module "backups_store" { source = "../../modules/backups_store" - iam_project_id = var.iam_project_id + iam_project_id = local.iam_project_id instance_name = local.k8s_cluster_name cleanup_bucket_on_destroy = var.cleanup_bucket_on_destroy @@ -544,8 +544,8 @@ module "backups" { k8s_cluster_context = module.k8s.cluster_context k8s_cluster_id = module.k8s.cluster_id - iam_project_id = var.iam_project_id - iam_tenant_id = var.iam_tenant_id + iam_project_id = data.nebius_iam_v1_project.this.id + iam_tenant_id = data.nebius_iam_v1_tenant.this.id instance_name = local.k8s_cluster_name soperator_namespace = local.slurm_cluster_name backups_password = var.backups_password diff --git a/soperator/installations/example/terraform.tf b/soperator/installations/example/terraform.tf index 52914e1f..bb02a8c2 100644 --- a/soperator/installations/example/terraform.tf +++ b/soperator/installations/example/terraform.tf @@ -30,11 +30,17 @@ terraform { source = "hashicorp/helm" version = "<3.0.0" } + + external = { + source = "hashicorp/external" + version = ">= 2.3.0" + } } } provider "nebius" { - domain = "api.eu.nebius.cloud:443" + domain = "api.eu.nebius.cloud:443" + profile = {} } provider "units" {} @@ -44,14 +50,36 @@ provider "string-functions" {} provider "kubernetes" { host = module.k8s.control_plane.public_endpoint cluster_ca_certificate = module.k8s.control_plane.cluster_ca_certificate - token = var.iam_token + exec { + api_version = "client.authentication.k8s.io/v1beta1" + command = "nebius" + args = [ + "mk8s", + "v1", + "cluster", + "get-token", + "--format", + "json", + ] + } } provider "flux" { kubernetes = { host = module.k8s.control_plane.public_endpoint cluster_ca_certificate = module.k8s.control_plane.cluster_ca_certificate - token = var.iam_token + exec = { + api_version = "client.authentication.k8s.io/v1beta1" + command = "nebius" + args = [ + "mk8s", + "v1", + "cluster", + "get-token", + "--format", + "json", + ] + } } } @@ -59,7 +87,18 @@ provider "helm" { kubernetes { host = module.k8s.control_plane.public_endpoint cluster_ca_certificate = module.k8s.control_plane.cluster_ca_certificate - token = var.iam_token + exec { + api_version = "client.authentication.k8s.io/v1beta1" + command = "nebius" + args = [ + "mk8s", + "v1", + "cluster", + "get-token", + "--format", + "json", + ] + } } } diff --git a/soperator/installations/example/variables.tf b/soperator/installations/example/variables.tf index 7d476c3c..cf4d5d99 100644 --- a/soperator/installations/example/variables.tf +++ b/soperator/installations/example/variables.tf @@ -1,62 +1,54 @@ # region Cloud +data "external" "env" { + program = ["jq", "--null-input", "env | { NEBIUS_PROJECT_ID, NEBIUS_OLLY_PROFILE, NEBIUS_OLLY_TENANT_ID }"] +} +locals { + region = coalesce(var.region, data.nebius_iam_v1_project.this.region) + iam_project_id = coalesce(var.iam_project_id, data.external.env.result.NEBIUS_PROJECT_ID) + o11y_profile = coalesce(var.o11y_profile, data.external.env.result.NEBIUS_OLLY_PROFILE, "soperator-telemetry") + o11y_iam_tenant_id = coalesce(var.o11y_iam_tenant_id, data.external.env.result.NEBIUS_OLLY_TENANT_ID, "tenant-e00vyb5y1x5vqkzw5e") +} + variable "region" { description = "Region of the project." type = string - nullable = false + default = null } resource "terraform_data" "check_region" { lifecycle { precondition { - condition = contains(module.resources.regions, var.region) - error_message = "Unknown region '${var.region}'. See https://docs.nebius.com/overview/regions" + condition = var.region == null || contains(module.resources.regions, var.region) + error_message = "Unknown region '${var.region != null ? var.region : ""}'. See https://docs.nebius.com/overview/regions" } } } -variable "iam_token" { - description = "IAM token used for communicating with Nebius services." - type = string - nullable = false - sensitive = true -} - variable "iam_project_id" { description = "ID of the IAM project." type = string - nullable = false + default = null validation { - condition = startswith(var.iam_project_id, "project-") + condition = var.iam_project_id == null || startswith(var.iam_project_id, "project-") error_message = "ID of the IAM project must start with `project-`." } } data "nebius_iam_v1_project" "this" { - id = var.iam_project_id -} - -variable "iam_tenant_id" { - description = "ID of the IAM tenant." - type = string - nullable = false - - validation { - condition = startswith(var.iam_tenant_id, "tenant-") - error_message = "ID of the IAM tenant must start with `tenant-`." - } + id = local.iam_project_id } data "nebius_iam_v1_tenant" "this" { - id = var.iam_tenant_id + id = data.nebius_iam_v1_project.this.parent_id } variable "o11y_iam_tenant_id" { description = "ID of the IAM tenant for O11y." type = string - nullable = false + default = null validation { - condition = startswith(var.o11y_iam_tenant_id, "tenant-") + condition = var.o11y_iam_tenant_id == null || startswith(var.o11y_iam_tenant_id, "tenant-") error_message = "ID of the IAM tenant must start with `tenant-`." } } @@ -64,10 +56,10 @@ variable "o11y_iam_tenant_id" { variable "o11y_profile" { description = "Profile for nebius CLI for public o11y." type = string - nullable = false + default = null validation { - condition = ( + condition = var.o11y_profile == null || ( (length(var.o11y_profile) >= 1 && var.public_o11y_enabled) || !var.public_o11y_enabled ) @@ -97,14 +89,24 @@ If you provision a NON-PRODUCTION cluster, set "production" variable to false. variable "vpc_subnet_id" { description = "ID of VPC subnet." type = string + nullable = true + default = null validation { - condition = startswith(var.vpc_subnet_id, "vpcsubnet-") + condition = var.vpc_subnet_id == null || startswith(var.vpc_subnet_id, "vpcsubnet-") error_message = "The ID of the VPC subnet must start with `vpcsubnet-`." } } + +data "external" "default_vpc_subnet" { + program = ["bash", "-euo", "pipefail", "-c", <<-BASH + nebius vpc subnet list --parent-id "$0" --format json | jq -r '.items[0].metadata | { id }' + BASH + , data.nebius_iam_v1_project.this.id] +} + data "nebius_vpc_v1_subnet" "this" { - id = var.vpc_subnet_id + id = var.vpc_subnet_id != null ? var.vpc_subnet_id : data.external.default_vpc_subnet.result.id } variable "slurm_login_public_ip" { @@ -420,10 +422,10 @@ resource "terraform_data" "check_nfs" { precondition { condition = (var.nfs.enabled - ? contains(module.resources.platform_regions[var.nfs.resource.platform], var.region) + ? contains(module.resources.platform_regions[var.nfs.resource.platform], local.region) : true ) - error_message = "Unsupported platform '${var.nfs.resource.platform}' in region '${var.region}'. See https://docs.nebius.com/compute/virtual-machines/types" + error_message = "Unsupported platform '${var.nfs.resource.platform}' in region '${local.region}'. See https://docs.nebius.com/compute/virtual-machines/types" } } } @@ -938,8 +940,8 @@ resource "terraform_data" "check_slurm_nodeset" { } precondition { - condition = contains(module.resources.platform_regions[each.value.resource.platform], var.region) - error_message = "Unsupported platform '${each.value.resource.platform}' in region '${var.region}'. See https://docs.nebius.com/compute/virtual-machines/types" + condition = contains(module.resources.platform_regions[each.value.resource.platform], local.region) + error_message = "Unsupported platform '${each.value.resource.platform}' in region '${local.region}'. See https://docs.nebius.com/compute/virtual-machines/types" } # TODO: precondition for total node group count @@ -957,7 +959,7 @@ resource "terraform_data" "check_local_nvme" { alltrue([ for worker in var.slurm_nodeset_workers : !try(worker.local_nvme.enabled, false) || ( - try(module.resources.local_nvme_supported_by_region_platform_preset[var.region][worker.resource.platform][worker.resource.preset], false) + try(module.resources.local_nvme_supported_by_region_platform_preset[local.region][worker.resource.platform][worker.resource.preset], false) ) ]) )