From c93d9be6c1cd74f25294e6f04eb61992420f4344 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 22 Jan 2025 15:38:03 +0000 Subject: [PATCH 1/7] support multiple networks for control node --- environments/.stackhpc/tofu/LEAFCLOUD.tfvars | 8 +++- environments/.stackhpc/tofu/main.tf | 13 +++--- .../tofu/compute.tf | 8 ++-- .../tofu/control.tf | 21 ++++++---- .../tofu/login.tf | 8 ++-- .../tofu/network.tf | 9 +++- .../tofu/variables.tf | 41 +++++++++++-------- 7 files changed, 64 insertions(+), 44 deletions(-) diff --git a/environments/.stackhpc/tofu/LEAFCLOUD.tfvars b/environments/.stackhpc/tofu/LEAFCLOUD.tfvars index 5e73896c8..135aadc64 100644 --- a/environments/.stackhpc/tofu/LEAFCLOUD.tfvars +++ b/environments/.stackhpc/tofu/LEAFCLOUD.tfvars @@ -1,5 +1,9 @@ -cluster_net = "slurmapp-ci" -cluster_subnet = "slurmapp-ci" +cluster_networks = [ + { + network = "slurmapp-ci" + subnet = "slurmapp-ci" + } +] control_node_flavor = "ec1.medium" # small ran out of memory, medium gets down to ~100Mi mem free on deployment other_node_flavor = "en1.xsmall" state_volume_type = "unencrypted" diff --git a/environments/.stackhpc/tofu/main.tf b/environments/.stackhpc/tofu/main.tf index e1aae6d89..cdf3e2f72 100644 --- a/environments/.stackhpc/tofu/main.tf +++ b/environments/.stackhpc/tofu/main.tf @@ -30,12 +30,10 @@ variable "cluster_image" { type = map(string) } -variable "cluster_net" {} +variable "cluster_networks" {} -variable "cluster_subnet" {} - -variable "vnic_type" { - default = "normal" +variable "vnic_types" { + default = {} } variable "state_volume_type"{ @@ -63,9 +61,8 @@ module "cluster" { source = "../../skeleton/{{cookiecutter.environment}}/tofu/" cluster_name = var.cluster_name - cluster_net = var.cluster_net - cluster_subnet = var.cluster_subnet - vnic_type = var.vnic_type + cluster_networks = var.cluster_networks + vnic_types = var.vnic_types key_pair = "slurm-app-ci" cluster_image_id = data.openstack_images_image_v2.cluster.id control_node_flavor = var.control_node_flavor diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf index 53e941517..15b05fd14 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf @@ -9,13 +9,13 @@ module "compute" { cluster_name = var.cluster_name cluster_domain_suffix = var.cluster_domain_suffix - cluster_net_id = data.openstack_networking_network_v2.cluster_net.id - cluster_subnet_id = data.openstack_networking_subnet_v2.cluster_subnet.id + cluster_net_id = data.openstack_networking_network_v2.cluster_net[var.cluster_networks[0].network].id + cluster_subnet_id = data.openstack_networking_subnet_v2.cluster_subnet[var.cluster_networks[0].network].id # can be set for group, defaults to top-level value: image_id = lookup(each.value, "image_id", var.cluster_image_id) - vnic_type = lookup(each.value, "vnic_type", var.vnic_type) - vnic_profile = lookup(each.value, "vnic_profile", var.vnic_profile) + #vnic_type = lookup(each.value, "vnic_type", var.vnic_type) + #vnic_profile = lookup(each.value, "vnic_profile", var.vnic_profile) volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances) root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size) extra_volumes = lookup(each.value, "extra_volumes", {}) diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf index b00889fab..ff5a3ec00 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf @@ -4,19 +4,21 @@ locals { resource "openstack_networking_port_v2" "control" { - name = "${var.cluster_name}-control" - network_id = data.openstack_networking_network_v2.cluster_net.id + for_each = {for net in var.cluster_networks: net.network => net} + + name = "${var.cluster_name}-control-${each.key}" + network_id = data.openstack_networking_network_v2.cluster_net[each.key].id admin_state_up = "true" fixed_ip { - subnet_id = data.openstack_networking_subnet_v2.cluster_subnet.id + subnet_id = data.openstack_networking_subnet_v2.cluster_subnet[each.key].id } security_group_ids = [for o in data.openstack_networking_secgroup_v2.nonlogin: o.id] binding { - vnic_type = var.vnic_type - profile = var.vnic_profile + vnic_type = lookup(var.vnic_types, each.key, "normal") + profile = lookup(var.vnic_profiles, each.key, "{}") } } @@ -49,9 +51,12 @@ resource "openstack_compute_instance_v2" "control" { } } - network { - port = openstack_networking_port_v2.control.id - access_network = true + dynamic "network" { + for_each = {for net in var.cluster_networks: net.network => net} + content { + port = openstack_networking_port_v2.control[network.key].id + access_network = length(var.cluster_networks) == 1 ? true : lookup(each.value, "access_network", false) + } } metadata = { diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf index 030b36e0b..13e62c4b8 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf @@ -9,13 +9,13 @@ module "login" { cluster_name = var.cluster_name cluster_domain_suffix = var.cluster_domain_suffix - cluster_net_id = data.openstack_networking_network_v2.cluster_net.id - cluster_subnet_id = data.openstack_networking_subnet_v2.cluster_subnet.id + cluster_net_id = data.openstack_networking_network_v2.cluster_net[var.cluster_networks[0].network].id + cluster_subnet_id = data.openstack_networking_subnet_v2.cluster_subnet[var.cluster_networks[0].network].id # can be set for group, defaults to top-level value: image_id = lookup(each.value, "image_id", var.cluster_image_id) - vnic_type = lookup(each.value, "vnic_type", var.vnic_type) - vnic_profile = lookup(each.value, "vnic_profile", var.vnic_profile) + #vnic_type = lookup(each.value, "vnic_type", var.vnic_type) + #vnic_profile = lookup(each.value, "vnic_profile", var.vnic_profile) volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances) root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size) extra_volumes = lookup(each.value, "extra_volumes", {}) diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/network.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/network.tf index d78e3a114..eb33fb42f 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/network.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/network.tf @@ -1,11 +1,16 @@ data "openstack_networking_network_v2" "cluster_net" { - name = var.cluster_net + + for_each = {for net in var.cluster_networks: net.network => net} + + name = each.value.network } data "openstack_networking_subnet_v2" "cluster_subnet" { - name = var.cluster_subnet + for_each = {for net in var.cluster_networks: net.network => net} + + name = each.value.subnet } data "openstack_networking_secgroup_v2" "login" { diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf index 8fda3f76b..5bb3ae529 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf @@ -9,14 +9,16 @@ variable "cluster_domain_suffix" { default = "internal" } -variable "cluster_net" { - type = string - description = "Name of existing cluster network" -} - -variable "cluster_subnet" { - type = string - description = "Name of existing cluster subnet" +variable "cluster_networks" { + type = list(map(string)) + description = <<-EOT + List of mappings defining networks. Mapping key/values: + network: Name of existing network + subnet: Name of existing subnet + access_network: Bool defining whether to use network for Ansible and + K3s. This network must be present on all nodes. + Defaults to true if only one network is specified. + EOT } variable "key_pair" { @@ -124,16 +126,23 @@ variable "home_volume_type" { description = "Type of home volume, if not default type" } -variable "vnic_type" { - type = string - description = "Default VNIC type, see https://registry.terraform.io/providers/terraform-provider-openstack/openstack/latest/docs/resources/networking_port_v2#vnic_type" - default = "normal" +variable "vnic_types" { + type = map(string) + description = <<-EOT + Default VNIC types, keyed by network name. See https://registry.terraform.io/providers/terraform-provider-openstack/openstack/latest/docs/resources/networking_port_v2#vnic_type + If not given this defaults to the "normal" type. + EOT + default = {} } -variable "vnic_profile" { - type = string - description = "Default VNIC binding profile as json string, see https://registry.terraform.io/providers/terraform-provider-openstack/openstack/latest/docs/resources/networking_port_v2#profile." - default = "{}" +variable "vnic_profiles" { + type = map(string) + description = <<-EOT + Default VNIC binding profiles, keyed by network name. Values are json strings. + See https://registry.terraform.io/providers/terraform-provider-openstack/openstack/latest/docs/resources/networking_port_v2#profile. + If not given this defaults to "{}" + EOT + default = {} } variable "login_security_groups" { From ad54226a18c9ecaad456c469c55d915f4cc8dc46 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 22 Jan 2025 16:28:31 +0000 Subject: [PATCH 2/7] support multiple networks for all nodes w/ inventory output --- .../tofu/compute.tf | 16 +++++----- .../tofu/control.tf | 2 +- .../tofu/inventory.tpl | 3 ++ .../tofu/login.tf | 9 +++--- .../tofu/node_group/network.tf | 14 +++++++++ .../tofu/node_group/nodes.tf | 30 ++++++++++++------- .../tofu/node_group/variables.tf | 29 ++++++++---------- 7 files changed, 61 insertions(+), 42 deletions(-) create mode 100644 environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/network.tf diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf index 15b05fd14..dac8a7d5c 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf @@ -7,24 +7,24 @@ module "compute" { nodes = each.value.nodes flavor = each.value.flavor + # always taken from top-level value: cluster_name = var.cluster_name cluster_domain_suffix = var.cluster_domain_suffix - cluster_net_id = data.openstack_networking_network_v2.cluster_net[var.cluster_networks[0].network].id - cluster_subnet_id = data.openstack_networking_subnet_v2.cluster_subnet[var.cluster_networks[0].network].id - + key_pair = var.key_pair + environment_root = var.environment_root + # can be set for group, defaults to top-level value: + networks = lookup(each.value, "networks", var.cluster_networks) image_id = lookup(each.value, "image_id", var.cluster_image_id) - #vnic_type = lookup(each.value, "vnic_type", var.vnic_type) - #vnic_profile = lookup(each.value, "vnic_profile", var.vnic_profile) + vnic_types = lookup(each.value, "vnic_types", var.vnic_types) + vnic_profiles = lookup(each.value, "vnic_profiles", var.vnic_profiles) volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances) root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size) extra_volumes = lookup(each.value, "extra_volumes", {}) - compute_init_enable = lookup(each.value, "compute_init_enable", []) ignore_image_changes = lookup(each.value, "ignore_image_changes", false) - key_pair = var.key_pair - environment_root = var.environment_root + # computed k3s_token = local.k3s_token control_address = [for n in openstack_compute_instance_v2.control["control"].network: n.fixed_ip_v4 if n.access_network][0] security_group_ids = [for o in data.openstack_networking_secgroup_v2.nonlogin: o.id] diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf index ff5a3ec00..c497ed9fd 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf @@ -55,7 +55,7 @@ resource "openstack_compute_instance_v2" "control" { for_each = {for net in var.cluster_networks: net.network => net} content { port = openstack_networking_port_v2.control[network.key].id - access_network = length(var.cluster_networks) == 1 ? true : lookup(each.value, "access_network", false) + access_network = length(var.cluster_networks) == 1 ? true : lookup(network.value, "access_network", false) } } diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl b/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl index 3a6fe2485..436a10da9 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl @@ -9,6 +9,7 @@ control: ${ control.name }: ansible_host: ${[for n in control.network: n.fixed_ip_v4 if n.access_network][0]} instance_id: ${ control.id } + networks: ${jsonencode({for n in control.network: n.name => {"fixed_ip_v4": n.fixed_ip_v4, "fixed_ip_v6": n.fixed_ip_v6}})} %{ endfor ~} vars: appliances_state_dir: ${state_dir} # NB needs to be set on group not host otherwise it is ignored in packer build! @@ -22,6 +23,7 @@ ${cluster_name}_${group_name}: ansible_host: ${node.access_ip_v4} instance_id: ${ node.id } image_id: ${ node.image_id } + networks: ${jsonencode({for n in node.network: n.name => {"fixed_ip_v4": n.fixed_ip_v4, "fixed_ip_v6": n.fixed_ip_v6}})} %{ endfor ~} %{ endfor ~} @@ -39,6 +41,7 @@ ${cluster_name}_${group_name}: ansible_host: ${node.access_ip_v4} instance_id: ${ node.id } image_id: ${ node.image_id } + networks: ${jsonencode({for n in node.network: n.name => {"fixed_ip_v4": n.fixed_ip_v4, "fixed_ip_v6": n.fixed_ip_v6}})} %{ endfor ~} %{ endfor ~} diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf index 13e62c4b8..19b0a76ed 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf @@ -9,13 +9,12 @@ module "login" { cluster_name = var.cluster_name cluster_domain_suffix = var.cluster_domain_suffix - cluster_net_id = data.openstack_networking_network_v2.cluster_net[var.cluster_networks[0].network].id - cluster_subnet_id = data.openstack_networking_subnet_v2.cluster_subnet[var.cluster_networks[0].network].id - + # can be set for group, defaults to top-level value: + networks = lookup(each.value, "networks", var.cluster_networks) image_id = lookup(each.value, "image_id", var.cluster_image_id) - #vnic_type = lookup(each.value, "vnic_type", var.vnic_type) - #vnic_profile = lookup(each.value, "vnic_profile", var.vnic_profile) + vnic_types = lookup(each.value, "vnic_types", var.vnic_types) + vnic_profiles = lookup(each.value, "vnic_profiles", var.vnic_profiles) volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances) root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size) extra_volumes = lookup(each.value, "extra_volumes", {}) diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/network.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/network.tf new file mode 100644 index 000000000..f5763b97b --- /dev/null +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/network.tf @@ -0,0 +1,14 @@ + +data "openstack_networking_network_v2" "network" { + + for_each = {for net in var.networks: net.network => net} + + name = each.value.network +} + +data "openstack_networking_subnet_v2" "subnet" { + + for_each = {for net in var.networks: net.network => net} + + name = each.value.subnet +} diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf index 07b9dfe65..c16900818 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf @@ -33,21 +33,23 @@ resource "openstack_compute_volume_attach_v2" "compute" { resource "openstack_networking_port_v2" "compute" { - for_each = toset(var.nodes) + for_each = {for item in setproduct(var.nodes, var.networks): + "${item[0]}-${item[1].network}" => item[1] + } name = "${var.cluster_name}-${each.key}" - network_id = var.cluster_net_id + network_id = data.openstack_networking_network_v2.network[each.value.network].id admin_state_up = "true" fixed_ip { - subnet_id = var.cluster_subnet_id + subnet_id = data.openstack_networking_subnet_v2.subnet[each.value.network].id } security_group_ids = var.security_group_ids binding { - vnic_type = var.vnic_type - profile = var.vnic_profile + vnic_type = lookup(var.vnic_types, each.value.network, "normal") + profile = lookup(var.vnic_profiles, each.value.network, "{}") } } @@ -72,9 +74,12 @@ resource "openstack_compute_instance_v2" "compute_fixed_image" { } } - network { - port = openstack_networking_port_v2.compute[each.key].id - access_network = true + dynamic "network" { + for_each = {for net in var.networks: net.network => net} + content { + port = openstack_networking_port_v2.compute["${each.key}-${network.key}"].id + access_network = length(var.networks) == 1 ? true : lookup(network.value, "access_network", false) + } } metadata = merge( @@ -120,9 +125,12 @@ resource "openstack_compute_instance_v2" "compute" { } } - network { - port = openstack_networking_port_v2.compute[each.key].id - access_network = true + dynamic "network" { + for_each = {for net in var.networks: net.network => net} + content { + port = openstack_networking_port_v2.compute["${each.key}-${network.key}"].id + access_network = length(var.networks) == 1 ? true : lookup(network.value, "access_network", false) + } } metadata = merge( diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/variables.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/variables.tf index fbb2c73ce..4214753b2 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/variables.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/variables.tf @@ -17,14 +17,6 @@ variable "cluster_domain_suffix" { default = "invalid" } -variable "cluster_net_id" { - type = string -} - -variable "cluster_subnet_id" { - type = string -} - variable "key_pair" { type = string description = "Name of an existing keypair in OpenStack" @@ -40,16 +32,14 @@ variable "environment_root" { description = "Path to environment root, automatically set by activate script" } -variable "vnic_type" { - type = string - description = "VNIC type, see https://registry.terraform.io/providers/terraform-provider-openstack/openstack/latest/docs/resources/networking_port_v2#vnic_type" - default = "normal" +variable "vnic_types" { + type = map(string) + default = {} } -variable "vnic_profile" { - type = string - description = "VNIC binding profile as json string, see https://registry.terraform.io/providers/terraform-provider-openstack/openstack/latest/docs/resources/networking_port_v2#profile." - default = "{}" +variable "vnic_profiles" { + type = map(string) + default = {} } variable "volume_backed_instances" { @@ -99,4 +89,9 @@ variable "ignore_image_changes" { type = bool description = "Whether to ignore changes to the image_id parameter" default = false -} \ No newline at end of file +} + +variable "networks" { + type = list(map(string)) + default = [] +} From 652edb1e075b97994e2900fec193c6e06c679d0e Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 22 Jan 2025 16:43:14 +0000 Subject: [PATCH 3/7] simplify control node definition and access IP --- .../skeleton/{{cookiecutter.environment}}/tofu/compute.tf | 2 +- .../skeleton/{{cookiecutter.environment}}/tofu/control.tf | 7 +++---- .../{{cookiecutter.environment}}/tofu/inventory.tf | 2 +- .../{{cookiecutter.environment}}/tofu/inventory.tpl | 7 ++----- .../skeleton/{{cookiecutter.environment}}/tofu/login.tf | 2 +- .../{{cookiecutter.environment}}/tofu/node_group/nodes.tf | 1 + 6 files changed, 9 insertions(+), 12 deletions(-) diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf index dac8a7d5c..a4fef5747 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf @@ -26,6 +26,6 @@ module "compute" { # computed k3s_token = local.k3s_token - control_address = [for n in openstack_compute_instance_v2.control["control"].network: n.fixed_ip_v4 if n.access_network][0] + control_address = openstack_compute_instance_v2.control.access_ip_v4 security_group_ids = [for o in data.openstack_networking_secgroup_v2.nonlogin: o.id] } diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf index c497ed9fd..a262cba38 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf @@ -24,9 +24,7 @@ resource "openstack_networking_port_v2" "control" { resource "openstack_compute_instance_v2" "control" { - for_each = toset(["control"]) - - name = "${var.cluster_name}-${each.key}" + name = "${var.cluster_name}-control" image_id = var.cluster_image_id flavor_name = var.control_node_flavor key_pair = var.key_pair @@ -62,11 +60,12 @@ resource "openstack_compute_instance_v2" "control" { metadata = { environment_root = var.environment_root k3s_token = local.k3s_token + # TODO: set k3s_subnet from access_network } user_data = <<-EOF #cloud-config - fqdn: ${var.cluster_name}-${each.key}.${var.cluster_name}.${var.cluster_domain_suffix} + fqdn: ${var.cluster_name}-control.${var.cluster_name}.${var.cluster_domain_suffix} bootcmd: %{for volume in local.control_volumes} diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tf index e2dd2d706..0af7eb30b 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tf @@ -3,7 +3,7 @@ resource "local_file" "hosts" { { "cluster_name": var.cluster_name, "cluster_domain_suffix": var.cluster_domain_suffix, - "control_instances": openstack_compute_instance_v2.control + "control": openstack_compute_instance_v2.control "login_groups": module.login "compute_groups": module.compute "state_dir": var.state_dir diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl b/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl index 436a10da9..05ec27ef8 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl @@ -5,16 +5,13 @@ all: control: hosts: -%{ for control in control_instances ~} ${ control.name }: - ansible_host: ${[for n in control.network: n.fixed_ip_v4 if n.access_network][0]} - instance_id: ${ control.id } + ansible_host: ${control.access_ip_v4} + instance_id: ${control.id} networks: ${jsonencode({for n in control.network: n.name => {"fixed_ip_v4": n.fixed_ip_v4, "fixed_ip_v6": n.fixed_ip_v6}})} -%{ endfor ~} vars: appliances_state_dir: ${state_dir} # NB needs to be set on group not host otherwise it is ignored in packer build! - %{ for group_name in keys(login_groups) ~} ${cluster_name}_${group_name}: hosts: diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf index 19b0a76ed..421a94a78 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf @@ -25,6 +25,6 @@ module "login" { key_pair = var.key_pair environment_root = var.environment_root k3s_token = local.k3s_token - control_address = [for n in openstack_compute_instance_v2.control["control"].network: n.fixed_ip_v4 if n.access_network][0] + control_address = openstack_compute_instance_v2.control.access_ip_v4 security_group_ids = [for o in data.openstack_networking_secgroup_v2.login: o.id] } diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf index c16900818..1cc12bd1f 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf @@ -138,6 +138,7 @@ resource "openstack_compute_instance_v2" "compute" { environment_root = var.environment_root k3s_token = var.k3s_token control_address = var.control_address + # TODO: set k3s_subnet from access_network }, {for e in var.compute_init_enable: e => true} ) From 619436d1d779dcc1f785de99baed6acbff0f2c23 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 23 Jan 2025 13:53:34 +0000 Subject: [PATCH 4/7] add network docs --- docs/networks.md | 104 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 docs/networks.md diff --git a/docs/networks.md b/docs/networks.md new file mode 100644 index 000000000..fa05a33df --- /dev/null +++ b/docs/networks.md @@ -0,0 +1,104 @@ +# Networks + +The default OpenTofu configurations in the appliance do not provision networks, +subnets or associated infrastructure such as routers. The requirements are that: +1. At least one network exists. +2. At least one network spans all nodes, referred to as the "access network". +3. Only one subnet per network is attached to nodes. +4. One network on each node provides outbound internet access (either directly, + or via a proxy). + +Futhermore, it is recommended that the deploy host has an interface on the +access network. While it is possible to e.g. use a floating IP on a login node +as an SSH proxy to access the other nodes, this can create problems in recovering +the cluster if the login node is unavailable and can make Ansible problems harder +to debug. + +This page describes supported configurations and how to implement them using +the OpenTofu variables. These will normally be set in +`environments/site/tofu/terraform.tfvars` for the site base environment. If they +need to be overriden for specific environments, this can be done via an OpenTofu +module as discussed [here](./production.md). + +Note that if an OpenStack subnet has a gateway IP defined then nodes with ports +attached to that subnet will get a default route set via that gateway. + +## Single network +This is the simplest possible configuration. A single network and subnet is +used for all nodes. The subnet provides outbound internet access via the default +route defined by the subnet gateway (often an OpenStack router to an external +network). + +```terraform +cluster_networks = [ + { + network = "netA" + subnet = "subnetA" + } +] +... +``` + +## Multiple homogenous networks +This is similar to the above, except each node has multiple networks. Therefore +`access_network` must be explicitly set. Note that only one subnet must have +a gateway defined, else default routes via both subnets will be present causing +routing problems. It also shows the second network as using direct-type vNICs +for RDMA. + +```terraform +cluster_networks = [ + { + network = "netA" + subnet = "subnetA" + access_network = true + }, + { + network = "netB" + subnet = "subnetB" + }, +] + +vnic_types = { + netB = "direct" +} +... +``` + + +## Additional networks on some nodes + +This example shows how to override variables for specific node groups. In this +case a baremetal node group has a second network attached. As above, only a +single subnet can have a gateway IP. + +```terraform +cluster_networks = [ + { + network = "netA" + subnet = "subnetA" + } +] + +compute = { + baremetal = { + nodes = ["baremetal-0", "baremetal-1"] + networks = [ + { + network = "netA" + subnet = "subnetA" + access_network = true + }, + { + network = "netB" + subnet = "subnetB" + } + ] + vnic_types = { + netA = "baremetal" + netB = "baremetal" + ... + } +} +... +``` From 83a0a85491ea6e101256420514914735ab500497 Mon Sep 17 00:00:00 2001 From: Steve Brasier <33413598+sjpb@users.noreply.github.com> Date: Fri, 24 Jan 2025 11:21:03 +0000 Subject: [PATCH 5/7] Apply suggestions from code review Co-authored-by: Matt Anson --- docs/networks.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/networks.md b/docs/networks.md index fa05a33df..42202a7e6 100644 --- a/docs/networks.md +++ b/docs/networks.md @@ -5,7 +5,7 @@ subnets or associated infrastructure such as routers. The requirements are that: 1. At least one network exists. 2. At least one network spans all nodes, referred to as the "access network". 3. Only one subnet per network is attached to nodes. -4. One network on each node provides outbound internet access (either directly, +4. At least one network on each node provides outbound internet access (either directly, or via a proxy). Futhermore, it is recommended that the deploy host has an interface on the @@ -43,7 +43,7 @@ cluster_networks = [ This is similar to the above, except each node has multiple networks. Therefore `access_network` must be explicitly set. Note that only one subnet must have a gateway defined, else default routes via both subnets will be present causing -routing problems. It also shows the second network as using direct-type vNICs +routing problems. It also shows the second network (netB) using direct-type vNICs for RDMA. ```terraform @@ -81,6 +81,9 @@ cluster_networks = [ ] compute = { + general = { + nodes = ["general-0", "general-1"] + } baremetal = { nodes = ["baremetal-0", "baremetal-1"] networks = [ From 040c2bc8dd01eaf71b39f9e044f7764ef4dbadd8 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 24 Jan 2025 12:10:07 +0000 Subject: [PATCH 6/7] use first network as access network and support extra_networks only --- docs/networks.md | 29 ++++++++----------- .../tofu/compute.tf | 4 ++- .../tofu/login.tf | 5 +++- .../tofu/node_group/nodes.tf | 4 +-- .../tofu/variables.tf | 5 ++-- 5 files changed, 23 insertions(+), 24 deletions(-) diff --git a/docs/networks.md b/docs/networks.md index 42202a7e6..09e3bc5a6 100644 --- a/docs/networks.md +++ b/docs/networks.md @@ -1,12 +1,12 @@ -# Networks +# Networking The default OpenTofu configurations in the appliance do not provision networks, subnets or associated infrastructure such as routers. The requirements are that: 1. At least one network exists. -2. At least one network spans all nodes, referred to as the "access network". +2. The first network defined spans all nodes, referred to as the "access network". 3. Only one subnet per network is attached to nodes. -4. At least one network on each node provides outbound internet access (either directly, - or via a proxy). +4. At least one network on each node provides outbound internet access (either +directly, or via a proxy). Futhermore, it is recommended that the deploy host has an interface on the access network. While it is possible to e.g. use a floating IP on a login node @@ -40,18 +40,17 @@ cluster_networks = [ ``` ## Multiple homogenous networks -This is similar to the above, except each node has multiple networks. Therefore -`access_network` must be explicitly set. Note that only one subnet must have -a gateway defined, else default routes via both subnets will be present causing -routing problems. It also shows the second network (netB) using direct-type vNICs -for RDMA. +This is similar to the above, except each node has multiple networks. The first +network, "netA" is the access network. Note that only one subnet must have a +gateway defined, else default routes via both subnets will be present causing +routing problems. It also shows the second network (netB) using direct-type +vNICs for RDMA. ```terraform cluster_networks = [ { network = "netA" subnet = "subnetA" - access_network = true }, { network = "netB" @@ -68,7 +67,7 @@ vnic_types = { ## Additional networks on some nodes -This example shows how to override variables for specific node groups. In this +This example shows how to modify variables for specific node groups. In this case a baremetal node group has a second network attached. As above, only a single subnet can have a gateway IP. @@ -86,12 +85,7 @@ compute = { } baremetal = { nodes = ["baremetal-0", "baremetal-1"] - networks = [ - { - network = "netA" - subnet = "subnetA" - access_network = true - }, + extra_networks = [ { network = "netB" subnet = "subnetB" @@ -101,6 +95,7 @@ compute = { netA = "baremetal" netB = "baremetal" ... + } } } ... diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf index a4fef5747..ca48d2aac 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf @@ -14,12 +14,14 @@ module "compute" { environment_root = var.environment_root # can be set for group, defaults to top-level value: - networks = lookup(each.value, "networks", var.cluster_networks) image_id = lookup(each.value, "image_id", var.cluster_image_id) vnic_types = lookup(each.value, "vnic_types", var.vnic_types) vnic_profiles = lookup(each.value, "vnic_profiles", var.vnic_profiles) volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances) root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size) + + # optionally set for group + networks = concat(var.cluster_networks, lookup(each.value, "extra_networks", [])) extra_volumes = lookup(each.value, "extra_volumes", {}) compute_init_enable = lookup(each.value, "compute_init_enable", []) ignore_image_changes = lookup(each.value, "ignore_image_changes", false) diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf index 421a94a78..8f51b6748 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf @@ -11,14 +11,17 @@ module "login" { cluster_domain_suffix = var.cluster_domain_suffix # can be set for group, defaults to top-level value: - networks = lookup(each.value, "networks", var.cluster_networks) image_id = lookup(each.value, "image_id", var.cluster_image_id) vnic_types = lookup(each.value, "vnic_types", var.vnic_types) vnic_profiles = lookup(each.value, "vnic_profiles", var.vnic_profiles) volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances) root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size) + + # optionally set for group + networks = concat(var.cluster_networks, lookup(each.value, "extra_networks", [])) extra_volumes = lookup(each.value, "extra_volumes", {}) + # can't be set for login compute_init_enable = [] ignore_image_changes = false diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf index 1cc12bd1f..09d39354c 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf @@ -78,7 +78,7 @@ resource "openstack_compute_instance_v2" "compute_fixed_image" { for_each = {for net in var.networks: net.network => net} content { port = openstack_networking_port_v2.compute["${each.key}-${network.key}"].id - access_network = length(var.networks) == 1 ? true : lookup(network.value, "access_network", false) + access_network = network.key == var.networks[0].network } } @@ -129,7 +129,7 @@ resource "openstack_compute_instance_v2" "compute" { for_each = {for net in var.networks: net.network => net} content { port = openstack_networking_port_v2.compute["${each.key}-${network.key}"].id - access_network = length(var.networks) == 1 ? true : lookup(network.value, "access_network", false) + access_network = network.key == var.networks[0].network } } diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf index 5bb3ae529..0fbf95541 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf @@ -15,9 +15,6 @@ variable "cluster_networks" { List of mappings defining networks. Mapping key/values: network: Name of existing network subnet: Name of existing subnet - access_network: Bool defining whether to use network for Ansible and - K3s. This network must be present on all nodes. - Defaults to true if only one network is specified. EOT } @@ -46,6 +43,7 @@ variable "login" { flavor: String flavor name Optional: image_id: Overrides variable cluster_image_id + extra_networks: List of mappings in same format as cluster_networks vnic_type: Overrides variable vnic_type vnic_profile: Overrides variable vnic_profile volume_backed_instances: Overrides variable volume_backed_instances @@ -77,6 +75,7 @@ variable "compute" { flavor: String flavor name Optional: image_id: Overrides variable cluster_image_id + extra_networks: List of mappings in same format as cluster_networks vnic_type: Overrides variable vnic_type vnic_profile: Overrides variable vnic_profile compute_init_enable: Toggles compute-init rebuild (see compute-init role docs) From cd423b50a9d2fd986ab7c91ac06f24202c2cebb4 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 24 Jan 2025 14:09:07 +0000 Subject: [PATCH 7/7] fixup control node for access network changes --- .../skeleton/{{cookiecutter.environment}}/tofu/control.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf index a262cba38..7d2f86bcb 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf @@ -53,7 +53,7 @@ resource "openstack_compute_instance_v2" "control" { for_each = {for net in var.cluster_networks: net.network => net} content { port = openstack_networking_port_v2.control[network.key].id - access_network = length(var.cluster_networks) == 1 ? true : lookup(network.value, "access_network", false) + access_network = network.key == var.cluster_networks[0].network } }