Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions docs/experimental/compute-init.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,21 @@

See the role README.md

# Changes to image / tofu state

When a compute group has ignore_image_changes set as true in a compute group
partition within tofu/main.tf, and the image is updated:

Subsequent changes to the tf cluster_image variable for that compute group don’t
actually result in a change via tofu plan/apply. This is done with the
lifecycle meta-argument "ignore_changes" in the compute resource.

As part of compute-init, the image_id is templated out to hostvars so that
ansible will have image_id for each compute node.

WIP: Attempts to change the cluster image from tofu then act as a target
for compute-init to read and rebuild to via slurm control.

# CI workflow

The compute node rebuild is tested in CI after the tests for rebuilding the
Expand Down
1 change: 1 addition & 0 deletions environments/.stackhpc/tofu/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ module "cluster" {
nodes: ["compute-0", "compute-1"]
flavor: var.other_node_flavor
compute_init_enable: ["compute", "etc_hosts", "nfs", "basic_users", "eessi"]
# ignore_image_changes: true
}
# Example of how to add another partition:
# extra: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ module "compute" {
extra_volumes = lookup(each.value, "extra_volumes", {})

compute_init_enable = lookup(each.value, "compute_init_enable", [])
ignore_image_changes = lookup(each.value, "ignore_image_changes", false)

key_pair = var.key_pair
environment_root = var.environment_root
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ locals {
# this is a mapping with
# keys "compute-0-vol-a", "compute-0-vol-b" ...
# values which are a mapping e.g. {"node"="compute-0", "volume"="vol-a"}

# Workaround for lifecycle meta-argument only taking static values
compute_instances = var.ignore_image_changes ? openstack_compute_instance_v2.compute_fixed_image : openstack_compute_instance_v2.compute
}

resource "openstack_blockstorage_volume_v3" "compute" {
Expand All @@ -24,7 +27,7 @@ resource "openstack_compute_volume_attach_v2" "compute" {

for_each = local.all_compute_volumes

instance_id = openstack_compute_instance_v2.compute["${each.value.node}"].id
instance_id = local.compute_instances["${each.value.node}"].id
volume_id = openstack_blockstorage_volume_v3.compute["${each.key}"].id
}

Expand All @@ -48,9 +51,57 @@ resource "openstack_networking_port_v2" "compute" {
}
}

resource "openstack_compute_instance_v2" "compute_fixed_image" {

for_each = var.ignore_image_changes ? toset(var.nodes) : []

name = "${var.cluster_name}-${each.key}"
image_id = var.image_id
flavor_name = var.flavor
key_pair = var.key_pair

dynamic "block_device" {
for_each = var.volume_backed_instances ? [1]: []
content {
uuid = var.image_id
source_type = "image"
destination_type = "volume"
volume_size = var.root_volume_size
boot_index = 0
delete_on_termination = true
}
}

network {
port = openstack_networking_port_v2.compute[each.key].id
access_network = true
}

metadata = merge(
{
environment_root = var.environment_root
k3s_token = var.k3s_token
control_address = var.control_address
},
{for e in var.compute_init_enable: e => true}
)

user_data = <<-EOF
#cloud-config
fqdn: ${var.cluster_name}-${each.key}.${var.cluster_name}.${var.cluster_domain_suffix}
EOF

lifecycle {
ignore_changes = [
image_id,
]
}

}

resource "openstack_compute_instance_v2" "compute" {

for_each = toset(var.nodes)
for_each = var.ignore_image_changes ? [] : toset(var.nodes)

name = "${var.cluster_name}-${each.key}"
image_id = var.image_id
Expand Down Expand Up @@ -91,5 +142,5 @@ resource "openstack_compute_instance_v2" "compute" {
}

output "compute_instances" {
value = openstack_compute_instance_v2.compute
value = local.compute_instances
}
Original file line number Diff line number Diff line change
Expand Up @@ -93,4 +93,10 @@ variable "compute_init_enable" {
type = list(string)
description = "Groups to activate for ansible-init compute rebuilds"
default = []
}

variable "ignore_image_changes" {
type = bool
description = "Whether to prevent tofu from applying changes to the cluster image"
default = false
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ ${cluster_name}_${group_name}:
${ node.name }:
ansible_host: ${node.access_ip_v4}
instance_id: ${ node.id }
image_id: ${ node.image_id }
%{ endfor ~}
%{ endfor ~}

Expand Down
Loading