Skip to content

Commit 40665c0

Browse files
bertiethorpesjpb
andauthored
Optionally ignore image changes in TF (#545)
* add lifecycle ignore changes to skeleton cookiecutter config * add docs to experimental/compute-init * refine docs * account for transitory down state in ci/check_slurm.yml * Update environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf Co-authored-by: Steve Brasier <[email protected]> --------- Co-authored-by: Steve Brasier <[email protected]>
1 parent 14cfa95 commit 40665c0

File tree

8 files changed

+77
-4
lines changed

8 files changed

+77
-4
lines changed

ansible/ci/check_slurm.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
shell: 'sinfo --noheader --format="%N %P %a %l %D %t" | sort' # using --format ensures we control whitespace: Partition,partition_state,max_jobtime,num_nodes,node_state,node_name
77
register: sinfo
88
changed_when: false
9-
until: not ("boot" in sinfo.stdout or "idle*" in sinfo.stdout)
9+
until: not ("boot" in sinfo.stdout or "idle*" in sinfo.stdout or "down" in sinfo.stdout)
1010
retries: 10
1111
delay: 5
1212
- name: Check nodes have expected slurm state

docs/experimental/compute-init.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22

33
See the role README.md
44

5+
# Changes to image / tofu state
6+
7+
When a compute group has the `ignore_image_changes` parameter set to true,
8+
changes to the `image_id` parameter (which defaults to `cluster_image_id`) are
9+
ignored by OpenTofu.
10+
11+
Regardless of whether `ignore_image_changes` is set, OpenTofu templates out the
12+
`image_id` into the Ansible inventory for each compute node. The `compute_init`
13+
role templates out hostvars to the control node, which means the "target" image
14+
ID is then available on the control node. Subsequent work will use this to
15+
rebuild the node via slurm.
16+
517
# CI workflow
618

719
The compute node rebuild is tested in CI after the tests for rebuilding the

environments/.stackhpc/tofu/main.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ module "cluster" {
8181
nodes: ["compute-0", "compute-1"]
8282
flavor: var.other_node_flavor
8383
compute_init_enable: ["compute", "etc_hosts", "nfs", "basic_users", "eessi"]
84+
# ignore_image_changes: true
8485
}
8586
# Example of how to add another partition:
8687
# extra: {

environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ module "compute" {
2121
extra_volumes = lookup(each.value, "extra_volumes", {})
2222

2323
compute_init_enable = lookup(each.value, "compute_init_enable", [])
24+
ignore_image_changes = lookup(each.value, "ignore_image_changes", false)
2425

2526
key_pair = var.key_pair
2627
environment_root = var.environment_root

environments/skeleton/{{cookiecutter.environment}}/tofu/compute/nodes.tf

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ locals {
99
# this is a mapping with
1010
# keys "compute-0-vol-a", "compute-0-vol-b" ...
1111
# values which are a mapping e.g. {"node"="compute-0", "volume"="vol-a"}
12+
13+
# Workaround for lifecycle meta-argument only taking static values
14+
compute_instances = var.ignore_image_changes ? openstack_compute_instance_v2.compute_fixed_image : openstack_compute_instance_v2.compute
1215
}
1316

1417
resource "openstack_blockstorage_volume_v3" "compute" {
@@ -24,7 +27,7 @@ resource "openstack_compute_volume_attach_v2" "compute" {
2427

2528
for_each = local.all_compute_volumes
2629

27-
instance_id = openstack_compute_instance_v2.compute["${each.value.node}"].id
30+
instance_id = local.compute_instances["${each.value.node}"].id
2831
volume_id = openstack_blockstorage_volume_v3.compute["${each.key}"].id
2932
}
3033

@@ -48,9 +51,57 @@ resource "openstack_networking_port_v2" "compute" {
4851
}
4952
}
5053

54+
resource "openstack_compute_instance_v2" "compute_fixed_image" {
55+
56+
for_each = var.ignore_image_changes ? toset(var.nodes) : []
57+
58+
name = "${var.cluster_name}-${each.key}"
59+
image_id = var.image_id
60+
flavor_name = var.flavor
61+
key_pair = var.key_pair
62+
63+
dynamic "block_device" {
64+
for_each = var.volume_backed_instances ? [1]: []
65+
content {
66+
uuid = var.image_id
67+
source_type = "image"
68+
destination_type = "volume"
69+
volume_size = var.root_volume_size
70+
boot_index = 0
71+
delete_on_termination = true
72+
}
73+
}
74+
75+
network {
76+
port = openstack_networking_port_v2.compute[each.key].id
77+
access_network = true
78+
}
79+
80+
metadata = merge(
81+
{
82+
environment_root = var.environment_root
83+
k3s_token = var.k3s_token
84+
control_address = var.control_address
85+
},
86+
{for e in var.compute_init_enable: e => true}
87+
)
88+
89+
user_data = <<-EOF
90+
#cloud-config
91+
fqdn: ${var.cluster_name}-${each.key}.${var.cluster_name}.${var.cluster_domain_suffix}
92+
EOF
93+
94+
lifecycle {
95+
ignore_changes = [
96+
image_id,
97+
]
98+
}
99+
100+
}
101+
51102
resource "openstack_compute_instance_v2" "compute" {
52103

53-
for_each = toset(var.nodes)
104+
for_each = var.ignore_image_changes ? [] : toset(var.nodes)
54105

55106
name = "${var.cluster_name}-${each.key}"
56107
image_id = var.image_id
@@ -91,5 +142,5 @@ resource "openstack_compute_instance_v2" "compute" {
91142
}
92143

93144
output "compute_instances" {
94-
value = openstack_compute_instance_v2.compute
145+
value = local.compute_instances
95146
}

environments/skeleton/{{cookiecutter.environment}}/tofu/compute/variables.tf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,4 +93,10 @@ variable "compute_init_enable" {
9393
type = list(string)
9494
description = "Groups to activate for ansible-init compute rebuilds"
9595
default = []
96+
}
97+
98+
variable "ignore_image_changes" {
99+
type = bool
100+
description = "Whether to ignore changes to the image_id parameter"
101+
default = false
96102
}

environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ ${cluster_name}_${group_name}:
2828
${ node.name }:
2929
ansible_host: ${node.access_ip_v4}
3030
instance_id: ${ node.id }
31+
image_id: ${ node.image_id }
3132
%{ endfor ~}
3233
%{ endfor ~}
3334

environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ variable "compute" {
5353
vnic_type: Overrides variable vnic_type
5454
vnic_profile: Overrides variable vnic_profile
5555
compute_init_enable: Toggles compute-init rebuild (see compute-init role docs)
56+
ignore_image_changes: Ignore changes to the image_id parameter (see docs/experimental/compute-init.md)
5657
volume_backed_instances: Overrides variable volume_backed_instances
5758
root_volume_size: Overrides variable root_volume_size
5859
extra_volumes: Mapping defining additional volumes to create and attach

0 commit comments

Comments
 (0)