Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion ansible/bootstrap.yml
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,8 @@
become: yes
tags: k3s
tasks:
- ansible.builtin.include_role:
- name: Install k3s
when: "'builder' in group_names"
ansible.builtin.include_role:
name: k3s
tasks_from: install.yml
22 changes: 22 additions & 0 deletions ansible/extras.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
- hosts: k3s_server
become: yes
tags: k3s
tasks:
- name: Start k3s server
when: "'builder' not in group_names"
ansible.builtin.include_role:
name: k3s
tasks_from: server-runtime.yml

- hosts: k3s_agent
become: yes
tags: k3s
tasks:
- name: Start k3s agents
when: "'builder' not in group_names"
vars: # set outside of role to allow compute init to define own value
k3s_bootstrap_token: "{{ hostvars[groups['k3s_server'] | first]._k3s_token_output.stdout | default('') }}"
ansible.builtin.include_role:
name: k3s
tasks_from: agent-runtime.yml

- hosts: basic_users:!builder
become: yes
tags:
Expand Down
3 changes: 3 additions & 0 deletions ansible/roles/k3s/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@ k3s_version: "v1.31.0+k3s1"
k3s_selinux_release: v1.6.latest.1
k3s_selinux_rpm_version: 1.6-1
k3s_helm_version: v3.11.0
k3s_bootstrap_token: "{{ None }}" # ansible managed
k3s_bootstrap_token_expiry: 10m
k3s_server_name: "{{ None }}" # ansible managed
44 changes: 0 additions & 44 deletions ansible/roles/k3s/files/start_k3s.yml

This file was deleted.

32 changes: 32 additions & 0 deletions ansible/roles/k3s/tasks/agent-runtime.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
---

- name: Template k3s agent env file
when: k3s_bootstrap_token != ""
ansible.builtin.template:
dest: /etc/systemd/system/k3s-agent.service.env
src: k3s-agent.service.env.j2
register: _k3s_agent_token_result

- name: Ensure password directory exists
ansible.builtin.file:
path: "/etc/rancher/node"
state: directory
owner: root
group: root
mode: 0640

- name: Write node password
ansible.builtin.copy:
dest: /etc/rancher/node/password
content: "{{ vault_k3s_node_password }}"
owner: root
group: root
mode: 0640 # normal k3s install is 644 but that doesn't feel right

- name: Start/restart k3s agent
when: _k3s_agent_token_result.changed
ansible.builtin.systemd:
name: k3s-agent
daemon_reload: true
state: restarted
enabled: true
5 changes: 0 additions & 5 deletions ansible/roles/k3s/tasks/install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,3 @@
ansible.builtin.lineinfile:
path: /etc/environment
line: "KUBECONFIG=/etc/rancher/k3s/k3s.yaml"

- name: Install ansible-init playbook for k3s agent or server activation
copy:
src: start_k3s.yml
dest: /etc/ansible-init/playbooks/0-start-k3s.yml
31 changes: 31 additions & 0 deletions ansible/roles/k3s/tasks/server-runtime.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
---

- name: Template k3s env file
ansible.builtin.template:
dest: /etc/systemd/system/k3s.service.env
src: k3s.service.env.j2
register: _k3s_env_file_status

- name: Start k3s server
ansible.builtin.systemd:
name: k3s
daemon_reload: "{{ _k3s_env_file_status.changed }}"
state: started
enabled: true

# Possible race here as there is a delay between agents disconnecting and being registered as down, probably won't be hit in general use though
- name: Check if k3s agents are connected
ignore_errors: true
ansible.builtin.shell:
cmd: kubectl get nodes --no-headers | grep -w Ready
register: _k3s_connected_nodes
retries: 5 # there may be a delay before the server reconnects to itself
delay: 10
until: not _k3s_connected_nodes.failed

- name: Generate new bootstrap token
no_log: true
when: _k3s_connected_nodes.stdout_lines | length != groups['k3s'] | length
shell:
cmd: "k3s token create --ttl {{ k3s_bootstrap_token_expiry }}"
register: _k3s_token_output
3 changes: 3 additions & 0 deletions ansible/roles/k3s/templates/k3s-agent.service.env.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
K3S_NODE_IP={{ ansible_host }}
K3S_TOKEN={{ k3s_bootstrap_token }}
K3S_URL=https://{{ k3s_server_name }}:6443
1 change: 1 addition & 0 deletions ansible/roles/k3s/templates/k3s.service.env.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
K3S_NODE_IP={{ ansible_host }}
2 changes: 1 addition & 1 deletion ansible/roles/passwords/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ slurm_appliance_secrets:
vault_openhpc_mungekey: "{{ secrets_openhpc_mungekey | default(vault_openhpc_mungekey | default(secrets_openhpc_mungekey_default)) }}"
vault_freeipa_ds_password: "{{ vault_freeipa_ds_password | default(lookup('password', '/dev/null')) }}"
vault_freeipa_admin_password: "{{ vault_freeipa_admin_password | default(lookup('password', '/dev/null')) }}"
vault_k3s_token: "{{ vault_k3s_token | default(lookup('ansible.builtin.password', '/dev/null', length=64)) }}"
vault_k3s_node_password: "{{ vault_k3s_node_password | default(lookup('ansible.builtin.password', '/dev/null', length=64)) }}"
vault_pulp_admin_password: "{{ vault_pulp_admin_password | default(lookup('password', '/dev/null', chars=['ascii_letters', 'digits'])) }}"
vault_demo_user_password: "{{ vault_demo_user_password | default(lookup('password', '/dev/null')) }}"

Expand Down

This file was deleted.

4 changes: 2 additions & 2 deletions environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"cluster_image": {
"RL8": "openhpc-RL8-250211-1540-a0b4a57e",
"RL9": "openhpc-RL9-250211-1540-a0b4a57e"
"RL8": "openhpc-RL8-250221-0904-e4ff694e",
"RL9": "openhpc-RL9-250221-0904-e4ff694e"
}
}
3 changes: 0 additions & 3 deletions environments/.stackhpc/tofu/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,6 @@ module "cluster" {
key_pair = "slurm-app-ci"
cluster_image_id = data.openstack_images_image_v2.cluster.id
control_node_flavor = var.control_node_flavor
# have to override default, as unusually the actual module path and secrets
# are not in the same environment for stackhpc
inventory_secrets_path = "${path.module}/../inventory/group_vars/all/secrets.yml"

login = {
login: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ opensearch_address: "127.0.0.1"
prometheus_address: "{{ hostvars[groups['prometheus'].0].api_address }}"
openondemand_address: "{{ hostvars[groups['openondemand'].0].api_address if groups['openondemand'] | count > 0 else '' }}"
grafana_address: "{{ hostvars[groups['grafana'].0].api_address }}"
k3s_server_name: "{{ hostvars[groups['k3s_server'] | first].ansible_host }}"

############################# bootstrap: local user configuration #########################

Expand Down
10 changes: 9 additions & 1 deletion environments/common/inventory/groups
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,16 @@ freeipa_client
[compute_init]
# EXPERIMENTAL: Compute hosts to enable joining cluster on boot on

[k3s]
[k3s:children]
# Hosts to run k3s server/agent
k3s_server
k3s_agent

[k3s_server]
# Hosts to run k3s server (should only be single node i.e control node)

[k3s_agent]
# Hosts to run k3s agent

[k9s]
# Hosts to install k9s on
Expand Down
11 changes: 8 additions & 3 deletions environments/common/layouts/everything
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,14 @@ cluster
[compute_init]
# EXPERIMENTAL: Compute hosts to enable joining cluster on boot on

[k3s:children]
# Hosts to run k3s server/agent
openhpc
[k3s_server:children]
# Hosts to run k3s server (should only be single node i.e control node)
control

[k3s_agent:children]
# Hosts to run k3s agent
compute
login

[k9s:children]
# Hosts to install k9s on
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ module "compute" {
availability_zone = lookup(each.value, "availability_zone", "nova")

# computed
k3s_token = local.k3s_token
# not using openstack_compute_instance_v2.control.access_ip_v4 to avoid
# updates to node metadata on deletion/recreation of the control node:
control_address = openstack_networking_port_v2.control[var.cluster_networks[0].network].all_fixed_ips[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ resource "openstack_compute_instance_v2" "control" {

metadata = {
environment_root = var.environment_root
k3s_token = local.k3s_token
access_ip = openstack_networking_port_v2.control[var.cluster_networks[0].network].all_fixed_ips[0]
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
data "external" "inventory_secrets" {
program = ["${path.module}/read-inventory-secrets.py"]

query = {
path = var.inventory_secrets_path == "" ? "${path.module}/../inventory/group_vars/all/secrets.yml" : var.inventory_secrets_path
}
}

data "external" "baremetal_nodes" {
# returns an empty map if cannot list baremetal nodes
program = ["${path.module}/baremetal-node-list.py"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ module "login" {
ignore_image_changes = false

# computed
k3s_token = local.k3s_token
# not using openstack_compute_instance_v2.control.access_ip_v4 to avoid
# updates to node metadata on deletion/recreation of the control node:
control_address = openstack_networking_port_v2.control[var.cluster_networks[0].network].all_fixed_ips[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ resource "openstack_compute_instance_v2" "compute_fixed_image" {
metadata = merge(
{
environment_root = var.environment_root
k3s_token = var.k3s_token
control_address = var.control_address
access_ip = openstack_networking_port_v2.compute["${each.key}-${var.networks[0].network}"].all_fixed_ips[0]
},
Expand Down Expand Up @@ -139,7 +138,6 @@ resource "openstack_compute_instance_v2" "compute" {
metadata = merge(
{
environment_root = var.environment_root
k3s_token = var.k3s_token
control_address = var.control_address
access_ip = openstack_networking_port_v2.compute["${each.key}-${var.networks[0].network}"].all_fixed_ips[0]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,6 @@ variable "security_group_ids" {
type = list
}

variable "k3s_token" {
type = string
}

variable "control_address" {
description = "Name/address of control node"
type = string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,13 +183,3 @@ variable "root_volume_size" {
type = number
default = 40
}

variable "inventory_secrets_path" {
description = "Path to inventory secrets.yml file. Default is standard cookiecutter location."
type = string
default = ""
}

locals {
k3s_token = data.external.inventory_secrets.result["vault_k3s_token"]
}
Loading