diff --git a/ansible/.gitignore b/ansible/.gitignore index 20ff5d7b5..adece9a3f 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -84,3 +84,5 @@ roles/* !roles/pytools/** !roles/rebuild/ !roles/rebuild/** +!roles/gateway/ +!roles/gateway/** diff --git a/ansible/fatimage.yml b/ansible/fatimage.yml index e5de38edf..5e515614a 100644 --- a/ansible/fatimage.yml +++ b/ansible/fatimage.yml @@ -79,7 +79,7 @@ - import_playbook: extras.yml # TODO: is this the right place? -- name: Install compute_init script +- name: Install compute_init playbook hosts: compute_init tags: compute_init # tagged to allow running on cluster instances for dev become: yes @@ -88,6 +88,15 @@ name: compute_init tasks_from: install.yml +- name: Install gateway playbook + hosts: gateway + tags: gateway + become: yes + gather_facts: no + tasks: + - include_role: + name: gateway + - hosts: builder become: yes gather_facts: yes diff --git a/ansible/roles/gateway/README.md b/ansible/roles/gateway/README.md new file mode 100644 index 000000000..3b8064175 --- /dev/null +++ b/ansible/roles/gateway/README.md @@ -0,0 +1,30 @@ +# gateway + +Ensure a single default route via a specified address exists on boot. + +**NB:** This role uses `linux-ansible-init` and is not run by the +`ansible/site.yml` playbook. + +## Role variables + +**NB:** This role has no Ansible variables. Setting the OpenTofu variable +`gateway_ip` to an IPv4 address will modify default routes as necessary to give +the instance a single default route via that address. The default route will +use the interface which has a CIDR including the gateway address. + +Note that: +- If the correct default route already exists, no changes are made. +- If a default route exists on a different interface, that route will be deleted. +- If a default route exists on the same interface but using a different address, + an assert will be raised to fail the `ansible-init` service - see logs using + `journalctl -xue ansible-init`. + +See [docs/networks.md](../../../docs/networks.md) for further discussion. + +## Requirements + +The image must include both this role and the `linux-ansible-init` role. This +is the case for StackHPC-built images. For custom images use one of the following +configurations during Packer build: +- Add `builder` into the `gateway` group in `environments/$ENV/inventory/groups` +- Add `gateway` to the `inventory_groups` Packer variable diff --git a/ansible/roles/gateway/files/gateway-init.yml b/ansible/roles/gateway/files/gateway-init.yml new file mode 100644 index 000000000..72edcb3a0 --- /dev/null +++ b/ansible/roles/gateway/files/gateway-init.yml @@ -0,0 +1,76 @@ +- hosts: localhost + #become: true + gather_facts: false + vars: + os_metadata: "{{ lookup('url', 'http://169.254.169.254/openstack/latest/meta_data.json') | from_json }}" + gateway_ip: "{{ os_metadata.meta.gateway_ip | default('') }}" + access_ip: "{{ os_metadata.meta.access_ip | default('') }}" + tasks: + - name: Read nmcli device info + command: nmcli --get GENERAL.DEVICE,GENERAL.CONNECTION,IP4.ADDRESS,IP4.GATEWAY device show + register: _nmcli_device_raw + changed_when: false + + - name: Set fact for nmcli devices + set_fact: + # creates a dict with keys as per zip arg below, values might be '' + nmcli_devices: >- + {{ + _nmcli_device_raw.stdout_lines | + batch(5, '') | + map('zip', ['device', 'connection', 'ip4_address', 'ip4_gateway']) | + map('map', 'reverse') | map('community.general.dict') + }} + # batch=5 because per device have 4x lines + blank line between devices + # batch takes default '' because last devices doesn't have trailing blank line + + - name: Examine whether device address contains gateway_ip + set_fact: + device_is_gateway_device: "{{ nmcli_devices | map(attribute='ip4_address') | map('ansible.utils.network_in_network', gateway_ip) }}" + # list of bools - false if gateway_ip == '' + + - name: Get name of connection containing gateway_ip + # might be empty string + set_fact: + gateway_ip_connection: >- + {{ nmcli_devices | map(attribute='connection') | + zip(device_is_gateway_device) | selectattr('1') | + map(attribute=0) | list | first | default ('') }} + + - name: Show debug info + debug: + msg: "gateway_ip={{ gateway_ip }} access_ip={{ access_ip }} gateway_ip_connection={{ gateway_ip_connection }}" + + - name: Error if device has a gateway which is not the desired one + assert: + that: item.gateway == gateway_ip + fail_msg: "Device {{ item | to_nice_json }} has gateway: cannot apply gateway {{ gateway_ip }}" + when: + - item.connection == gateway_ip_connection + - item.ip4_gateway != '' + - item.ip4_gateway != gateway_ip + loop: "{{ nmcli_devices }}" + + - name: Remove undesired gateways + shell: | + nmcli connection modify '{{ item.connection }}' \ + ipv4.never-default yes \ + ipv6.never-default yes + nmcli connection up '{{ item.connection }}' + when: + - gateway_ip != '' + - item.ip4_gateway != '' + - item.connection != gateway_ip_connection + loop: "{{ nmcli_devices }}" + + - name: Add desired gateways + shell: | + nmcli connection modify '{{ item.connection }}' \ + ipv4.address {{ item.ip4_address }} \ + ipv4.gateway {{ gateway_ip }} + nmcli connection up '{{ item.connection }}' + when: + - gateway_ip != '' + - item.ip4_gateway != gateway_ip + - item.connection == gateway_ip_connection + loop: "{{ nmcli_devices }}" diff --git a/ansible/roles/gateway/tasks/main.yml b/ansible/roles/gateway/tasks/main.yml new file mode 100644 index 000000000..c13ba5ce9 --- /dev/null +++ b/ansible/roles/gateway/tasks/main.yml @@ -0,0 +1,7 @@ +- name: Add gateway playbook + copy: + src: gateway-init.yml + dest: /etc/ansible-init/playbooks/05-gateway-init.yml + owner: root + group: root + mode: 0644 diff --git a/docs/networks.md b/docs/networks.md index 4556ac623..790c4613b 100644 --- a/docs/networks.md +++ b/docs/networks.md @@ -8,11 +8,13 @@ subnets or associated infrastructure such as routers. The requirements are that: 4. At least one network on each node provides outbound internet access (either directly, or via a proxy). -Futhermore, it is recommended that the deploy host has an interface on the -access network. While it is possible to e.g. use a floating IP on a login node -as an SSH proxy to access the other nodes, this can create problems in recovering -the cluster if the login node is unavailable and can make Ansible problems harder -to debug. +Addresses on the "access network" used as the `ansible_host` IPs. + +It is recommended that the deploy host either has a direct connection to the +"access network" or jumps through a host on it which is not part of the appliance. +Using e.g. a floating IP on a login node as a jumphost creates problems in +recovering the cluster if the login node is unavailable and can make Ansible +problems harder to debug. > [!WARNING] > If home directories are on a shared filesystem with no authentication (such @@ -29,8 +31,8 @@ the OpenTofu variables. These will normally be set in need to be overriden for specific environments, this can be done via an OpenTofu module as discussed [here](./production.md). -Note that if an OpenStack subnet has a gateway IP defined then nodes with ports -attached to that subnet will get a default route set via that gateway. +Note that if an OpenStack subnet has a gateway IP defined then by default nodes +with ports attached to that subnet get a default route set via that gateway. ## Single network This is the simplest possible configuration. A single network and subnet is @@ -77,8 +79,9 @@ vnic_types = { ## Additional networks on some nodes This example shows how to modify variables for specific node groups. In this -case a baremetal node group has a second network attached. As above, only a -single subnet can have a gateway IP. +case a baremetal node group has a second network attached. Here "subnetA" must +have a gateway IP defined and "subnetB" must not, to avoid routing problems on +the multi-homeed compute nodes. ```terraform cluster_networks = [ @@ -109,3 +112,85 @@ compute = { } ... ``` + +## Multiple networks with non-default gateways + +In some multiple network configurations it may be necessary to manage default +routes rather than them being automatically created from a subnet gateway. +This can be done using the tofu variable `gateway_ip` which can be set for the +cluster and/or overriden on the compute and login groups. If this is set: +- a default route via that address will be created on the appropriate interface + during boot if it does not exist +- any other default routes will be removed + +For example the cluster configuration below has a "campus" network with a +default gateway which provides inbound SSH / ondemand access and outbound +internet attached only to the login nodes, and a "data" network attached to +all nodes. The "data" network has no gateway IP set on its subnet to avoid dual +default routes and routing conflicts on the multi-homed login nodes, but does +have outbound connectivity via a router: + +```terraform +cluster_networks = [ + { + network = "data" # access network, CIDR 172.16.0.0/23 + subnet = "data_subnet" + } +] + +login = { + interactive = { + nodes = ["login-0"] + extra_networks = [ + { + network = "campus" + subnet = "campus_subnet" + } + ] + } +} +compute = { + general = { + nodes = ["compute-0", "compute-1"] + } + gateway_ip = "172.16.0.1" # Router interface +} +``` + +If there is no default route at all (either from a subnet gateway or from +`gateway_ip`) then a dummy route is created via the access network interface to +ensure [correct](https://docs.k3s.io/installation/airgap#default-network-route) +`k3s` operation. + +When using a subnet with no default gateway, OpenStack's nameserver for the +subnet may refuse lookups. External nameservers can be defined using the +[resolv_conf](../ansible/roles/resolv_conf/README.md) role. + +## Proxies + +If some nodes have no outbound connectivity via any networks, the cluster can +be configured to deploy a [squid proxy](https://www.squid-cache.org/) on a node +with outbound connectivity. Assuming the `compute` and `control` nodes have no +outbound connectivity and the `login` node does, the minimal configuration for +this is: + +```yaml +# environments/$SITE/inventory/groups: +[squid:children] +login +[proxy:children] +control +compute +``` + +```yaml +# environments/$SITE/inventory/group_vars/all/squid.yml: +# these are just examples +squid_cache_disk: 1024 # MB +squid_cache_mem: '12 GB' +``` + +Note that name resolution must still be possible and may require defining an +nameserver which is directly reachable from the node using the +[resolv_conf](../ansible/roles/resolv_conf/README.md) +role. diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json index 0af12befc..6e87c5d58 100644 --- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json @@ -1,6 +1,6 @@ { "cluster_image": { - "RL8": "openhpc-RL8-250311-1020-d05208bc", - "RL9": "openhpc-RL9-250311-1020-d05208bc" + "RL8": "openhpc-RL8-250312-1522-7e5c051d", + "RL9": "openhpc-RL9-250312-1435-7e5c051d" } } diff --git a/environments/common/inventory/group_vars/all/ansible_init.yml b/environments/common/inventory/group_vars/all/ansible_init.yml index be68dbe8c..af30f37d6 100644 --- a/environments/common/inventory/group_vars/all/ansible_init.yml +++ b/environments/common/inventory/group_vars/all/ansible_init.yml @@ -1 +1,9 @@ -ansible_init_wait: 1200 # seconds \ No newline at end of file +ansible_init_wait: 1200 # seconds + +ansible_init_pip_packages: + # role defaults: + - ansible + - jmespath + - requests + # custom: + - netaddr # required for gateway role diff --git a/environments/common/inventory/groups b/environments/common/inventory/groups index 632e1f25b..2b1d0ce81 100644 --- a/environments/common/inventory/groups +++ b/environments/common/inventory/groups @@ -179,3 +179,6 @@ extra_packages [chrony] # Hosts where crony configuration is applied. See docs/chrony.md for more details. + +[gateway] +# Add builder to this group to install gateway ansible-init playbook into image diff --git a/environments/common/layouts/everything b/environments/common/layouts/everything index ab5e1be5c..e3c3f763d 100644 --- a/environments/common/layouts/everything +++ b/environments/common/layouts/everything @@ -98,16 +98,16 @@ cluster [k3s_server:children] # Hosts to run k3s server (should only be single node i.e control node) -control +#control [k3s_agent:children] # Hosts to run k3s agent -compute -login +#compute +#login [k9s:children] # Hosts to install k9s on -control +#control [lustre] # Hosts to run lustre client @@ -121,3 +121,7 @@ builder [chrony] # Hosts where crony configuration is applied. See docs/chrony.md for more details. + +[gateway:children] +# Add builder to this group to install gateway ansible-init playbook into image +builder diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf index 87ff662a5..7ab27d84f 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf @@ -18,6 +18,7 @@ module "compute" { vnic_types = lookup(each.value, "vnic_types", var.vnic_types) volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances) root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size) + gateway_ip = lookup(each.value, "gateway_ip", var.gateway_ip) # optionally set for group: networks = concat(var.cluster_networks, lookup(each.value, "extra_networks", [])) diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf index dc1c05b3b..b4308f93d 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf @@ -60,6 +60,7 @@ resource "openstack_compute_instance_v2" "control" { metadata = { environment_root = var.environment_root access_ip = openstack_networking_port_v2.control[var.cluster_networks[0].network].all_fixed_ips[0] + gateway_ip = var.gateway_ip } user_data = <<-EOF diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf index bbfad9cb4..c4a2c74b3 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf @@ -18,6 +18,7 @@ module "login" { vnic_types = lookup(each.value, "vnic_types", var.vnic_types) volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances) root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size) + gateway_ip = lookup(each.value, "gateway_ip", var.gateway_ip) # optionally set for group networks = concat(var.cluster_networks, lookup(each.value, "extra_networks", [])) diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf index f5d3424e6..f08ec1ca3 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf @@ -87,6 +87,7 @@ resource "openstack_compute_instance_v2" "compute_fixed_image" { environment_root = var.environment_root control_address = var.control_address access_ip = openstack_networking_port_v2.compute["${each.key}-${var.networks[0].network}"].all_fixed_ips[0] + gateway_ip = var.gateway_ip }, {for e in var.compute_init_enable: e => true} ) @@ -140,6 +141,7 @@ resource "openstack_compute_instance_v2" "compute" { environment_root = var.environment_root control_address = var.control_address access_ip = openstack_networking_port_v2.compute["${each.key}-${var.networks[0].network}"].all_fixed_ips[0] + gateway_ip = var.gateway_ip }, {for e in var.compute_init_enable: e => true} ) diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/variables.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/variables.tf index 224d25b47..896a28a48 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/variables.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/variables.tf @@ -122,3 +122,8 @@ variable "baremetal_nodes" { type = map(string) default = {} } + +variable "gateway_ip" { + type = string + default = "" +} diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf index a6868ca0f..155d8c582 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf @@ -58,9 +58,9 @@ variable "login" { must already be allocated to the project. fip_network: Name of network containing ports to attach FIPs to. Only required if multiple networks are defined. - match_ironic_node: Set true to launch instances on the Ironic node of the same name as each cluster node availability_zone: Name of availability zone - ignored unless match_ironic_node is true (default: "nova") + gateway_ip: Address to add default route via EOF } @@ -96,6 +96,7 @@ variable "compute" { **NB**: The order in /dev is not guaranteed to match the mapping match_ironic_node: Set true to launch instances on the Ironic node of the same name as each cluster node availability_zone: Name of availability zone - ignored unless match_ironic_node is true (default: "nova") + gateway_ip: Address to add default route via EOF } @@ -172,3 +173,9 @@ variable "root_volume_size" { type = number default = 40 } + +variable "gateway_ip" { + description = "Address to add default route via" + type = string + default = "" +} diff --git a/requirements.yml b/requirements.yml index f46fe4cf1..006a068f9 100644 --- a/requirements.yml +++ b/requirements.yml @@ -48,7 +48,7 @@ collections: version: 0.4.0 - name: https://github.com/azimuth-cloud/ansible-collection-image-utils type: git - version: 0.4.0 + version: 0.5.0 # stackhpc.pulp has pulp.squeezer as dependency, any version, but latest # requires newer ansible than can install - name: pulp.squeezer