Skip to content

Commit d3908ab

Browse files
committed
Add node recovery playbook and improve deployment automation
New playbook: - recover-node.yml: Automates recovery of reimaged/failed nodes - Cleans stale K3s node secrets and registrations - Handles Longhorn disk UUID mismatches - Re-registers nodes with cluster Role improvements: - base: Add defaults, enhance iSCSI config, fix hostname persistence - k3s-prereq: Auto-detect NVMe boot vs eMMC, handle post-migration layout - k3s-agent: Add service checks, wait for Ready state Playbook improvements: - bootstrap.yml: Add pre-flight checks and post-bootstrap verification
1 parent e0434ce commit d3908ab

File tree

7 files changed

+575
-10
lines changed

7 files changed

+575
-10
lines changed

CHANGELOG.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,39 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [1.1.8] - 2025-12-26
9+
10+
### Added
11+
12+
- Node recovery playbook (`playbooks/recover-node.yml`)
13+
- Cleans up stale K3s node password secrets
14+
- Removes stale Longhorn disk entries
15+
- Re-registers nodes with cluster
16+
- Configures Longhorn storage automatically
17+
- Usage: `ansible-playbook playbooks/recover-node.yml --limit node3`
18+
19+
### Changed
20+
21+
- Improved `base` role
22+
- Added role defaults for packages and kernel modules
23+
- Enhanced iSCSI configuration with socket and initiator setup
24+
- Fixed hostname persistence in /etc/hostname
25+
26+
- Improved `k3s-prereq` role
27+
- Auto-detects NVMe boot vs eMMC boot scenarios
28+
- Handles post-migration partition layout (nvme0n1p1=root, nvme0n1p2=longhorn)
29+
- Better idempotency for storage configuration
30+
31+
- Improved `k3s-agent` role
32+
- Added service existence check before restart
33+
- Waits for node to reach Ready state
34+
- Creates node password directory
35+
36+
- Improved `bootstrap.yml` playbook
37+
- Added pre-flight checks (memory, architecture, NVMe)
38+
- Post-bootstrap verification of iSCSI and storage
39+
- Better logging of configuration state
40+
841
## [1.1.7] - 2025-12-26
942

1043
### Changed
@@ -136,6 +169,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
136169
- Comprehensive installation guide (INSTALL.md)
137170
- Implementation documentation (docs/IMPLEMENTATION.md)
138171

172+
[1.1.8]: https://github.com/jfreed-dev/turing-ansible-cluster/compare/v1.1.7...v1.1.8
139173
[1.1.7]: https://github.com/jfreed-dev/turing-ansible-cluster/compare/v1.1.6...v1.1.7
140174
[1.1.6]: https://github.com/jfreed-dev/turing-ansible-cluster/compare/v1.1.5...v1.1.6
141175
[1.1.5]: https://github.com/jfreed-dev/turing-ansible-cluster/compare/v1.1.4...v1.1.5

ansible/playbooks/bootstrap.yml

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
---
22
# Bootstrap nodes with base packages and prerequisites
33
# Usage: ansible-playbook -i inventories/server/hosts.yml playbooks/bootstrap.yml
4+
#
5+
# This playbook prepares nodes for K3s installation by:
6+
# - Installing required packages (open-iscsi, nfs-common, etc.)
7+
# - Configuring kernel modules and sysctl settings
8+
# - Setting up NVMe storage for Longhorn (if applicable)
9+
# - Configuring hostname and /etc/hosts
410

511
- name: Bootstrap cluster nodes
612
hosts: k3s_cluster
@@ -13,7 +19,7 @@
1319
pre_tasks:
1420
- name: Display target information
1521
ansible.builtin.debug:
16-
msg: "Bootstrapping {{ inventory_hostname }} ({{ ansible_host }})"
22+
msg: "Bootstrapping {{ inventory_hostname }} ({{ ansible_host }}) - NVMe: {{ has_nvme | default(false) }}"
1723

1824
- name: Wait for nodes to be reachable
1925
ansible.builtin.wait_for_connection:
@@ -22,16 +28,65 @@
2228
- name: Gather facts
2329
ansible.builtin.setup:
2430

31+
# Pre-flight checks
32+
- name: Check minimum memory (2GB required)
33+
ansible.builtin.assert:
34+
that: ansible_memtotal_mb >= 2048
35+
fail_msg: "Node {{ inventory_hostname }} has only {{ ansible_memtotal_mb }}MB RAM. Minimum 2048MB required."
36+
success_msg: "Memory check passed: {{ ansible_memtotal_mb }}MB"
37+
38+
- name: Check architecture
39+
ansible.builtin.assert:
40+
that: ansible_architecture == 'aarch64'
41+
fail_msg: "Expected aarch64 architecture, got {{ ansible_architecture }}"
42+
success_msg: "Architecture check passed: {{ ansible_architecture }}"
43+
44+
- name: Check NVMe device exists (if configured)
45+
ansible.builtin.stat:
46+
path: "{{ nvme_device | default('/dev/nvme0n1') }}"
47+
register: bootstrap_nvme_check
48+
when: has_nvme | default(false)
49+
50+
- name: Warn if NVMe configured but not present
51+
ansible.builtin.debug:
52+
msg: "WARNING: has_nvme=true but {{ nvme_device | default('/dev/nvme0n1') }} not found!"
53+
when:
54+
- has_nvme | default(false)
55+
- not bootstrap_nvme_check.stat.exists | default(false)
56+
2557
roles:
2658
- base
2759
- k3s-prereq
2860

2961
post_tasks:
62+
- name: Verify iSCSI is running
63+
ansible.builtin.command: systemctl is-active iscsid
64+
register: bootstrap_iscsid_status
65+
changed_when: false
66+
failed_when: false
67+
68+
- name: Display iSCSI status
69+
ansible.builtin.debug:
70+
msg: "iSCSI status: {{ bootstrap_iscsid_status.stdout }}"
71+
72+
- name: Verify Longhorn directory exists
73+
ansible.builtin.stat:
74+
path: "{{ longhorn_path }}"
75+
register: bootstrap_longhorn_dir
76+
77+
- name: Display storage configuration
78+
ansible.builtin.debug:
79+
msg: |
80+
Storage configuration for {{ inventory_hostname }}:
81+
- Longhorn path: {{ longhorn_path }} (exists: {{ bootstrap_longhorn_dir.stat.exists }})
82+
- NVMe configured: {{ has_nvme | default(false) }}
83+
- Boot device: {{ ansible_mounts | selectattr('mount', 'equalto', '/') | map(attribute='device') | first }}
84+
3085
- name: Reboot if required
3186
ansible.builtin.reboot:
3287
reboot_timeout: 300
3388
when: reboot_required | default(false)
3489

3590
- name: Bootstrap complete
3691
ansible.builtin.debug:
37-
msg: "{{ inventory_hostname }} bootstrap complete"
92+
msg: "{{ inventory_hostname }} bootstrap complete - ready for K3s installation"

0 commit comments

Comments
 (0)