Skip to content

Commit 154463b

Browse files
author
Harish Kumar
committed
moving recovery_test.sh to image
1 parent 3e72c7f commit 154463b

File tree

6 files changed

+195
-117
lines changed

6 files changed

+195
-117
lines changed

mkosi.images/base/mkosi.conf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ Packages=
3737
# CA certificates
3838
ca-certificates
3939

40+
# LUKS support
41+
cryptsetup
42+
4043
# systemd's DNS resolver
4144
systemd-resolved
4245

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#!/bin/bash
2+
set -e
3+
set -x
4+
5+
machine_id="$(cat /etc/machine-id)"
6+
7+
test_recovery_key() {
8+
test_device="$1"
9+
10+
test_partition="$(lsblk -nlo PARTLABEL ${test_device} | tr -d '\n')"
11+
mapper_name="$(lsblk -nlo NAME ${test_device} | awk 'NR==2')"
12+
13+
echo "Testing recovery unlock for ${test_partition} (device: ${test_device}, mapper: ${mapper_name})..."
14+
echo ""
15+
16+
# Get recovery key from Vault
17+
recovery_key="$(mangosctl sudo -- vault kv get -field=key "secrets/mangos/recovery-keys/${machine_id}/${test_partition}")"
18+
19+
# Find TPM keyslot number
20+
tpm_slot="$(cryptsetup luksDump ${test_device} | \
21+
awk '/Tokens:/,/Keyslots:/ {if (/systemd-tpm2/) found=1; if (found && /^ [0-9]+:/) {print $1; exit}}' | \
22+
tr -d ':')"
23+
24+
echo "Removing TPM keyslot ${tpm_slot} (simulating TPM failure)..."
25+
# Provide the recovery key on stdin so systemd-cryptenroll does not prompt interactively.
26+
# Use --unlock-key-file=/dev/stdin to read the key from stdin when wiping the TPM slot.
27+
printf '%s' "$recovery_key" | systemd-cryptenroll --wipe-slot=tpm2 --unlock-key-file=/dev/stdin ${test_device}
28+
29+
30+
# Get mount point for this partition
31+
mount_point="$(findmnt -n -o TARGET /dev/mapper/${mapper_name})"
32+
33+
# Unmount and close
34+
if [ -n "${mount_point}" ]; then
35+
systemctl stop "$(systemd-escape -p --suffix=mount "${mount_point}")"
36+
fi
37+
cryptsetup close "${mapper_name}"
38+
39+
# THE CRITICAL TEST: Unlock with recovery key
40+
echo "Unlocking with recovery key..."
41+
echo -n "$recovery_key" | systemd-cryptsetup attach "${mapper_name}" "${test_device}" -
42+
# Remount
43+
if [ -n "${mount_point}" ]; then
44+
mount /dev/mapper/"${mapper_name}" "${mount_point}"
45+
fi
46+
47+
# Verify device is accessible
48+
if [ ! -b /dev/mapper/"${mapper_name}" ]; then
49+
echo "ERROR: Device not accessible after recovery"
50+
exit 1
51+
fi
52+
53+
echo "Data accessible after recovery: OK"
54+
55+
# Re-enroll TPM (cleanup for future tests)
56+
echo "Re-enrolling TPM keyslot..."
57+
# Re-enroll by supplying the recovery key on stdin (non-interactive)
58+
printf '%s' "${recovery_key}" | systemd-cryptenroll "${test_device}" \
59+
--tpm2-device=auto \
60+
--tpm2-pcrs=7 \
61+
--tpm2-public-key-pcrs=11 \
62+
--unlock-key-file=/dev/stdin
63+
64+
echo "Recovery test: PASSED"
65+
66+
}
67+
68+
# Auto-detect first LUKS partition for testing
69+
devices="$(lsblk -ln -o NAME,TYPE,FSTYPE | awk '$2=="part" && $3=="crypto_LUKS" {print "/dev/"$1}' | tr '\n' ' ')"
70+
71+
echo "> LUKS-encrypted devices found: $devices"
72+
73+
for test_device in ${devices}; do
74+
echo "> Testing device: ${test_device}"
75+
test_recovery_key "${test_device}"
76+
done
77+
78+
echo "> All recovery tests completed successfully."
79+
echo "failing to see whats happening"
80+
exit 1

mkosi.images/base/mkosi.extra/usr/share/mangos/self_test.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,14 @@ else
5858

5959
echo "Recovery key validation: PASSED"
6060
fi
61+
62+
echo 'Testing LUKS recovery functionality'
63+
64+
if /usr/share/mangos/recovery_test.sh; then
65+
echo "LUKS recovery test: PASSED"
66+
else
67+
echo "LUKS recovery test: FAILED"
68+
exit 1
69+
fi
70+
71+
echo "All self-tests completed successfully."

recovery_test.sh

Lines changed: 0 additions & 64 deletions
This file was deleted.

resources/mangosctl/mangosctl.sh

Lines changed: 50 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -326,19 +326,28 @@ enroll_recovery_keys() {
326326
local machine_id="$(cat /etc/machine-id)"
327327
local found_any=0
328328

329+
local marker_dir="/var/lib/mangos/luks-recovery-keys-enrolled"
330+
mkdir -p "${marker_dir}"
331+
329332
# Find all LUKS-encrypted partitions
330-
local devices=($(lsblk -ln -o NAME,TYPE,FSTYPE | awk '$2=="part" && $3=="crypto_LUKS" {print "/dev/"$1}'))
333+
local devices="$(lsblk -ln -o NAME,TYPE,FSTYPE | awk '$2=="part" && $3=="crypto_LUKS" {print "/dev/"$1}' | tr '\n' ' ')"
334+
335+
echo "> LUKS-encrypted devices found: $devices"
331336

332-
for device in "${devices[@]}"; do
333-
local partlabel=$(lsblk -n -o PARTLABEL "$device" 2>/dev/null | tr -d ' \n\r\t')
337+
for device in ${devices}; do
338+
echo "> Processing device: ${device}"
339+
local partlabel="$(lsblk -n -o PARTLABEL "${device}" 2>/dev/null | tr -d ' \n\r\t')"
334340

335341
# Skip if no valid partition label
336-
if [ -z "$partlabel" ]; then
342+
if [ -z "${partlabel}" ]; then
343+
echo "> Device ${device} has no PARTLABEL, skipping"
337344
continue
338345
fi
339346

340-
# Skip if recovery key already exists in Vault
341-
if VAULT_TOKEN="${vault_token}" vault kv get "secrets/mangos/recovery-keys/${machine_id}/${partlabel}" >/dev/null 2>&1; then
347+
# Skip if already enrolled
348+
local marker_file="${marker_dir}/${partlabel}"
349+
if [ -f "${marker_file}" ]; then
350+
echo "> Recovery key for ${partlabel} already enrolled, skipping"
342351
continue
343352
fi
344353

@@ -352,30 +361,54 @@ enroll_recovery_keys() {
352361
# Extract recovery key - format: 6 lowercase alphanumeric groups of 8, separated by dashes
353362
# Example: etklvner-lblhnbgl-kdtnujtk-ikjlgbur-lnlrjrrc-iuikkidg-feientnn-dkjeeuft
354363
LUKS_RECOVERY_KEY_REGEX='[a-z0-9]{8}(-[a-z0-9]{8}){7}'
355-
local recovery_key=$(echo "$output" | grep -oE "${LUKS_RECOVERY_KEY_REGEX}" | head -n 1)
364+
local recovery_key="$(echo "$output" | grep -oE "${LUKS_RECOVERY_KEY_REGEX}" | head -n 1)"
356365

357-
if [ -n "$recovery_key" ] && [[ "$recovery_key" =~ ^${LUKS_RECOVERY_KEY_REGEX}$ ]]; then
358-
VAULT_TOKEN="${vault_token}" vault kv put "secrets/mangos/recovery-keys/${machine_id}/${partlabel}" \
366+
if [ -n "${recovery_key}" ]; then
367+
if VAULT_TOKEN="${vault_token}" vault kv put "secrets/mangos/recovery-keys/${machine_id}/${partlabel}" \
359368
key="${recovery_key}" hostname="${HOSTNAME}" device="${device}" created="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
360-
if [ $? -eq 0 ]; then
369+
then
361370
greenln Success
371+
touch "${marker_file}"
362372
else
363373
red "Failed to store in Vault"
364-
echo
365374
fi
366375
else
367-
red "Failed to enroll or extract recovery key"
368-
echo
376+
red "Failed to extract recovery key. cryptenroll output:"
377+
echo "${output}"
369378
fi
370379
done
371380

372-
if [ $found_any -eq 0 ]; then
373-
echo " > All recovery keys already enrolled"
381+
if [ ${found_any} -eq 0 ]; then
382+
echo "> All recovery keys already enrolled"
374383
else
375-
echo " > Recovery keys enrolled and stored in Vault"
384+
echo "> Recovery keys enrolled and stored in Vault"
376385
fi
377386
}
378387

388+
write_machine_id_metadata() {
389+
step "Getting mount accessor for node-cert"
390+
node_auth_accessor="$(vault read -field=accessor sys/auth/node-cert)"
391+
392+
step "Looking up entity name for this node"
393+
entity_name="$(vault write -field=name identity/lookup/entity alias_name=${HOSTNAME}.mangos alias_mount_accessor=${node_auth_accessor})"
394+
395+
step "Setting machine-id as entity metadata"
396+
machine_id="$(cat /etc/machine-id)"
397+
398+
# Read current metadata, merge with new machine_id, and write back
399+
current_metadata="$(vault read -format=json identity/entity/name/${entity_name} | jq -r '.data.metadata // {}')"
400+
new_metadata="$(echo "${current_metadata}" | jq --arg mid "${machine_id}" '. + {machine_id: $mid}')"
401+
402+
# Convert JSON to key=value arguments for Vault CLI
403+
metadata_args=()
404+
while IFS='=' read -r k v; do
405+
metadata_args+=("metadata=${k}=${v}")
406+
done < <(echo "${new_metadata}" | jq -r 'to_entries|map("\(.key)=\(.value|tostring)")|.[]')
407+
408+
vault write identity/entity/name/"${entity_name}" "${metadata_args[@]}"
409+
greenln Success
410+
}
411+
379412
do_enroll() {
380413
declare -A groups
381414

@@ -481,18 +514,7 @@ do_enroll() {
481514
NODE_VAULT_TOKEN=$(vault login -method=cert -path=node-cert -client-cert=/var/lib/mangos/mangos.crt -client-key=<(systemd-creds decrypt ${confext_dir}/etc/credstore.encrypted/mangos.key) -token-only)
482515
greenln Success
483516

484-
step "Getting mount accessor for node-cert"
485-
node_auth_accessor=$(vault read -field=accessor sys/auth/node-cert)
486-
echo $node_auth_accessor
487-
488-
step "Looking up entity name for this node"
489-
entity_name=$(vault write -field=name identity/lookup/entity alias_name=${HOSTNAME}.mangos alias_mount_accessor=${node_auth_accessor})
490-
echo $entity_name
491-
492-
step "Setting machine-id as entity metadata"
493-
machine_id=$(cat /etc/machine-id)
494-
vault write identity/entity/name/${entity_name} metadata=machine_id="${machine_id}"
495-
greenln Success
517+
do_step "Writing machine ID metadata to Vault" write_machine_id_metadata
496518

497519
for group in ${!groups[@]}
498520
do

run_tests.sh

Lines changed: 51 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -264,33 +264,59 @@ $systemd_run -u "mangos-test-${testid}-socat" -d -p SuccessExitStatus=130 -q --w
264264

265265
step ssh into VM
266266

267-
if $systemd_run -d --wait -q -p StandardOutput=journal -- ssh -i ./mkosi.key \
268-
-o UserKnownHostsFile=/dev/null \
269-
-o StrictHostKeyChecking=no \
270-
-o LogLevel=ERROR \
271-
-o ProxyCommand="mkosi sandbox -- socat - VSOCK-CONNECT:42:%p" \
272-
root@mkosi 'mangosctl --base-url=http://10.0.2.2:8081 updatectl add-overrides ; /usr/share/mangos/self_test.sh'
273-
then
274-
success
275-
$systemd_run -u "mangos-test-${testid}-result" -q -- echo "Mangos test ${testid} succeeded"
276-
else
277-
failure
278-
$systemd_run -u "mangos-test-${testid}-result" -q -- echo "Mangos test ${testid} failed"
279-
exit 1
280-
fi
267+
# Stream the remote self-test live to the workflow console and also save to a logfile
268+
diag_ssh_out="${tmpdir}/self_test_ssh.out"
269+
echo "Streaming remote self-test output to ${diag_ssh_out}"
270+
271+
# Use direct ssh (with forced tty) so output is streamed live. Save output with tee.
272+
# Run ssh+tee in background and tail the logfile in foreground so CI logs show live output
273+
ssh_cmd=(ssh -tt -i ./mkosi.key
274+
-o UserKnownHostsFile=/dev/null
275+
-o StrictHostKeyChecking=no
276+
-o LogLevel=ERROR
277+
-o ProxyCommand="mkosi sandbox -- socat - VSOCK-CONNECT:42:%p"
278+
root@mkosi "bash -lc 'mangosctl --base-url=http://10.0.2.2:8081 updatectl add-overrides ; /usr/share/mangos/self_test.sh'")
279+
280+
# Ensure diag file exists
281+
touch "${diag_ssh_out}"
282+
283+
# Trap to clean child processes on exit
284+
cleanup_ssh_tail() {
285+
if [ -n "${ssh_pid:-}" ]; then
286+
kill "${ssh_pid}" 2>/dev/null || true
287+
fi
288+
if [ -n "${tail_pid:-}" ]; then
289+
kill "${tail_pid}" 2>/dev/null || true
290+
fi
291+
}
292+
trap cleanup_ssh_tail EXIT
281293

282-
step 'Testing LUKS recovery functionality'
283-
if $systemd_run -d --wait -q -p StandardOutput=journal -- ssh -i ./mkosi.key \
284-
-o UserKnownHostsFile=/dev/null \
285-
-o StrictHostKeyChecking=no \
286-
-o LogLevel=ERROR \
287-
-o ProxyCommand="mkosi sandbox -- socat - VSOCK-CONNECT:42:%p" \
288-
root@mkosi bash -s < ./recovery_test.sh
289-
then
294+
# Start ssh pipeline in background, using stdbuf to avoid buffering
295+
stdbuf -oL "${ssh_cmd[@]}" 2>&1 | stdbuf -oL tee "${diag_ssh_out}" &
296+
ssh_pid=$!
297+
298+
# Give ssh/tee a moment to start writing, then tail the logfile to stream live output
299+
sleep 1
300+
tail -n +1 -f "${diag_ssh_out}" &
301+
tail_pid=$!
302+
303+
# Wait for ssh to finish
304+
wait ${ssh_pid}
305+
ssh_rc=$?
306+
307+
# Stop tailing
308+
kill ${tail_pid} 2>/dev/null || true
309+
wait ${tail_pid} 2>/dev/null || true
310+
311+
trap - EXIT
312+
313+
if [ ${ssh_rc} -eq 0 ]; then
290314
success
291-
$systemd_run -u "mangos-test-${testid}-result" -q -- echo "Mangos test ${testid} succeeded"
315+
echo "Mangos test ${testid} succeeded" | $systemd_run -q -u "mangos-test-${testid}-result" -- cat
292316
else
293317
failure
294-
$systemd_run -u "mangos-test-${testid}-result" -q -- echo "Recovery test failed"
295-
exit 1
318+
echo "Mangos test ${testid} failed" | $systemd_run -q -u "mangos-test-${testid}-result" -- cat
319+
echo "--- Tail of remote self-test output (last 200 lines) ---"
320+
tail -n 200 "${diag_ssh_out}" || true
321+
exit ${ssh_rc}
296322
fi

0 commit comments

Comments
 (0)