Skip to content

Commit adfde15

Browse files
author
Harish Kumar
committed
fix nomad issue
1 parent bb5e26c commit adfde15

File tree

6 files changed

+79
-25
lines changed

6 files changed

+79
-25
lines changed

.github/workflows/pr.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ jobs:
7575
- name: Export image version for later steps
7676
run: echo IMAGE_VERSION="$(./mkosi.version)" >> $GITHUB_ENV
7777
- name: Test it
78+
env:
79+
VERBOSE: "1"
7880
run: |
7981
#!/bin/bash
8082
set -x

hashiext-download.sh

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/bin/bash
22

3+
set -x
34
VAULT_VERSION=${VAULT_VERSION:-latest}
45
CONSUL_VERSION=${CONSUL_VERSION:-latest}
56
NOMAD_VERSION=${NOMAD_VERSION:-latest}
@@ -18,21 +19,22 @@ download() {
1819
version=$(get_latest_version "$name")
1920
fi
2021

21-
version="${version#v}"
22+
version="${version#v}" # Remove leading 'v' if present
23+
version="${version% *}" # sometimes version string gets extra characters with space
2224

2325
origdir="$(pwd)"
2426
tmpdir=$(mktemp -d)
2527
cd "$tmpdir" || exit 1
2628

2729
local url="https://releases.hashicorp.com/${name}/${version}/${name}_${version}_linux_amd64.zip"
2830
local fname="${url##*/}"
29-
wget -O "${fname}" "${url}"
31+
wget --no-verbose -O "${fname}" "${url}"
3032

3133
sha256sums=https://releases.hashicorp.com/${name}/${version}/${name}_${version}_SHA256SUMS
3234
sha256sums_sig=https://releases.hashicorp.com/${name}/${version}/${name}_${version}_SHA256SUMS.sig
3335

34-
wget -O SHA256SUMS "${sha256sums}"
35-
wget -O SHA256SUMS.sig "${sha256sums_sig}"
36+
wget --no-verbose -O SHA256SUMS "${sha256sums}"
37+
wget --no-verbose -O SHA256SUMS.sig "${sha256sums_sig}"
3638

3739
if ! gpg --verify --no-default-keyring --keyring ${origdir}/resources/hashicorp-signing-key.72D7468F.gpg SHA256SUMS.sig SHA256SUMS
3840
then
@@ -68,7 +70,7 @@ cni_plugins=https://github.com/containernetworking/plugins/releases/download/v1.
6870

6971
if ! [ -e "$(basename $cni_plugins)" ]
7072
then
71-
wget $cni_plugins
73+
wget --no-verbose $cni_plugins
7274
fi
7375

7476
if ! [ -d resources/cni ]

mkosi.images/base/mkosi.extra/usr/share/mangos/self_test.sh

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,66 @@ systemctl is-active systemd-cryptsetup@var.service
1212
systemctl is-active systemd-cryptsetup@var\\x2dtmp.service
1313
mangosctl bootstrap
1414
mangosctl sudo enroll -g{vault-server,{nomad,consul}-{server,client}}s 127.0.0.1
15-
mangosctl sudo -- nomad job run /usr/share/mangos/test.nomad
16-
tries=10
17-
while ! mangosctl sudo -- nomad alloc logs -namespace=admin -task server -job test | grep SUCCESS
15+
mangosctl sudo -- nomad job run -detach /usr/share/mangos/test.nomad
16+
17+
echo "Waiting for job allocation to start..."
18+
echo "Current time: $(date)"
19+
tries=60
20+
success=0
21+
22+
# Temporarily disable exit-on-error for polling loop
23+
set +e
24+
25+
while [ $tries -gt 0 ]
1826
do
19-
if [ $tries -le 0 ]
27+
# Get allocation status first
28+
alloc_status=$(mangosctl sudo -- nomad job allocs -namespace=admin -json test 2>/dev/null | jq -r '.[0].ClientStatus // empty')
29+
30+
if [ -n "$alloc_status" ]; then
31+
echo "[$(date +%H:%M:%S)] Allocation status: $alloc_status"
32+
else
33+
echo "[$(date +%H:%M:%S)] No allocation yet..."
34+
fi
35+
36+
# Check if logs are available and contain SUCCESS
37+
if mangosctl sudo -- nomad alloc logs -namespace=admin -task server -job test 2>/dev/null | grep -q SUCCESS
2038
then
21-
echo "Test job did not complete successfully"
22-
exit 1
39+
echo "Test job completed successfully!"
40+
success=1
41+
break
42+
fi
43+
44+
# If allocation failed, break early
45+
if [ "$alloc_status" = "failed" ]; then
46+
echo "Allocation failed, breaking loop"
47+
break
2348
fi
49+
2450
tries=$((tries - 1))
25-
echo "Sleeping 10 seconds."
51+
echo "[$(date +%H:%M:%S)] Waiting... ($tries attempts remaining)"
2652
sleep 10
27-
echo "Trying again. $tries tries left"
2853
done
2954

55+
# Re-enable exit-on-error
56+
set -e
57+
58+
if [ $success -eq 0 ]
59+
then
60+
echo "Test job did not complete successfully after 10 minutes"
61+
echo "=== Job Status ==="
62+
mangosctl sudo -- nomad job status -namespace=admin test || true
63+
echo "=== Allocation Logs ==="
64+
mangosctl sudo -- nomad alloc logs -namespace=admin -task server -job test 2>&1 || true
65+
echo "=== Allocation Status ==="
66+
alloc_id=$(mangosctl sudo -- nomad job allocs -namespace=admin -json test 2>/dev/null | jq -r '.[0].ID // empty')
67+
if [ -n "$alloc_id" ]; then
68+
mangosctl sudo -- nomad alloc status -namespace=admin "$alloc_id" || true
69+
else
70+
echo "No allocations found for job"
71+
fi
72+
exit 1
73+
fi
74+
3075
echo "===> Validating Recovery Keys"
3176
machine_id=$(cat /etc/machine-id)
3277

@@ -38,7 +83,7 @@ if [ -z "$luks_partitions" ]; then
3883
else
3984
# Test 1: Verify recovery keys exist in Vault
4085
for device in $luks_partitions; do
41-
partition=$(lsblk -nlo PARTLABEL /dev/$device)
86+
partition=$(lsblk -n -o PARTLABEL "/dev/$device" 2>/dev/null | tr -d ' \n\r\t')
4287
if ! mangosctl sudo -- vault kv get "secrets/mangos/recovery-keys/${machine_id}/${partition}" >/dev/null 2>&1; then
4388
echo "ERROR: Recovery key not found in Vault for ${partition}"
4489
exit 1

mkosi.images/consul/mkosi.version

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.22.1
1+
1.22.0

resources/mangosctl/mangosctl.sh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ DEFAULT_REGION=global
44
DEFAULT_DATACENTER=dc1
55

66
set -e
7-
set -x
87

98
usage() {
109
echo 'Usage: $0 [GLOBAL OPTIONS] {install|update|enroll}'
@@ -302,10 +301,10 @@ enroll_recovery_keys() {
302301
local found_any=0
303302

304303
# Find all LUKS-encrypted partitions
305-
local devices=($(lsblk -ln -o NAME,TYPE,FSTYPE | awk '$2=="part" && $3=="crypto_LUKS" {print "/dev/"$1}'))
304+
local devices=($(lsblk -ln -o NAME,TYPE,FSTYPE | awk '$2=="part" && $3=="crypto_LUKS" {print $1}'))
306305

307306
for device in "${devices[@]}"; do
308-
local partlabel=$(lsblk -n -o PARTLABEL "$device" 2>/dev/null | tr -d ' \n\r\t')
307+
local partlabel=$(lsblk -n -o PARTLABEL "/dev/$device" 2>/dev/null | tr -d ' \n\r\t')
309308

310309
# Skip if no valid partition label
311310
if [ -z "$partlabel" ]; then

run_tests.sh

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,22 +37,28 @@ report_outcome() {
3737
fi
3838
}
3939

40-
# Run a command with journalctl streaming
40+
# Run a command with journalctl streaming (controlled by VERBOSE)
4141
run_with_logs() {
4242
local unit_filter="$1"
4343
local header="$2"
4444
shift 2
4545

46-
echo
47-
echo "$(bold "$header")"
48-
journalctl --user -u "$unit_filter" -f --no-pager &
49-
local journal_pid=$!
46+
local journal_pid=""
47+
48+
if [ -n "${VERBOSE}" ]; then
49+
echo
50+
echo "$(bold "$header")"
51+
journalctl --user -u "$unit_filter" -f --no-pager &
52+
journal_pid=$!
53+
fi
5054

5155
"$@"
5256
local result=$?
5357

54-
kill $journal_pid 2>/dev/null || true
55-
wait $journal_pid 2>/dev/null || true
58+
if [ -n "$journal_pid" ]; then
59+
kill $journal_pid 2>/dev/null || true
60+
wait $journal_pid 2>/dev/null || true
61+
fi
5662

5763
return $result
5864
}

0 commit comments

Comments
 (0)