Skip to content

Commit 4252d21

Browse files
(MAINT) debug ssh more
1 parent a29807e commit 4252d21

File tree

1 file changed

+48
-4
lines changed

1 file changed

+48
-4
lines changed

.github/workflows/test-install-matrix.yaml

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ jobs:
6060
echo 'Host *' > $HOME/.ssh/config
6161
echo ' ServerAliveInterval 150' >> $HOME/.ssh/config
6262
echo ' ServerAliveCountMax 2' >> $HOME/.ssh/config
63+
echo ' StrictHostKeyChecking no' >> $HOME/.ssh/config
64+
echo ' UserKnownHostsFile /dev/null' >> $HOME/.ssh/config
65+
echo ' ConnectTimeout 30' >> $HOME/.ssh/config
66+
echo ' ConnectionAttempts 10' >> $HOME/.ssh/config
6367
bundle exec rake spec_prep
6468
echo ::endgroup::
6569
echo ::group::provision
@@ -75,6 +79,9 @@ jobs:
7579
echo ::endgroup::
7680
echo ::group::update every 'ssh:' tag in ./inventory.yaml file to add 'native-ssh: true' under install_test_cluster and indent correctly
7781
sed -i -e '/ssh:/a\ native-ssh: true' ./inventory.yaml || true; echo
82+
# Also add additional SSH options for container environments
83+
sed -i -e '/ssh:/a\ connect-timeout: 30' ./inventory.yaml || true; echo
84+
sed -i -e '/ssh:/a\ host-key-check: false' ./inventory.yaml || true; echo
7885
echo ::endgroup::
7986
echo ::group::list contents of ./inventory.yaml
8087
ls -l ./inventory.yaml || true; echo
@@ -96,13 +103,50 @@ jobs:
96103
# Check if containers are running
97104
docker ps
98105
99-
# Try to connect manually to each container
106+
# Check SSH processes in containers
100107
for container in $(docker ps --format "table {{.Names}}" | tail -n +2); do
101108
echo "Testing SSH to container: $container"
102-
timeout 30 docker exec $container systemctl status sshd || echo "SSH service check failed for $container"
103-
timeout 30 docker exec $container ps aux | grep sshd || echo "No sshd processes found in $container"
109+
echo "SSH processes in $container:"
110+
docker exec $container ps aux | grep sshd || echo "No sshd processes found in $container"
111+
112+
# Test SSH connectivity directly
113+
echo "Testing direct SSH connection to $container:"
114+
container_ip=$(docker inspect $container | jq -r '.[0].NetworkSettings.IPAddress')
115+
echo "Container IP: $container_ip"
116+
117+
# Get the mapped SSH port
118+
ssh_port=$(docker port $container 22 | cut -d: -f2)
119+
echo "SSH port mapping: localhost:$ssh_port -> $container:22"
120+
121+
# Test SSH connection with timeout
122+
timeout 10 ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 -p $ssh_port root@localhost 'echo "SSH connection successful"' || echo "SSH connection failed to $container"
104123
done
105-
echo ::endgroup::
124+
echo ::endgroup::
125+
- name: Wait for SSH to be ready
126+
run: |
127+
echo ::group::wait_for_ssh
128+
# Wait for SSH to be available on all containers via Bolt
129+
for i in {1..12}; do
130+
echo "Attempt $i: Testing Bolt SSH connectivity..."
131+
if bundle exec bolt command run 'echo "Bolt SSH test successful"' \
132+
--inventoryfile ./inventory.yaml \
133+
--targets all \
134+
--connect-timeout 10 \
135+
--no-host-key-check \
136+
--transport ssh; then
137+
echo "All containers are accessible via Bolt SSH!"
138+
break
139+
fi
140+
if [ $i -eq 12 ]; then
141+
echo "Containers failed to become accessible after 12 attempts"
142+
echo "Final inventory check:"
143+
cat ./inventory.yaml
144+
exit 1
145+
fi
146+
echo "Waiting 10 seconds before retry..."
147+
sleep 10
148+
done
149+
echo ::endgroup::
106150
- name: Install PE on test cluster
107151
timeout-minutes: 120
108152
run: |

0 commit comments

Comments
 (0)