6060 echo 'Host *' > $HOME/.ssh/config
6161 echo ' ServerAliveInterval 150' >> $HOME/.ssh/config
6262 echo ' ServerAliveCountMax 2' >> $HOME/.ssh/config
63+ echo ' StrictHostKeyChecking no' >> $HOME/.ssh/config
64+ echo ' UserKnownHostsFile /dev/null' >> $HOME/.ssh/config
65+ echo ' ConnectTimeout 30' >> $HOME/.ssh/config
66+ echo ' ConnectionAttempts 10' >> $HOME/.ssh/config
6367 bundle exec rake spec_prep
6468 echo ::endgroup::
6569 echo ::group::provision
7579 echo ::endgroup::
7680 echo ::group::update every 'ssh:' tag in ./inventory.yaml file to add 'native-ssh: true' under install_test_cluster and indent correctly
7781 sed -i -e '/ssh:/a\ native-ssh: true' ./inventory.yaml || true; echo
82+ # Also add additional SSH options for container environments
83+ sed -i -e '/ssh:/a\ connect-timeout: 30' ./inventory.yaml || true; echo
84+ sed -i -e '/ssh:/a\ host-key-check: false' ./inventory.yaml || true; echo
7885 echo ::endgroup::
7986 echo ::group::list contents of ./inventory.yaml
8087 ls -l ./inventory.yaml || true; echo
@@ -96,13 +103,50 @@ jobs:
96103 # Check if containers are running
97104 docker ps
98105
99- # Try to connect manually to each container
106+ # Check SSH processes in containers
100107 for container in $(docker ps --format "table {{.Names}}" | tail -n +2); do
101108 echo "Testing SSH to container: $container"
102- timeout 30 docker exec $container systemctl status sshd || echo "SSH service check failed for $container"
103- timeout 30 docker exec $container ps aux | grep sshd || echo "No sshd processes found in $container"
109+ echo "SSH processes in $container:"
110+ docker exec $container ps aux | grep sshd || echo "No sshd processes found in $container"
111+
112+ # Test SSH connectivity directly
113+ echo "Testing direct SSH connection to $container:"
114+ container_ip=$(docker inspect $container | jq -r '.[0].NetworkSettings.IPAddress')
115+ echo "Container IP: $container_ip"
116+
117+ # Get the mapped SSH port
118+ ssh_port=$(docker port $container 22 | cut -d: -f2)
119+ echo "SSH port mapping: localhost:$ssh_port -> $container:22"
120+
121+ # Test SSH connection with timeout
122+ timeout 10 ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 -p $ssh_port root@localhost 'echo "SSH connection successful"' || echo "SSH connection failed to $container"
104123 done
105- echo ::endgroup::
124+ echo ::endgroup::
125+ - name : Wait for SSH to be ready
126+ run : |
127+ echo ::group::wait_for_ssh
128+ # Wait for SSH to be available on all containers via Bolt
129+ for i in {1..12}; do
130+ echo "Attempt $i: Testing Bolt SSH connectivity..."
131+ if bundle exec bolt command run 'echo "Bolt SSH test successful"' \
132+ --inventoryfile ./inventory.yaml \
133+ --targets all \
134+ --connect-timeout 10 \
135+ --no-host-key-check \
136+ --transport ssh; then
137+ echo "All containers are accessible via Bolt SSH!"
138+ break
139+ fi
140+ if [ $i -eq 12 ]; then
141+ echo "Containers failed to become accessible after 12 attempts"
142+ echo "Final inventory check:"
143+ cat ./inventory.yaml
144+ exit 1
145+ fi
146+ echo "Waiting 10 seconds before retry..."
147+ sleep 10
148+ done
149+ echo ::endgroup::
106150 - name : Install PE on test cluster
107151 timeout-minutes : 120
108152 run : |
0 commit comments