Skip to content

Commit 44b1b0b

Browse files
Merge pull request openshift#6919 from bfournie/agent-tui-ssh-check
OCPBUGS-8094: In agent 'wait-for bootstrap' command, test ssh to Node0
2 parents 3485fdd + 65033f3 commit 44b1b0b

File tree

2 files changed

+39
-3
lines changed

2 files changed

+39
-3
lines changed

pkg/agent/cluster.go

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@ package agent
22

33
import (
44
"context"
5+
"net"
56
"os"
67
"path/filepath"
8+
"strconv"
79
"time"
810

911
"github.com/go-openapi/strfmt"
@@ -12,6 +14,7 @@ import (
1214

1315
"github.com/openshift/assisted-service/client/installer"
1416
"github.com/openshift/assisted-service/models"
17+
"github.com/openshift/installer/pkg/gather/ssh"
1518
)
1619

1720
// Cluster is a struct designed to help interact with the cluster that is
@@ -56,6 +59,7 @@ type clusterInstallStatusHistory struct {
5659
ClusterInstallComplete bool
5760
NotReadyTime time.Time
5861
ValidationResults *validationResults
62+
ClusterInitTime time.Time
5963
}
6064

6165
// NewCluster initializes a Cluster object
@@ -94,6 +98,7 @@ func NewCluster(ctx context.Context, assetDir string) (*Cluster, error) {
9498
ClusterConsoleRouteCreated: false,
9599
ClusterConsoleRouteURLCreated: false,
96100
ClusterInstallComplete: false,
101+
ClusterInitTime: time.Now(),
97102
}
98103

99104
cvalidationresults := &validationResults{
@@ -136,7 +141,14 @@ func (czero *Cluster) IsBootstrapComplete() (bool, bool, error) {
136141
logrus.Trace("Current API Status: Agent Rest API: down, Bootstrap Kube API: down")
137142
if !czero.installHistory.RestAPISeen && !czero.installHistory.ClusterKubeAPISeen {
138143
logrus.Debug("Agent Rest API never initialized. Bootstrap Kube API never initialized")
139-
logrus.Info("Waiting for cluster install to initialize. Sleeping for 30 seconds")
144+
elapsedSinceInit := time.Since(czero.installHistory.ClusterInitTime)
145+
// After allowing time for the interface to come up, check if Node0 can be accessed via ssh
146+
if elapsedSinceInit > 2*time.Minute && !czero.CanSSHToNodeZero() {
147+
logrus.Info("Cannot access Rendezvous Host. There may be a network configuration problem, check console for additional info")
148+
} else {
149+
logrus.Info("Waiting for cluster install to initialize. Sleeping for 30 seconds")
150+
}
151+
140152
time.Sleep(30 * time.Second)
141153
return false, false, nil
142154
}
@@ -425,6 +437,18 @@ func (czero *Cluster) PrintInstallStatus(cluster *models.Cluster) error {
425437
return nil
426438
}
427439

440+
// CanSSHToNodeZero Checks if ssh to NodeZero succeeds.
441+
func (czero *Cluster) CanSSHToNodeZero() bool {
442+
ip := czero.API.Rest.NodeZeroIP
443+
port := 22
444+
445+
_, err := ssh.NewClient("core", net.JoinHostPort(ip, strconv.Itoa(port)), czero.API.Rest.NodeSSHKey)
446+
if err != nil {
447+
logrus.Debugf("Failed to connect to the Rendezvous Host: %s", err)
448+
}
449+
return err == nil
450+
}
451+
428452
// Human friendly install status strings mapped to the Agent Rest API cluster statuses
429453
func humanFriendlyClusterInstallStatus(status string) string {
430454
clusterStoppedInstallingStates := map[string]string{

pkg/agent/rest.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/openshift/installer/pkg/asset/agent/agentconfig"
1818
"github.com/openshift/installer/pkg/asset/agent/image"
1919
"github.com/openshift/installer/pkg/asset/agent/manifests"
20+
"github.com/openshift/installer/pkg/asset/installconfig"
2021
assetstore "github.com/openshift/installer/pkg/asset/store"
2122
"github.com/openshift/installer/pkg/types/agent"
2223
)
@@ -27,13 +28,15 @@ type NodeZeroRestClient struct {
2728
ctx context.Context
2829
config client.Config
2930
NodeZeroIP string
31+
NodeSSHKey []string
3032
}
3133

3234
// NewNodeZeroRestClient Initialize a new rest client to interact with the Agent Rest API on node zero.
3335
func NewNodeZeroRestClient(ctx context.Context, assetDir string) (*NodeZeroRestClient, error) {
3436
restClient := &NodeZeroRestClient{}
3537
agentConfigAsset := &agentconfig.AgentConfig{}
3638
agentManifestsAsset := &manifests.AgentManifests{}
39+
installConfigAsset := &installconfig.InstallConfig{}
3740

3841
assetStore, err := assetstore.NewStore(assetDir)
3942
if err != nil {
@@ -42,15 +45,19 @@ func NewNodeZeroRestClient(ctx context.Context, assetDir string) (*NodeZeroRestC
4245

4346
agentConfig, agentConfigError := assetStore.Load(agentConfigAsset)
4447
agentManifests, manifestError := assetStore.Load(agentManifestsAsset)
48+
installConfig, installConfigError := assetStore.Load(installConfigAsset)
4549

4650
if agentConfigError != nil {
4751
logrus.Debug(errors.Wrapf(agentConfigError, "failed to load %s", agentConfigAsset.Name()))
4852
}
4953
if manifestError != nil {
5054
logrus.Debug(errors.Wrapf(manifestError, "failed to load %s", agentManifestsAsset.Name()))
5155
}
52-
if agentConfigError != nil || manifestError != nil {
53-
return nil, errors.New("failed to load AgentConfig or NMStateConfig")
56+
if installConfigError != nil {
57+
logrus.Debug(errors.Wrapf(installConfigError, "failed to load %s", installConfigAsset.Name()))
58+
}
59+
if agentConfigError != nil || manifestError != nil || installConfigError != nil {
60+
return nil, errors.New("failed to load AgentConfig, NMStateConfig, or InstallConfig")
5461
}
5562

5663
var RendezvousIP string
@@ -70,6 +77,11 @@ func NewNodeZeroRestClient(ctx context.Context, assetDir string) (*NodeZeroRestC
7077
return nil, rendezvousIPError
7178
}
7279

80+
// Get SSH Keys which can be used to determine if Rest API failures are due to network connectivity issues
81+
if installConfig != nil {
82+
restClient.NodeSSHKey = append(restClient.NodeSSHKey, installConfig.(*installconfig.InstallConfig).Config.SSHKey)
83+
}
84+
7385
config := client.Config{}
7486
config.URL = &url.URL{
7587
Scheme: "http",

0 commit comments

Comments
 (0)