@@ -2,6 +2,7 @@ package agent
22
33import (
44 "context"
5+ "fmt"
56 "net"
67 "os"
78 "path/filepath"
@@ -14,6 +15,7 @@ import (
1415
1516 "github.com/openshift/assisted-service/client/installer"
1617 "github.com/openshift/assisted-service/models"
18+ "github.com/openshift/installer/pkg/asset/agent/workflow"
1719 "github.com/openshift/installer/pkg/gather/ssh"
1820)
1921
@@ -27,6 +29,7 @@ type Cluster struct {
2729 clusterID * strfmt.UUID
2830 clusterInfraEnvID * strfmt.UUID
2931 installHistory * clusterInstallStatusHistory
32+ workflow workflow.AgentWorkflowType
3033}
3134
3235type clientSet struct {
@@ -63,21 +66,20 @@ type clusterInstallStatusHistory struct {
6366}
6467
6568// NewCluster initializes a Cluster object
66- func NewCluster (ctx context.Context , assetDir string ) (* Cluster , error ) {
67-
69+ func NewCluster (ctx context.Context , assetDir , rendezvousIP , kubeconfigPath , sshKey string , workflowType workflow.AgentWorkflowType ) (* Cluster , error ) {
6870 czero := & Cluster {}
6971 capi := & clientSet {}
7072
71- restclient , err := NewNodeZeroRestClient (ctx , assetDir )
73+ restclient , err := NewNodeZeroRestClient (ctx , rendezvousIP , sshKey )
7274 if err != nil {
7375 logrus .Fatal (err )
7476 }
75- kubeclient , err := NewClusterKubeAPIClient (ctx , assetDir )
77+ kubeclient , err := NewClusterKubeAPIClient (ctx , kubeconfigPath )
7678 if err != nil {
7779 logrus .Fatal (err )
7880 }
7981
80- ocpclient , err := NewClusterOpenShiftAPIClient (ctx , assetDir )
82+ ocpclient , err := NewClusterOpenShiftAPIClient (ctx , kubeconfigPath )
8183 if err != nil {
8284 logrus .Fatal (err )
8385 }
@@ -108,6 +110,7 @@ func NewCluster(ctx context.Context, assetDir string) (*Cluster, error) {
108110
109111 czero .Ctx = ctx
110112 czero .API = capi
113+ czero .workflow = workflowType
111114 czero .clusterID = nil
112115 czero .clusterInfraEnvID = nil
113116 czero .assetDir = assetDir
@@ -167,7 +170,6 @@ func (czero *Cluster) IsBootstrapComplete() (bool, bool, error) {
167170 if configmap {
168171 logrus .Info ("Bootstrap configMap status is complete" )
169172 czero .installHistory .ClusterBootstrapComplete = true
170- return true , false , nil
171173 }
172174 if err != nil {
173175 logrus .Debug (err )
@@ -176,105 +178,133 @@ func (czero *Cluster) IsBootstrapComplete() (bool, bool, error) {
176178
177179 // Agent Rest API is available
178180 if agentRestAPILive {
179-
180- // First time we see the agent Rest API
181- if ! czero .installHistory .RestAPISeen {
182- logrus .Debug ("Agent Rest API Initialized" )
183- czero .installHistory .RestAPISeen = true
184- czero .installHistory .NotReadyTime = time .Now ()
181+ exitOnErr , err := czero .MonitorStatusFromAssistedService ()
182+ if err != nil {
183+ return false , exitOnErr , err
185184 }
185+ }
186186
187- // Lazy loading of the clusterID and clusterInfraEnvID
188- if czero .clusterID == nil {
189- clusterID , err := czero .API .Rest .getClusterID ()
190- if err != nil {
191- return false , false , errors .Wrap (err , "Unable to retrieve clusterID from Agent Rest API" )
192- }
193- czero .clusterID = clusterID
194- }
187+ // cluster bootstrap is not complete
188+ return false , false , nil
189+ }
195190
196- if czero .clusterInfraEnvID == nil {
197- clusterInfraEnvID , err := czero .API .Rest .getClusterInfraEnvID ()
198- if err != nil {
199- return false , false , errors .Wrap (err , "Unable to retrieve clusterInfraEnvID from Agent Rest API" )
200- }
201- czero .clusterInfraEnvID = clusterInfraEnvID
202- }
191+ // MonitorStatusFromAssistedService (exit-on-error, returned-error)
192+ // checks if the Assisted Service API is up, and both cluster and
193+ // infraenv have been registered.
194+ //
195+ // After those preconditions are met,
196+ // it then reports on the host validation status and overall cluster
197+ // status and updates the cluster's install history.
198+ //
199+ // After cluster or host installation has started, new events from
200+ // the Assisted Service API are also logged and updated to the cluster's
201+ // install history.
202+ func (czero * Cluster ) MonitorStatusFromAssistedService () (bool , error ) {
203+ resource := "cluster"
204+ logPrefix := ""
205+ if czero .workflow == workflow .AgentWorkflowTypeAddNodes {
206+ resource = "host"
207+ logPrefix = fmt .Sprintf ("Node %s: " , czero .API .Rest .NodeZeroIP )
208+ }
203209
204- // Getting cluster metadata from Agent Rest API
205- clusterMetadata , err := czero .GetClusterRestAPIMetadata ()
210+ // First time we see the agent Rest API
211+ if ! czero .installHistory .RestAPISeen {
212+ logrus .Debugf ("%sAgent Rest API Initialized" , logPrefix )
213+ czero .installHistory .RestAPISeen = true
214+ czero .installHistory .NotReadyTime = time .Now ()
215+ }
216+
217+ // Lazy loading of the clusterID and clusterInfraEnvID
218+ if czero .clusterID == nil {
219+ clusterID , err := czero .API .Rest .getClusterID ()
206220 if err != nil {
207- return false , false , errors .Wrap (err , "Unable to retrieve cluster metadata from Agent Rest API" )
221+ return false , errors .Wrap (err , "Unable to retrieve clusterID from Agent Rest API" )
208222 }
223+ czero .clusterID = clusterID
224+ }
209225
210- if clusterMetadata == nil {
211- return false , false , errors .New ("cluster metadata returned nil from Agent Rest API" )
226+ if czero .clusterInfraEnvID == nil {
227+ clusterInfraEnvID , err := czero .API .Rest .getClusterInfraEnvID ()
228+ if err != nil {
229+ return false , errors .Wrap (err , "Unable to retrieve clusterInfraEnvID from Agent Rest API" )
212230 }
231+ czero .clusterInfraEnvID = clusterInfraEnvID
232+ }
233+
234+ // Getting cluster metadata from Agent Rest API
235+ clusterMetadata , err := czero .GetClusterRestAPIMetadata ()
236+ if err != nil {
237+ return false , errors .Wrap (err , "Unable to retrieve cluster metadata from Agent Rest API" )
238+ }
239+
240+ if clusterMetadata == nil {
241+ return false , errors .New ("cluster metadata returned nil from Agent Rest API" )
242+ }
213243
214- czero .PrintInstallStatus (clusterMetadata )
244+ czero .PrintInstallStatus (clusterMetadata )
215245
216- // If status indicates pending action, log host info to help pinpoint what is missing
217- if (* clusterMetadata .Status != czero .installHistory .RestAPIPreviousClusterStatus ) &&
218- (* clusterMetadata .Status == models .ClusterStatusInstallingPendingUserAction ) {
219- for _ , host := range clusterMetadata .Hosts {
220- if * host .Status == models .ClusterStatusInstallingPendingUserAction {
246+ // If status indicates pending action, log host info to help pinpoint what is missing
247+ if (* clusterMetadata .Status != czero .installHistory .RestAPIPreviousClusterStatus ) &&
248+ (* clusterMetadata .Status == models .ClusterStatusInstallingPendingUserAction ) {
249+ for _ , host := range clusterMetadata .Hosts {
250+ if * host .Status == models .ClusterStatusInstallingPendingUserAction {
251+ if logPrefix != "" {
252+ logrus .Warningf ("%s%s %s" , logPrefix , host .RequestedHostname , * host .StatusInfo )
253+ } else {
221254 logrus .Warningf ("Host %s %s" , host .RequestedHostname , * host .StatusInfo )
222255 }
223256 }
224257 }
258+ }
225259
226- if * clusterMetadata .Status == models .ClusterStatusReady {
227- stuck , err := czero .IsClusterStuckInReady ()
228- if err != nil {
229- return false , stuck , err
230- }
231- } else {
232- czero .installHistory .NotReadyTime = time .Now ()
260+ if * clusterMetadata .Status == models .ClusterStatusReady {
261+ stuck , err := czero .IsClusterStuckInReady ()
262+ if err != nil {
263+ return stuck , err
233264 }
265+ } else {
266+ czero .installHistory .NotReadyTime = time .Now ()
267+ }
234268
235- czero .installHistory .RestAPIPreviousClusterStatus = * clusterMetadata .Status
269+ czero .installHistory .RestAPIPreviousClusterStatus = * clusterMetadata .Status
236270
237- installing , _ := czero .IsInstalling (* clusterMetadata .Status )
238- if ! installing {
239- errored , _ := czero .HasErrored (* clusterMetadata .Status )
240- if errored {
241- return false , false , errors .New ("cluster has stopped installing... working to recover installation" )
242- } else if * clusterMetadata .Status == models .ClusterStatusCancelled {
243- return false , true , errors .New ("cluster installation was cancelled" )
244- }
271+ installing , _ := czero .IsInstalling (* clusterMetadata .Status )
272+ if ! installing {
273+ errored , _ := czero .HasErrored (* clusterMetadata .Status )
274+ if errored {
275+ return false , fmt .Errorf ("%s has stopped installing... working to recover installation" , resource )
276+ } else if * clusterMetadata .Status == models .ClusterStatusCancelled {
277+ return true , fmt .Errorf ("%s installation was cancelled" , resource )
245278 }
279+ }
246280
247- validationsErr := checkValidations (clusterMetadata , czero .installHistory .ValidationResults , logrus .StandardLogger ())
248- if validationsErr != nil {
249- return false , false , errors .Wrap (validationsErr , "cluster host validations failed" )
281+ validationsErr := checkValidations (clusterMetadata , czero .installHistory .ValidationResults , logrus .StandardLogger (), logPrefix )
282+ if validationsErr != nil {
283+ return false , errors .Wrap (validationsErr , "host validations failed" )
250284
251- }
285+ }
252286
253- // Print most recent event associated with the clusterInfraEnvID
254- eventList , err := czero .API .Rest .GetInfraEnvEvents (czero .clusterInfraEnvID )
255- if err != nil {
256- return false , false , errors .Wrap (err , "Unable to retrieve events about the cluster from the Agent Rest API" )
257- }
258- if len (eventList ) == 0 {
259- // No cluster events detected from the Agent Rest API
260- } else {
261- mostRecentEvent := eventList [len (eventList )- 1 ]
262- // Don't print the same status message back to back
263- if * mostRecentEvent .Message != czero .installHistory .RestAPIPreviousEventMessage {
264- if * mostRecentEvent .Severity == models .EventSeverityInfo {
265- logrus .Info (* mostRecentEvent .Message )
266- } else {
267- logrus .Warn (* mostRecentEvent .Message )
268- }
287+ // Print most recent event associated with the clusterInfraEnvID
288+ eventList , err := czero .API .Rest .GetInfraEnvEvents (czero .clusterInfraEnvID )
289+ if err != nil {
290+ return false , errors .Wrap (err , fmt .Sprintf ("Unable to retrieve events about the %s from the Agent Rest API" , resource ))
291+ }
292+ if len (eventList ) == 0 {
293+ // No cluster events detected from the Agent Rest API
294+ } else {
295+ mostRecentEvent := eventList [len (eventList )- 1 ]
296+ // Don't print the same status message back to back
297+ if * mostRecentEvent .Message != czero .installHistory .RestAPIPreviousEventMessage {
298+ if * mostRecentEvent .Severity == models .EventSeverityInfo {
299+ logrus .Infof ("%s%s" , logPrefix , * mostRecentEvent .Message )
300+ } else {
301+ logrus .Warnf ("%s%s" , logPrefix , * mostRecentEvent .Message )
269302 }
270- czero .installHistory .RestAPIPreviousEventMessage = * mostRecentEvent .Message
271- czero .installHistory .RestAPIInfraEnvEventList = eventList
272303 }
273-
304+ czero .installHistory .RestAPIPreviousEventMessage = * mostRecentEvent .Message
305+ czero .installHistory .RestAPIInfraEnvEventList = eventList
274306 }
275-
276- // cluster bootstrap is not complete
277- return false , false , nil
307+ return false , nil
278308}
279309
280310// IsInstallComplete Determine if the cluster has completed installation.
@@ -429,15 +459,12 @@ func (czero *Cluster) PrintInstallationComplete() error {
429459}
430460
431461// PrintInstallStatus Print a human friendly message using the models from the Agent Rest API.
432- func (czero * Cluster ) PrintInstallStatus (cluster * models.Cluster ) error {
433-
434- friendlyStatus := humanFriendlyClusterInstallStatus (* cluster .Status )
462+ func (czero * Cluster ) PrintInstallStatus (cluster * models.Cluster ) {
463+ friendlyStatus := czero .humanFriendlyClusterInstallStatus (* cluster .Status )
435464 // Don't print the same status message back to back
436465 if * cluster .Status != czero .installHistory .RestAPIPreviousClusterStatus {
437466 logrus .Info (friendlyStatus )
438467 }
439-
440- return nil
441468}
442469
443470// CanSSHToNodeZero Checks if ssh to NodeZero succeeds.
@@ -453,7 +480,7 @@ func (czero *Cluster) CanSSHToNodeZero() bool {
453480}
454481
455482// Human friendly install status strings mapped to the Agent Rest API cluster statuses
456- func humanFriendlyClusterInstallStatus (status string ) string {
483+ func ( czero * Cluster ) humanFriendlyClusterInstallStatus (status string ) string {
457484 clusterStoppedInstallingStates := map [string ]string {
458485 models .ClusterStatusAddingHosts : "Cluster is adding hosts" ,
459486 models .ClusterStatusCancelled : "Cluster installation cancelled" ,
@@ -466,6 +493,10 @@ func humanFriendlyClusterInstallStatus(status string) string {
466493 models .ClusterStatusPreparingForInstallation : "Preparing cluster for installation" ,
467494 models .ClusterStatusReady : "Cluster is ready for install" ,
468495 }
469- return clusterStoppedInstallingStates [status ]
470-
496+ switch czero .workflow {
497+ case workflow .AgentWorkflowTypeAddNodes :
498+ return fmt .Sprintf ("Node %s: %s" , czero .API .Rest .NodeZeroIP , clusterStoppedInstallingStates [status ])
499+ default :
500+ return clusterStoppedInstallingStates [status ]
501+ }
471502}
0 commit comments