Skip to content

Commit fb2ef62

Browse files
Merge pull request openshift#8181 from honza/monitor-bootstrap
METAL-939: baremetal: monitor bootstrap process
2 parents 04ec332 + c4f09ff commit fb2ef62

File tree

4 files changed

+184
-21
lines changed

4 files changed

+184
-21
lines changed

cmd/openshift-install/create.go

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ import (
5252
"github.com/openshift/installer/pkg/types/baremetal"
5353
"github.com/openshift/installer/pkg/types/gcp"
5454
"github.com/openshift/installer/pkg/types/vsphere"
55+
baremetalutils "github.com/openshift/installer/pkg/utils/baremetal"
5556
cov1helpers "github.com/openshift/library-go/pkg/config/clusteroperator/v1helpers"
5657
"github.com/openshift/library-go/pkg/route/routeapihelpers"
5758
)
@@ -437,42 +438,53 @@ func waitForBootstrapComplete(ctx context.Context, config *rest.Config) *cluster
437438
return newAPIError(err)
438439
}
439440

440-
if err := waitForBootstrapConfigMap(ctx, client); err != nil {
441-
return err
442-
}
441+
var platformName string
443442

444-
if err := waitForStableSNOBootstrap(ctx, config); err != nil {
445-
return newBootstrapError(err)
443+
if assetStore, err := assetstore.NewStore(command.RootOpts.Dir); err == nil {
444+
if installConfig, err := assetStore.Load(&installconfig.InstallConfig{}); err == nil && installConfig != nil {
445+
platformName = installConfig.(*installconfig.InstallConfig).Config.Platform.Name()
446+
}
446447
}
447448

448-
return nil
449-
}
450-
451-
// waitForBootstrapConfigMap watches the configmaps in the kube-system namespace
452-
// and waits for the bootstrap configmap to report that bootstrapping has
453-
// completed.
454-
func waitForBootstrapConfigMap(ctx context.Context, client *kubernetes.Clientset) *clusterCreateError {
455449
timeout := 30 * time.Minute
456450

457451
// Wait longer for baremetal, VSphere due to length of time it takes to boot
458-
if assetStore, err := assetstore.NewStore(command.RootOpts.Dir); err == nil {
459-
if installConfig, err := assetStore.Load(&installconfig.InstallConfig{}); err == nil && installConfig != nil {
460-
if installConfig.(*installconfig.InstallConfig).Config.Platform.Name() == baremetal.Name || installConfig.(*installconfig.InstallConfig).Config.Platform.Name() == vsphere.Name {
461-
timeout = 60 * time.Minute
462-
}
463-
}
452+
if platformName == baremetal.Name || platformName == vsphere.Name {
453+
timeout = 60 * time.Minute
464454
}
465455

466-
untilTime := time.Now().Add(timeout)
467-
timezone, _ := untilTime.Zone()
456+
untilTime = time.Now().Add(timeout)
457+
timezone, _ = untilTime.Zone()
468458
logrus.Infof("Waiting up to %v (until %v %s) for bootstrapping to complete...",
469459
timeout, untilTime.Format(time.Kitchen), timezone)
470460

471461
waitCtx, cancel := context.WithTimeout(ctx, timeout)
472462
defer cancel()
473463

464+
if platformName == baremetal.Name {
465+
if err := baremetalutils.WaitForBaremetalBootstrapControlPlane(waitCtx, config); err != nil {
466+
return newBootstrapError(err)
467+
}
468+
logrus.Infof(" Baremetal control plane finished provisioning.")
469+
}
470+
471+
if err := waitForBootstrapConfigMap(waitCtx, client); err != nil {
472+
return err
473+
}
474+
475+
if err := waitForStableSNOBootstrap(ctx, config); err != nil {
476+
return newBootstrapError(err)
477+
}
478+
479+
return nil
480+
}
481+
482+
// waitForBootstrapConfigMap watches the configmaps in the kube-system namespace
483+
// and waits for the bootstrap configmap to report that bootstrapping has
484+
// completed.
485+
func waitForBootstrapConfigMap(ctx context.Context, client *kubernetes.Clientset) *clusterCreateError {
474486
_, err := clientwatch.UntilWithSync(
475-
waitCtx,
487+
ctx,
476488
cache.NewListWatchFromClient(client.CoreV1().RESTClient(), "configmaps", "kube-system", fields.OneTermEqualSelector("metadata.name", "bootstrap")),
477489
&corev1.ConfigMap{},
478490
nil,

pkg/utils/baremetal/OWNERS

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md
2+
# This file just uses aliases defined in OWNERS_ALIASES.
3+
4+
approvers:
5+
- baremetal-approvers
6+
reviewers:
7+
- baremetal-reviewers

pkg/utils/baremetal/bootstrap.go

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
package baremetal
2+
3+
import (
4+
"context"
5+
"fmt"
6+
7+
baremetalhost "github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1"
8+
"github.com/sirupsen/logrus"
9+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
10+
"k8s.io/apimachinery/pkg/runtime"
11+
"k8s.io/apimachinery/pkg/watch"
12+
"k8s.io/client-go/dynamic"
13+
"k8s.io/client-go/rest"
14+
clientwatch "k8s.io/client-go/tools/watch"
15+
)
16+
17+
// WaitForBaremetalBootstrapControlPlane will watch baremetalhost resources on the bootstrap
18+
// and wait for the control plane to finish provisioning.
19+
func WaitForBaremetalBootstrapControlPlane(ctx context.Context, config *rest.Config) error {
20+
client, err := dynamic.NewForConfig(config)
21+
if err != nil {
22+
return fmt.Errorf("creating a baremetal client: %w", err)
23+
}
24+
25+
r := client.Resource(baremetalhost.GroupVersion.WithResource("baremetalhosts")).Namespace("openshift-machine-api")
26+
blw := BmhCacheListerWatcher{
27+
Resource: r,
28+
RetryWatch: true,
29+
}
30+
31+
logrus.Infof(" Waiting for baremetal control plane to provision...")
32+
33+
masters := map[string]baremetalhost.BareMetalHost{}
34+
35+
_, err = clientwatch.UntilWithSync(
36+
ctx,
37+
blw,
38+
&unstructured.Unstructured{},
39+
nil,
40+
func(event watch.Event) (bool, error) {
41+
switch event.Type {
42+
case watch.Added, watch.Modified:
43+
default:
44+
return false, nil
45+
}
46+
47+
bmh := &baremetalhost.BareMetalHost{}
48+
49+
unstr, err := runtime.DefaultUnstructuredConverter.ToUnstructured(event.Object)
50+
if err != nil {
51+
return false, err
52+
}
53+
54+
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(unstr, bmh); err != nil {
55+
logrus.Error("failed to convert to bmh", err)
56+
return false, err
57+
}
58+
59+
role, found := bmh.Labels["installer.openshift.io/role"]
60+
61+
if found && role == "control-plane" {
62+
prev, found := masters[bmh.Name]
63+
64+
if !found || bmh.Status.Provisioning.State != prev.Status.Provisioning.State {
65+
if bmh.Status.Provisioning.State == baremetalhost.StateNone {
66+
// StateNone is an empty string
67+
logrus.Infof(" baremetalhost: %s: uninitialized", bmh.Name)
68+
} else {
69+
logrus.Infof(" baremetalhost: %s: %s", bmh.Name, bmh.Status.Provisioning.State)
70+
}
71+
72+
if bmh.Status.OperationalStatus == baremetalhost.OperationalStatusError {
73+
logrus.Warnf(" baremetalhost: %s: %s: %s", bmh.Name, bmh.Status.ErrorType, bmh.Status.ErrorMessage)
74+
}
75+
}
76+
77+
masters[bmh.Name] = *bmh
78+
}
79+
80+
if len(masters) == 0 {
81+
return false, nil
82+
}
83+
84+
for _, master := range masters {
85+
if master.Status.Provisioning.State != baremetalhost.StateProvisioned {
86+
return false, nil
87+
}
88+
}
89+
90+
return true, nil
91+
},
92+
)
93+
94+
return err
95+
}

pkg/utils/baremetal/cache.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package baremetal
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"github.com/sirupsen/logrus"
8+
apierrors "k8s.io/apimachinery/pkg/api/errors"
9+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
11+
"k8s.io/apimachinery/pkg/runtime"
12+
"k8s.io/apimachinery/pkg/watch"
13+
"k8s.io/client-go/dynamic"
14+
)
15+
16+
// BmhCacheListerWatcher is an object that wraps the listing and wrapping
17+
// functionality for baremetal host resources.
18+
type BmhCacheListerWatcher struct {
19+
Resource dynamic.ResourceInterface
20+
RetryWatch bool
21+
}
22+
23+
// List returns a list of baremetal hosts as dynamic objects.
24+
func (bc BmhCacheListerWatcher) List(options metav1.ListOptions) (runtime.Object, error) {
25+
list, err := bc.Resource.List(context.TODO(), options)
26+
if apierrors.IsNotFound(err) {
27+
logrus.Debug(" baremetalhost resource not yet available, will retry")
28+
return &unstructured.UnstructuredList{}, nil
29+
}
30+
31+
return list, err
32+
}
33+
34+
// Watch starts a watch over baremetal hosts.
35+
func (bc BmhCacheListerWatcher) Watch(options metav1.ListOptions) (watch.Interface, error) {
36+
w, err := bc.Resource.Watch(context.TODO(), options)
37+
if apierrors.IsNotFound(err) && bc.RetryWatch {
38+
logrus.Debug(" baremetalhost resource not yet available, will retry")
39+
// When the Resource isn't installed yet, we can encourage the caller to keep
40+
// retrying by supplying an empty watcher. In the case of
41+
// UntilWithSync, the caller also checks how long it takes to create the
42+
// watch. To avoid errors, we introduce an artificial delay of one
43+
// second.
44+
w := watch.NewEmptyWatch()
45+
time.Sleep(time.Second)
46+
return w, nil
47+
}
48+
return w, err
49+
}

0 commit comments

Comments
 (0)