Skip to content

Commit 0f77fe3

Browse files
authored
🌱 Provision baremetal via --baremetal-image-url-command (#1679)
1 parent e18620f commit 0f77fe3

17 files changed

+452
-26
lines changed

api/v1beta1/hetznerbaremetalmachine_types.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,11 @@ type Image struct {
187187
// URL defines the remote URL for downloading a tar, tar.gz, tar.bz, tar.bz2, tar.xz, tgz, tbz, txz image.
188188
URL string `json:"url,omitempty"`
189189

190+
// UseCustomImageURLCommand makes the controller use the command provided by `--baremetal-image-url-command` instead of installimage.
191+
// Docs: https://syself.com/docs/caph/developers/image-url-command
192+
// +optional
193+
UseCustomImageURLCommand bool `json:"useCustomImageURLCommand"`
194+
190195
// Name defines the archive name after download. This has to be a valid name for Installimage.
191196
Name string `json:"name,omitempty"`
192197

@@ -197,6 +202,9 @@ type Image struct {
197202
// GetDetails returns the path of the image and whether the image has to be downloaded.
198203
func (image Image) GetDetails() (imagePath string, needsDownload bool, errorMessage string) {
199204
// If image is set, then the URL is also set and we have to download a remote file
205+
if image.UseCustomImageURLCommand {
206+
return "", false, "internal error: image.UseCustomImageURLCommand is active. Method GetDetails() should be used for the traditional way (without image-url-command)."
207+
}
200208
switch {
201209
case image.Name != "" && image.URL != "":
202210
suffix, err := GetImageSuffix(image.URL)

config/crd/bases/infrastructure.cluster.x-k8s.io_hetznerbaremetalhosts.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,11 @@ spec:
396396
a tar, tar.gz, tar.bz, tar.bz2, tar.xz, tgz, tbz, txz
397397
image.
398398
type: string
399+
useCustomImageURLCommand:
400+
description: |-
401+
UseCustomImageURLCommand makes the controller use the command provided by `--baremetal-image-url-command` instead of installimage.
402+
Docs: https://syself.com/docs/caph/developers/image-url-command
403+
type: boolean
399404
type: object
400405
logicalVolumeDefinitions:
401406
description: LVMDefinitions defines the logical volume definitions

config/crd/bases/infrastructure.cluster.x-k8s.io_hetznerbaremetalmachines.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,11 @@ spec:
155155
description: URL defines the remote URL for downloading a
156156
tar, tar.gz, tar.bz, tar.bz2, tar.xz, tgz, tbz, txz image.
157157
type: string
158+
useCustomImageURLCommand:
159+
description: |-
160+
UseCustomImageURLCommand makes the controller use the command provided by `--baremetal-image-url-command` instead of installimage.
161+
Docs: https://syself.com/docs/caph/developers/image-url-command
162+
type: boolean
158163
type: object
159164
logicalVolumeDefinitions:
160165
description: LVMDefinitions defines the logical volume definitions

config/crd/bases/infrastructure.cluster.x-k8s.io_hetznerbaremetalmachinetemplates.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,11 @@ spec:
142142
a tar, tar.gz, tar.bz, tar.bz2, tar.xz, tgz, tbz,
143143
txz image.
144144
type: string
145+
useCustomImageURLCommand:
146+
description: |-
147+
UseCustomImageURLCommand makes the controller use the command provided by `--baremetal-image-url-command` instead of installimage.
148+
Docs: https://syself.com/docs/caph/developers/image-url-command
149+
type: boolean
145150
type: object
146151
logicalVolumeDefinitions:
147152
description: LVMDefinitions defines the logical volume

controllers/hetznerbaremetalhost_controller.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@ import (
2323
"reflect"
2424
"time"
2525

26+
"github.com/google/go-cmp/cmp"
2627
corev1 "k8s.io/api/core/v1"
2728
apierrors "k8s.io/apimachinery/pkg/api/errors"
2829
"k8s.io/apimachinery/pkg/types"
30+
"k8s.io/apimachinery/pkg/util/wait"
2931
"k8s.io/klog/v2"
3032
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3133
"sigs.k8s.io/cluster-api/util"
@@ -47,6 +49,7 @@ import (
4749
robotclient "github.com/syself/cluster-api-provider-hetzner/pkg/services/baremetal/client/robot"
4850
sshclient "github.com/syself/cluster-api-provider-hetzner/pkg/services/baremetal/client/ssh"
4951
"github.com/syself/cluster-api-provider-hetzner/pkg/services/baremetal/host"
52+
"github.com/syself/cluster-api-provider-hetzner/pkg/utils"
5053
)
5154

5255
// HetznerBareMetalHostReconciler reconciles a HetznerBareMetalHost object.
@@ -59,6 +62,7 @@ type HetznerBareMetalHostReconciler struct {
5962
WatchFilterValue string
6063
PreProvisionCommand string
6164
SSHAfterInstallImage bool
65+
ImageURLCommand string
6266
}
6367

6468
//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=hetznerbaremetalhosts,verbs=get;list;watch;create;update;patch;delete
@@ -88,6 +92,58 @@ func (r *HetznerBareMetalHostReconciler) Reconcile(ctx context.Context, req ctrl
8892
return reconcile.Result{}, err
8993
}
9094

95+
// ----------------------------------------------------------------
96+
// Start: avoid conflict errors. Wait until local cache is up-to-date
97+
// Won't be needed once this was implemented:
98+
// https://github.com/kubernetes-sigs/controller-runtime/issues/3320
99+
initialHost := bmHost.DeepCopy()
100+
defer func() {
101+
// We can potentially optimize this further by ensuring that the cache is up to date only in
102+
// the cases where an outdated cache would lead to problems. Currently, we ensure that the
103+
// cache is up to date in all cases, i.e. for all possible changes to the
104+
// HetznerBareMetalHost object.
105+
if cmp.Equal(initialHost, bmHost) {
106+
// Nothing has changed. No need to wait.
107+
return
108+
}
109+
startReadOwnWrite := time.Now()
110+
111+
// The object changed. Wait until the new version is in the local cache
112+
113+
// Get the latest version from the apiserver.
114+
apiserverHost := &infrav1.HetznerBareMetalHost{}
115+
116+
// Use uncached APIReader
117+
err := r.APIReader.Get(ctx, client.ObjectKeyFromObject(bmHost), apiserverHost)
118+
if err != nil {
119+
reterr = errors.Join(reterr,
120+
fmt.Errorf("failed get HetznerBareMetalHost via uncached APIReader: %w", err))
121+
return
122+
}
123+
124+
apiserverRV := apiserverHost.ResourceVersion
125+
126+
err = wait.PollUntilContextTimeout(ctx, 100*time.Millisecond, 3*time.Second, true, func(ctx context.Context) (done bool, err error) {
127+
// new resource, read from local cache
128+
latestFromLocalCache := &infrav1.HetznerBareMetalHost{}
129+
getErr := r.Get(ctx, client.ObjectKeyFromObject(apiserverHost), latestFromLocalCache)
130+
if apierrors.IsNotFound(getErr) {
131+
// the object was deleted. All is fine.
132+
return true, nil
133+
}
134+
if getErr != nil {
135+
return false, getErr
136+
}
137+
return utils.IsLocalCacheUpToDate(latestFromLocalCache.ResourceVersion, apiserverRV), nil
138+
})
139+
if err != nil {
140+
log.Error(err, "cache sync failed after BootState change")
141+
}
142+
log.Info("Wait for update being in local cache", "durationWaitForLocalCacheSync", time.Since(startReadOwnWrite).Round(time.Millisecond))
143+
}()
144+
// End: avoid conflict errors. Wait until local cache is up-to-date
145+
// ----------------------------------------------------------------
146+
91147
initialProvisioningState := bmHost.Spec.Status.ProvisioningState
92148
defer func() {
93149
if initialProvisioningState != bmHost.Spec.Status.ProvisioningState {
@@ -203,6 +259,7 @@ func (r *HetznerBareMetalHostReconciler) Reconcile(ctx context.Context, req ctrl
203259
RescueSSHSecret: rescueSSHSecret,
204260
SecretManager: secretManager,
205261
PreProvisionCommand: r.PreProvisionCommand,
262+
ImageURLCommand: r.ImageURLCommand,
206263
SSHAfterInstallImage: r.SSHAfterInstallImage,
207264
})
208265
if err != nil {

docs/caph/04-developers/06-image-url-command.md

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,29 @@ sidebar: image-url-command
55
description: Documentation on the CAPH image-url-command
66
---
77

8-
The `--hcloud-image-url-command` for the caph controller can be used to execute a custom command to
9-
install the node image.
8+
The `--hcloud-image-url-command` and `--baremtal-image-url-command` for the caph controller can be
9+
used to execute a custom command to install the node image.
1010

1111
This provides you a flexible way to create nodes.
1212

13-
The script/binary will be copied into the Hetzner Rescue System and executed.
13+
The script/binary will be copied into the rescue system and executed.
1414

1515
You need to enable two things:
1616

1717
* The caph binary must get argument. Example:
18-
`--hcloud-image-url-command=/shared/image-url-command.sh`
19-
* The hcloudmachine resource must have spec.imageURL set (usually via a hcloudmachinetemplate)
18+
`--[hcloud|baremetal]-image-url-command=/shared/image-url-command.sh`
19+
* for hcloud: The hcloudmachine resource must have spec.imageURL set (usually via a
20+
hcloudmachinetemplate)
21+
* for baremetal: The hetznerbaremetal resource must use `useCustomImageURLCommand: true`.
2022

21-
The command will get the imageURL, bootstrap-data and machine-name of the corresponding
22-
hcloudmachine as argument.
23+
The command will get the imageURL, bootstrap-data, machine-name of the corresponding
24+
machine and the root devices (seperated by spaces) as argument.
25+
26+
Example:
27+
28+
```bash
29+
/root/image-url-command oci://example.com/yourimage:v1 /root/bootstrap.data my-md-bm-kh57r-5z2v8-zdfc9 'sda sdb'
30+
```
2331

2432
It is up to the command to download from that URL and provision the disk accordingly. This command
2533
must be accessible by the controller pod. You can use an initContainer to copy the command to a
@@ -36,7 +44,7 @@ A Kubernetes event will be created in both (success, failure) cases containing t
3644
and stderr) of the script. If the script takes longer than 7 minutes, the controller cancels the
3745
provisioning.
3846

39-
We measured these durations:
47+
We measured these durations for hcloud:
4048

4149
| oldState | newState | avg(s) | min(s) | max(s) |
4250
|----------|----------|-------:|-------:|-------:|

main.go

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ var (
8585
syncPeriod time.Duration
8686
rateLimitWaitTime time.Duration
8787
preProvisionCommand string
88-
imageURLCommand string
88+
hcloudImageURLCommand string
89+
baremetalImageURLCommand string
8990
skipWebhooks bool
9091
sshAfterInstallImage bool
9192
)
@@ -108,7 +109,8 @@ func main() {
108109
fs.DurationVar(&rateLimitWaitTime, "rate-limit", 5*time.Minute, "The rate limiting for HCloud controller (e.g. 5m)")
109110
fs.BoolVar(&hcloudclient.DebugAPICalls, "debug-hcloud-api-calls", false, "Debug all calls to the hcloud API.")
110111
fs.StringVar(&preProvisionCommand, "pre-provision-command", "", "Command to run (in rescue-system) before installing the image on bare metal servers. You can use that to check if the machine is healthy before installing the image. If the exit value is non-zero, the machine is considered unhealthy. This command must be accessible by the controller pod. You can use an initContainer to copy the command to a shared emptyDir.")
111-
fs.StringVar(&imageURLCommand, "hcloud-image-url-command", "", "Command to run (in rescue-system) to provision an hcloud machine. The command will get the imageURL, bootstrap-data and machine-name of the corresponding hcloudmachine as argument. It is up to the command to download from that URL and provision the disk accordingly. This command must be accessible by the controller pod. You can use an initContainer to copy the command to a shared emptyDir. The env var OCI_REGISTRY_AUTH_TOKEN from the caph process will be set for the command, too. The command must end with the last line containing IMAGE_URL_DONE. Otherwise the execution is considered to have failed. Docs: https://syself.com/docs/caph/developers/image-url-command")
112+
fs.StringVar(&hcloudImageURLCommand, "hcloud-image-url-command", "", "Command to run (in rescue-system) to provision an hcloud machine. Docs: https://syself.com/docs/caph/developers/image-url-command")
113+
fs.StringVar(&baremetalImageURLCommand, "baremetal-image-url-command", "", "Command to run (in rescue-system) to provision an baremetal machine. Docs: https://syself.com/docs/caph/developers/image-url-command")
112114
fs.BoolVar(&skipWebhooks, "skip-webhooks", false, "Skip setting up of webhooks. Together with --leader-elect=false, you can use `go run main.go` to run CAPH in a cluster connected via KUBECONFIG. You should scale down the caph deployment to 0 before doing that. This is only for testing!")
113115
fs.BoolVar(&sshAfterInstallImage, "baremetal-ssh-after-install-image", true, "Connect to the baremetal machine after install-image and ensure it is provisioned. Current default is true, but we might change that to false. Background: Users might not want the controller to be able to ssh onto the servers")
114116

@@ -133,22 +135,38 @@ func main() {
133135
}
134136
}
135137

136-
// If ImageURLCommand is set, check if the file exists and validate the basename.
137-
if imageURLCommand != "" {
138-
baseName := filepath.Base(imageURLCommand)
138+
// If hcloudImageURLCommand is set, check if the file exists and validate the basename.
139+
if hcloudImageURLCommand != "" {
140+
baseName := filepath.Base(hcloudImageURLCommand)
139141
if !commandRegex.MatchString(baseName) {
140142
msg := fmt.Sprintf("basename (%s) must match the regex %s", baseName, commandRegex.String())
141143
setupLog.Error(errors.New(msg), "")
142144
os.Exit(1)
143145
}
144146

145-
_, err := os.Stat(imageURLCommand)
147+
_, err := os.Stat(hcloudImageURLCommand)
146148
if err != nil {
147149
setupLog.Error(err, "hcloud-image-url-command not found")
148150
os.Exit(1)
149151
}
150152
}
151153

154+
// If baremetalImageURLCommand is set, check if the file exists and validate the basename.
155+
if baremetalImageURLCommand != "" {
156+
baseName := filepath.Base(baremetalImageURLCommand)
157+
if !commandRegex.MatchString(baseName) {
158+
msg := fmt.Sprintf("basename (%s) must match the regex %s", baseName, commandRegex.String())
159+
setupLog.Error(errors.New(msg), "")
160+
os.Exit(1)
161+
}
162+
163+
_, err := os.Stat(baremetalImageURLCommand)
164+
if err != nil {
165+
setupLog.Error(err, "baremetal-image-url-command not found")
166+
os.Exit(1)
167+
}
168+
}
169+
152170
var watchNamespaces map[string]cache.Config
153171
if watchNamespace != "" {
154172
watchNamespaces = map[string]cache.Config{
@@ -215,7 +233,7 @@ func main() {
215233
HCloudClientFactory: hcloudClientFactory,
216234
SSHClientFactory: sshclient.NewFactory(),
217235
WatchFilterValue: watchFilterValue,
218-
ImageURLCommand: imageURLCommand,
236+
ImageURLCommand: hcloudImageURLCommand,
219237
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: hcloudMachineConcurrency}); err != nil {
220238
setupLog.Error(err, "unable to create controller", "controller", "HCloudMachine")
221239
os.Exit(1)
@@ -240,6 +258,7 @@ func main() {
240258
RateLimitWaitTime: rateLimitWaitTime,
241259
WatchFilterValue: watchFilterValue,
242260
PreProvisionCommand: preProvisionCommand,
261+
ImageURLCommand: baremetalImageURLCommand,
243262
SSHAfterInstallImage: sshAfterInstallImage,
244263
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: hetznerBareMetalHostConcurrency}); err != nil {
245264
setupLog.Error(err, "unable to create controller", "controller", "HetznerBareMetalHost")

pkg/scope/baremetalhost.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ type BareMetalHostScopeParams struct {
4848
RescueSSHSecret *corev1.Secret
4949
SecretManager *secretutil.SecretManager
5050
PreProvisionCommand string
51+
ImageURLCommand string
5152
SSHAfterInstallImage bool
5253
}
5354

@@ -101,6 +102,7 @@ func NewBareMetalHostScope(params BareMetalHostScopeParams) (*BareMetalHostScope
101102
cluster: params.Cluster,
102103
hetznerCluster: params.HetznerCluster,
103104
},
105+
ImageURLCommand: params.ImageURLCommand,
104106
}, nil
105107
}
106108

@@ -120,6 +122,7 @@ type BareMetalHostScope struct {
120122
PreProvisionCommand string
121123
SSHAfterInstallImage bool
122124
WorkloadClusterClientFactory WorkloadClusterClientFactory
125+
ImageURLCommand string
123126
}
124127

125128
// Name returns the HetznerCluster name.

pkg/scope/cluster.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,14 @@ import (
3737

3838
// ClusterScopeParams defines the input parameters used to create a new scope.
3939
type ClusterScopeParams struct {
40-
Client client.Client
41-
APIReader client.Reader
42-
Logger logr.Logger
43-
HetznerSecret *corev1.Secret
44-
HCloudClient hcloudclient.Client
45-
Cluster *clusterv1.Cluster
46-
HetznerCluster *infrav1.HetznerCluster
40+
Client client.Client
41+
APIReader client.Reader
42+
Logger logr.Logger
43+
HetznerSecret *corev1.Secret
44+
HCloudClient hcloudclient.Client
45+
Cluster *clusterv1.Cluster
46+
HetznerCluster *infrav1.HetznerCluster
47+
ImageURLCommand string
4748
}
4849

4950
// NewClusterScope creates a new Scope from the supplied parameters.

0 commit comments

Comments
 (0)