Skip to content

Commit e18620f

Browse files
authored
🌱 Add --baremetal-ssh-after-install-image=false to controller (#1686)
1 parent 8ab0b5e commit e18620f

14 files changed

+641
-133
lines changed

api/v1beta1/conditions_const.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,3 +233,8 @@ const (
233233
// DeprecatedRateLimitExceededCondition reports whether the rate limit has been reached.
234234
DeprecatedRateLimitExceededCondition clusterv1.ConditionType = "RateLimitExceeded"
235235
)
236+
237+
const (
238+
// RebootSucceededCondition indicates that the machine got rebooted successfully.
239+
RebootSucceededCondition clusterv1.ConditionType = "RebootSucceeded"
240+
)

api/v1beta1/hetznerbaremetalhost_types.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ const (
189189
// RebootTypeSoftware defines the software reboot. "Send CTRL+ALT+DEL to the server".
190190
RebootTypeSoftware RebootType = "sw"
191191
// RebootTypeHardware defines the hardware reboot. "Execute an automatic hardware reset".
192+
// The RebootTypeHardware is supported by all servers.
192193
RebootTypeHardware RebootType = "hw"
193194
// RebootTypeManual defines the manual reboot. "Order a manual power cycle".
194195
RebootTypeManual RebootType = "man"
@@ -310,6 +311,22 @@ type ControllerGeneratedStatus struct {
310311
// Conditions define the current service state of the HetznerBareMetalHost.
311312
// +optional
312313
Conditions clusterv1.Conditions `json:"conditions,omitempty"`
314+
315+
// ExternalIDs contains values from external systems.
316+
// +optional
317+
ExternalIDs ExternalIDs `json:"externalIDs,omitzero"`
318+
}
319+
320+
// ExternalIDs contains values from external systems.
321+
type ExternalIDs struct {
322+
// RebootAnnotationNodeBootID reflects the BootID of the Node resource in the workload-cluster.
323+
// Only set when the machine gets rebooted.
324+
// +optional
325+
RebootAnnotationNodeBootID string `json:"rebootAnnotationNodeBootID,omitempty"`
326+
327+
// RebootAnnotationSince indicates when the reboot via Annotation started.
328+
// +optional
329+
RebootAnnotationSince metav1.Time `json:"rebootAnnotationSince,omitzero"`
313330
}
314331

315332
// GetIPAddress returns the IPv6 if set, otherwise the IPv4.

api/v1beta1/zz_generated.deepcopy.go

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/infrastructure.cluster.x-k8s.io_hetznerbaremetalhosts.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,20 @@ spec:
238238
ErrorType indicates the type of failure encountered when the
239239
OperationalStatus is OperationalStatusError.
240240
type: string
241+
externalIDs:
242+
description: ExternalIDs contains values from external systems.
243+
properties:
244+
rebootAnnotationNodeBootID:
245+
description: |-
246+
RebootAnnotationNodeBootID reflects the BootID of the Node resource in the workload-cluster.
247+
Only set when the machine gets rebooted.
248+
type: string
249+
rebootAnnotationSince:
250+
description: RebootAnnotationSince indicates when the reboot
251+
via Annotation started.
252+
format: date-time
253+
type: string
254+
type: object
241255
hardwareDetails:
242256
description: StatusHardwareDetails are automatically gathered
243257
and should not be modified by the user.

controllers/hetznerbaremetalhost_controller.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,13 @@ import (
5252
// HetznerBareMetalHostReconciler reconciles a HetznerBareMetalHost object.
5353
type HetznerBareMetalHostReconciler struct {
5454
client.Client
55-
RateLimitWaitTime time.Duration
56-
APIReader client.Reader
57-
RobotClientFactory robotclient.Factory
58-
SSHClientFactory sshclient.Factory
59-
WatchFilterValue string
60-
PreProvisionCommand string
55+
RateLimitWaitTime time.Duration
56+
APIReader client.Reader
57+
RobotClientFactory robotclient.Factory
58+
SSHClientFactory sshclient.Factory
59+
WatchFilterValue string
60+
PreProvisionCommand string
61+
SSHAfterInstallImage bool
6162
}
6263

6364
//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=hetznerbaremetalhosts,verbs=get;list;watch;create;update;patch;delete
@@ -202,6 +203,7 @@ func (r *HetznerBareMetalHostReconciler) Reconcile(ctx context.Context, req ctrl
202203
RescueSSHSecret: rescueSSHSecret,
203204
SecretManager: secretManager,
204205
PreProvisionCommand: r.PreProvisionCommand,
206+
SSHAfterInstallImage: r.SSHAfterInstallImage,
205207
})
206208
if err != nil {
207209
return reconcile.Result{}, fmt.Errorf("failed to create scope: %w", err)

docs/caph/03-reference/06-hetzner-bare-metal-machine-template.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ Via MatchLabels you can specify a certain label (key and value) that identifies
7878
| `template.spec.sshSpec.secretRef.key.name` | `string` | | yes | Name is the key in the secret's data where the SSH key's name is stored |
7979
| `template.spec.sshSpec.secretRef.key.publicKey` | `string` | | yes | PublicKey is the key in the secret's data where the SSH key's public key is stored |
8080
| `template.spec.sshSpec.secretRef.key.privateKey` | `string` | | yes | PrivateKey is the key in the secret's data where the SSH key's private key is stored |
81-
| `template.spec.sshSpec.portAfterInstallImage` | `int` | `22` | no | PortAfterInstallImage specifies the port that can be used to reach the server via SSH after install image completed successfully |
81+
| `template.spec.sshSpec.portAfterInstallImage` | `int` | `22` | no | PortAfterInstallImage specifies the port that can be used to reach the server via SSH after install image completed successfully. If `--baremetal-ssh-after-install-image=false` is set, then this value will never be used. |
8282
| `template.spec.sshSpec.portAfterCloudInit` | `int` | `22` (install image port) | no | PortAfterCloudInit specifies the port that can be used to reach the server via SSH after cloud init completed successfully. Deprecated. Since [PR Install Cloud-Init-Data via post-install.sh #1407](https://github.com/syself/cluster-api-provider-hetzner/pull/1407) this field is not functional. |
8383

8484
## installImage.image

hack/update-operator-dev-deployment.sh

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,19 +90,25 @@ image="$image_path/caph-staging:$tag"
9090

9191
wait
9292

93-
kubectl scale --replicas=1 -n mgt-system deployment/caph-controller-manager
93+
ns=$(kubectl get deployments.apps -A | { grep caph-controller || true; } | cut -d' ' -f1)
94+
if [[ -z $ns ]]; then
95+
echo "failed to get namespace for caph-controller"
96+
exit 1
97+
fi
98+
99+
kubectl scale --replicas=1 -n "$ns" deployment/caph-controller-manager
94100

95-
kubectl set image -n mgt-system deployment/caph-controller-manager manager="$image"
101+
kubectl set image -n "$ns" deployment/caph-controller-manager manager="$image"
96102

97-
kubectl patch deployment -n mgt-system -p '[{"op": "replace", "path": "/spec/template/spec/containers/0/imagePullPolicy", "value": "Always"}]' --type='json' caph-controller-manager
103+
kubectl patch deployment -n "$ns" -p '[{"op": "replace", "path": "/spec/template/spec/containers/0/imagePullPolicy", "value": "Always"}]' --type='json' caph-controller-manager
98104

99-
kubectl rollout restart -n mgt-system deployment caph-controller-manager
105+
kubectl rollout restart -n "$ns" deployment caph-controller-manager
100106

101107
trap "echo 'Interrupted! Exiting...'; exit 1" SIGINT
102108

103-
while ! kubectl rollout status deployment --timeout=3s -n mgt-system caph-controller-manager; do
109+
while ! kubectl rollout status deployment --timeout=3s -n "$ns" caph-controller-manager; do
104110
echo "Rollout failed"
105-
kubectl events -n mgt-system | grep caph-controller-manager | tail -n 5
111+
kubectl events -n "$ns" | grep caph-controller-manager | tail -n 5
106112
echo
107113
echo
108114
done

main.go

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ var (
8787
preProvisionCommand string
8888
imageURLCommand string
8989
skipWebhooks bool
90+
sshAfterInstallImage bool
9091
)
9192

9293
func main() {
@@ -109,6 +110,8 @@ func main() {
109110
fs.StringVar(&preProvisionCommand, "pre-provision-command", "", "Command to run (in rescue-system) before installing the image on bare metal servers. You can use that to check if the machine is healthy before installing the image. If the exit value is non-zero, the machine is considered unhealthy. This command must be accessible by the controller pod. You can use an initContainer to copy the command to a shared emptyDir.")
110111
fs.StringVar(&imageURLCommand, "hcloud-image-url-command", "", "Command to run (in rescue-system) to provision an hcloud machine. The command will get the imageURL, bootstrap-data and machine-name of the corresponding hcloudmachine as argument. It is up to the command to download from that URL and provision the disk accordingly. This command must be accessible by the controller pod. You can use an initContainer to copy the command to a shared emptyDir. The env var OCI_REGISTRY_AUTH_TOKEN from the caph process will be set for the command, too. The command must end with the last line containing IMAGE_URL_DONE. Otherwise the execution is considered to have failed. Docs: https://syself.com/docs/caph/developers/image-url-command")
111112
fs.BoolVar(&skipWebhooks, "skip-webhooks", false, "Skip setting up of webhooks. Together with --leader-elect=false, you can use `go run main.go` to run CAPH in a cluster connected via KUBECONFIG. You should scale down the caph deployment to 0 before doing that. This is only for testing!")
113+
fs.BoolVar(&sshAfterInstallImage, "baremetal-ssh-after-install-image", true, "Connect to the baremetal machine after install-image and ensure it is provisioned. Current default is true, but we might change that to false. Background: Users might not want the controller to be able to ssh onto the servers")
114+
112115
pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
113116
pflag.Parse()
114117

@@ -230,13 +233,14 @@ func main() {
230233
}
231234

232235
if err = (&controllers.HetznerBareMetalHostReconciler{
233-
Client: mgr.GetClient(),
234-
RobotClientFactory: robotclient.NewFactory(),
235-
SSHClientFactory: sshclient.NewFactory(),
236-
APIReader: mgr.GetAPIReader(),
237-
RateLimitWaitTime: rateLimitWaitTime,
238-
WatchFilterValue: watchFilterValue,
239-
PreProvisionCommand: preProvisionCommand,
236+
Client: mgr.GetClient(),
237+
RobotClientFactory: robotclient.NewFactory(),
238+
SSHClientFactory: sshclient.NewFactory(),
239+
APIReader: mgr.GetAPIReader(),
240+
RateLimitWaitTime: rateLimitWaitTime,
241+
WatchFilterValue: watchFilterValue,
242+
PreProvisionCommand: preProvisionCommand,
243+
SSHAfterInstallImage: sshAfterInstallImage,
240244
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: hetznerBareMetalHostConcurrency}); err != nil {
241245
setupLog.Error(err, "unable to create controller", "controller", "HetznerBareMetalHost")
242246
os.Exit(1)

pkg/scope/baremetalhost.go

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ type BareMetalHostScopeParams struct {
4848
RescueSSHSecret *corev1.Secret
4949
SecretManager *secretutil.SecretManager
5050
PreProvisionCommand string
51+
SSHAfterInstallImage bool
5152
}
5253

5354
// NewBareMetalHostScope creates a new Scope from the supplied parameters.
@@ -93,23 +94,32 @@ func NewBareMetalHostScope(params BareMetalHostScopeParams) (*BareMetalHostScope
9394
RescueSSHSecret: params.RescueSSHSecret,
9495
SecretManager: params.SecretManager,
9596
PreProvisionCommand: params.PreProvisionCommand,
97+
SSHAfterInstallImage: params.SSHAfterInstallImage,
98+
WorkloadClusterClientFactory: &realWorkloadClusterClientFactory{
99+
logger: params.Logger,
100+
client: params.Client,
101+
cluster: params.Cluster,
102+
hetznerCluster: params.HetznerCluster,
103+
},
96104
}, nil
97105
}
98106

99107
// BareMetalHostScope defines the basic context for an actuator to operate upon.
100108
type BareMetalHostScope struct {
101109
logr.Logger
102-
Client client.Client
103-
SecretManager *secretutil.SecretManager
104-
RobotClient robotclient.Client
105-
SSHClientFactory sshclient.Factory
106-
HetznerBareMetalHost *infrav1.HetznerBareMetalHost
107-
HetznerBareMetalMachine *infrav1.HetznerBareMetalMachine
108-
HetznerCluster *infrav1.HetznerCluster
109-
Cluster *clusterv1.Cluster
110-
OSSSHSecret *corev1.Secret
111-
RescueSSHSecret *corev1.Secret
112-
PreProvisionCommand string
110+
Client client.Client
111+
SecretManager *secretutil.SecretManager
112+
RobotClient robotclient.Client
113+
SSHClientFactory sshclient.Factory
114+
HetznerBareMetalHost *infrav1.HetznerBareMetalHost
115+
HetznerBareMetalMachine *infrav1.HetznerBareMetalMachine
116+
HetznerCluster *infrav1.HetznerCluster
117+
Cluster *clusterv1.Cluster
118+
OSSSHSecret *corev1.Secret
119+
RescueSSHSecret *corev1.Secret
120+
PreProvisionCommand string
121+
SSHAfterInstallImage bool
122+
WorkloadClusterClientFactory WorkloadClusterClientFactory
113123
}
114124

115125
// Name returns the HetznerCluster name.

pkg/scope/cluster.go

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,9 @@ import (
2929
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3030
"sigs.k8s.io/cluster-api/util/conditions"
3131
"sigs.k8s.io/cluster-api/util/patch"
32-
"sigs.k8s.io/cluster-api/util/secret"
3332
"sigs.k8s.io/controller-runtime/pkg/client"
3433

3534
infrav1 "github.com/syself/cluster-api-provider-hetzner/api/v1beta1"
36-
secretutil "github.com/syself/cluster-api-provider-hetzner/pkg/secrets"
3735
hcloudclient "github.com/syself/cluster-api-provider-hetzner/pkg/services/hcloud/client"
3836
)
3937

@@ -148,21 +146,7 @@ func (s *ClusterScope) ControlPlaneAPIEndpointPort() int32 {
148146

149147
// ClientConfig return a kubernetes client config for the cluster context.
150148
func (s *ClusterScope) ClientConfig(ctx context.Context) (clientcmd.ClientConfig, error) {
151-
cluster := client.ObjectKey{
152-
Name: fmt.Sprintf("%s-%s", s.Cluster.Name, secret.Kubeconfig),
153-
Namespace: s.Cluster.Namespace,
154-
}
155-
156-
secretManager := secretutil.NewSecretManager(s.Logger, s.Client, s.APIReader)
157-
kubeconfigSecret, err := secretManager.AcquireSecret(ctx, cluster, s.HetznerCluster, false, false)
158-
if err != nil {
159-
return nil, fmt.Errorf("failed to acquire secret: %w", err)
160-
}
161-
kubeconfigBytes, ok := kubeconfigSecret.Data[secret.KubeconfigDataName]
162-
if !ok {
163-
return nil, fmt.Errorf("missing key %q in secret data", secret.KubeconfigDataName)
164-
}
165-
return clientcmd.NewClientConfigFromBytes(kubeconfigBytes)
149+
return workloadClientConfigFromKubeconfigSecret(ctx, s.Logger, s.Client, s.APIReader, s.Cluster, s.HetznerCluster)
166150
}
167151

168152
// ListMachines returns HCloudMachines.

0 commit comments

Comments
 (0)