Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
3d01d2d
:seedling: Remove usage of FailureReason and FailureMessage (baremetal)
guettli Nov 17, 2025
1fa1406
make linter happy.
guettli Nov 17, 2025
8c7c7a9
set condition.
guettli Nov 17, 2025
8c81c6c
feedback of AI.
guettli Nov 17, 2025
5b3a35d
set condition on host, too. Do not loose the message...
guettli Nov 17, 2025
98bcfff
cleanup, do not set condition from hbmm reconcile on hbmh.
guettli Nov 17, 2025
fb67a97
copy condition from hbmm to host. No need to read capi machine.
guettli Nov 17, 2025
a5521cd
delete via RemediateAnnotation does not work well. Code in Remediatio…
guettli Nov 17, 2025
7d834ca
revert last commit. Delete via remediation is ok.
guettli Nov 18, 2025
cda32ca
WIP: give msg to method, so that a more precise error gets created.
guettli Nov 18, 2025
700d1ec
show exit message of remediation on capi machine condition.
guettli Nov 20, 2025
f71d8a2
no need to try a reboot.
guettli Nov 20, 2025
7c57394
Merge branch 'main' into tg/remove-failure-reason--baremetal
guettli Nov 20, 2025
401a474
more logging, WIP
guettli Nov 20, 2025
25890c4
fixed test cases.
guettli Nov 20, 2025
7133120
deduplicated code, cleaned up PR.
guettli Nov 20, 2025
527de14
...
guettli Nov 20, 2025
826c0d4
adapt comments to no longer use failure reason.
guettli Nov 20, 2025
523dd7b
remove todo.
guettli Nov 20, 2025
49a275c
linting.
guettli Nov 20, 2025
0726e9a
make diff to main smaller.
guettli Nov 20, 2025
ef94736
tiny changes.
guettli Nov 20, 2025
0385349
Merge branch 'main' into tg/remove-failure-reason--baremetal
guettli Nov 20, 2025
12cf08b
:seedling: set GITHUB_TOKEN, so that Lychee does not get rate-limited.
guettli Nov 20, 2025
69642d9
Merge branch 'tg/set-github-token-to-avoid-rate-limiting' into tg/rem…
guettli Nov 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/pr-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ jobs:
# if there's a diff then the workflow will exit here.
- name: Run make verify
run: make BUILD_IN_CONTAINER=false verify
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Lint Golang Code
run: make BUILD_IN_CONTAINER=false lint-golang-ci
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,7 @@ ifeq ($(BUILD_IN_CONTAINER),true)
$(BUILDER_IMAGE):$(BUILDER_IMAGE_VERSION) $@;
else
@lychee --version
@if [ -z "$${GITHUB_TOKEN}" ]; then echo "GITHUB_TOKEN is not set"; exit 1; fi
lychee --verbose --config .lychee.toml ./*.md ./docs/**/*.md 2>&1 | grep -vP '\[(200|EXCLUDED)\]'
endif

Expand Down
12 changes: 12 additions & 0 deletions api/v1beta1/conditions_const.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,15 @@ const (
// RebootSucceededCondition indicates that the machine got rebooted successfully.
RebootSucceededCondition clusterv1.ConditionType = "RebootSucceeded"
)

const (
// NoRemediateMachineAnnotationCondition is:
// - False when the corresponding CAPI Machine has the "cluster.x-k8s.io/remediate-machine" annotation set and will be remediated by CAPI soon.
// - True otherwise.
NoRemediateMachineAnnotationCondition clusterv1.ConditionType = "NoRemediateMachineAnnotation"

// RemediateMachineAnnotationIsSetReason indicates that the CAPI machine has the
// "cluster.x-k8s.io/remediate-machine" annotation set. The CAPI machine and the corresponding
// infra-machine will be deleted by CAPI soon.
RemediateMachineAnnotationIsSetReason = "RemediateMachineAnnotationIsSet"
)
7 changes: 5 additions & 2 deletions api/v1beta1/hetznerbaremetalhost_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,10 @@ const (
// RegistrationError is an error condition occurring when the
// controller is unable to retrieve information on a specific server via robot.
RegistrationError ErrorType = "registration error"

// PreparationError is an error condition occurring when something fails while preparing host reconciliation.
PreparationError ErrorType = "preparation error"

// ProvisioningError is an error condition occurring when the controller
// fails to provision or deprovision the Host.
ProvisioningError ErrorType = "provisioning error"
Expand Down Expand Up @@ -230,8 +232,9 @@ type HetznerBareMetalHostSpec struct {
// +optional
ConsumerRef *corev1.ObjectReference `json:"consumerRef,omitempty"`

// MaintenanceMode indicates that a machine is supposed to be deprovisioned
// and won't be selected by any Hetzner bare metal machine.
// MaintenanceMode indicates that a machine is supposed to be deprovisioned. The CAPI Machine
// will get the cluster.x-k8s.io/remediate-machine annotation, and CAPI will deprovision the
// machine. Accordingly, the host won't be selected by any Hetzner bare metal machine.
MaintenanceMode *bool `json:"maintenanceMode,omitempty"`

// Description is a human-entered text used to help identify the host.
Expand Down
12 changes: 4 additions & 8 deletions api/v1beta1/hetznerbaremetalmachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/selection"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
capierrors "sigs.k8s.io/cluster-api/errors" //nolint:staticcheck // we will handle that, when we update to capi v1.11
)

const (
Expand Down Expand Up @@ -298,8 +297,11 @@ type HetznerBareMetalMachineStatus struct {
LastUpdated *metav1.Time `json:"lastUpdated,omitempty"`

// FailureReason will be set in the event that there is a terminal problem.
//
// Deprecated: This field is deprecated and is going to be removed when support for v1beta1 will be dropped. Please see https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20240916-improve-status-in-CAPI-resources.md for more details.
//
// +optional
FailureReason *capierrors.MachineStatusError `json:"failureReason,omitempty"`
FailureReason *string `json:"failureReason,omitempty"`

// FailureMessage will be set in the event that there is a terminal problem.
// +optional
Expand Down Expand Up @@ -357,12 +359,6 @@ func (hbmm *HetznerBareMetalMachine) SetConditions(conditions clusterv1.Conditio
hbmm.Status.Conditions = conditions
}

// SetFailure sets a failure reason and message.
func (hbmm *HetznerBareMetalMachine) SetFailure(reason capierrors.MachineStatusError, message string) {
hbmm.Status.FailureReason = &reason
hbmm.Status.FailureMessage = &message
}

// GetImageSuffix tests whether the suffix is known and outputs it if yes. Otherwise it returns an error.
func GetImageSuffix(url string) (string, error) {
if strings.HasPrefix(url, "oci://") {
Expand Down
30 changes: 0 additions & 30 deletions api/v1beta1/hetznerbaremetalmachine_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/stretchr/testify/require"
capierrors "sigs.k8s.io/cluster-api/errors" //nolint:staticcheck // we will handle that, when we update to capi v1.11
)

var _ = Describe("Test Image.GetDetails", func() {
Expand Down Expand Up @@ -177,35 +176,6 @@ var _ = Describe("Test GetImageSuffix", func() {
)
})

var _ = Describe("Test SetFailure", func() {
bmMachine := HetznerBareMetalMachine{}
newFailureMessage := "bad error"
newFailureReason := capierrors.CreateMachineError

It("sets new failure on the machine with existing failure", func() {
failureMessage := "first message"
failureReason := capierrors.MachineStatusError("first error")
bmMachine.Status.FailureMessage = &failureMessage
bmMachine.Status.FailureReason = &failureReason

bmMachine.SetFailure(newFailureReason, newFailureMessage)

Expect(bmMachine.Status.FailureMessage).ToNot(BeNil())
Expect(bmMachine.Status.FailureReason).ToNot(BeNil())
Expect(*bmMachine.Status.FailureMessage).To(Equal(newFailureMessage))
Expect(*bmMachine.Status.FailureReason).To(Equal(newFailureReason))
})

It("sets new failure on the machine without existing failure", func() {
bmMachine.SetFailure(newFailureReason, newFailureMessage)

Expect(bmMachine.Status.FailureMessage).ToNot(BeNil())
Expect(bmMachine.Status.FailureReason).ToNot(BeNil())
Expect(*bmMachine.Status.FailureMessage).To(Equal(newFailureMessage))
Expect(*bmMachine.Status.FailureReason).To(Equal(newFailureReason))
})
})

var _ = Describe("Test HasHostAnnotation", func() {
type testCaseHasHostAnnotation struct {
annotations map[string]string
Expand Down
2 changes: 1 addition & 1 deletion api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,9 @@ spec:
type: string
maintenanceMode:
description: |-
MaintenanceMode indicates that a machine is supposed to be deprovisioned
and won't be selected by any Hetzner bare metal machine.
MaintenanceMode indicates that a machine is supposed to be deprovisioned. The CAPI Machine
will get the cluster.x-k8s.io/remediate-machine annotation, and CAPI will deprovision the
machine. Accordingly, the host won't be selected by any Hetzner bare metal machine.
type: boolean
rootDeviceHints:
description: |-
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -399,8 +399,10 @@ spec:
a terminal problem.
type: string
failureReason:
description: FailureReason will be set in the event that there is
a terminal problem.
description: |-
FailureReason will be set in the event that there is a terminal problem.

Deprecated: This field is deprecated and is going to be removed when support for v1beta1 will be dropped. Please see https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20240916-improve-status-in-CAPI-resources.md for more details.
type: string
lastUpdated:
description: LastUpdated identifies when this status was last observed.
Expand Down
19 changes: 17 additions & 2 deletions controllers/hetznerbaremetalhost_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ func (r *HetznerBareMetalHostReconciler) Reconcile(ctx context.Context, req ctrl
return res, nil
}

// Case "Delete" was handled in reconcileSelectedStates. From now we know that the host has not
// DeletionTimestamp set.

hetznerCluster := &infrav1.HetznerCluster{}

hetznerClusterName := client.ObjectKey{
Expand Down Expand Up @@ -226,9 +229,21 @@ func (r *HetznerBareMetalHostReconciler) Reconcile(ctx context.Context, req ctrl
}

log = log.WithValues("HetznerBareMetalMachine", klog.KObj(hetznerBareMetalMachine))

ctx = ctrl.LoggerInto(ctx, log)

remediateConditionOfHbmm := conditions.Get(hetznerBareMetalMachine, infrav1.NoRemediateMachineAnnotationCondition)
if remediateConditionOfHbmm != nil && remediateConditionOfHbmm.Status == corev1.ConditionFalse {
// The hbmm of this host is in remediation. Do not reconcile it.
// Take the Condition of the hbmm and make it available on the hbmh.
msg := "hbmm has NoRemediateMachineAnnotationCondition=False. Not reconciling this host."
log.Info(msg)
conditions.MarkFalse(bmHost, infrav1.NoRemediateMachineAnnotationCondition,
remediateConditionOfHbmm.Reason, remediateConditionOfHbmm.Severity,
"%s", remediateConditionOfHbmm.Message)
return reconcile.Result{}, nil
}
conditions.MarkTrue(bmHost, infrav1.NoRemediateMachineAnnotationCondition)

// Get Hetzner robot api credentials
secretManager := secretutil.NewSecretManager(log, r.Client, r.APIReader)
robotCreds, err := getAndValidateRobotCredentials(ctx, req.Namespace, hetznerCluster, secretManager)
Expand Down Expand Up @@ -307,7 +322,7 @@ func (r *HetznerBareMetalHostReconciler) reconcileSelectedStates(ctx context.Con

return ctrl.Result{RequeueAfter: 10 * time.Second}, nil

// Handle StateDeleting
// Handle StateDeleting
case infrav1.StateDeleting:
if controllerutil.RemoveFinalizer(bmHost, infrav1.HetznerBareMetalHostFinalizer) ||
controllerutil.RemoveFinalizer(bmHost, infrav1.DeprecatedBareMetalHostFinalizer) {
Expand Down
10 changes: 5 additions & 5 deletions controllers/hetznerbaremetalmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,20 +73,20 @@ func (r *HetznerBareMetalMachineReconciler) Reconcile(ctx context.Context, req r
log = log.WithValues("HetznerBareMetalMachine", klog.KObj(hbmMachine))

// Fetch the Machine.
machine, err := util.GetOwnerMachine(ctx, r.Client, hbmMachine.ObjectMeta)
capiMachine, err := util.GetOwnerMachine(ctx, r.Client, hbmMachine.ObjectMeta)
if err != nil {
return reconcile.Result{}, fmt.Errorf("failed to get owner machine. BareMetalMachine.ObjectMeta.OwnerReferences %v: %w",
hbmMachine.ObjectMeta.OwnerReferences, err)
}
if machine == nil {
if capiMachine == nil {
log.Info("Machine Controller has not yet set OwnerRef")
return reconcile.Result{}, nil
}

log = log.WithValues("Machine", klog.KObj(machine))
log = log.WithValues("Machine", klog.KObj(capiMachine))

// Fetch the Cluster.
cluster, err := util.GetClusterFromMetadata(ctx, r.Client, machine.ObjectMeta)
cluster, err := util.GetClusterFromMetadata(ctx, r.Client, capiMachine.ObjectMeta)
if err != nil {
log.Info("Machine is missing cluster label or cluster does not exist")
return reconcile.Result{}, nil
Expand Down Expand Up @@ -125,7 +125,7 @@ func (r *HetznerBareMetalMachineReconciler) Reconcile(ctx context.Context, req r
machineScope, err := scope.NewBareMetalMachineScope(scope.BareMetalMachineScopeParams{
Client: r.Client,
Logger: log,
Machine: machine,
Machine: capiMachine,
BareMetalMachine: hbmMachine,
HetznerCluster: hetznerCluster,
HCloudClient: hcc,
Expand Down
36 changes: 29 additions & 7 deletions controllers/hetznerbaremetalmachine_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package controllers

import (
"context"
"fmt"
"testing"
"time"

Expand All @@ -36,13 +37,14 @@ import (
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/utils/ptr"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/conditions"
"sigs.k8s.io/cluster-api/util/patch"
"sigs.k8s.io/controller-runtime/pkg/client"
fakeclient "sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

infrav1 "github.com/syself/cluster-api-provider-hetzner/api/v1beta1"
"github.com/syself/cluster-api-provider-hetzner/pkg/services/baremetal/baremetal"
robotmock "github.com/syself/cluster-api-provider-hetzner/pkg/services/baremetal/client/mocks/robot"
sshmock "github.com/syself/cluster-api-provider-hetzner/pkg/services/baremetal/client/mocks/ssh"
sshclient "github.com/syself/cluster-api-provider-hetzner/pkg/services/baremetal/client/ssh"
Expand Down Expand Up @@ -411,7 +413,7 @@ var _ = Describe("HetznerBareMetalMachineReconciler", func() {
}, timeout, time.Second).Should(BeTrue())
})

It("sets a failure reason when maintenance mode is set on the host", func() {
It("sets RemediateMachineAnnotation when maintenance mode is set on the host", func() {
By("making sure that machine is ready")

Eventually(func() bool {
Expand All @@ -431,14 +433,34 @@ var _ = Describe("HetznerBareMetalMachineReconciler", func() {

Expect(ph.Patch(ctx, host, patch.WithStatusObservedGeneration{})).To(Succeed())

By("checking that failure message is set on machine")
By("checking that RemediateMachineAnnotation is set on machine")

Eventually(func() bool {
Eventually(func() error {
if err := testEnv.Get(ctx, key, bmMachine); err != nil {
return false
return err
}
return bmMachine.Status.FailureMessage != nil && *bmMachine.Status.FailureMessage == baremetal.FailureMessageMaintenanceMode
}, timeout).Should(BeTrue())

capiMachine, err := util.GetOwnerMachine(ctx, testEnv, bmMachine.ObjectMeta)
if err != nil {
return err
}

_, exists := capiMachine.Annotations[clusterv1.RemediateMachineAnnotation]
if !exists {
return fmt.Errorf("RemediateMachineAnnotation not set on capi machine")
}

c := conditions.Get(bmMachine, infrav1.NoRemediateMachineAnnotationCondition)
if c == nil {
return fmt.Errorf("condition NoRemediateMachineAnnotationCondition does not exist")
}

if c.Status != corev1.ConditionFalse {
return fmt.Errorf("condition NoRemediateMachineAnnotationCondition should be False")
}

return nil
}, timeout).Should(Succeed())
})

It("checks the hetznerBareMetalMachine status running phase", func() {
Expand Down
67 changes: 67 additions & 0 deletions pkg/baremetalutils/baremetalutils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
Copyright 2025 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Package baremetalutils implements helper functions for working with baremetal.
package baremetalutils

import (
"context"
"fmt"
"strings"

"sigs.k8s.io/controller-runtime/pkg/client"

infrav1 "github.com/syself/cluster-api-provider-hetzner/api/v1beta1"
)

func splitHostKey(key string) (namespace, name string) {
parts := strings.Split(key, "/")
if len(parts) != 2 {
panic("unexpected host key")
}
return parts[0], parts[1]
}

// GetAssociatedHost gets the associated host by looking for an annotation on the
// machine that contains a reference to the host. Returns nil if not found. Assumes the host is in
// the same namespace as the machine.
func GetAssociatedHost(ctx context.Context, crClient client.Client, hbmm *infrav1.HetznerBareMetalMachine) (*infrav1.HetznerBareMetalHost, error) {
annotations := hbmm.GetAnnotations()
// if no annotations exist on machine, no host can be associated
if annotations == nil {
return nil, nil
}

// check if host annotation is set and return if not
hostKey, ok := annotations[infrav1.HostAnnotation]
if !ok {
return nil, nil
}

// find associated host object and return it
hostNamespace, hostName := splitHostKey(hostKey)

host := &infrav1.HetznerBareMetalHost{}
key := client.ObjectKey{
Name: hostName,
Namespace: hostNamespace,
}

if err := crClient.Get(ctx, key, host); err != nil {
return nil, fmt.Errorf("failed to get host object: %w", err)
}
return host, nil
}
Loading
Loading