Skip to content

Commit 773258f

Browse files
authored
Merge pull request #123 from mrog/main
Discard and replace new VMs if they never join the cluster
2 parents 9416906 + 187d468 commit 773258f

File tree

7 files changed

+92
-4
lines changed

7 files changed

+92
-4
lines changed

api/v1beta1/cloudstackmachine_types.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package v1beta1
1919
import (
2020
corev1 "k8s.io/api/core/v1"
2121
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
22+
"time"
2223
)
2324

2425
const (
@@ -130,10 +131,23 @@ type CloudStackMachineStatus struct {
130131
// +optional
131132
InstanceState InstanceState `json:"instanceState,omitempty"`
132133

134+
// InstanceStateLastUpdated is the time the instance state was last updated.
135+
// +optional
136+
InstanceStateLastUpdated metav1.Time `json:"instanceStateLastUpdated,omitempty"`
137+
133138
// Ready indicates the readiness of the provider resource.
134139
Ready bool `json:"ready"`
135140
}
136141

142+
// TimeSinceLastStateChange returns the amount of time that's elapsed since the state was last updated. If the state
143+
// hasn't ever been updated, it returns a negative value.
144+
func (s *CloudStackMachineStatus) TimeSinceLastStateChange() time.Duration {
145+
if s.InstanceStateLastUpdated.IsZero() {
146+
return time.Duration(-1)
147+
}
148+
return time.Since(s.InstanceStateLastUpdated.Time)
149+
}
150+
137151
// +kubebuilder:object:root=true
138152
// +kubebuilder:resource:path=cloudstackmachines,scope=Namespaced,categories=cluster-api,shortName=csm
139153
// +kubebuilder:storageversion
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
Copyright 2022 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1beta1_test
18+
19+
import (
20+
. "github.com/onsi/ginkgo/v2"
21+
. "github.com/onsi/gomega"
22+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
23+
infrav1 "sigs.k8s.io/cluster-api-provider-cloudstack/api/v1beta1"
24+
"time"
25+
)
26+
27+
var _ = Describe("CloudStackMachine types", func() {
28+
var cloudStackMachine infrav1.CloudStackMachine
29+
30+
BeforeEach(func() { // Reset test vars to initial state.
31+
cloudStackMachine = infrav1.CloudStackMachine{}
32+
})
33+
34+
Context("When calculating time since state change", func() {
35+
It("Return the correct value when the last state update time is known", func() {
36+
delta := time.Duration(10 * time.Minute)
37+
lastUpdated := time.Now().Add(-delta)
38+
cloudStackMachine.Status.InstanceStateLastUpdated = metav1.NewTime(lastUpdated)
39+
Ω(cloudStackMachine.Status.TimeSinceLastStateChange()).Should(BeNumerically("~", delta, time.Second))
40+
})
41+
42+
It("Return a negative value when the last state update time is unknown", func() {
43+
Ω(cloudStackMachine.Status.TimeSinceLastStateChange()).Should(BeNumerically("<", 0))
44+
})
45+
})
46+
})

api/v1beta1/zz_generated.deepcopy.go

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/infrastructure.cluster.x-k8s.io_cloudstackmachines.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,11 @@ spec:
235235
description: InstanceState is the state of the CloudStack instance
236236
for this machine.
237237
type: string
238+
instanceStateLastUpdated:
239+
description: InstanceStateLastUpdated is the time the instance state
240+
was last updated.
241+
format: date-time
242+
type: string
238243
ready:
239244
description: Ready indicates the readiness of the provider resource.
240245
type: boolean

controllers/cloudstackmachinestatechecker_controller.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,16 +70,33 @@ func (r *CloudStackMachineStateCheckerReconciliationRunner) Reconcile() (ctrl.Re
7070
if res, err := r.GetParent(r.ReconciliationSubject, r.CSMachine)(); r.ShouldReturn(res, err) {
7171
return res, err
7272
}
73+
7374
if res, err := r.GetParent(r.CSMachine, r.CAPIMachine)(); r.ShouldReturn(res, err) {
7475
return res, err
7576
}
7677

7778
if err := r.CSClient.ResolveVMInstanceDetails(r.CSMachine); err != nil && !strings.Contains(strings.ToLower(err.Error()), "no match found") {
7879
return r.ReturnWrappedError(err, "failed to resolve VM instance details")
7980
}
80-
if r.CSMachine.Status.InstanceState == "Running" {
81+
82+
csRunning := r.CSMachine.Status.InstanceState == "Running"
83+
csTimeInState := r.CSMachine.Status.TimeSinceLastStateChange()
84+
capiRunning := r.CAPIMachine.Status.Phase == "Running"
85+
86+
// capiTimeout indicates that a new VM is running, but it isn't reachable due to a network issue or a misconfiguration.
87+
// When this happens, the machine should be deleted or else the cluster won't ever recover.
88+
capiTimeout := csRunning && !capiRunning && csTimeInState > 5*time.Minute
89+
90+
if csRunning && capiRunning {
8191
r.ReconciliationSubject.Status.Ready = true
82-
} else {
92+
} else if !csRunning || capiTimeout {
93+
r.Log.Info("CloudStack instance in bad state",
94+
"name", r.CSMachine.Name,
95+
"instance-id", r.CSMachine.Spec.InstanceID,
96+
"cs-state", r.CSMachine.Status.InstanceState,
97+
"cs-time-in-state", csTimeInState.String(),
98+
"capi-phase", r.CAPIMachine.Status.Phase)
99+
83100
if err := r.K8sClient.Delete(r.RequestCtx, r.CAPIMachine); err != nil {
84101
return r.ReturnWrappedError(err, "failed to delete CAPI machine")
85102
}

pkg/cloud/instance.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package cloud
1818

1919
import (
2020
"fmt"
21+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2122
"strings"
2223

2324
capiv1 "sigs.k8s.io/cluster-api/api/v1beta1"
@@ -41,7 +42,11 @@ func setMachineDataFromVMMetrics(vmResponse *cloudstack.VirtualMachinesMetric, c
4142
csMachine.Spec.ProviderID = pointer.StringPtr(fmt.Sprintf("cloudstack:///%s", vmResponse.Id))
4243
csMachine.Spec.InstanceID = pointer.StringPtr(vmResponse.Id)
4344
csMachine.Status.Addresses = []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: vmResponse.Ipaddress}}
44-
csMachine.Status.InstanceState = infrav1.InstanceState(vmResponse.State)
45+
newInstanceState := infrav1.InstanceState(vmResponse.State)
46+
if newInstanceState != csMachine.Status.InstanceState || (newInstanceState != "" && csMachine.Status.InstanceStateLastUpdated.IsZero()) {
47+
csMachine.Status.InstanceState = newInstanceState
48+
csMachine.Status.InstanceStateLastUpdated = metav1.Now()
49+
}
4550
}
4651

4752
// ResolveVMInstanceDetails Retrieves VM instance details by csMachine.Spec.InstanceID or csMachine.Name, and

test/e2e/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,6 @@ You can specify JOB environment variable which value is a regular expression to
5858
For example,
5959

6060
```shell
61-
JOB=PR-Blocking make run
61+
JOB=PR-Blocking make run-e2e
6262
```
6363
This command runs the e2e tests that contains `PR-Blocking` in their spec names.

0 commit comments

Comments
 (0)