Skip to content

Commit ba44229

Browse files
committed
validate that pod is running + retry the df command on pod if fails
1 parent f0d7803 commit ba44229

File tree

3 files changed

+51
-7
lines changed

3 files changed

+51
-7
lines changed

pkg/controller/perconaservermongodb/metrics_client.go

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,15 @@ import (
55
"context"
66
"strconv"
77
"strings"
8+
"time"
89

910
"github.com/pkg/errors"
1011
corev1 "k8s.io/api/core/v1"
12+
"k8s.io/apimachinery/pkg/util/wait"
13+
"k8s.io/client-go/util/retry"
14+
logf "sigs.k8s.io/controller-runtime/pkg/log"
1115

16+
"github.com/percona/percona-server-mongodb-operator/pkg/naming"
1217
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb/config"
1318
)
1419

@@ -25,6 +30,23 @@ func (r *ReconcilePerconaServerMongoDB) getPVCUsageFromMetrics(
2530
pod *corev1.Pod,
2631
pvcName string,
2732
) (*PVCUsage, error) {
33+
log := logf.FromContext(ctx).WithName("StorageAutoscaling").WithValues("pvc", pvcName)
34+
35+
if pod == nil {
36+
return nil, errors.New("pod is nil")
37+
}
38+
39+
if !isContainerAndPodRunning(*pod, naming.ComponentMongod) {
40+
log.V(1).Info("skipping PVC metrics check: container and pod not running", "phase", pod.Status.Phase)
41+
return nil, nil
42+
}
43+
44+
backoff := wait.Backoff{
45+
Steps: 5,
46+
Duration: 5 * time.Second,
47+
Factor: 2.0,
48+
}
49+
2850
// Execute df command in the mongod container to get disk usage
2951
// df -B1 /data/db outputs in bytes
3052
// Example output:
@@ -33,9 +55,18 @@ func (r *ReconcilePerconaServerMongoDB) getPVCUsageFromMetrics(
3355
var stdout, stderr bytes.Buffer
3456
command := []string{"df", "-B1", config.MongodContainerDataDir}
3557

36-
err := r.clientcmd.Exec(ctx, pod, "mongod", command, nil, &stdout, &stderr, false)
58+
err := retry.OnError(backoff, func(err error) bool { return true }, func() error {
59+
stdout.Reset()
60+
stderr.Reset()
61+
62+
err := r.clientcmd.Exec(ctx, pod, naming.ComponentMongod, command, nil, &stdout, &stderr, false)
63+
if err != nil {
64+
return errors.Wrapf(err, "failed to execute df in pod %s: %s", pod.Name, stderr.String())
65+
}
66+
return nil
67+
})
3768
if err != nil {
38-
return nil, errors.Wrapf(err, "failed to execute df in pod %s: %s", pod.Name, stderr.String())
69+
return nil, errors.Wrap(err, "wait for df execution")
3970
}
4071

4172
lines := strings.Split(strings.TrimSpace(stdout.String()), "\n")

pkg/controller/perconaservermongodb/metrics_client_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,24 @@ func TestGetPVCUsageFromMetrics(t *testing.T) {
145145
Name: "test-pod-0",
146146
Namespace: "test-namespace",
147147
},
148+
Spec: corev1.PodSpec{
149+
Containers: []corev1.Container{
150+
{
151+
Name: "mongod",
152+
},
153+
},
154+
},
155+
Status: corev1.PodStatus{
156+
Phase: corev1.PodRunning,
157+
ContainerStatuses: []corev1.ContainerStatus{
158+
{
159+
Name: "mongod",
160+
State: corev1.ContainerState{
161+
Running: &corev1.ContainerStateRunning{},
162+
},
163+
},
164+
},
165+
},
148166
}
149167

150168
result, err := r.getPVCUsageFromMetrics(ctx, pod, tt.pvcName)

pkg/controller/perconaservermongodb/volume_autoscaling.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,6 @@ func (r *ReconcilePerconaServerMongoDB) checkAndResizePVC(
102102
) error {
103103
log := logf.FromContext(ctx).WithName("StorageAutoscaling").WithValues("pvc", pvc.Name)
104104

105-
if pod.Status.Phase != corev1.PodRunning {
106-
log.V(1).Info("skipping PVC check: pod not running", "phase", pod.Status.Phase)
107-
return nil
108-
}
109-
110105
usage, err := r.getPVCUsageFromMetrics(ctx, pod, pvc.Name)
111106
if err != nil {
112107
return errors.Wrap(err, "get PVC usage from metrics")

0 commit comments

Comments
 (0)