Skip to content

Commit ffd9a60

Browse files
⚡ prometheus and improvments (#24)
modified: controllers/pod_controller.go; modified: controllers/pod_controller_functions.go; modified: pkg/cache/rediscache/rediscache.go
1 parent 59a8e5b commit ffd9a60

File tree

3 files changed

+118
-54
lines changed

3 files changed

+118
-54
lines changed

controllers/pod_controller.go

Lines changed: 60 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"github.com/jatalocks/kube-reqsizer/pkg/cache/localcache"
2727
"github.com/jatalocks/kube-reqsizer/pkg/cache/rediscache"
2828
"github.com/jatalocks/kube-reqsizer/types"
29+
"github.com/prometheus/client_golang/prometheus"
2930
corev1 "k8s.io/api/core/v1"
3031
v1 "k8s.io/api/core/v1"
3132
apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -36,6 +37,7 @@ import (
3637

3738
ctrl "sigs.k8s.io/controller-runtime"
3839
"sigs.k8s.io/controller-runtime/pkg/client"
40+
"sigs.k8s.io/controller-runtime/pkg/metrics"
3941
)
4042

4143
// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;update;patch
@@ -66,64 +68,90 @@ const (
6668
operatorModeAnnotation = "reqsizer.jatalocks.github.io/mode"
6769
)
6870

71+
var (
72+
cpuOffset = prometheus.NewGauge(
73+
prometheus.GaugeOpts{
74+
Name: "cpu_offset",
75+
Help: "Number of milli-cores that have been increased/removed since startup",
76+
},
77+
)
78+
memoryOffset = prometheus.NewGauge(
79+
prometheus.GaugeOpts{
80+
Name: "memory_offset",
81+
Help: "Number of megabits that have been increased/removed since startup",
82+
},
83+
)
84+
cacheSize = prometheus.NewGauge(
85+
prometheus.GaugeOpts{
86+
Name: "cache_size",
87+
Help: "Number of pod controllers currently in cache",
88+
},
89+
)
90+
)
91+
92+
func init() {
93+
// Register custom metrics with the global prometheus registry
94+
metrics.Registry.MustRegister(cpuOffset, memoryOffset, cacheSize)
95+
}
96+
6997
func cacheKeyFunc(obj interface{}) (string, error) {
70-
return obj.(types.PodRequests).Name + "-" + obj.(types.PodRequests).Namespace, nil
98+
return obj.(types.PodRequests).Name, nil
7199
}
72100

73101
var cacheStore = cache.NewStore(cacheKeyFunc)
74102

75103
// Reconcile handles a reconciliation request for a Pod.
76104
func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
77105
log := r.Log.WithValues("pod", req.NamespacedName)
78-
106+
if r.EnablePersistence {
107+
cacheSize.Set(float64(r.RedisClient.CacheSize()))
108+
} else {
109+
cacheSize.Set(float64(len(cacheStore.List())))
110+
}
79111
/*
80112
Step 0: Fetch the Pod from the Kubernetes API.
81113
*/
82-
83114
var pod corev1.Pod
115+
84116
if err := r.Get(ctx, req.NamespacedName, &pod); err != nil {
85117
if apierrors.IsNotFound(err) {
86-
// we'll ignore not-found errors, since they can't be fixed by an immediate
87-
// requeue (we'll need to wait for a new notification), and we can get them
88-
// on deleted requests.
89118
return ctrl.Result{}, nil
90119
}
91120
log.Error(nil, "unable to fetch Pod")
92121
return ctrl.Result{}, err
93122
}
94-
95-
annotation, err := r.NamespaceOrPodHaveAnnotation(pod, ctx)
123+
podReferenceName := r.GetPodCacheName(&pod) + "-" + pod.Namespace
124+
annotation, err := r.NamespaceOrPodHaveAnnotation(&pod, ctx)
96125
if err != nil {
97126
log.Error(nil, "failed to get annotations")
98127
return ctrl.Result{}, err
99128
}
100-
ignoreAnnotation, err := r.NamespaceOrPodHaveIgnoreAnnotation(pod, ctx)
129+
ignoreAnnotation, err := r.NamespaceOrPodHaveIgnoreAnnotation(&pod, ctx)
101130
if err != nil {
102131
log.Error(nil, "failed to get annotations")
103132
return ctrl.Result{}, err
104133
}
105134

106135
if ((!r.EnableAnnotation) || (r.EnableAnnotation && annotation)) && !ignoreAnnotation {
136+
log.Info("Cache Reference Name: " + podReferenceName)
137+
107138
data, err := r.ClientSet.RESTClient().Get().AbsPath(fmt.Sprintf("apis/metrics.k8s.io/v1beta1/namespaces/%v/pods/%v", pod.Namespace, pod.Name)).DoRaw(ctx)
108139

109140
if err != nil {
110141
log.Error(nil, "failed to get stats from pod")
111142
return ctrl.Result{}, err
112143
}
113144
PodUsageData := GeneratePodRequestsObjectFromRestData(data)
114-
err, _, _, deploymentName := r.GetPodParentKind(pod, ctx)
115-
if err != nil {
116-
deploymentName = pod.Name
117-
}
118-
SumPodRequest := types.PodRequests{Name: deploymentName, Namespace: pod.Namespace, ContainerRequests: []types.ContainerRequests{}}
145+
SumPodRequest := types.PodRequests{Name: podReferenceName, Namespace: pod.Namespace, ContainerRequests: []types.ContainerRequests{}}
119146

120147
SumPodRequest.ContainerRequests = PodUsageData.ContainerRequests
121148
var LatestPodRequest types.PodRequests
122149
if r.EnablePersistence {
123-
LatestPodRequest, err = r.RedisClient.FetchFromCache(deploymentName + "-" + pod.Namespace)
150+
LatestPodRequest, err = r.RedisClient.FetchFromCache(podReferenceName)
124151
} else {
125-
LatestPodRequest, err = localcache.FetchFromCache(cacheStore, deploymentName+"-"+pod.Namespace)
152+
LatestPodRequest, err = localcache.FetchFromCache(cacheStore, podReferenceName)
126153
}
154+
127155
if err != nil {
128156
SumPodRequest.Sample = 0
129157
log.Info(fmt.Sprint("Adding cache sample ", SumPodRequest.Sample))
@@ -184,7 +212,6 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
184212
}
185213
}
186214
}
187-
log.Info(fmt.Sprint(SumPodRequest))
188215
if (SumPodRequest.Sample >= r.SampleSize) && r.MinimumUptimeOfPodInParent(pod, ctx) {
189216
log.Info("Sample Size and Minimum Time have been reached")
190217
PodChange := false
@@ -216,28 +243,34 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
216243
case "average":
217244
if r.ValidateCPU(currentC.CPU, AverageUsageCPU) {
218245
pod.Spec.Containers[i].Resources.Requests[v1.ResourceCPU] = resource.MustParse(fmt.Sprintf("%dm", int(float64(AverageUsageCPU)*r.CPUFactor)))
246+
cpuOffset.Add(float64(int(float64(AverageUsageCPU)*r.CPUFactor) - int(currentC.CPU)))
219247
PodChange = true
220248
}
221249
if r.ValidateMemory(currentC.Memory, AverageUsageMemory) {
222250
pod.Spec.Containers[i].Resources.Requests[v1.ResourceMemory] = resource.MustParse(fmt.Sprintf("%dMi", int(float64(AverageUsageMemory)*r.MemoryFactor)))
251+
memoryOffset.Add(float64(int(float64(AverageUsageMemory)*r.MemoryFactor) - int(currentC.Memory)))
223252
PodChange = true
224253
}
225254
case "min":
226255
if r.ValidateCPU(currentC.CPU, c.MinCPU) {
227256
pod.Spec.Containers[i].Resources.Requests[v1.ResourceCPU] = resource.MustParse(fmt.Sprintf("%dm", int(float64(c.MinCPU)*r.CPUFactor)))
257+
cpuOffset.Add(float64(int(float64(c.MinCPU)*r.CPUFactor) - int(currentC.CPU)))
228258
PodChange = true
229259
}
230260
if r.ValidateMemory(currentC.Memory, c.MinMemory) {
231261
pod.Spec.Containers[i].Resources.Requests[v1.ResourceMemory] = resource.MustParse(fmt.Sprintf("%dMi", int(float64(c.MinMemory)*r.MemoryFactor)))
262+
memoryOffset.Add(float64(int(float64(c.MinMemory)*r.MemoryFactor) - int(currentC.Memory)))
232263
PodChange = true
233264
}
234265
case "max":
235266
if r.ValidateCPU(currentC.CPU, c.MaxCPU) {
236267
pod.Spec.Containers[i].Resources.Requests[v1.ResourceCPU] = resource.MustParse(fmt.Sprintf("%dm", int(float64(c.MaxCPU)*r.CPUFactor)))
268+
cpuOffset.Add(float64(int(float64(c.MaxCPU)*r.CPUFactor) - int(currentC.CPU)))
237269
PodChange = true
238270
}
239271
if r.ValidateMemory(currentC.Memory, c.MaxMemory) {
240272
pod.Spec.Containers[i].Resources.Requests[v1.ResourceMemory] = resource.MustParse(fmt.Sprintf("%dMi", int(float64(c.MaxMemory)*r.MemoryFactor)))
273+
memoryOffset.Add(float64(int(float64(c.MaxMemory)*r.MemoryFactor) - int(currentC.Memory)))
241274
PodChange = true
242275
}
243276
}
@@ -249,6 +282,15 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
249282
}
250283
}
251284
}
285+
if r.EnablePersistence {
286+
if err := r.RedisClient.DeleteFromCache(SumPodRequest); err != nil {
287+
log.Error(err, err.Error())
288+
}
289+
} else {
290+
if err := localcache.DeleteFromCache(cacheStore, LatestPodRequest); err != nil {
291+
log.Error(err, err.Error())
292+
}
293+
}
252294
if PodChange {
253295
pod.Annotations["reqsizer.jatalocks.github.io/changed"] = "true"
254296

@@ -267,20 +309,9 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
267309
UpdatePodController(podSpec, Requests, ctx)
268310

269311
return r.UpdateKubeObject(deployment.(client.Object), ctx)
270-
271-
}
272-
273-
if r.EnablePersistence {
274-
if err := r.RedisClient.DeleteFromCache(SumPodRequest); err != nil {
275-
log.Error(err, err.Error())
276-
}
277-
} else {
278-
if err := localcache.DeleteFromCache(cacheStore, LatestPodRequest); err != nil {
279-
log.Error(err, err.Error())
280-
}
281312
}
282313
}
283314
}
284315

285-
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
316+
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
286317
}

controllers/pod_controller_functions.go

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,13 @@ import (
1515
v1 "k8s.io/api/core/v1"
1616
apierrors "k8s.io/apimachinery/pkg/api/errors"
1717
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
18+
"k8s.io/apimachinery/pkg/labels"
1819
ctrl "sigs.k8s.io/controller-runtime"
1920
"sigs.k8s.io/controller-runtime/pkg/client"
2021
"sigs.k8s.io/controller-runtime/pkg/controller"
2122
)
2223

23-
func (r *PodReconciler) NamespaceOrPodHaveAnnotation(pod corev1.Pod, ctx context.Context) (bool, error) {
24+
func (r *PodReconciler) NamespaceOrPodHaveAnnotation(pod *corev1.Pod, ctx context.Context) (bool, error) {
2425
podHasAnnotation := pod.Annotations[operatorAnnotation] == "true"
2526
namespace, err := r.ClientSet.CoreV1().Namespaces().Get(ctx, pod.Namespace, metav1.GetOptions{})
2627
if err != nil {
@@ -30,7 +31,7 @@ func (r *PodReconciler) NamespaceOrPodHaveAnnotation(pod corev1.Pod, ctx context
3031
return (podHasAnnotation || namespaceHasAnnotation), nil
3132
}
3233

33-
func (r *PodReconciler) NamespaceOrPodHaveIgnoreAnnotation(pod corev1.Pod, ctx context.Context) (bool, error) {
34+
func (r *PodReconciler) NamespaceOrPodHaveIgnoreAnnotation(pod *corev1.Pod, ctx context.Context) (bool, error) {
3435
podHasIgnoreAnnotation := pod.Annotations[operatorAnnotation] == "false"
3536
namespace, err := r.ClientSet.CoreV1().Namespaces().Get(ctx, pod.Namespace, metav1.GetOptions{})
3637
if err != nil {
@@ -68,7 +69,7 @@ func (r *PodReconciler) UpdateKubeObject(pod client.Object, ctx context.Context)
6869
if apierrors.IsConflict(err) {
6970
// The Pod has been updated since we read it.
7071
// Requeue the Pod to try to reconciliate again.
71-
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
72+
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
7273
}
7374
if apierrors.IsNotFound(err) {
7475
// The Pod has been deleted since we read it.
@@ -78,7 +79,7 @@ func (r *PodReconciler) UpdateKubeObject(pod client.Object, ctx context.Context)
7879
log.Error(err, "unable to update pod")
7980
return ctrl.Result{}, err
8081
}
81-
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
82+
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
8283
}
8384

8485
func UpdatePodController(podspec *corev1.PodSpec, Requests []types.NewContainerRequests, ctx context.Context) {
@@ -190,11 +191,22 @@ func (r *PodReconciler) MinimumUptimeOfPodInParent(pod corev1.Pod, ctx context.C
190191
}
191192
err, _, _, deploymentName := r.GetPodParentKind(pod, ctx)
192193
if err != nil {
194+
log.Error(err)
193195
return false
194196
}
197+
198+
// Create the label selector
199+
labelSelector := labels.Set{
200+
"app": deploymentName,
201+
"app.kubernetes.io/name": deploymentName,
202+
"app.kubernetes.io/instance": deploymentName,
203+
"app.kubernetes.io/component": deploymentName,
204+
}
205+
195206
options := metav1.ListOptions{
196-
LabelSelector: "app=" + deploymentName,
207+
LabelSelector: labelSelector.AsSelector().String(),
197208
}
209+
198210
podList, _ := r.ClientSet.CoreV1().Pods(pod.Namespace).List(ctx, options)
199211
// List() returns a pointer to slice, derefernce it, before iterating
200212
for _, podInfo := range (*podList).Items {
@@ -207,26 +219,47 @@ func (r *PodReconciler) MinimumUptimeOfPodInParent(pod corev1.Pod, ctx context.C
207219
}
208220

209221
func (r *PodReconciler) GetPodParentKind(pod corev1.Pod, ctx context.Context) (error, *v1.PodSpec, interface{}, string) {
210-
switch pod.OwnerReferences[0].Kind {
211-
case "ReplicaSet":
212-
replica, err := r.ClientSet.AppsV1().ReplicaSets(pod.Namespace).Get(ctx, pod.OwnerReferences[0].Name, metav1.GetOptions{})
213-
if err != nil {
214-
log.Error(err, err.Error())
215-
return err, nil, nil, ""
216-
}
217-
deployment, err := r.ClientSet.AppsV1().Deployments(pod.Namespace).Get(ctx, replica.OwnerReferences[0].Name, metav1.GetOptions{})
218-
if replica.OwnerReferences[0].Kind == "Deployment" {
222+
if len(pod.OwnerReferences) > 0 {
223+
switch pod.OwnerReferences[0].Kind {
224+
case "ReplicaSet":
225+
replica, err := r.ClientSet.AppsV1().ReplicaSets(pod.Namespace).Get(ctx, pod.OwnerReferences[0].Name, metav1.GetOptions{})
226+
if err != nil {
227+
log.Error(err, err.Error())
228+
return err, nil, nil, ""
229+
}
230+
deployment, err := r.ClientSet.AppsV1().Deployments(pod.Namespace).Get(ctx, replica.OwnerReferences[0].Name, metav1.GetOptions{})
231+
if replica.OwnerReferences[0].Kind == "Deployment" {
232+
return err, &deployment.Spec.Template.Spec, deployment, deployment.Name
233+
} else {
234+
return errors.New("Is Owned by Unknown CRD"), nil, nil, ""
235+
}
236+
case "DaemonSet":
237+
deployment, err := r.ClientSet.AppsV1().DaemonSets(pod.Namespace).Get(ctx, pod.OwnerReferences[0].Name, metav1.GetOptions{})
219238
return err, &deployment.Spec.Template.Spec, deployment, deployment.Name
220-
} else {
239+
case "StatefulSet":
240+
deployment, err := r.ClientSet.AppsV1().StatefulSets(pod.Namespace).Get(ctx, pod.OwnerReferences[0].Name, metav1.GetOptions{})
241+
return err, &deployment.Spec.Template.Spec, deployment, deployment.Name
242+
default:
221243
return errors.New("Is Owned by Unknown CRD"), nil, nil, ""
222244
}
223-
case "DaemonSet":
224-
deployment, err := r.ClientSet.AppsV1().DaemonSets(pod.Namespace).Get(ctx, pod.OwnerReferences[0].Kind, metav1.GetOptions{})
225-
return err, &deployment.Spec.Template.Spec, deployment, deployment.Name
226-
case "StatefulSet":
227-
deployment, err := r.ClientSet.AppsV1().StatefulSets(pod.Namespace).Get(ctx, pod.OwnerReferences[0].Kind, metav1.GetOptions{})
228-
return err, &deployment.Spec.Template.Spec, deployment, deployment.Name
229-
default:
230-
return errors.New("Is Owned by Unknown CRD"), nil, nil, ""
245+
} else {
246+
return errors.New("Pod Has No Owner"), nil, nil, ""
247+
}
248+
}
249+
250+
func (r *PodReconciler) GetPodCacheName(pod *corev1.Pod) string {
251+
val, ok := pod.Labels["app"]
252+
if !ok {
253+
val, ok = pod.Labels["app.kubernetes.io/name"]
254+
if !ok {
255+
val, ok = pod.Labels["app.kubernetes.io/instance"]
256+
if !ok {
257+
val, ok = pod.Labels["app.kubernetes.io/component"]
258+
if !ok {
259+
val = strings.Split(pod.Name, "-")[0]
260+
}
261+
}
262+
}
231263
}
264+
return val
232265
}

pkg/cache/rediscache/rediscache.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ func (client RedisClient) AddToCache(object types.PodRequests) error {
1818
klog.Errorf("failed to add key value to cache error", err)
1919
return err
2020
}
21-
err = client.Client.Set(object.Name+"-"+object.Namespace, val, 0).Err()
21+
err = client.Client.Set(object.Name, val, 0).Err()
2222
if err != nil {
2323
klog.Errorf("failed to add key value to cache error", err)
2424
return err
@@ -41,7 +41,7 @@ func (client RedisClient) FetchFromCache(key string) (types.PodRequests, error)
4141
}
4242

4343
func (client RedisClient) DeleteFromCache(object types.PodRequests) error {
44-
return client.Client.Del(object.Name + "-" + object.Namespace).Err()
44+
return client.Client.Del(object.Name).Err()
4545
}
4646

4747
func (client RedisClient) CacheSize() int64 {

0 commit comments

Comments
 (0)