diff --git a/docs/metrics/workload/pod-metrics.md b/docs/metrics/workload/pod-metrics.md index a83f81987..e76ccf8bf 100644 --- a/docs/metrics/workload/pod-metrics.md +++ b/docs/metrics/workload/pod-metrics.md @@ -30,7 +30,9 @@ | kube_pod_status_container_ready_time | Gauge | Time when the container of the pod entered Ready state. | seconds | `pod`=<pod-name>
`namespace`=<pod-namespace>
`uid`=<pod-uid> | EXPERIMENTAL | - | | kube_pod_container_status_restarts_total | Counter | The number of container restarts per container | | `container`=<container-name>
`namespace`=<pod-namespace>
`pod`=<pod-name>
`uid`=<pod-uid> | STABLE | - | | kube_pod_container_resource_requests | Gauge | The number of requested request resource by a container. It is recommended to use the `kube_pod_resource_requests` metric exposed by kube-scheduler instead, as it is more precise. | `cpu`=<core>
`memory`=<bytes> | `resource`=<resource-name>
`unit`=<resource-unit>
`container`=<container-name>
`pod`=<pod-name>
`namespace`=<pod-namespace>
`node`=< node-name>
`uid`=<pod-uid> | EXPERIMENTAL | - | +| kube_pod_container_actual_resource_requests | Gauge | The number of actually requested request resource by a container calculated based on status.containerStatuses of a Pod. | `cpu`=<core>
`memory`=<bytes> | `resource`=<resource-name>
`unit`=<resource-unit>
`container`=<container-name>
`pod`=<pod-name>
`namespace`=<pod-namespace>
`node`=< node-name>
`uid`=<pod-uid> | EXPERIMENTAL | - | | kube_pod_container_resource_limits | Gauge | The number of requested limit resource by a container. It is recommended to use the `kube_pod_resource_limits` metric exposed by kube-scheduler instead, as it is more precise. | `cpu`=<core>
`memory`=<bytes> | `resource`=<resource-name>
`unit`=<resource-unit>
`container`=<container-name>
`pod`=<pod-name>
`namespace`=<pod-namespace>
`node`=< node-name>
`uid`=<pod-uid> | EXPERIMENTAL | - | +| kube_pod_container_actual_resource_limits | Gauge | The number of actually requested limit resource by a containercalculated based on status.containerStatuses of a Pod. | `cpu`=<core>
`memory`=<bytes> | `resource`=<resource-name>
`unit`=<resource-unit>
`container`=<container-name>
`pod`=<pod-name>
`namespace`=<pod-namespace>
`node`=< node-name>
`uid`=<pod-uid> | EXPERIMENTAL | - | | kube_pod_overhead_cpu_cores | Gauge | The pod overhead in regards to cpu cores associated with running a pod | core | `pod`=<pod-name>
`namespace`=<pod-namespace>
`uid`=<pod-uid> | EXPERIMENTAL | - | | kube_pod_overhead_memory_bytes | Gauge | The pod overhead in regards to memory associated with running a pod | bytes | `pod`=<pod-name>
`namespace`=<pod-namespace>
`uid`=<pod-uid> | EXPERIMENTAL | - | | kube_pod_runtimeclass_name_info | Gauge | The runtimeclass associated with the pod | | `pod`=<pod-name>
`namespace`=<pod-namespace>
`uid`=<pod-uid> | EXPERIMENTAL | - | diff --git a/internal/store/pod.go b/internal/store/pod.go index d4744dbb2..ac5377c58 100644 --- a/internal/store/pod.go +++ b/internal/store/pod.go @@ -45,7 +45,9 @@ func podMetricFamilies(allowAnnotationsList, allowLabelsList []string) []generat createPodCompletionTimeFamilyGenerator(), createPodContainerInfoFamilyGenerator(), createPodContainerResourceLimitsFamilyGenerator(), + createPodContainerActualResourceLimitsFamilyGenerator(), createPodContainerResourceRequestsFamilyGenerator(), + createPodContainerActualResourceRequestsFamilyGenerator(), createPodContainerStateStartedFamilyGenerator(), createPodContainerStatusLastTerminatedReasonFamilyGenerator(), createPodContainerStatusLastTerminatedExitCodeFamilyGenerator(), @@ -165,6 +167,74 @@ func createPodContainerInfoFamilyGenerator() generator.FamilyGenerator { ) } +func createPodContainerActualResourceLimitsFamilyGenerator() generator.FamilyGenerator { + return *generator.NewFamilyGeneratorWithStability( + "kube_pod_container_actual_resource_limits", + "The number of actually requested limit resource by a container calculated based on status.containerStatuses of a Pod.", + metric.Gauge, + basemetrics.ALPHA, + "", + wrapPodFunc(func(p *v1.Pod) *metric.Family { + ms := []*metric.Metric{} + + for _, c := range p.Status.ContainerStatuses { + if c.Resources == nil { + continue + } + + lim := c.Resources.Limits + + for resourceName, val := range lim { + switch resourceName { + case v1.ResourceCPU: + ms = append(ms, &metric.Metric{ + LabelValues: []string{c.Name, p.Spec.NodeName, SanitizeLabelName(string(resourceName)), string(constant.UnitCore)}, + Value: convertValueToFloat64(&val), + }) + case v1.ResourceStorage: + fallthrough + case v1.ResourceEphemeralStorage: + fallthrough + case v1.ResourceMemory: + ms = append(ms, &metric.Metric{ + LabelValues: []string{c.Name, p.Spec.NodeName, SanitizeLabelName(string(resourceName)), string(constant.UnitByte)}, + Value: float64(val.Value()), + }) + default: + if isHugePageResourceName(resourceName) { + ms = append(ms, &metric.Metric{ + LabelValues: []string{c.Name, p.Spec.NodeName, SanitizeLabelName(string(resourceName)), string(constant.UnitByte)}, + Value: float64(val.Value()), + }) + } + if isAttachableVolumeResourceName(resourceName) { + ms = append(ms, &metric.Metric{ + Value: float64(val.Value()), + LabelValues: []string{c.Name, p.Spec.NodeName, SanitizeLabelName(string(resourceName)), string(constant.UnitByte)}, + }) + } + if isExtendedResourceName(resourceName) { + ms = append(ms, &metric.Metric{ + Value: float64(val.Value()), + LabelValues: []string{c.Name, p.Spec.NodeName, SanitizeLabelName(string(resourceName)), string(constant.UnitInteger)}, + }) + + } + } + } + } + + for _, metric := range ms { + metric.LabelKeys = []string{"container", "node", "resource", "unit"} + } + + return &metric.Family{ + Metrics: ms, + } + }), + ) +} + func createPodContainerResourceLimitsFamilyGenerator() generator.FamilyGenerator { return *generator.NewFamilyGeneratorWithStability( "kube_pod_container_resource_limits", @@ -229,6 +299,73 @@ func createPodContainerResourceLimitsFamilyGenerator() generator.FamilyGenerator ) } +func createPodContainerActualResourceRequestsFamilyGenerator() generator.FamilyGenerator { + return *generator.NewFamilyGeneratorWithStability( + "kube_pod_container_actual_resource_requests", + "The number of actually requested request resource by a container calculated based on status.containerStatuses of a Pod.", + metric.Gauge, + basemetrics.ALPHA, + "", + wrapPodFunc(func(p *v1.Pod) *metric.Family { + ms := []*metric.Metric{} + + for _, c := range p.Status.ContainerStatuses { + if c.Resources == nil { + continue + } + + req := c.Resources.Requests + + for resourceName, val := range req { + switch resourceName { + case v1.ResourceCPU: + ms = append(ms, &metric.Metric{ + LabelValues: []string{c.Name, p.Spec.NodeName, SanitizeLabelName(string(resourceName)), string(constant.UnitCore)}, + Value: convertValueToFloat64(&val), + }) + case v1.ResourceStorage: + fallthrough + case v1.ResourceEphemeralStorage: + fallthrough + case v1.ResourceMemory: + ms = append(ms, &metric.Metric{ + LabelValues: []string{c.Name, p.Spec.NodeName, SanitizeLabelName(string(resourceName)), string(constant.UnitByte)}, + Value: float64(val.Value()), + }) + default: + if isHugePageResourceName(resourceName) { + ms = append(ms, &metric.Metric{ + LabelValues: []string{c.Name, p.Spec.NodeName, SanitizeLabelName(string(resourceName)), string(constant.UnitByte)}, + Value: float64(val.Value()), + }) + } + if isAttachableVolumeResourceName(resourceName) { + ms = append(ms, &metric.Metric{ + LabelValues: []string{c.Name, p.Spec.NodeName, SanitizeLabelName(string(resourceName)), string(constant.UnitByte)}, + Value: float64(val.Value()), + }) + } + if isExtendedResourceName(resourceName) { + ms = append(ms, &metric.Metric{ + LabelValues: []string{c.Name, p.Spec.NodeName, SanitizeLabelName(string(resourceName)), string(constant.UnitInteger)}, + Value: float64(val.Value()), + }) + } + } + } + } + + for _, metric := range ms { + metric.LabelKeys = []string{"container", "node", "resource", "unit"} + } + + return &metric.Family{ + Metrics: ms, + } + }), + ) +} + func createPodContainerResourceRequestsFamilyGenerator() generator.FamilyGenerator { return *generator.NewFamilyGeneratorWithStability( "kube_pod_container_resource_requests", diff --git a/internal/store/pod_test.go b/internal/store/pod_test.go index 10e76f6ba..2ea489e30 100644 --- a/internal/store/pod_test.go +++ b/internal/store/pod_test.go @@ -2282,7 +2282,7 @@ func BenchmarkPodStore(b *testing.B) { }, } - expectedFamilies := 55 + expectedFamilies := 57 for n := 0; n < b.N; n++ { families := f(pod) if len(families) != expectedFamilies { diff --git a/pkg/app/server_test.go b/pkg/app/server_test.go index e65143d11..2dd1eade6 100644 --- a/pkg/app/server_test.go +++ b/pkg/app/server_test.go @@ -209,6 +209,8 @@ func TestFullScrapeCycle(t *testing.T) { expected := `# HELP kube_pod_annotations Kubernetes annotations converted to Prometheus labels. # HELP kube_pod_completion_time [STABLE] Completion time in unix timestamp for a pod. +# HELP kube_pod_container_actual_resource_limits The number of actually requested limit resource by a container calculated based on status.containerStatuses of a Pod. +# HELP kube_pod_container_actual_resource_requests The number of actually requested request resource by a container calculated based on status.containerStatuses of a Pod. # HELP kube_pod_container_info [STABLE] Information about a container in a pod. # HELP kube_pod_container_resource_limits The number of requested limit resource by a container. It is recommended to use the kube_pod_resource_limits metric exposed by kube-scheduler instead, as it is more precise. # HELP kube_pod_container_resource_requests The number of requested request resource by a container. It is recommended to use the kube_pod_resource_requests metric exposed by kube-scheduler instead, as it is more precise. @@ -263,6 +265,8 @@ func TestFullScrapeCycle(t *testing.T) { # HELP kube_pod_tolerations Information about the pod tolerations # TYPE kube_pod_annotations gauge # TYPE kube_pod_completion_time gauge +# TYPE kube_pod_container_actual_resource_limits gauge +# TYPE kube_pod_container_actual_resource_requests gauge # TYPE kube_pod_container_info gauge # TYPE kube_pod_container_resource_limits gauge # TYPE kube_pod_container_resource_requests gauge @@ -315,6 +319,16 @@ func TestFullScrapeCycle(t *testing.T) { # TYPE kube_pod_status_unschedulable gauge # TYPE kube_pod_status_unscheduled_time gauge # TYPE kube_pod_tolerations gauge +kube_pod_container_actual_resource_limits{namespace="default",pod="pod0",uid="abc-0",container="pod1_con1",node="node1",resource="cpu",unit="core"} 0.3 +kube_pod_container_actual_resource_limits{namespace="default",pod="pod0",uid="abc-0",container="pod1_con1",node="node1",resource="ephemeral_storage",unit="byte"} 4e+08 +kube_pod_container_actual_resource_limits{namespace="default",pod="pod0",uid="abc-0",container="pod1_con1",node="node1",resource="memory",unit="byte"} 2e+08 +kube_pod_container_actual_resource_limits{namespace="default",pod="pod0",uid="abc-0",container="pod1_con1",node="node1",resource="nvidia_com_gpu",unit="integer"} 2 +kube_pod_container_actual_resource_limits{namespace="default",pod="pod0",uid="abc-0",container="pod1_con1",node="node1",resource="storage",unit="byte"} 5e+08 +kube_pod_container_actual_resource_requests{namespace="default",pod="pod0",uid="abc-0",container="pod1_con1",node="node1",resource="cpu",unit="core"} 0.3 +kube_pod_container_actual_resource_requests{namespace="default",pod="pod0",uid="abc-0",container="pod1_con1",node="node1",resource="ephemeral_storage",unit="byte"} 4e+08 +kube_pod_container_actual_resource_requests{namespace="default",pod="pod0",uid="abc-0",container="pod1_con1",node="node1",resource="memory",unit="byte"} 2e+08 +kube_pod_container_actual_resource_requests{namespace="default",pod="pod0",uid="abc-0",container="pod1_con1",node="node1",resource="nvidia_com_gpu",unit="integer"} 2 +kube_pod_container_actual_resource_requests{namespace="default",pod="pod0",uid="abc-0",container="pod1_con1",node="node1",resource="storage",unit="byte"} 5e+08 kube_pod_container_info{namespace="default",pod="pod0",uid="abc-0",container="pod1_con1",image_spec="k8s.gcr.io/hyperkube2_spec",image="k8s.gcr.io/hyperkube2",image_id="docker://sha256:bbb",container_id="docker://cd456"} 1 kube_pod_container_info{namespace="default",pod="pod0",uid="abc-0",container="pod1_con2",image_spec="k8s.gcr.io/hyperkube3_spec",image="k8s.gcr.io/hyperkube3",image_id="docker://sha256:ccc",container_id="docker://ef789"} 1 kube_pod_container_resource_limits{namespace="default",pod="pod0",uid="abc-0",container="pod1_con1",node="node1",resource="cpu",unit="core"} 0.2 @@ -852,6 +866,22 @@ func pod(client *fake.Clientset, index int) error { ExitCode: 137, }, }, + Resources: &v1.ResourceRequirements{ + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("300m"), + v1.ResourceMemory: resource.MustParse("200M"), + v1.ResourceEphemeralStorage: resource.MustParse("400M"), + v1.ResourceStorage: resource.MustParse("500M"), + v1.ResourceName("nvidia.com/gpu"): resource.MustParse("2"), + }, + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("300m"), + v1.ResourceMemory: resource.MustParse("200M"), + v1.ResourceEphemeralStorage: resource.MustParse("400M"), + v1.ResourceStorage: resource.MustParse("500M"), + v1.ResourceName("nvidia.com/gpu"): resource.MustParse("2"), + }, + }, }, { Name: "pod1_con2",