feat: Add kubeReserved calculation visibility for troubleshooting #8810

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

moko-poi wants to merge 3 commits into aws:main from moko-poi:feat/add-kubereserved-visibility

pkg/providers/instancetype/types.go

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -29,6 +29,7 @@ import (
  
    	corev1 "k8s.io/api/core/v1"

    	"k8s.io/apimachinery/pkg/api/resource"

    	"k8s.io/apimachinery/pkg/util/sets"

    	"sigs.k8s.io/controller-runtime/pkg/log"

    	karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"

    	v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1"

    @@ -45,9 +46,7 @@ const (
  
    	NodeFSAvailable = "nodefs.available"

    )

    var (

    	instanceTypeScheme = regexp.MustCompile(`(^[a-z]+)(\-[0-9]+tb)?([0-9]+).*\.`)

    )

    var instanceTypeScheme = regexp.MustCompile(`(^[a-z]+)(\-[0-9]+tb)?([0-9]+).*\.`)

    type ZoneData struct {

    	Name      string

    @@ -138,19 +137,47 @@ func NewInstanceType(
  
    	capacityReservations []v1.CapacityReservation,

    ) *cloudprovider.InstanceType {

    	amiFamily := amifamily.GetAMIFamily(amiFamilyType, &amifamily.Options{})

    	// Calculate effective pods for kubeReserved calculation

    	effectivePods := lo.Ternary(amiFamily.FeatureFlags().UsesENILimitedMemoryOverhead, ENILimitedPods(ctx, info, 0), pods(ctx, info, amiFamily, maxPods, podsPerCore))

    	kubeReservedResources := kubeReservedResources(cpu(info), effectivePods, kubeReserved)

    	it := &cloudprovider.InstanceType{

    		Name:         string(info.InstanceType),

    		Requirements: computeRequirements(info, region, offeringZones, subnetZoneInfo, amiFamily, capacityReservations),

    		Capacity:     computeCapacity(ctx, info, amiFamily, blockDeviceMappings, instanceStorePolicy, maxPods, podsPerCore),

    		Overhead: &cloudprovider.InstanceTypeOverhead{

    			KubeReserved:      kubeReservedResources(cpu(info), lo.Ternary(amiFamily.FeatureFlags().UsesENILimitedMemoryOverhead, ENILimitedPods(ctx, info, 0), pods(ctx, info, amiFamily, maxPods, podsPerCore)), kubeReserved),

    			KubeReserved:      kubeReservedResources,

    			SystemReserved:    systemReservedResources(systemReserved),

    			EvictionThreshold: evictionThreshold(memory(ctx, info), ephemeralStorage(info, amiFamily, blockDeviceMappings, instanceStorePolicy), amiFamily, evictionHard, evictionSoft),

    		},

    	}

    	if it.Requirements.Compatible(scheduling.NewRequirements(scheduling.NewRequirement(corev1.LabelOSStable, corev1.NodeSelectorOpIn, string(corev1.Windows)))) == nil {

    		it.Capacity[v1.ResourcePrivateIPv4Address] = *privateIPv4Address(string(info.InstanceType))

    	}

    	// Log kubeReserved calculation details for Custom AMI troubleshooting

    	// For EKS-optimized AMIs (AL2, AL2023, Bottlerocket), the calculation is deterministic

    	// and documented. Custom AMIs may have different kubelet configurations, so logging

    	// helps users understand how Karpenter calculates allocatable resources.

    	if amiFamilyType == v1.AMIFamilyCustom {

    		allocatable := it.Allocatable()

    		log.FromContext(ctx).V(2).Info("calculated instance type resources for Custom AMI",

    			"instance-type", info.InstanceType,

    			"max-pods-configured", maxPods,

    			"effective-pods", effectivePods.Value(),

    			"uses-eni-limited-overhead", amiFamily.FeatureFlags().UsesENILimitedMemoryOverhead,

    			"capacity-memory", it.Capacity.Memory().String(),

    			"capacity-cpu", it.Capacity.Cpu().String(),

    			"kube-reserved-memory", kubeReservedResources.Memory().String(),

    			"kube-reserved-cpu", kubeReservedResources.Cpu().String(),

    			"system-reserved-memory", it.Overhead.SystemReserved.Memory().String(),

    			"system-reserved-cpu", it.Overhead.SystemReserved.Cpu().String(),

    			"allocatable-memory", allocatable.Memory().String(),

    			"allocatable-cpu", allocatable.Cpu().String(),

    		)

    	}

    	return it

    }

    @@ -320,8 +347,8 @@ func getArchitecture(info ec2types.InstanceTypeInfo) string {
  
    func computeCapacity(ctx context.Context, info ec2types.InstanceTypeInfo, amiFamily amifamily.AMIFamily,

    	blockDeviceMapping []*v1.BlockDeviceMapping, instanceStorePolicy *v1.InstanceStorePolicy,

    	maxPods *int32, podsPerCore *int32) corev1.ResourceList {

    	maxPods *int32, podsPerCore *int32,

    ) corev1.ResourceList {

    	resourceList := corev1.ResourceList{

    		corev1.ResourceCPU:              *cpu(info),

    		corev1.ResourceMemory:           *memory(ctx, info),

    @@ -383,7 +410,7 @@ func ephemeralStorage(info ec2types.InstanceTypeInfo, amiFamily amifamily.AMIFam
  
    			}

    		}

    	}

    	//Return the ephemeralBlockDevice size if defined in ami

    	// Return the ephemeralBlockDevice size if defined in ami

    	if ephemeralBlockDevice, ok := lo.Find(amiFamily.DefaultBlockDeviceMappings(), func(item *v1.BlockDeviceMapping) bool {

    		return *amiFamily.EphemeralBlockDevice() == *item.DeviceName

    	}); ok {

    @@ -483,7 +510,7 @@ func ENILimitedPods(ctx context.Context, info ec2types.InstanceTypeInfo, reserve
  
    }

    func privateIPv4Address(instanceTypeName string) *resource.Quantity {

    	//https://github.com/aws/amazon-vpc-resource-controller-k8s/blob/ecbd6965a0100d9a070110233762593b16023287/pkg/provider/ip/provider.go#L297

    	// https://github.com/aws/amazon-vpc-resource-controller-k8s/blob/ecbd6965a0100d9a070110233762593b16023287/pkg/provider/ip/provider.go#L297

    	limits, ok := Limits[instanceTypeName]

    	if !ok {

    		return resources.Quantity("0")

website/content/en/docs/troubleshooting.md

-Original file line number
+Diff line change
@@ Expand Up @@
     This can be spot checked like shown above, or monitored via the following metric:
     ```
-    operator_status_condition_count{type="ConsistentStateFound",kind="NodeClaim",status="False"}
+    operator_nodeclaim_status_condition_count{type="ConsistentStateFound",kind="NodeClaim",status="False"}
+    ```
+    #### Debugging kubeReserved Calculations
+    To understand how Karpenter calculates `kubeReserved` for your instance types, you can enable verbose logging by setting the log level to 1 or higher. Karpenter will log detailed information about resource calculations:
+    ```bash
+    # View Karpenter logs with verbose output
+    kubectl logs -n karpenter deploy/karpenter -f
+    ```
+    Look for log entries with the message `"calculated instance type resources"` which includes:
+    - `instance-type`: The EC2 instance type being evaluated
+    - `ami-family`: The AMI family being used (e.g., AL2, AL2023, Bottlerocket, Custom)
+    - `max-pods-configured`: User-configured maxPods value (if set)
+    - `effective-pods`: The actual pod count used for kubeReserved calculation
+    - `uses-eni-limited-overhead`: Whether ENI-limited calculation is used
+    - `kube-reserved-memory`: Memory reserved for Kubernetes components
+    - `kube-reserved-cpu`: CPU reserved for Kubernetes components
+    - `allocatable-memory`: Final allocatable memory after all reservations
+    - `allocatable-cpu`: Final allocatable CPU after all reservations
+    **Understanding kubeReserved defaults:**
+    For EKS-optimized AMI families (AL2, AL2023, Bottlerocket), Karpenter automatically calculates `kubeReserved` based on:
+    - **Memory**: `11 MiB * pods + 255 MiB` (where pods is either ENI-limited or maxPods)
+    - **CPU**: Based on instance CPU count using a graduated percentage scale
+    For Custom AMI families, the same calculation is applied by default. If you're using a Custom AMI with different kubelet configuration, you should explicitly set `kubeReserved` in your EC2NodeClass:
+    ```yaml
+    apiVersion: karpenter.k8s.aws/v1
+    kind: EC2NodeClass
+    spec:
+      amiFamily: Custom
+      kubelet:
+        kubeReserved:
+          cpu: "100m"
+          memory: "500Mi"
+        maxPods: 110
     ```
     ### Karpenter Is Unable to Satisfy Topology Spread Constraint
@@ Expand Down @@

website/content/en/preview/troubleshooting.md

-Original file line number
+Diff line change
@@ Expand Up @@
     operator_status_condition_count{type="ConsistentStateFound",kind="NodeClaim",status="False"}
     ```
+    #### Debugging kubeReserved Calculations
+    Karpenter automatically calculates `kubeReserved` for each instance type to determine allocatable resources. Understanding this calculation is important for capacity planning and troubleshooting.
+    ##### EKS-Optimized AMI Families (AL2, AL2023, Bottlerocket)
+    For EKS-optimized AMI families, Karpenter uses the same calculation as the [EKS bootstrap script](https://github.com/awslabs/amazon-eks-ami/blob/main/templates/al2/runtime/bootstrap.sh):
+    **Memory reservation:**
+    ```
+    kubeReserved.memory = 11 MiB × max_pods + 255 MiB
+    ```
+    **CPU reservation (graduated scale):**
+    ```
+    kubeReserved.cpu = 6% of the first core + 1% of the next core (up to 2 cores) + 0.5% of the next 2 cores (up to 4 cores) + 0.25% of any cores above 4 cores
+    ```
+    **Example calculations for common instance types:**
+    | Instance Type | vCPUs | Default Max Pods | Memory Reserved | CPU Reserved | Allocatable Memory | Allocatable CPU |
+    |---------------|-------|------------------|-----------------|--------------|--------------------|-----------------|
+    | t3.medium     | 2     | 17               | 442 MiB         | 70m          | ~3.5 GiB           | 1930m           |
+    | m5.large      | 2     | 29               | 574 MiB         | 70m          | ~7.4 GiB           | 1930m           |
+    | m5.xlarge     | 4     | 58               | 893 MiB         | 80m          | ~15.1 GiB          | 3920m           |
+    | m5.2xlarge    | 8     | 58               | 893 MiB         | 90m          | ~31.1 GiB          | 7910m           |
+    | m5.4xlarge    | 16    | 234              | 2,829 MiB       | 110m         | ~13.2 GiB          | 15890m          |
+    > **Note:** The `max_pods` value used in the formula depends on the ENI capacity of the instance type. See [AWS documentation](https://github.com/awslabs/amazon-eks-ami/blob/main/templates/shared/runtime/eni-max-pods.txt) for ENI-based pod limits.
+    ##### Custom AMI Families
+    For Custom AMI families, Karpenter applies the same calculation **by default**. However, if your Custom AMI uses different kubelet configuration, this may not match your actual node capacity.
+    **Recommended approach for Custom AMIs:**
+    Explicitly configure `kubeReserved` in your EC2NodeClass to match your kubelet configuration:
+    ```yaml
+    apiVersion: karpenter.k8s.aws/v1
+    kind: EC2NodeClass
+    spec:
+      amiFamily: Custom
+      kubelet:
+        # Set these to match your kubelet's actual configuration
+        kubeReserved:
+          cpu: "100m"
+          memory: "500Mi"
+        maxPods: 110
+    ```
+    **Troubleshooting Custom AMI calculations:**
+    To see how Karpenter calculates resources for your Custom AMI configuration, enable verbose logging:
+    ```bash
+    # Enable verbose logging (requires setting controller log level to 1 or higher)
+    kubectl logs -n karpenter deploy/karpenter -f
+    ```
+    Look for log entries with the message `"calculated instance type resources for Custom AMI"` which includes:
+    - `instance-type`: The EC2 instance type being evaluated
+    - `max-pods-configured`: Your configured maxPods value (if set)
+    - `effective-pods`: The actual pod count used for kubeReserved calculation
+    - `kube-reserved-memory` / `kube-reserved-cpu`: Calculated reserved resources
+    - `allocatable-memory` / `allocatable-cpu`: Final allocatable resources
+    This logging is **only enabled for Custom AMI families** as the calculation for EKS-optimized AMIs is deterministic and documented above.
+    **Common issues:**
+    - If `effective-pods` is much higher than your `max-pods-configured`, check if ENI-limited calculation is being used
+    - If allocatable resources are lower than expected, verify your `kubeReserved` settings match your kubelet configuration
+    - For capacity planning, use the formulas above to predict resource availability
     ### Karpenter Is Unable to Satisfy Topology Spread Constraint
     When scheduling pods with TopologySpreadConstraints, Karpenter will attempt to spread the pods across all eligible domains.
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat: Add kubeReserved calculation visibility for troubleshooting #8810

Uh oh!

Diff view

Diff view

There are no files selected for viewing

feat: Add kubeReserved calculation visibility for troubleshooting #8810

Are you sure you want to change the base?

Uh oh!

feat: Add kubeReserved calculation visibility for troubleshooting #8810

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing