diff --git a/config/helm/aws-node-termination-handler/README.md b/config/helm/aws-node-termination-handler/README.md index 06ceac1a..8c59e88f 100644 --- a/config/helm/aws-node-termination-handler/README.md +++ b/config/helm/aws-node-termination-handler/README.md @@ -95,6 +95,7 @@ The configuration in this table applies to all AWS Node Termination Handler mode | `webhookTemplateConfigMapName` | Pass the webhook template file as a configmap. | "``" | | `webhookTemplateConfigMapKey` | Name of the Configmap key storing the template file. | `""` | | `enableSqsTerminationDraining` | If `true`, this turns on queue-processor mode which drains nodes when an SQS termination event is received. | `false` | +| `enableOutOfServiceTaint` | If `true`, this will add out-of-service taint to node after cordon/drain process which would forcefully evict pods without matching tolerations and detach persistent volumes. | `false` | ### Queue-Processor Mode Configuration diff --git a/config/helm/aws-node-termination-handler/templates/daemonset.linux.yaml b/config/helm/aws-node-termination-handler/templates/daemonset.linux.yaml index be6385de..ee5298be 100644 --- a/config/helm/aws-node-termination-handler/templates/daemonset.linux.yaml +++ b/config/helm/aws-node-termination-handler/templates/daemonset.linux.yaml @@ -99,6 +99,8 @@ spec: value: {{ .Values.cordonOnly | quote }} - name: TAINT_NODE value: {{ .Values.taintNode | quote }} + - name: ENABLE_OUT_OF_SERVICE_TAINT + value: {{ .Values.enableOutOfServiceTaint | quote }} - name: EXCLUDE_FROM_LOAD_BALANCERS value: {{ .Values.excludeFromLoadBalancers | quote }} - name: DELETE_LOCAL_DATA diff --git a/config/helm/aws-node-termination-handler/templates/daemonset.windows.yaml b/config/helm/aws-node-termination-handler/templates/daemonset.windows.yaml index 95af69d1..8c296373 100644 --- a/config/helm/aws-node-termination-handler/templates/daemonset.windows.yaml +++ b/config/helm/aws-node-termination-handler/templates/daemonset.windows.yaml @@ -99,6 +99,8 @@ spec: value: {{ .Values.cordonOnly | quote }} - name: TAINT_NODE value: {{ .Values.taintNode | quote }} + - name: ENABLE_OUT_OF_SERVICE_TAINT + value: {{ .Values.enableOutOfServiceTaint | quote }} - name: EXCLUDE_FROM_LOAD_BALANCERS value: {{ .Values.excludeFromLoadBalancers | quote }} - name: DELETE_LOCAL_DATA diff --git a/config/helm/aws-node-termination-handler/templates/deployment.yaml b/config/helm/aws-node-termination-handler/templates/deployment.yaml index 7c043fec..32a188d5 100644 --- a/config/helm/aws-node-termination-handler/templates/deployment.yaml +++ b/config/helm/aws-node-termination-handler/templates/deployment.yaml @@ -102,6 +102,8 @@ spec: value: {{ .Values.cordonOnly | quote }} - name: TAINT_NODE value: {{ .Values.taintNode | quote }} + - name: ENABLE_OUT_OF_SERVICE_TAINT + value: {{ .Values.enableOutOfServiceTaint | quote }} - name: EXCLUDE_FROM_LOAD_BALANCERS value: {{ .Values.excludeFromLoadBalancers | quote }} - name: DELETE_LOCAL_DATA diff --git a/config/helm/aws-node-termination-handler/values.yaml b/config/helm/aws-node-termination-handler/values.yaml index f6c7bf42..4b523bde 100644 --- a/config/helm/aws-node-termination-handler/values.yaml +++ b/config/helm/aws-node-termination-handler/values.yaml @@ -86,6 +86,9 @@ cordonOnly: false # Taint node upon spot interruption termination notice. taintNode: false +# Add out-of-service taint to node after cordon/drain process which would forcefully evict pods without matching tolerations and detach persistent volumes. +enableOutOfServiceTaint: false + # Exclude node from load balancer before cordoning via the ServiceNodeExclusion feature gate. excludeFromLoadBalancers: false diff --git a/pkg/config/config.go b/pkg/config/config.go index 6e926bf4..9368ecb9 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -77,6 +77,8 @@ const ( taintNode = "TAINT_NODE" taintEffectDefault = "NoSchedule" taintEffect = "TAINT_EFFECT" + enableOutOfServiceTaintConfigKey = "ENABLE_OUT_OF_SERVICE_TAINT" + enableOutOfServiceTaintDefault = false excludeFromLoadBalancers = "EXCLUDE_FROM_LOAD_BALANCERS" jsonLoggingConfigKey = "JSON_LOGGING" jsonLoggingDefault = false @@ -149,6 +151,7 @@ type Config struct { CordonOnly bool TaintNode bool TaintEffect string + EnableOutOfServiceTaint bool ExcludeFromLoadBalancers bool JsonLogging bool LogLevel string @@ -215,6 +218,7 @@ func ParseCliArgs() (config Config, err error) { flag.BoolVar(&config.CordonOnly, "cordon-only", getBoolEnv(cordonOnly, false), "If true, nodes will be cordoned but not drained when an interruption event occurs.") flag.BoolVar(&config.TaintNode, "taint-node", getBoolEnv(taintNode, false), "If true, nodes will be tainted when an interruption event occurs.") flag.StringVar(&config.TaintEffect, "taint-effect", getEnv(taintEffect, taintEffectDefault), "Sets the effect when a node is tainted.") + flag.BoolVar(&config.EnableOutOfServiceTaint, "enable-out-of-service-taint", getBoolEnv(enableOutOfServiceTaintConfigKey, enableOutOfServiceTaintDefault), "If true, nodes will be tainted as out-of-service after we cordon/drain the nodes when an interruption event occurs.") flag.BoolVar(&config.ExcludeFromLoadBalancers, "exclude-from-load-balancers", getBoolEnv(excludeFromLoadBalancers, false), "If true, nodes will be marked for exclusion from load balancers when an interruption event occurs.") flag.BoolVar(&config.JsonLogging, "json-logging", getBoolEnv(jsonLoggingConfigKey, jsonLoggingDefault), "If true, use JSON-formatted logs instead of human readable logs.") flag.StringVar(&config.LogLevel, "log-level", getEnv(logLevelConfigKey, logLevelDefault), "Sets the log level (INFO, DEBUG, or ERROR)") @@ -344,6 +348,7 @@ func (c Config) PrintJsonConfigArgs() { Bool("cordon_only", c.CordonOnly). Bool("taint_node", c.TaintNode). Str("taint_effect", c.TaintEffect). + Bool("enable_out_of_service_taint", c.EnableOutOfServiceTaint). Bool("exclude_from_load_balancers", c.ExcludeFromLoadBalancers). Bool("json_logging", c.JsonLogging). Str("log_level", c.LogLevel). @@ -395,6 +400,7 @@ func (c Config) PrintHumanConfigArgs() { "\tcordon-only: %t,\n"+ "\ttaint-node: %t,\n"+ "\ttaint-effect: %s,\n"+ + "\tenable-out-of-service-taint: %t,\n"+ "\texclude-from-load-balancers: %t,\n"+ "\tjson-logging: %t,\n"+ "\tlog-level: %s,\n"+ @@ -437,6 +443,7 @@ func (c Config) PrintHumanConfigArgs() { c.CordonOnly, c.TaintNode, c.TaintEffect, + c.EnableOutOfServiceTaint, c.ExcludeFromLoadBalancers, c.JsonLogging, c.LogLevel, diff --git a/pkg/interruptionevent/draincordon/handler.go b/pkg/interruptionevent/draincordon/handler.go index 0360a31c..9fac6b07 100644 --- a/pkg/interruptionevent/draincordon/handler.go +++ b/pkg/interruptionevent/draincordon/handler.go @@ -119,6 +119,15 @@ func (h *Handler) HandleEvent(drainEvent *monitor.InterruptionEvent) error { if (err == nil || (!nodeFound && h.commonHandler.NthConfig.DeleteSqsMsgIfNodeNotFound)) && drainEvent.PostDrainTask != nil { h.commonHandler.RunPostDrainTask(nodeName, drainEvent) } + + // Only add out-of-service taint if ENABLE_OUT_OF_SERVICE_TAINT flag is true, and CORDON_ONLY flag is false + if err == nil && h.commonHandler.NthConfig.EnableOutOfServiceTaint && !h.commonHandler.NthConfig.CordonOnly { + err = h.commonHandler.Node.TaintOutOfService(nodeName) + if err != nil { + return fmt.Errorf("cannot add out-of-service taint on node %s: %w", nodeName, err) + } + } + return nil } diff --git a/pkg/node/node.go b/pkg/node/node.go index 204c5de6..5210049e 100644 --- a/pkg/node/node.go +++ b/pkg/node/node.go @@ -59,6 +59,10 @@ const ( ASGLifecycleTerminationTaint = "aws-node-termination-handler/asg-lifecycle-termination" // RebalanceRecommendationTaint is a taint used to make spot instance unschedulable RebalanceRecommendationTaint = "aws-node-termination-handler/rebalance-recommendation" + // OutOfServiceTaint is a taint used to forcefully evict pods without matching tolerations and detach persistent volumes + OutOfServiceTaintKey = "node.kubernetes.io/out-of-service" + OutOfServiceTaintValue = "nodeshutdown" + OutOfServiceTaintEffectType = "NoExecute" maxTaintValueLength = 63 daemonSet = "DaemonSet" @@ -447,7 +451,7 @@ func (n Node) TaintSpotItn(nodeName string, eventID string) error { eventID = eventID[:maxTaintValueLength] } - return addTaint(k8sNode, n, SpotInterruptionTaint, eventID) + return addTaint(k8sNode, n, SpotInterruptionTaint, eventID, n.nthConfig.TaintEffect) } // TaintASGLifecycleTermination adds the spot termination notice taint onto a node @@ -465,7 +469,7 @@ func (n Node) TaintASGLifecycleTermination(nodeName string, eventID string) erro eventID = eventID[:maxTaintValueLength] } - return addTaint(k8sNode, n, ASGLifecycleTerminationTaint, eventID) + return addTaint(k8sNode, n, ASGLifecycleTerminationTaint, eventID, n.nthConfig.TaintEffect) } // TaintRebalanceRecommendation adds the rebalance recommendation notice taint onto a node @@ -483,7 +487,7 @@ func (n Node) TaintRebalanceRecommendation(nodeName string, eventID string) erro eventID = eventID[:maxTaintValueLength] } - return addTaint(k8sNode, n, RebalanceRecommendationTaint, eventID) + return addTaint(k8sNode, n, RebalanceRecommendationTaint, eventID, n.nthConfig.TaintEffect) } // LogPods logs all the pod names on a node @@ -525,7 +529,21 @@ func (n Node) TaintScheduledMaintenance(nodeName string, eventID string) error { eventID = eventID[:maxTaintValueLength] } - return addTaint(k8sNode, n, ScheduledMaintenanceTaint, eventID) + return addTaint(k8sNode, n, ScheduledMaintenanceTaint, eventID, n.nthConfig.TaintEffect) +} + +// TaintOutOfService adds the out-of-service taint (NoExecute) onto a node +func (n Node) TaintOutOfService(nodeName string) error { + if !n.nthConfig.EnableOutOfServiceTaint || n.nthConfig.CordonOnly { + return nil + } + + k8sNode, err := n.fetchKubernetesNode(nodeName) + if err != nil { + return fmt.Errorf("Unable to fetch kubernetes node from API: %w", err) + } + + return addTaint(k8sNode, n, OutOfServiceTaintKey, OutOfServiceTaintValue, OutOfServiceTaintEffectType) } // RemoveNTHTaints removes NTH-specific taints from a node @@ -711,8 +729,8 @@ func getTaintEffect(effect string) corev1.TaintEffect { } } -func addTaint(node *corev1.Node, nth Node, taintKey string, taintValue string) error { - effect := getTaintEffect(nth.nthConfig.TaintEffect) +func addTaint(node *corev1.Node, nth Node, taintKey string, taintValue string, effectType string) error { + effect := getTaintEffect(effectType) if nth.nthConfig.DryRun { log.Info().Msgf("Would have added taint (%s=%s:%s) to node %s, but dry-run flag was set", taintKey, taintValue, effect, nth.nthConfig.NodeName) return nil diff --git a/pkg/node/node_test.go b/pkg/node/node_test.go index 0e4c393b..89fe33cc 100644 --- a/pkg/node/node_test.go +++ b/pkg/node/node_test.go @@ -37,6 +37,9 @@ import ( // Size of the fakeRecorder buffer const recorderBufferSize = 10 +const outOfServiceTaintKey = "node.kubernetes.io/out-of-service" +const outOfServiceTaintValue = "nodeshutdown" + var nodeName = "NAME" func getDrainHelper(client *fake.Clientset) *drain.Helper { @@ -418,3 +421,40 @@ func TestFilterOutDaemonSetPods(t *testing.T) { filteredMockPodList := tNode.FilterOutDaemonSetPods(mockPodList) h.Equals(t, 2, len(filteredMockPodList.Items)) } + +func TestTaintOutOfService(t *testing.T) { + client := fake.NewSimpleClientset() + _, err := client.CoreV1().Nodes().Create( + context.Background(), + &v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: nodeName}, + }, + metav1.CreateOptions{}) + h.Ok(t, err) + + tNode, err := newNode(config.Config{EnableOutOfServiceTaint: true}, client) + h.Ok(t, err) + h.Equals(t, true, tNode.GetNthConfig().EnableOutOfServiceTaint) + h.Equals(t, false, tNode.GetNthConfig().CordonOnly) + + err = tNode.TaintOutOfService(nodeName) + h.Ok(t, err) + + updatedNode, err := client.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) + h.Ok(t, err) + taintFound := false + expectedTaint := v1.Taint{ + Key: outOfServiceTaintKey, + Value: outOfServiceTaintValue, + Effect: corev1.TaintEffectNoExecute, + } + for _, taint := range updatedNode.Spec.Taints { + if taint.Key == expectedTaint.Key && + taint.Value == expectedTaint.Value && + taint.Effect == expectedTaint.Effect { + taintFound = true + break + } + } + h.Equals(t, true, taintFound) +} diff --git a/test/e2e/spot-interruption-test b/test/e2e/spot-interruption-test index 36781d21..79845754 100755 --- a/test/e2e/spot-interruption-test +++ b/test/e2e/spot-interruption-test @@ -17,6 +17,12 @@ function fail_and_exit { exit "${1:-1}" } +function remove_out_of_service_taint { + local node=$1 + echo "Removing out-of-service taint from node ${node}" + kubectl taint nodes "${node}" node.kubernetes.io/out-of-service:NoExecute- || true +} + echo "Starting Spot Interruption Test for Node Termination Handler" SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" @@ -37,6 +43,7 @@ anth_helm_args=( --set enableScheduledEventDraining="false" --set enableSpotInterruptionDraining="true" --set taintNode="true" + --set enableOutOfServiceTaint="true" --set daemonsetTolerations="" --wait --force @@ -110,6 +117,7 @@ fi cordoned=0 tainted=0 +outOfServiceTainted=0 test_node=${TEST_NODE:-$CLUSTER_NAME-worker} for i in $(seq 1 $TAINT_CHECK_CYCLES); do if [[ $cordoned -eq 0 ]] && kubectl get nodes "${test_node}" | grep SchedulingDisabled >/dev/null; then @@ -118,13 +126,19 @@ for i in $(seq 1 $TAINT_CHECK_CYCLES); do fi if [[ $cordoned -eq 1 && $tainted -eq 0 ]] && kubectl get nodes "${test_node}" -o json | grep -q "aws-node-termination-handler/spot-itn" >/dev/null; then - echo "✅ Verified the worked node was tainted!" - tainted=1 + echo "✅ Verified the worked node was tainted!" + tainted=1 + fi + + if [[ $cordoned -eq 1 && $tainted -eq 1 ]] && kubectl get nodes "${test_node}" -o json | grep -q "node.kubernetes.io/out-of-service" >/dev/null; then + echo "✅ Verified the worked node was tainted as out-of-service!" + outOfServiceTainted=1 fi - if [[ $tainted -eq 1 && $(kubectl get deployments regular-pod-test -o=jsonpath='{.status.unavailableReplicas}') -eq 1 ]]; then + if [[ $tainted -eq 1 && $outOfServiceTainted -eq 1 && $(kubectl get deployments regular-pod-test -o=jsonpath='{.status.unavailableReplicas}') -eq 1 ]]; then echo "✅ Verified the regular-pod-test pod was evicted!" echo "✅ Spot Interruption Test Passed $CLUSTER_NAME! ✅" + remove_out_of_service_taint "${test_node}" exit 0 fi echo "Assertion Loop $i/$TAINT_CHECK_CYCLES, sleeping for $TAINT_CHECK_SLEEP seconds" @@ -135,8 +149,11 @@ if [[ $cordoned -eq 0 ]]; then echo "❌ Worker node was not cordoned" elif [[ $tainted -eq 0 ]]; then echo "❌ Worker node was not tainted" +elif [[ $outOfServiceTainted -eq 0 ]]; then + echo "❌ Worker node was not tainted as out-of-service" else echo "❌ regular-pod-test pod was not evicted" fi +remove_out_of_service_taint "${test_node}" fail_and_exit 1