Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/kubectl-datadog/autoscaling/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"k8s.io/cli-runtime/pkg/genericclioptions"

"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/install"
"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/uninstall"
)

// options provides information required by cluster command
Expand All @@ -31,6 +32,7 @@ func New(streams genericclioptions.IOStreams) *cobra.Command {
}

cmd.AddCommand(install.New(streams))
cmd.AddCommand(uninstall.New(streams))

o := newOptions(streams)
o.configFlags.AddFlags(cmd.Flags())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,50 @@ func EnsureAwsAuthRole(ctx context.Context, clientset kubernetes.Interface, role

return nil
}

func RemoveAwsAuthRole(ctx context.Context, clientset kubernetes.Interface, roleArn string) error {
cm, err := clientset.CoreV1().ConfigMaps("kube-system").Get(ctx, "aws-auth", metav1.GetOptions{})
if err != nil {
return fmt.Errorf("failed to get aws-auth ConfigMap: %w", err)
}

var roles []RoleMapping
if mapRoles, ok := cm.Data["mapRoles"]; ok {
if err = yaml.Unmarshal([]byte(mapRoles), &roles); err != nil {
return fmt.Errorf("failed to parse mapRoles: %w", err)
}
} else {
log.Printf("No mapRoles found in aws-auth ConfigMap, skipping role removal.")
return nil
}

found := false
updatedRoles := make([]RoleMapping, 0, len(roles))
for _, role := range roles {
if role.RoleArn == roleArn {
found = true
continue
}
updatedRoles = append(updatedRoles, role)
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: could use slices.DeleteFunc:

oldLen := len(roles)
roles = slices.DeleteFunc(roles, func(role RoleMapping) bool { return role.RoleArn == roleArn })
found := oldLen != len(roles)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


if !found {
log.Printf("Role %s not found in aws-auth ConfigMap, skipping removal.", roleArn)
return nil
}

updated, err := yaml.Marshal(updatedRoles)
if err != nil {
return fmt.Errorf("failed to marshal updated mapRoles: %w", err)
}

cm.Data["mapRoles"] = string(updated)

if _, err := clientset.CoreV1().ConfigMaps("kube-system").Update(ctx, cm, metav1.UpdateOptions{}); err != nil {
return fmt.Errorf("failed to update aws-auth ConfigMap: %w", err)
}

log.Printf("Removed role %s from aws-auth ConfigMap.", roleArn)

return nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,48 @@ func updateStack(ctx context.Context, client *cloudformation.Client, stackName s
return nil
}

func DeleteStack(ctx context.Context, client *cloudformation.Client, stackName string) error {
exist, err := doesStackExist(ctx, client, stackName)
if err != nil {
return err
}

if !exist {
log.Printf("Stack %s does not exist, skipping deletion.", stackName)
return nil
}

log.Printf("Deleting stack %s…", stackName)

_, err = client.DeleteStack(
ctx,
&cloudformation.DeleteStackInput{
StackName: aws.String(stackName),
},
)
if err != nil {
return fmt.Errorf("failed to delete stack %s: %w", stackName, err)
}

waiter := cloudformation.NewStackDeleteCompleteWaiter(client)
if err := waiter.Wait(
ctx,
&cloudformation.DescribeStacksInput{
StackName: aws.String(stackName),
},
maxWaitDuration,
); err != nil {
log.Printf("Failed to delete stack %s.", stackName)
describeStack(ctx, client, stackName)

return fmt.Errorf("failed to wait for stack %s deletion: %w", stackName, err)
}

log.Printf("Deleted stack %s.", stackName)

return nil
}

func describeStack(ctx context.Context, client *cloudformation.Client, stackName string) error {
out, err := client.DescribeStacks(
ctx,
Expand Down
119 changes: 119 additions & 0 deletions cmd/kubectl-datadog/autoscaling/cluster/common/clients/clients.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Package clients provides shared AWS and Kubernetes client initialization
// for the Karpenter install and uninstall commands.
package clients

import (
"context"
"fmt"

awssdk "github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/service/cloudformation"
"github.com/aws/aws-sdk-go-v2/service/ec2"
"github.com/aws/aws-sdk-go-v2/service/eks"
"github.com/aws/aws-sdk-go-v2/service/sts"
karpawsv1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/cli-runtime/pkg/genericclioptions"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/apiutil"
karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"

"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/install/guess"
)

// Clients holds all AWS and Kubernetes client instances needed for
// Karpenter installation and uninstallation operations.
type Clients struct {
// AWS clients
Config awssdk.Config
CloudFormation *cloudformation.Client
EC2 *ec2.Client
EKS *eks.Client
STS *sts.Client

// Kubernetes clients
K8sClient client.Client // controller-runtime client
K8sClientset *kubernetes.Clientset // typed Kubernetes client
}

// Build creates AWS and Kubernetes clients for Karpenter operations.
func Build(ctx context.Context, configFlags *genericclioptions.ConfigFlags, k8sClientset *kubernetes.Clientset) (*Clients, error) {
awsConfig, err := config.LoadDefaultConfig(ctx)
if err != nil {
return nil, fmt.Errorf("failed to load AWS config: %w", err)
}

sch := runtime.NewScheme()

if err = scheme.AddToScheme(sch); err != nil {
return nil, fmt.Errorf("failed to add base scheme: %w", err)
}

sch.AddKnownTypes(
schema.GroupVersion{Group: "karpenter.sh", Version: "v1"},
&karpv1.NodePool{},
&karpv1.NodePoolList{},
)
metav1.AddToGroupVersion(sch, schema.GroupVersion{Group: "karpenter.sh", Version: "v1"})

sch.AddKnownTypes(
schema.GroupVersion{Group: "karpenter.k8s.aws", Version: "v1"},
&karpawsv1.EC2NodeClass{},
&karpawsv1.EC2NodeClassList{},
)
metav1.AddToGroupVersion(sch, schema.GroupVersion{Group: "karpenter.k8s.aws", Version: "v1"})

restConfig, err := configFlags.ToRESTConfig()
if err != nil {
return nil, fmt.Errorf("failed to get REST config: %w", err)
}

httpClient, err := rest.HTTPClientFor(restConfig)
if err != nil {
return nil, fmt.Errorf("unable to create http client: %w", err)
}

mapper, err := apiutil.NewDynamicRESTMapper(restConfig, httpClient)
if err != nil {
return nil, fmt.Errorf("unable to instantiate mapper: %w", err)
}

k8sClient, err := client.New(restConfig, client.Options{
Scheme: sch,
Mapper: mapper,
})
if err != nil {
return nil, fmt.Errorf("failed to create Karpenter client: %w", err)
}

return &Clients{
Config: awsConfig,
CloudFormation: cloudformation.NewFromConfig(awsConfig),
EC2: ec2.NewFromConfig(awsConfig),
EKS: eks.NewFromConfig(awsConfig),
STS: sts.NewFromConfig(awsConfig),
K8sClient: k8sClient,
K8sClientset: k8sClientset,
}, nil
}

// GetClusterNameFromKubeconfig extracts the EKS cluster name from the current kubeconfig context.
func GetClusterNameFromKubeconfig(ctx context.Context, configFlags *genericclioptions.ConfigFlags) (string, error) {
kubeRawConfig, err := configFlags.ToRawKubeConfigLoader().RawConfig()
if err != nil {
return "", fmt.Errorf("failed to get raw kubeconfig: %w", err)
}

kubeContext := ""
if configFlags.Context != nil {
kubeContext = *configFlags.Context
}

return guess.GetClusterNameFromKubeconfig(ctx, kubeRawConfig, kubeContext), nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@ import (
"errors"
"fmt"
"log"
"os"
"time"

"helm.sh/helm/v3/pkg/action"
"helm.sh/helm/v3/pkg/chart/loader"
"helm.sh/helm/v3/pkg/cli"
"helm.sh/helm/v3/pkg/kube"
"helm.sh/helm/v3/pkg/release"
"helm.sh/helm/v3/pkg/storage/driver"
"k8s.io/cli-runtime/pkg/genericclioptions"
)

func CreateOrUpgrade(ctx context.Context, ac *action.Configuration, releaseName, namespace, chartRef, version string, values map[string]any) error {
Expand Down Expand Up @@ -125,3 +128,51 @@ func upgrade(ctx context.Context, ac *action.Configuration, releaseName, namespa

return nil
}

func Uninstall(ctx context.Context, ac *action.Configuration, releaseName string) error {
exist, err := doesExist(ctx, ac, releaseName)
if err != nil {
return err
}

if !exist {
log.Printf("Helm release %s does not exist, skipping uninstallation.", releaseName)
return nil
}

log.Printf("Uninstalling Helm release %s…", releaseName)

uninstallAction := action.NewUninstall(ac)
uninstallAction.Wait = true
uninstallAction.Timeout = 30 * time.Minute

response, err := uninstallAction.Run(releaseName)
if err != nil {
return fmt.Errorf("failed to uninstall Helm release %s: %w", releaseName, err)
}

log.Printf("Uninstalled Helm release %s.", response.Release.Name)

return nil
}

// NewActionConfig creates a new Helm action configuration from kubeconfig flags.
func NewActionConfig(configFlags *genericclioptions.ConfigFlags, namespace string) (*action.Configuration, error) {
kubeConfig := ""
if configFlags.KubeConfig != nil {
kubeConfig = *configFlags.KubeConfig
}
kubeContext := ""
if configFlags.Context != nil {
kubeContext = *configFlags.Context
}

restClientGetter := kube.GetConfig(kubeConfig, kubeContext, namespace)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Respect full kubeconfig overrides for Helm config

This uses kube.GetConfig(kubeConfig, kubeContext, namespace), which only honors the kubeconfig path and context and ignores other ConfigFlags overrides like --server, --token, --certificate-authority, --insecure-skip-tls-verify, or impersonation flags. In environments where users rely on those flags (e.g., CI or ephemeral clusters without a kubeconfig file), the Helm install/uninstall will silently target the default kubeconfig context or fail to authenticate, causing operations to hit the wrong cluster or error. Consider passing the ConfigFlags RESTClientGetter directly so all overrides are respected.

Useful? React with 👍 / 👎.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actionConfig := new(action.Configuration)

if err := actionConfig.Init(restClientGetter, namespace, os.Getenv("HELM_DRIVER"), log.Printf); err != nil {
return nil, fmt.Errorf("failed to initialize Helm configuration: %w", err)
}

return actionConfig, nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
)

func createOrUpdate(ctx context.Context, cli client.Client, object client.Object) error {
func CreateOrUpdate(ctx context.Context, cli client.Client, object client.Object) error {
resourceVersion, err := getResourceVersion(ctx, cli, object)
if err != nil {
return err
Expand Down Expand Up @@ -58,3 +58,59 @@ func update(ctx context.Context, cli client.Client, object client.Object) error

return nil
}

func Delete(ctx context.Context, cli client.Client, object client.Object) error {
log.Printf("Deleting %s %s…", object.GetObjectKind().GroupVersionKind().Kind, object.GetName())

if err := cli.Delete(ctx, object); err != nil {
if apierrors.IsNotFound(err) {
log.Printf("%s %s not found, skipping deletion.", object.GetObjectKind().GroupVersionKind().Kind, object.GetName())
return nil
}
Comment on lines +66 to +69
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it a good idea to mask 404? E.g. update bubbles it up.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is inside the Delete function the goal of which is to ensure that the given object is eventually not existing anymore.
If the given object cannot be found, it is already not existing, then the system is already in the expected state.
The promise of the Delete function, which is: “The object will eventually be deleted” is already kept.
Such a situation shouldn’t be considered as an error.

The question might then be: why would we try to delete an object that doesn’t exist?
Well, this might happen if a user tries to manually delete some objects concurrently to the execution of this script.
One could argue that it isn’t a good idea to do manual actions that might conflict with a running script.
But if we can handle this case gracefully, that’s still better.

return fmt.Errorf("failed to delete %s %s: %w", object.GetObjectKind().GroupVersionKind().Kind, object.GetName(), err)
}

log.Printf("Deleted %s %s.", object.GetObjectKind().GroupVersionKind().Kind, object.GetName())

return nil
}

func DeleteAllWithLabel(ctx context.Context, cli client.Client, list client.ObjectList, labelSelector client.MatchingLabels) error {
gvk := list.GetObjectKind().GroupVersionKind()
kind := gvk.Kind

log.Printf("Listing %s resources with labels %v…", kind, labelSelector)

if err := cli.List(ctx, list, labelSelector); err != nil {
return fmt.Errorf("failed to list %s resources: %w", kind, err)
}

items, err := extractListItems(list)
if err != nil {
return err
}

if len(items) == 0 {
log.Printf("No %s resources found with labels %v, skipping deletion.", kind, labelSelector)
return nil
}

log.Printf("Found %d %s resource(s) to delete.", len(items), kind)

for _, item := range items {
if err := Delete(ctx, cli, item); err != nil {
return err
}
}

return nil
}

func extractListItems(list client.ObjectList) ([]client.Object, error) {
switch v := list.(type) {
case interface{ GetItems() []client.Object }:
return v.GetItems(), nil
default:
return nil, fmt.Errorf("unsupported list type: %T", list)
}
}
Loading
Loading