Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 30 additions & 39 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,53 +1,44 @@
# Binaries for programs and plugins
*.exe
*.exe~
# Build artifacts
# Config files with sensitive data (keep sample config)
# Go workspace file
# IDE and editor files
# Log files
# OS generated files
# Output of the go coverage tool, specifically when used with LiteIDE
# Test binary, built with `go test -c`
# Test coverage reports
*.dll
*.so
*.dylib
aks-flex-node

# Test binary, built with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.exe
*.exe~
*.log
*.out

# Test coverage reports
coverage.out
coverage.html
coverage.xml

# Go workspace file
go.work

# IDE and editor files
.vscode/
.idea/
*.swp
*.so
*.swo
*.swp
*.test
*~

# OS generated files
._*
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db

# Log files
*.log
/var/log/

# Config files with sensitive data (keep sample config)
config.json

# Environment files with secrets
.idea/
.vscode/
.env
.env.local
.env.*.local

# Build artifacts
/build/
/dist/
/dist/AKSFlexNode
/var/log/
AKSFlexNode
Standard_D8pds_v6_sku.json
Thumbs.db
aks-flex-node
config.json
coverage.html
coverage.out
coverage.xml
ehthumbs.db
go.work
34 changes: 32 additions & 2 deletions commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ var (
BuildTime = "unknown"
)

// Unbootstrap command flags
var cleanupMode string

// NewAgentCommand creates a new agent command
func NewAgentCommand() *cobra.Command {
cmd := &cobra.Command{
Expand All @@ -44,12 +47,19 @@ func NewUnbootstrapCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "unbootstrap",
Short: "Remove AKS node configuration and Arc connection",
Long: "Clean up and remove all AKS node components and Arc registration from this machine",
Long: `Clean up and remove all AKS node components and Arc registration from this machine.

For private clusters (config has private: true), this also handles VPN cleanup:
--cleanup-mode=local Remove node and local VPN config, keep Gateway (default)
--cleanup-mode=full Remove everything including Gateway VM and Azure resources`,
RunE: func(cmd *cobra.Command, args []string) error {
return runUnbootstrap(cmd.Context())
},
}

cmd.Flags().StringVar(&cleanupMode, "cleanup-mode", "local",
"Private cluster cleanup mode: 'local' (keep Gateway) or 'full' (remove all Azure resources)")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't think you should add this as new CLI argument since this is private cluster only, this can be defined in config


return cmd
}

Expand Down Expand Up @@ -87,6 +97,13 @@ func runAgent(ctx context.Context) error {
return err
}

// Print visible success message
fmt.Println()
fmt.Println("========================================")
fmt.Println(" Join process finished successfully!")
fmt.Println("========================================")
fmt.Println()

// After successful bootstrap, transition to daemon mode
logger.Info("Bootstrap completed successfully, transitioning to daemon mode...")
return runDaemonLoop(ctx, cfg)
Expand All @@ -101,14 +118,27 @@ func runUnbootstrap(ctx context.Context) error {
return fmt.Errorf("failed to load config from %s: %w", configPath, err)
}

// Pass cleanup mode to config so the PrivateClusterUninstall step can read it
if cfg.Azure.TargetCluster != nil {
cfg.Azure.TargetCluster.CleanupMode = cleanupMode
}

bootstrapExecutor := bootstrapper.New(cfg, logger)
result, err := bootstrapExecutor.Unbootstrap(ctx)
if err != nil {
return err
}

// Handle and log the result (unbootstrap is more lenient with failures)
return handleExecutionResult(result, "unbootstrap", logger)
if err := handleExecutionResult(result, "unbootstrap", logger); err != nil {
return err
}

// Print final success message
fmt.Println()
fmt.Println("\033[0;32mSUCCESS:\033[0m Unbootstrap completed successfully!")

return nil
}

// runVersion displays version information
Expand Down
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ require (
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.19.1
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.10.1
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v3 v3.0.0-beta.2
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v6 v6.4.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5 v5.0.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/hybridcompute/armhybridcompute v1.2.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6 v6.2.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armsubscriptions v1.3.0
github.com/Azure/go-autorest/autorest/to v0.4.1
github.com/google/uuid v1.6.0
github.com/sirupsen/logrus v1.9.3
Expand Down
12 changes: 10 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,22 @@ github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 h1:9iefClla7iYpfYWdzPCRDo
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2/go.mod h1:XtLgD3ZD34DAaVIIAyG3objl5DynM3CQ/vMcbBNJZGI=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v3 v3.0.0-beta.2 h1:qiir/pptnHqp6hV8QwV+IExYIf6cPsXBfUDUXQ27t2Y=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v3 v3.0.0-beta.2/go.mod h1:jVRrRDLCOuif95HDYC23ADTMlvahB7tMdl519m9Iyjc=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v6 v6.4.0 h1:z7Mqz6l0EFH549GvHEqfjKvi+cRScxLWbaoeLm9wxVQ=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v6 v6.4.0/go.mod h1:v6gbfH+7DG7xH2kUNs+ZJ9tF6O3iNnR85wMtmr+F54o=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5 v5.0.0 h1:5n7dPVqsWfVKw+ZiEKSd3Kzu7gwBkbEBkeXb8rgaE9Q=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5 v5.0.0/go.mod h1:HcZY0PHPo/7d75p99lB6lK0qYOP4vLRJUBpiehYXtLQ=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/hybridcompute/armhybridcompute v1.2.0 h1:7UuAn4ljE+H3GQ7qts3c7oAaMRvge68EgyckoNP/1Ro=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/hybridcompute/armhybridcompute v1.2.0/go.mod h1:F2eDq/BGK2LOEoDtoHbBOphaPqcjT0K/Y5Am8vf7+0w=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v2 v2.0.0 h1:PTFGRSlMKCQelWwxUyYVEUqseBJVemLyqWJjvMyt0do=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v2 v2.0.0/go.mod h1:LRr2FzBTQlONPPa5HREE5+RjSCTXl7BwOvYOaWTqCaI=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.1.1 h1:7CBQ+Ei8SP2c6ydQTGCCrS35bDxgTMfoP2miAwK++OU=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.1.1/go.mod h1:c/wcGeGx5FUPbM/JltUYHZcKmigwyVLJlDq+4HdtXaw=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v3 v3.1.0 h1:2qsIIvxVT+uE6yrNldntJKlLRgxGbZ85kgtz5SNBhMw=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v3 v3.1.0/go.mod h1:AW8VEadnhw9xox+VaVd9sP7NjzOAnaZBLRH6Tq3cJ38=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6 v6.2.0 h1:HYGD75g0bQ3VO/Omedm54v4LrD3B1cGImuRF3AJ5wLo=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6 v6.2.0/go.mod h1:ulHyBFJOI0ONiRL4vcJTmS7rx18jQQlEPmAgo80cRdM=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0 h1:Dd+RhdJn0OTtVGaeDLZpcumkIVCtA/3/Fo42+eoYvVM=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0/go.mod h1:5kakwfW5CjC9KK+Q4wjXAg+ShuIm2mBMua0ZFj2C8PE=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armsubscriptions v1.3.0 h1:wxQx2Bt4xzPIKvW59WQf1tJNx/ZZKPfN+EhPX3Z6CYY=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armsubscriptions v1.3.0/go.mod h1:TpiwjwnW/khS0LKs4vW5UmmT9OWcxaveS8U7+tlknzo=
github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs=
github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
github.com/Azure/go-autorest/autorest/to v0.4.1 h1:CxNHBqdzTr7rLtdrtb5CMjJcDut+WNGCVv7OmS5+lTc=
Expand Down
8 changes: 5 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ func main() {
}

// Add global flags for configuration
rootCmd.PersistentFlags().StringVar(&configPath, "config", "", "Path to configuration JSON file (required)")
rootCmd.PersistentFlags().StringVar(&configPath, "config", "", "Path to configuration JSON file (required for agent/unbootstrap)")
_ = rootCmd.PersistentFlags().MarkHidden("config")
// Don't mark as required globally - we'll check in PersistentPreRunE for commands that need it

// Add commands
Expand All @@ -49,8 +50,9 @@ func main() {

// Set up persistent pre-run to initialize config and logger
rootCmd.PersistentPreRunE = func(cmd *cobra.Command, args []string) error {
// Skip config loading for version command
if cmd.Name() == "version" {
// Skip config loading for commands that don't need it
switch cmd.Name() {
case "version":
return nil
}

Expand Down
6 changes: 4 additions & 2 deletions pkg/bootstrapper/bootstrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"go.goms.io/aks/AKSFlexNode/pkg/components/services"
"go.goms.io/aks/AKSFlexNode/pkg/components/system_configuration"
"go.goms.io/aks/AKSFlexNode/pkg/config"
"go.goms.io/aks/AKSFlexNode/pkg/privatecluster"
)

// Bootstrapper executes bootstrap steps sequentially
Expand All @@ -31,8 +32,8 @@ func New(cfg *config.Config, logger *logrus.Logger) *Bootstrapper {

// Bootstrap executes all bootstrap steps sequentially
func (b *Bootstrapper) Bootstrap(ctx context.Context) (*ExecutionResult, error) {
// Define the bootstrap steps in order - using modules directly
steps := []Executor{
privatecluster.NewInstaller(b.logger), // VPN/Gateway setup (if private cluster)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure the private cluster set up should be put in the first place in both installer and uninstaller?

arc.NewInstaller(b.logger), // Setup Arc
services.NewUnInstaller(b.logger), // Stop kubelet before setup
system_configuration.NewInstaller(b.logger), // Configure system (early)
Expand All @@ -51,6 +52,7 @@ func (b *Bootstrapper) Bootstrap(ctx context.Context) (*ExecutionResult, error)
// Unbootstrap executes all cleanup steps sequentially (in reverse order of bootstrap)
func (b *Bootstrapper) Unbootstrap(ctx context.Context) (*ExecutionResult, error) {
steps := []Executor{
privatecluster.NewUninstaller(b.logger), // Node removal + VPN teardown (if private cluster)
services.NewUnInstaller(b.logger), // Stop services first
npd.NewUnInstaller(b.logger), // Uninstall Node Problem Detector
kubelet.NewUnInstaller(b.logger), // Clean kubelet configuration
Expand All @@ -59,7 +61,7 @@ func (b *Bootstrapper) Unbootstrap(ctx context.Context) (*ExecutionResult, error
containerd.NewUnInstaller(b.logger), // Uninstall containerd binary
runc.NewUnInstaller(b.logger), // Uninstall runc binary
system_configuration.NewUnInstaller(b.logger), // Clean system settings
arc.NewUnInstaller(b.logger), // Uninstall Arc (after cleanup)
arc.NewUnInstaller(b.logger), // Uninstall Arc (after cleanup, uses public internet)
}

return b.ExecuteSteps(ctx, steps, "unbootstrap")
Expand Down
61 changes: 47 additions & 14 deletions pkg/components/arc/arc_installer.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,17 +118,15 @@ func (i *Installer) setupArcPermissions() error {
func (i *Installer) downloadArcInstallScript(ctx context.Context, destPath string) error {
// Try curl first
if _, err := exec.LookPath("curl"); err == nil {
cmd := exec.CommandContext(ctx, "curl", "-L", "-o", destPath, arcInstallScriptURL)
if err := cmd.Run(); err != nil {
if _, err := utils.RunCommandWithOutput("curl", "-L", "-o", destPath, arcInstallScriptURL); err != nil {
return fmt.Errorf("curl download failed: %w", err)
}
return nil
}

// Try wget as fallback
if _, err := exec.LookPath("wget"); err == nil {
cmd := exec.CommandContext(ctx, "wget", "-O", destPath, arcInstallScriptURL)
if err := cmd.Run(); err != nil {
if _, err := utils.RunCommandWithOutput("wget", "-O", destPath, arcInstallScriptURL); err != nil {
return fmt.Errorf("wget download failed: %w", err)
}
return nil
Expand Down Expand Up @@ -198,19 +196,15 @@ func (i *Installer) IsCompleted(ctx context.Context) bool {
return false
}

// Use same approach as status collector - check azcmagent show with timeout
timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()

cmd := exec.CommandContext(timeoutCtx, "azcmagent", "show")
output, err := cmd.Output()
// Use same approach as status collector - check azcmagent show
output, err := utils.RunCommandWithOutput("azcmagent", "show")
if err != nil {
i.logger.Debugf("azcmagent show failed: %v - Arc not ready", err)
return false
}

// Parse output to check if agent is connected (same logic as status collector)
lines := strings.Split(strings.TrimSpace(string(output)), "\n")
lines := strings.Split(strings.TrimSpace(output), "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.Contains(line, "Agent Status") && strings.Contains(line, ":") {
Expand All @@ -236,11 +230,31 @@ func (i *Installer) IsCompleted(ctx context.Context) bool {
func (i *Installer) registerArcMachine(ctx context.Context) (*armhybridcompute.Machine, error) {
i.logger.Info("Registering machine with Azure Arc using Arc agent")

// Check if already registered
// Check if already registered in Azure AND locally connected
machine, err := i.getArcMachine(ctx)
if err == nil && machine != nil {
i.logger.Infof("Machine already registered as Arc machine: %s", to.String(machine.Name))
return machine, nil
// Azure resource exists, but also verify local agent is connected
if i.isLocalAgentConnected(ctx) {
i.logger.Infof("Machine already registered and locally connected as Arc machine: %s", to.String(machine.Name))
return machine, nil
}
i.logger.Warnf("Arc machine '%s' exists in Azure but local agent is disconnected, re-connecting...", to.String(machine.Name))

// Step 1: Clean up local agent state
i.logger.Info("Cleaning up local agent state...")
if output, err := utils.RunCommandWithOutput("azcmagent", "disconnect", "--force-local-only"); err != nil {
i.logger.Warnf("Local disconnect had issues (continuing): %v, output: %s", err, output)
}

// Step 2: Delete the stale Azure Arc resource so connect can recreate it
arcResourceGroup := i.config.GetArcResourceGroup()
arcMachineName := i.config.GetArcMachineName()
i.logger.Infof("Deleting stale Arc machine resource '%s' from Azure...", arcMachineName)
if _, err := i.hybridComputeMachineClient.Delete(ctx, arcResourceGroup, arcMachineName, nil); err != nil {
i.logger.Warnf("Failed to delete Arc machine from Azure (continuing): %v", err)
} else {
i.logger.Info("Stale Arc machine resource deleted from Azure")
}
}

// Register using Arc agent command
Expand All @@ -253,6 +267,25 @@ func (i *Installer) registerArcMachine(ctx context.Context) (*armhybridcompute.M
return i.waitForArcRegistration(ctx)
}

// isLocalAgentConnected checks if the local Arc agent is connected
func (i *Installer) isLocalAgentConnected(_ context.Context) bool {
output, err := utils.RunCommandWithOutput("azcmagent", "show")
if err != nil {
i.logger.Debugf("azcmagent show failed: %v", err)
return false
}

for _, line := range strings.Split(output, "\n") {
if strings.Contains(line, "Agent Status") && strings.Contains(line, ":") {
parts := strings.SplitN(line, ":", 2)
if len(parts) == 2 {
return strings.TrimSpace(strings.ToLower(parts[1])) == "connected"
}
}
}
return false
}

func (i *Installer) validateManagedCluster(ctx context.Context) error {
i.logger.Info("Validating target AKS Managed Cluster requirements for Azure RBAC authentication")

Expand Down
Loading
Loading