Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions aks-flex-node-sudoers
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,9 @@ aks-flex-node ALL=(root) NOPASSWD:SETENV: /sbin/ip addr
aks-flex-node ALL=(root) NOPASSWD:SETENV: /bin/netstat -rn

# Read-only Kubernetes API check for node readiness (used by status collector)
# This is intentionally limited to 'get node' with the kubelet kubeconfig.
aks-flex-node ALL=(root) NOPASSWD:SETENV: /usr/bin/kubectl --kubeconfig /var/lib/kubelet/kubeconfig get node *
aks-flex-node ALL=(root) NOPASSWD:SETENV: /usr/local/bin/kubectl --kubeconfig /var/lib/kubelet/kubeconfig get node *
# This is intentionally limited to 'get' with the kubelet kubeconfig.
aks-flex-node ALL=(root) NOPASSWD:SETENV: /usr/bin/kubectl --kubeconfig /var/lib/kubelet/kubeconfig get *
aks-flex-node ALL=(root) NOPASSWD:SETENV: /usr/local/bin/kubectl --kubeconfig /var/lib/kubelet/kubeconfig get *

# Note: Arc agent (azcmagent) is managed by install.sh and should not be removed during unbootstrap
# Unbootstrap only cleans up what AKS Flex Node created, not the underlying Arc installation
Expand Down
23 changes: 23 additions & 0 deletions commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,18 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"time"

"github.com/sirupsen/logrus"
"github.com/spf13/cobra"

"go.goms.io/aks/AKSFlexNode/pkg/bootstrapper"
"go.goms.io/aks/AKSFlexNode/pkg/components/kubelet"
"go.goms.io/aks/AKSFlexNode/pkg/config"
"go.goms.io/aks/AKSFlexNode/pkg/logger"
"go.goms.io/aks/AKSFlexNode/pkg/status"
"go.goms.io/aks/AKSFlexNode/pkg/utils"
)

// Version information variables (set at build time)
Expand Down Expand Up @@ -173,6 +176,8 @@ func runDaemonLoop(ctx context.Context, cfg *config.Config) error {
} else {
logger.Infof("Bootstrap health check completed at %s", time.Now().Format("2006-01-02 15:04:05"))
}

checkAndReboot(ctx)
}
}
}
Expand Down Expand Up @@ -276,3 +281,21 @@ func handleExecutionResult(result *bootstrapper.ExecutionResult, operation strin
// For bootstrap, return error on failure
return fmt.Errorf("%s failed: %s", operation, result.Error)
}

// checkAndReboot is a PoC function, just demonstrate the node agent's remediation capability.
func checkAndReboot(ctx context.Context) {
logger := logger.GetLoggerFromContext(ctx)

args := []string{"--kubeconfig", kubelet.KubeletKubeconfigPath,
"get", "events", "--sort-by=.lastTimestamp"}

output, err := utils.RunCommandWithOutput("kubectl", args...)
if err != nil {
logger.Errorf("Failed to get last event: %s", err)
return
}

if strings.Contains(output, "kernel NULL pointer") {
logger.Warn("Node has kernel NULL pointer error, initiating reboot...")
}
}
Loading