Skip to content

Commit 69604af

Browse files
author
weiliu2
committed
Private AKS cluster: Add flexible node support
- Add private-join command to join Private AKS cluster via Gateway - Add private-leave command with --mode=local|full cleanup options - Add private-install.sh and private-uninstall.sh scripts - Add pkg/privatecluster package with embedded scripts - Add documentation for creating and configuring Private AKS cluster
1 parent 9c281de commit 69604af

File tree

7 files changed

+1659
-3
lines changed

7 files changed

+1659
-3
lines changed

commands.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"go.goms.io/aks/AKSFlexNode/pkg/bootstrapper"
1515
"go.goms.io/aks/AKSFlexNode/pkg/config"
1616
"go.goms.io/aks/AKSFlexNode/pkg/logger"
17+
"go.goms.io/aks/AKSFlexNode/pkg/privatecluster"
1718
"go.goms.io/aks/AKSFlexNode/pkg/status"
1819
)
1920

@@ -118,6 +119,107 @@ func runVersion() {
118119
fmt.Printf("Build Time: %s\n", BuildTime)
119120
}
120121

122+
// Private cluster command variables
123+
var (
124+
aksResourceID string
125+
cleanupModeFlag string
126+
)
127+
128+
// NewPrivateJoinCommand creates a new private-join command
129+
func NewPrivateJoinCommand() *cobra.Command {
130+
cmd := &cobra.Command{
131+
Use: "private-join",
132+
Short: "Join a Private AKS cluster (requires sudo)",
133+
Long: `Join a Private AKS cluster.
134+
135+
Prerequisites:
136+
1. A Private AKS cluster must exist with AAD and Azure RBAC enabled
137+
See: pkg/privatecluster/create_private_cluster.md
138+
139+
2. Current user must have the following roles on the cluster:
140+
- Azure Kubernetes Service Cluster Admin Role
141+
- Azure Kubernetes Service RBAC Cluster Admin
142+
143+
3. Current user must be logged in via 'sudo az login'
144+
145+
The full resource ID of the Private AKS cluster is required as the --aks-resource-id parameter.
146+
This same resource ID can be used later with the private-leave command.`,
147+
RunE: func(cmd *cobra.Command, args []string) error {
148+
return runPrivateJoin(cmd.Context())
149+
},
150+
}
151+
152+
cmd.Flags().StringVar(&aksResourceID, "aks-resource-id", "", "AKS cluster resource ID (required)")
153+
cmd.MarkFlagRequired("aks-resource-id")
154+
155+
return cmd
156+
}
157+
158+
// NewPrivateLeaveCommand creates a new private-leave command
159+
func NewPrivateLeaveCommand() *cobra.Command {
160+
cmd := &cobra.Command{
161+
Use: "private-leave",
162+
Short: "Leave a Private AKS cluster (--mode=local|full, requires sudo)",
163+
Long: `Remove this edge node from a Private AKS cluster.
164+
165+
Cleanup modes:
166+
--local Local cleanup only (default):
167+
- Remove node from AKS cluster
168+
- Run aks-flex-node unbootstrap
169+
- Remove Arc Agent
170+
- Stop VPN and remove client config
171+
- Keep Gateway for other nodes
172+
173+
--full Full cleanup (requires --aks-resource-id):
174+
- All local cleanup steps
175+
- Delete Gateway VM
176+
- Delete Gateway subnet, NSG, Public IP
177+
- Delete SSH keys
178+
179+
This command requires the current user to be logged in via 'sudo az login'.`,
180+
RunE: func(cmd *cobra.Command, args []string) error {
181+
return runPrivateLeave(cmd.Context())
182+
},
183+
}
184+
185+
cmd.Flags().StringVar(&cleanupModeFlag, "mode", "local", "Cleanup mode: 'local' (keep Gateway) or 'full' (remove all Azure resources)")
186+
cmd.Flags().StringVar(&aksResourceID, "aks-resource-id", "", "AKS cluster resource ID (required for --mode=full)")
187+
188+
return cmd
189+
}
190+
191+
// runPrivateJoin executes the private cluster join process
192+
func runPrivateJoin(ctx context.Context) error {
193+
if os.Getuid() != 0 {
194+
return fmt.Errorf("this command requires root privileges, please run with 'sudo'")
195+
}
196+
runner := privatecluster.NewScriptRunner("")
197+
return runner.RunPrivateInstall(ctx, aksResourceID)
198+
}
199+
200+
// runPrivateLeave executes the private cluster leave process
201+
func runPrivateLeave(ctx context.Context) error {
202+
if os.Getuid() != 0 {
203+
return fmt.Errorf("this command requires root privileges, please run with 'sudo'")
204+
}
205+
// Validate cleanup mode
206+
var mode privatecluster.CleanupMode
207+
switch cleanupModeFlag {
208+
case "local":
209+
mode = privatecluster.CleanupModeLocal
210+
case "full":
211+
mode = privatecluster.CleanupModeFull
212+
if aksResourceID == "" {
213+
return fmt.Errorf("--aks-resource-id is required for full cleanup mode")
214+
}
215+
default:
216+
return fmt.Errorf("invalid cleanup mode: %s (use 'local' or 'full')", cleanupModeFlag)
217+
}
218+
219+
runner := privatecluster.NewScriptRunner("")
220+
return runner.RunPrivateUninstall(ctx, mode, aksResourceID)
221+
}
222+
121223
// runDaemonLoop runs the periodic status collection and bootstrap monitoring daemon
122224
func runDaemonLoop(ctx context.Context, cfg *config.Config) error {
123225
logger := logger.GetLoggerFromContext(ctx)

main.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,16 @@ func main() {
2525
}
2626

2727
// Add global flags for configuration
28-
rootCmd.PersistentFlags().StringVar(&configPath, "config", "", "Path to configuration JSON file (required)")
28+
rootCmd.PersistentFlags().StringVar(&configPath, "config", "", "Path to configuration JSON file (required for agent/unbootstrap)")
29+
rootCmd.PersistentFlags().MarkHidden("config") // Hide from global help, shown in agent/unbootstrap help
2930
// Don't mark as required globally - we'll check in PersistentPreRunE for commands that need it
3031

3132
// Add commands
3233
rootCmd.AddCommand(NewAgentCommand())
3334
rootCmd.AddCommand(NewUnbootstrapCommand())
3435
rootCmd.AddCommand(NewVersionCommand())
36+
rootCmd.AddCommand(NewPrivateJoinCommand())
37+
rootCmd.AddCommand(NewPrivateLeaveCommand())
3538

3639
// Set up context with signal handling
3740
ctx, cancel := context.WithCancel(context.Background())
@@ -49,8 +52,9 @@ func main() {
4952

5053
// Set up persistent pre-run to initialize config and logger
5154
rootCmd.PersistentPreRunE = func(cmd *cobra.Command, args []string) error {
52-
// Skip config loading for version command
53-
if cmd.Name() == "version" {
55+
// Skip config loading for commands that don't need it
56+
switch cmd.Name() {
57+
case "version", "private-join", "private-leave":
5458
return nil
5559
}
5660

pkg/privatecluster/README.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Private AKS Cluster - Edge Node Join/Leave
2+
3+
## Prerequisites
4+
5+
### 1. Login to Azure CLI as root
6+
7+
```bash
8+
sudo az login
9+
```
10+
11+
### 2. Create a Private AKS Cluster
12+
13+
Create a Private AKS cluster with AAD and Azure RBAC enabled, and assign the required roles to your user.
14+
15+
See: [create_private_cluster.md](create_private_cluster.md)
16+
17+
## Join Private AKS Cluster
18+
19+
### 1. Build the project
20+
21+
```bash
22+
go build -o aks-flex-node .
23+
```
24+
25+
### 2. Join the cluster
26+
27+
```bash
28+
sudo ./aks-flex-node private-join --aks-resource-id "<AKS_RESOURCE_ID>"
29+
```
30+
31+
Example:
32+
```bash
33+
sudo ./aks-flex-node private-join \
34+
--aks-resource-id "/subscriptions/xxx/resourcegroups/my-rg/providers/Microsoft.ContainerService/managedClusters/my-private-aks"
35+
```
36+
37+
### 3. Verify
38+
39+
```bash
40+
sudo kubectl get nodes
41+
```
42+
43+
## Leave Private AKS Cluster
44+
45+
```bash
46+
sudo ./aks-flex-node private-leave --mode=<local|full> [--aks-resource-id "<AKS_RESOURCE_ID>"]
47+
```
48+
49+
### Mode Comparison
50+
51+
| Mode | Command | Description |
52+
|------|---------|-------------|
53+
| `local` | `sudo ./aks-flex-node private-leave --mode=local` | Remove node and local components, **keep Gateway** for other nodes |
54+
| `full` | `sudo ./aks-flex-node private-leave --mode=full --aks-resource-id "..."` | Remove all components **including Gateway and Azure resources** |
55+
56+
### When to use each mode
57+
58+
- **`--mode=local`**: Other nodes are still using the Gateway, or you plan to rejoin later
59+
- **`--mode=full`**: Last node leaving, clean up all Azure resources (Gateway VM, subnet, NSG, public IP)
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
# Create Private AKS Cluster
2+
3+
This guide shows how to create a Private AKS Cluster with AAD and Azure RBAC enabled for edge node testing.
4+
5+
## Prerequisites
6+
7+
### 1. Login to Azure CLI as root
8+
9+
```bash
10+
sudo az login
11+
```
12+
13+
### 2. Set variables
14+
15+
```bash
16+
# Required
17+
CLUSTER_NAME="my-private-aks"
18+
RESOURCE_GROUP="my-rg"
19+
LOCATION="eastus2"
20+
21+
# Optional (defaults)
22+
VNET_NAME="${CLUSTER_NAME}-vnet"
23+
VNET_CIDR="10.224.0.0/12"
24+
SUBNET_NAME="aks-subnet"
25+
SUBNET_CIDR="10.224.0.0/16"
26+
NODE_COUNT=1
27+
NODE_VM_SIZE="Standard_D2s_v3"
28+
```
29+
30+
## Step 1: Create Resource Group
31+
32+
```bash
33+
az group create \
34+
--name "$RESOURCE_GROUP" \
35+
--location "$LOCATION"
36+
```
37+
38+
## Step 2: Create VNet and Subnet
39+
40+
```bash
41+
# Create VNet
42+
az network vnet create \
43+
--resource-group "$RESOURCE_GROUP" \
44+
--name "$VNET_NAME" \
45+
--address-prefix "$VNET_CIDR"
46+
47+
# Create Subnet
48+
az network vnet subnet create \
49+
--resource-group "$RESOURCE_GROUP" \
50+
--vnet-name "$VNET_NAME" \
51+
--name "$SUBNET_NAME" \
52+
--address-prefix "$SUBNET_CIDR"
53+
```
54+
55+
## Step 3: Create Private AKS Cluster
56+
57+
```bash
58+
# Get Subnet ID
59+
SUBNET_ID=$(az network vnet subnet show \
60+
--resource-group "$RESOURCE_GROUP" \
61+
--vnet-name "$VNET_NAME" \
62+
--name "$SUBNET_NAME" \
63+
--query id -o tsv)
64+
65+
# Create Private AKS Cluster
66+
az aks create \
67+
--resource-group "$RESOURCE_GROUP" \
68+
--name "$CLUSTER_NAME" \
69+
--location "$LOCATION" \
70+
--node-count "$NODE_COUNT" \
71+
--node-vm-size "$NODE_VM_SIZE" \
72+
--network-plugin azure \
73+
--vnet-subnet-id "$SUBNET_ID" \
74+
--enable-private-cluster \
75+
--enable-aad \
76+
--enable-azure-rbac \
77+
--generate-ssh-keys
78+
```
79+
80+
> **Note:** This may take 5-10 minutes.
81+
82+
## Step 4: Assign RBAC Roles to Current User
83+
84+
The current user needs two roles to manage the cluster:
85+
86+
| Role | Purpose |
87+
|------|---------|
88+
| Azure Kubernetes Service Cluster Admin Role | Get kubectl credentials |
89+
| Azure Kubernetes Service RBAC Cluster Admin | Perform cluster operations |
90+
91+
```bash
92+
# Get current user's Object ID
93+
USER_OBJECT_ID=$(az ad signed-in-user show --query id -o tsv)
94+
95+
# Get AKS Resource ID
96+
AKS_RESOURCE_ID=$(az aks show \
97+
--resource-group "$RESOURCE_GROUP" \
98+
--name "$CLUSTER_NAME" \
99+
--query id -o tsv)
100+
101+
# Assign Role 1: Azure Kubernetes Service Cluster Admin Role
102+
az role assignment create \
103+
--assignee "$USER_OBJECT_ID" \
104+
--role "Azure Kubernetes Service Cluster Admin Role" \
105+
--scope "$AKS_RESOURCE_ID"
106+
107+
# Assign Role 2: Azure Kubernetes Service RBAC Cluster Admin
108+
az role assignment create \
109+
--assignee "$USER_OBJECT_ID" \
110+
--role "Azure Kubernetes Service RBAC Cluster Admin" \
111+
--scope "$AKS_RESOURCE_ID"
112+
```
113+
114+
## Step 5: Get Kubectl Credentials
115+
116+
```bash
117+
# Create kubeconfig directory
118+
sudo mkdir -p /root/.kube
119+
120+
# Get credentials
121+
sudo az aks get-credentials \
122+
--resource-group "$RESOURCE_GROUP" \
123+
--name "$CLUSTER_NAME" \
124+
--overwrite-existing \
125+
--file /root/.kube/config
126+
127+
# Convert kubeconfig for Azure CLI auth
128+
sudo kubelogin convert-kubeconfig -l azurecli --kubeconfig /root/.kube/config
129+
```
130+
131+
## Step 6: Get Cluster Resource ID
132+
133+
Save this for use with `private-join` and `private-leave` commands:
134+
135+
```bash
136+
az aks show \
137+
--resource-group "$RESOURCE_GROUP" \
138+
--name "$CLUSTER_NAME" \
139+
--query id -o tsv
140+
```
141+
142+
Example output:
143+
```
144+
/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourcegroups/my-rg/providers/Microsoft.ContainerService/managedClusters/my-private-aks
145+
```
146+
147+
## Next Steps
148+
149+
### Join an edge node to the private cluster
150+
151+
```bash
152+
sudo ./aks-flex-node private-join \
153+
--aks-resource-id "/subscriptions/.../resourcegroups/.../providers/Microsoft.ContainerService/managedClusters/my-private-aks"
154+
```
155+
156+
### Leave the private cluster
157+
158+
```bash
159+
# Local cleanup (keep Gateway for other nodes)
160+
sudo ./aks-flex-node private-leave --mode=local
161+
162+
# Full cleanup (remove Gateway and all Azure resources)
163+
sudo ./aks-flex-node private-leave --mode=full \
164+
--aks-resource-id "/subscriptions/.../resourcegroups/.../providers/Microsoft.ContainerService/managedClusters/my-private-aks"
165+
```

0 commit comments

Comments
 (0)