Skip to content
This repository was archived by the owner on Dec 9, 2025. It is now read-only.

Commit f257eb8

Browse files
committed
dranetctl: do not require flag for network interfaces
list acceleratorpod per cluster dranetctl for networks Change-Id: I11afcc7f063ebbe13548ffd13e2592d4bac6923d
1 parent 65feb22 commit f257eb8

File tree

2 files changed

+76
-10
lines changed

2 files changed

+76
-10
lines changed

pkg/dranetctl/gke/acceleratorpod.go

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ func init() {
4141
acceleratorpodCmd.AddCommand(acceleratorpodCreateCmd)
4242
acceleratorpodCmd.AddCommand(acceleratorpodGetCmd)
4343
acceleratorpodCmd.AddCommand(acceleratorpodDeleteCmd)
44+
acceleratorpodCmd.AddCommand(acceleratorpodListCmd)
4445
}
4546

4647
var (
@@ -49,6 +50,57 @@ var (
4950
additionalNetworkInterfaces int
5051
)
5152

53+
// acceleratorpodListCmd represents the list command for accelerator pods (node pools)
54+
var acceleratorpodListCmd = &cobra.Command{
55+
Use: "list",
56+
Short: "List accelerator node pools in a GKE cluster",
57+
Long: `Lists all GKE node pools that were created and tagged by dranetctl
58+
as accelerator pods. It identifies these node pools by looking for the
59+
'dra.net/acceleratorpod: "true"' label.`,
60+
RunE: func(cmd *cobra.Command, args []string) error {
61+
if clusterName == "" {
62+
return fmt.Errorf("cluster name not explicitly provided")
63+
}
64+
// Try to get the nodepool from the cluster
65+
if location == "-" {
66+
return fmt.Errorf("location for cluster %s not specified", clusterName)
67+
}
68+
ctx := context.Background()
69+
70+
// Get the cluster to list the node pools
71+
req := &containerpb.GetClusterRequest{
72+
Name: fmt.Sprintf("projects/%s/locations/%s/clusters/%s", projectID, location, clusterName),
73+
}
74+
75+
cluster, err := ContainersClient.GetCluster(ctx, req)
76+
if err != nil {
77+
return fmt.Errorf("failed to get cluster: %w", err)
78+
}
79+
80+
var acceleratorNodePools []string
81+
for _, np := range cluster.NodePools {
82+
if np.Config != nil && np.Config.Labels != nil {
83+
if val, ok := np.Config.Labels["dra.net/acceleratorpod"]; ok && val == "true" {
84+
acceleratorNodePools = append(acceleratorNodePools, np.Name)
85+
}
86+
}
87+
}
88+
89+
if len(acceleratorNodePools) == 0 {
90+
fmt.Printf("No accelerator node pools found in cluster %s with label dra.net/acceleratorpod: \"true\".\n", clusterName)
91+
return nil
92+
}
93+
94+
fmt.Printf("There are %d dranet accelerator node pools in cluster %s:\n", len(acceleratorNodePools), clusterName)
95+
fmt.Println("---")
96+
for _, name := range acceleratorNodePools {
97+
fmt.Println(name)
98+
}
99+
100+
return nil
101+
},
102+
}
103+
52104
// acceleratorpodCreateCmd represents the create subcommand for acceleratorpod
53105
var acceleratorpodCreateCmd = &cobra.Command{
54106
Use: "create <acceleratorpod_name>",
@@ -109,7 +161,9 @@ network-aware placement. This group of machines is referred to as an accelerator
109161
Config: &containerpb.NodeConfig{
110162
MachineType: machineType,
111163
// TODO allow to set labels and taints
112-
Labels: map[string]string{"dra.net/acceleratorpod": "true"}},
164+
Labels: map[string]string{"dra.net/acceleratorpod": "true"},
165+
ResourceLabels: map[string]string{"dra.net/acceleratorpod": "true"},
166+
},
113167
NetworkConfig: &containerpb.NodeNetworkConfig{
114168
AdditionalNodeNetworkConfigs: additionalNetworkConfigs,
115169
},
@@ -160,7 +214,6 @@ func init() {
160214
// Mark required flags for the create command
161215
_ = acceleratorpodCreateCmd.MarkFlagRequired("machine-type")
162216
_ = acceleratorpodCreateCmd.MarkFlagRequired("node-count")
163-
_ = acceleratorpodCreateCmd.MarkFlagRequired("additional-network-interfaces")
164217
}
165218

166219
// acceleratorpodGetCmd represents the get subcommand for acceleratorpod

pkg/dranetctl/gke/networks.go

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,13 @@ import (
3636

3737
const (
3838
// assume total ownership of these networks by dranet
39-
wellKnownPrefix = "dranet"
39+
wellKnownPrefix = "dranetctl"
4040
)
4141

4242
var (
4343
// extract region and subnet name from URL
44-
reSubnets = regexp.MustCompile(`/regions/([^/]+)/subnetworks/([^/]+)$`)
44+
reSubnets = regexp.MustCompile(`/regions/([^/]+)/subnetworks/([^/]+)$`)
45+
acceleratorPodNameFlag string
4546
)
4647

4748
// getRegion get the region part from a location
@@ -92,7 +93,7 @@ func createAcceleratorNetworks(ctx context.Context, acceleratorpodName string, n
9293
}
9394

9495
// Create Subnetwork
95-
// get a non overllaping range from the Class E
96+
// get a non overlaping range from the Class E
9697
// TODO: this needs to be handled better
9798
networkURL := fmt.Sprintf("https://www.googleapis.com/compute/v1/projects/%s/global/networks/%s", projectID, networkName)
9899
cidr := fmt.Sprintf("255.255.%d.0/24", 20+i)
@@ -311,7 +312,7 @@ func deleteNetwork(ctx context.Context, networkName string) error {
311312
}
312313

313314
// listNetworks list all dranet networks
314-
func listNetworks(ctx context.Context) []string {
315+
func listNetworks(ctx context.Context, acceleratorPodName string) []string {
315316
output := []string{}
316317
// Prepare the List request.
317318
req := &computepb.ListNetworksRequest{
@@ -330,9 +331,18 @@ func listNetworks(ctx context.Context) []string {
330331
return output
331332
}
332333

333-
if strings.HasPrefix(*network.Name, wellKnownPrefix) {
334-
output = append(output, *network.Name)
334+
// it assumes ownership via the well known prefix
335+
if !strings.HasPrefix(*network.Name, wellKnownPrefix) {
336+
continue
337+
}
338+
// filter by accelerator pod name if exist
339+
if acceleratorPodName != "" &&
340+
!strings.Contains(*network.Name, obtainHexHash(acceleratorPodName)) {
341+
continue
335342
}
343+
344+
output = append(output, *network.Name)
345+
336346
klog.V(2).Infof("Name: %s\n", *network.Name)
337347
klog.V(2).Infof(" ID: %d\n", network.Id)
338348
klog.V(2).Infof(" SelfLink: %s\n", *network.SelfLink)
@@ -358,9 +368,10 @@ var cleanupNetworksCmd = &cobra.Command{
358368
Short: "Deletes all Google Cloud networks labeled as managed by DRA-Net",
359369
Long: `This command lists all Google Cloud networks in the specified project and deletes those created by dranetctl.
360370
Use with caution, as this action is irreversible.`,
371+
Args: cobra.MaximumNArgs(0),
361372
Run: func(cmd *cobra.Command, args []string) {
362373
ctx := cmd.Context()
363-
networks := listNetworks(ctx)
374+
networks := listNetworks(ctx, acceleratorPodNameFlag)
364375
for _, network := range networks {
365376
klog.Infof("deleting network %s\n", network)
366377
err := deleteNetwork(ctx, network)
@@ -374,9 +385,10 @@ Use with caution, as this action is irreversible.`,
374385
var listNetworksCmd = &cobra.Command{
375386
Use: "list",
376387
Short: "Lists all Google Cloud networks in a project",
388+
Args: cobra.MaximumNArgs(0), // optional the acceleratorpod name as an argument
377389
Run: func(cmd *cobra.Command, args []string) {
378390
ctx := cmd.Context()
379-
networks := listNetworks(ctx)
391+
networks := listNetworks(ctx, acceleratorPodNameFlag)
380392
fmt.Printf("There are %d dranet networks\n", len(networks))
381393
fmt.Println("---")
382394
for _, network := range networks {
@@ -388,4 +400,5 @@ var listNetworksCmd = &cobra.Command{
388400
func init() {
389401
networksCmd.AddCommand(cleanupNetworksCmd)
390402
networksCmd.AddCommand(listNetworksCmd)
403+
networksCmd.PersistentFlags().StringVar(&acceleratorPodNameFlag, "acceleratorpod", "", "Name of the accelerator pod to filter networks")
391404
}

0 commit comments

Comments
 (0)