Skip to content

Commit e850619

Browse files
authored
Merge pull request #433 from klueska/imex-with-hostnames
Add support for using DNSNames instead of raw IPs for IMEX daemons
2 parents fa6b5e3 + 9715887 commit e850619

File tree

12 files changed

+393
-19
lines changed

12 files changed

+393
-19
lines changed

api/nvidia.com/resource/v1beta1/computedomain.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,4 +91,11 @@ type ComputeDomainNode struct {
9191
Name string `json:"name"`
9292
IPAddress string `json:"ipAddress"`
9393
CliqueID string `json:"cliqueID"`
94+
// The Index field is used to ensure a consistent IP-to-DNS name
95+
// mapping across all machines within an IMEX domain. Each node's index
96+
// directly determines its DNS name. It is marked as optional (but not
97+
// omitempty) in order to support downgrades and avoid an API bump.
98+
// +optional
99+
// +kubebuilder:validation:Optional
100+
Index int `json:"index"`
94101
}

cmd/compute-domain-controller/controller.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ type ManagerConfig struct {
3838
// imageName is the full image name to use when rendering templates
3939
imageName string
4040

41+
// maxNodesPerIMEXDomain is the maximum number of nodes per IMEX domain to allocate
42+
maxNodesPerIMEXDomain int
43+
4144
// clientsets provides access to various Kubernetes API client interfaces
4245
clientsets flags.ClientSets
4346

@@ -67,12 +70,13 @@ func (c *Controller) Run(ctx context.Context) error {
6770
workQueue := workqueue.New(workqueue.DefaultControllerRateLimiter())
6871

6972
managerConfig := &ManagerConfig{
70-
driverName: c.config.driverName,
71-
driverNamespace: c.config.flags.namespace,
72-
additionalNamespaces: c.config.flags.additionalNamespaces.Value(),
73-
imageName: c.config.flags.imageName,
74-
clientsets: c.config.clientsets,
75-
workQueue: workQueue,
73+
driverName: c.config.driverName,
74+
driverNamespace: c.config.flags.namespace,
75+
additionalNamespaces: c.config.flags.additionalNamespaces.Value(),
76+
imageName: c.config.flags.imageName,
77+
maxNodesPerIMEXDomain: c.config.flags.maxNodesPerIMEXDomain,
78+
clientsets: c.config.clientsets,
79+
workQueue: workQueue,
7680
}
7781

7882
cdManager := NewComputeDomainManager(managerConfig)

cmd/compute-domain-controller/daemonset.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ type DaemonSetTemplateData struct {
5050
ComputeDomainLabelValue types.UID
5151
ResourceClaimTemplateName string
5252
ImageName string
53+
MaxNodesPerIMEXDomain int
5354
FeatureGates map[string]bool
5455
}
5556

@@ -200,6 +201,7 @@ func (m *DaemonSetManager) Create(ctx context.Context, cd *nvapi.ComputeDomain)
200201
ComputeDomainLabelValue: cd.UID,
201202
ResourceClaimTemplateName: rct.Name,
202203
ImageName: m.config.imageName,
204+
MaxNodesPerIMEXDomain: m.config.maxNodesPerIMEXDomain,
203205
FeatureGates: featuregates.ToMap(),
204206
}
205207

cmd/compute-domain-controller/main.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,23 @@ import (
4444

4545
const (
4646
DriverName = "compute-domain.nvidia.com"
47+
48+
// This constant provides a reasonable default for the maximum size of
49+
// a given IMEX Domain. On GB200 and GB300 the limit is 18, so we pick
50+
// this for now. It can be overridden as an environment variable or
51+
// command line argument as required.
52+
defaultMaxNodesPerIMEXDomain = 18
4753
)
4854

4955
type Flags struct {
5056
kubeClientConfig flags.KubeClientConfig
5157
loggingConfig *flags.LoggingConfig
5258
featureGateConfig *flags.FeatureGateConfig
5359

54-
podName string
55-
namespace string
56-
imageName string
60+
podName string
61+
namespace string
62+
imageName string
63+
maxNodesPerIMEXDomain int
5764

5865
httpEndpoint string
5966
metricsPath string
@@ -103,6 +110,13 @@ func newApp() *cli.App {
103110
Destination: &flags.imageName,
104111
EnvVars: []string{"IMAGE_NAME"},
105112
},
113+
&cli.IntFlag{
114+
Name: "max-nodes-per-imex-domain",
115+
Usage: "The maximum number of possible nodes per IMEX domain",
116+
Value: defaultMaxNodesPerIMEXDomain,
117+
EnvVars: []string{"MAX_NODES_PER_IMEX_DOMAIN"},
118+
Destination: &flags.maxNodesPerIMEXDomain,
119+
},
106120
&cli.StringFlag{
107121
Category: "HTTP server:",
108122
Name: "http-endpoint",

cmd/compute-domain-daemon/computedomain.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,16 @@ func (m *ComputeDomainManager) UpdateComputeDomainNodeInfo(ctx context.Context,
218218

219219
// If there isn't one, create one and append it to the list
220220
if nodeInfo == nil {
221+
// Get the next available index for this new node
222+
nextIndex, err := getNextAvailableIndex(newCD.Status.Nodes, m.config.maxNodesPerIMEXDomain)
223+
if err != nil {
224+
return fmt.Errorf("error getting next available index: %w", err)
225+
}
226+
221227
nodeInfo = &nvapi.ComputeDomainNode{
222228
Name: m.config.nodeName,
223229
CliqueID: m.config.cliqueID,
230+
Index: nextIndex,
224231
}
225232
newCD.Status.Nodes = append(newCD.Status.Nodes, nodeInfo)
226233
}
@@ -243,6 +250,46 @@ func (m *ComputeDomainManager) UpdateComputeDomainNodeInfo(ctx context.Context,
243250
return nil
244251
}
245252

253+
// The Index field in the Nodes section of the ComputeDomain status ensures a
254+
// consistent IP-to-DNS name mapping across all machines within a given IMEX
255+
// domain. Each node's index directly determines its DNS name using the format
256+
// "compute-domain-daemon-{index}".
257+
//
258+
// getNextAvailableIndex finds the next available index for the current node by
259+
// seeing which ones are already taken by other nodes in the ComputeDomain
260+
// status. It fills in gaps where it can, and returns an error if no index is
261+
// available within maxNodesPerIMEXDomain.
262+
//
263+
// By filling gaps in the index sequence (rather than always appending), we
264+
// maintain stable DNS names for existing nodes even when intermediate nodes
265+
// are removed from the compute domain and new ones are added.
266+
func getNextAvailableIndex(nodes []*nvapi.ComputeDomainNode, maxNodesPerIMEXDomain int) (int, error) {
267+
if len(nodes) >= maxNodesPerIMEXDomain {
268+
return -1, fmt.Errorf("cannot add more nodes, already at maximum (%d)", maxNodesPerIMEXDomain)
269+
}
270+
271+
// Create a map to track used indices
272+
usedIndices := make(map[int]bool)
273+
274+
// Collect all currently used indices
275+
for _, node := range nodes {
276+
usedIndices[node.Index] = true
277+
}
278+
279+
// Find the next available index, starting from 0 and filling gaps
280+
nextIndex := 0
281+
for usedIndices[nextIndex] {
282+
nextIndex++
283+
}
284+
285+
// Ensure nextIndex is within the range 0..maxNodesPerIMEXDomain
286+
if nextIndex < 0 || nextIndex >= maxNodesPerIMEXDomain {
287+
return -1, fmt.Errorf("no available indices within maxNodesPerIMEXDomain (%d)", maxNodesPerIMEXDomain)
288+
}
289+
290+
return nextIndex, nil
291+
}
292+
246293
// If we've reached the expected number of nodes and if there was actually a
247294
// change compared to the previously known set of nodes: pass info to IMEX
248295
// daemon controller.

cmd/compute-domain-daemon/controller.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ type ManagerConfig struct {
3535
computeDomainNamespace string
3636
cliqueID string
3737
podIP string
38+
maxNodesPerIMEXDomain int
3839
}
3940

4041
// ControllerConfig holds the configuration for the controller.
@@ -45,6 +46,7 @@ type ControllerConfig struct {
4546
computeDomainNamespace string
4647
cliqueID string
4748
podIP string
49+
maxNodesPerIMEXDomain int
4850
}
4951

5052
// Controller manages the lifecycle of compute domain operations.
@@ -73,6 +75,7 @@ func NewController(config *ControllerConfig) (*Controller, error) {
7375
computeDomainNamespace: config.computeDomainNamespace,
7476
cliqueID: config.cliqueID,
7577
podIP: config.podIP,
78+
maxNodesPerIMEXDomain: config.maxNodesPerIMEXDomain,
7679
}
7780

7881
controller := &Controller{

0 commit comments

Comments
 (0)