Skip to content

Commit 30b19e2

Browse files
committed
Set dcgm-exporter pod spec DNS policy when setting hostNetwork
Mirroring upstream dcgm-exporter helm chart behavior, set dcgm-exporter Pod `spec.DNSPolicy` to default to `DNSClusterFirstWithHostNet` when setting `spec.DNSPolicy` to true.
1 parent d858d24 commit 30b19e2

File tree

2 files changed

+44
-1
lines changed

2 files changed

+44
-1
lines changed

controllers/object_controls.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1696,12 +1696,14 @@ func TransformDCGMExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe
16961696
if remoteEngine != "" && strings.HasPrefix(remoteEngine, "localhost") {
16971697
// enable hostNetwork for communication with external DCGM using localhost
16981698
obj.Spec.Template.Spec.HostNetwork = true
1699+
obj.Spec.Template.Spec.DNSPolicy = corev1.DNSClusterFirstWithHostNet
16991700
}
17001701
}
17011702
// set hostNetwork if specified for DCGM Exporter (if it is already enabled above,
17021703
// do not touch the value)
17031704
if config.DCGMExporter.IsHostNetworkEnabled() {
17041705
obj.Spec.Template.Spec.HostNetwork = true
1706+
obj.Spec.Template.Spec.DNSPolicy = corev1.DNSClusterFirstWithHostNet
17051707
}
17061708

17071709
setRuntimeClassName(&obj.Spec.Template.Spec, config, n.runtime)

controllers/transforms_test.go

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,11 @@ func (d Daemonset) WithHostNetwork(enabled bool) Daemonset {
168168
return d
169169
}
170170

171+
func (d Daemonset) WithDNSPolicy(policy corev1.DNSPolicy) Daemonset {
172+
d.Spec.Template.Spec.DNSPolicy = policy
173+
return d
174+
}
175+
171176
func (d Daemonset) WithHostPID(enabled bool) Daemonset {
172177
d.Spec.Template.Spec.HostPID = enabled
173178
return d
@@ -1301,7 +1306,7 @@ func TestTransformDCGMExporter(t *testing.T) {
13011306
{Name: "DCGM_REMOTE_HOSTENGINE_INFO", Value: "nvidia-dcgm:5555"},
13021307
{Name: "foo", Value: "bar"},
13031308
},
1304-
}).WithContainer(corev1.Container{Name: "dummy"}).WithPullSecret("pull-secret").WithRuntimeClassName("nvidia").WithHostNetwork(true),
1309+
}).WithContainer(corev1.Container{Name: "dummy"}).WithPullSecret("pull-secret").WithRuntimeClassName("nvidia").WithHostNetwork(true).WithDNSPolicy(corev1.DNSClusterFirstWithHostNet),
13051310
},
13061311
{
13071312
description: "transform dcgm exporter with hostNetwork disabled",
@@ -1363,6 +1368,42 @@ func TestTransformDCGMExporter(t *testing.T) {
13631368
},
13641369
}).WithContainer(corev1.Container{Name: "dummy"}).WithPullSecret("pull-secret").WithRuntimeClassName("nvidia").WithHostNetwork(false),
13651370
},
1371+
{
1372+
description: "transform dcgm exporter with dcgm running on the host itself(DGX BaseOS)",
1373+
ds: NewDaemonset().
1374+
WithContainer(corev1.Container{
1375+
Name: "dcgm-exporter",
1376+
Env: []corev1.EnvVar{{Name: "DCGM_REMOTE_HOSTENGINE_INFO", Value: "localhost:5555"}},
1377+
}).
1378+
WithContainer(corev1.Container{Name: "dummy"}),
1379+
cpSpec: &gpuv1.ClusterPolicySpec{
1380+
DCGM: gpuv1.DCGMSpec{
1381+
Enabled: newBoolPtr(false),
1382+
},
1383+
DCGMExporter: gpuv1.DCGMExporterSpec{
1384+
Repository: "nvcr.io/nvidia/cloud-native",
1385+
Image: "dcgm-exporter",
1386+
Version: "v1.0.0",
1387+
ImagePullPolicy: "IfNotPresent",
1388+
ImagePullSecrets: []string{"pull-secret"},
1389+
Args: []string{"--fail-on-init-error=false"},
1390+
Env: []gpuv1.EnvVar{
1391+
{Name: "DCGM_REMOTE_HOSTENGINE_INFO", Value: "localhost:5555"},
1392+
{Name: "foo", Value: "bar"},
1393+
},
1394+
},
1395+
},
1396+
expectedDs: NewDaemonset().WithContainer(corev1.Container{
1397+
Name: "dcgm-exporter",
1398+
Image: "nvcr.io/nvidia/cloud-native/dcgm-exporter:v1.0.0",
1399+
ImagePullPolicy: corev1.PullIfNotPresent,
1400+
Args: []string{"--fail-on-init-error=false"},
1401+
Env: []corev1.EnvVar{
1402+
{Name: "DCGM_REMOTE_HOSTENGINE_INFO", Value: "localhost:5555"},
1403+
{Name: "foo", Value: "bar"},
1404+
},
1405+
}).WithContainer(corev1.Container{Name: "dummy"}).WithPullSecret("pull-secret").WithRuntimeClassName("nvidia").WithHostNetwork(true).WithDNSPolicy(corev1.DNSClusterFirstWithHostNet),
1406+
},
13661407
{
13671408
description: "transform dcgm exporter, openshift",
13681409
openshiftVersion: "1.0.0",

0 commit comments

Comments
 (0)