Skip to content

Commit d621a5e

Browse files
committed
kmod-6.12-nvidia-r570: Add grid-license-check
Add a unit that checks for the license to be valid for GRID. Kubelet requires this unit so if the license is not present, then the node never joins the cluster. This prevents a situation where a node could fail to get a license, join the cluster, and then later have workloads start to fail due to the unlicensed status. Signed-off-by: Matthew Yeazel <yeazelm@amazon.com>
1 parent d42096d commit d621a5e

File tree

3 files changed

+30
-1
lines changed

3 files changed

+30
-1
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
NVidiaEULAforAWS.pdf
22
COPYING
33
*.rpm
4+
NvidiaGridAWSUserLicenseAgreement.DOCX
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
[Unit]
2+
Description=GRID License Check
3+
RefuseManualStart=true
4+
RefuseManualStop=true
5+
DefaultDependencies=no
6+
Before=kubelet.service
7+
After=nvidia-gridd.service
8+
Requires=nvidia-gridd.service
9+
10+
[Service]
11+
Type=oneshot
12+
ExecCondition=/usr/bin/ghostdog match-nvidia-driver grid
13+
# Otherwise, attempt to load the module.
14+
ExecStart=/usr/bin/nvidia-smi -q
15+
# Ensure that the stderr file exists. Otherwise, grep fails on an empty file.
16+
ExecStart=-/usr/bin/touch /tmp/.nvidia-gridd-license
17+
# Succeed unless there was a fatal error.
18+
ExecStart=/usr/bin/grep -Fqvzw Unlicensed /tmp/.nvidia-gridd-license
19+
RemainAfterExit=true
20+
StandardOutput=append:/tmp/.nvidia-gridd-license
21+
Restart=on-failure
22+
RestartSec=1
23+
StartLimitBurst=120
24+
25+
[Install]
26+
RequiredBy=nvidia-k8s-device-plugin.service

packages/kmod-6.12-nvidia-r570/kmod-6.12-nvidia-r570.spec

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ Source206: nvidia-persistenced.service
5656
Source207: fabricmanager.env
5757
Source208: gridd.conf
5858
Source209: nvidia-gridd.service
59+
Source210: grid-license-check.service
5960

6061
# NVIDIA tesla conf files from 300 to 399
6162
Source300: nvidia-tesla-tmpfiles.conf
@@ -410,7 +411,7 @@ install kernel-open/nvidia-drm.ko %{buildroot}%{_cross_datadir}/nvidia/grid/driv
410411
# Install nvidia-gridd and related files
411412
install -m 755 nvidia-gridd %{buildroot}%{_cross_bindir}/nvidia-gridd
412413
install -m 644 %{S:208} %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/nvidia/gridd.conf
413-
install -p -m 0644 %{S:209} %{buildroot}%{_cross_unitdir}
414+
install -p -m 0644 %{S:209} %{S:210} %{buildroot}%{_cross_unitdir}
414415
popd
415416
# End GRID driver
416417
%endif
@@ -748,6 +749,7 @@ popd
748749
%{_cross_bindir}/nvidia-gridd
749750
%{_cross_factorydir}%{_cross_sysconfdir}/nvidia/gridd.conf
750751
%{_cross_unitdir}/nvidia-gridd.service
752+
%{_cross_unitdir}/grid-license-check.service
751753

752754
%{_cross_datadir}/nvidia/grid/drivers/nvidia.ko
753755
%{_cross_datadir}/nvidia/grid/drivers/nvidia-uvm.ko

0 commit comments

Comments
 (0)