Skip to content

Commit db29ced

Browse files
committed
Add gpu_devices policy for device-level GPU isolation
Signed-off-by: Cong Wang <cwang@multikernel.io>
1 parent f255e0a commit db29ced

File tree

4 files changed

+99
-0
lines changed

4 files changed

+99
-0
lines changed

src/sandlock/_context.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,25 @@ def __enter__(self) -> "SandboxContext":
633633
writable = list(self._policy.fs_writable)
634634
readable = list(self._policy.fs_readable)
635635
denied = list(self._policy.fs_denied)
636+
637+
# GPU device access
638+
if self._policy.gpu_devices is not None:
639+
_gpu_rw = [
640+
"/dev/nvidia*", "/dev/nvidiactl",
641+
"/dev/nvidia-uvm", "/dev/nvidia-uvm-tools",
642+
"/dev/dri",
643+
]
644+
_gpu_ro = [
645+
"/proc/driver/nvidia",
646+
"/sys/bus/pci/devices",
647+
"/sys/module/nvidia",
648+
]
649+
for p in _gpu_rw:
650+
if p not in writable:
651+
writable.append(p)
652+
for p in _gpu_ro:
653+
if p not in readable:
654+
readable.append(p)
636655
bind_ports = self._policy.bind_ports() or None
637656
connect_ports = self._policy.connect_ports() or None
638657
if (writable or readable or bind_ports or connect_ports
@@ -747,6 +766,14 @@ def __enter__(self) -> "SandboxContext":
747766
if self._policy.env:
748767
os.environ.update(self._policy.env)
749768

769+
# 8b. GPU device visibility
770+
if self._policy.gpu_devices is not None:
771+
devs = self._policy.gpu_devices
772+
if len(devs) > 0:
773+
vis = ",".join(str(d) for d in devs)
774+
os.environ["CUDA_VISIBLE_DEVICES"] = vis
775+
os.environ["ROCR_VISIBLE_DEVICES"] = vis
776+
750777
# 9b. Disable vDSO for time virtualization
751778
if (self._notif_policy is not None
752779
and self._notif_policy.time_start is not None):

src/sandlock/policy.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,14 @@ class Policy:
195195
pages, which causes nondeterministic memory layout, RSS measurements,
196196
and page fault timing. Applied via prctl(PR_SET_THP_DISABLE)."""
197197

198+
# GPU access
199+
gpu_devices: Sequence[int] | None = None
200+
"""GPU device indices visible to the sandbox. When set, Landlock
201+
rules are added for GPU device files (/dev/nvidia*, /dev/dri/*) and
202+
driver paths (/proc/driver/nvidia, /sys/bus/pci/devices), and
203+
``CUDA_VISIBLE_DEVICES`` / ``ROCR_VISIBLE_DEVICES`` are set.
204+
``None`` = no GPU access. ``[]`` (empty list) = all GPUs visible."""
205+
198206
# Optional chroot
199207
chroot: str | None = None
200208
"""Path to chroot into before applying other confinement."""

tests/test_policy.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,20 @@ def test_env_set(self):
153153
assert p.env == {"FOO": "bar", "BAZ": "qux"}
154154

155155

156+
class TestGpuDevices:
157+
def test_default_none(self):
158+
p = Policy()
159+
assert p.gpu_devices is None
160+
161+
def test_specific_devices(self):
162+
p = Policy(gpu_devices=[0, 2])
163+
assert p.gpu_devices == [0, 2]
164+
165+
def test_all_gpus(self):
166+
p = Policy(gpu_devices=[])
167+
assert p.gpu_devices == []
168+
169+
156170
class TestIpcScoping:
157171
def test_defaults_to_off(self):
158172
p = Policy()

tests/test_sandbox.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,3 +482,53 @@ def test_throttle_result_correct(self):
482482
result = Sandbox(Policy(max_cpu=50)).run(["python3", "-c", self._BURN_CODE])
483483
assert result.success
484484
assert result.stdout.strip() == b"20000000"
485+
486+
487+
class TestGpuDevices:
488+
"""Test GPU device isolation via CUDA_VISIBLE_DEVICES."""
489+
490+
_GPU_POLICY_READABLE = _PYTHON_READABLE
491+
492+
def test_gpu_devices_sets_env(self):
493+
"""gpu_devices=[0,2] sets CUDA_VISIBLE_DEVICES=0,2."""
494+
code = (
495+
"import os; "
496+
"print(os.environ.get('CUDA_VISIBLE_DEVICES', 'UNSET'))"
497+
)
498+
policy = Policy(gpu_devices=[0, 2], fs_readable=self._GPU_POLICY_READABLE)
499+
result = Sandbox(policy).run(["python3", "-c", code])
500+
assert result.success, f"failed: {result.stderr}"
501+
assert result.stdout.strip() == b"0,2"
502+
503+
def test_gpu_devices_sets_rocr(self):
504+
"""gpu_devices also sets ROCR_VISIBLE_DEVICES for AMD GPUs."""
505+
code = (
506+
"import os; "
507+
"print(os.environ.get('ROCR_VISIBLE_DEVICES', 'UNSET'))"
508+
)
509+
policy = Policy(gpu_devices=[1], fs_readable=self._GPU_POLICY_READABLE)
510+
result = Sandbox(policy).run(["python3", "-c", code])
511+
assert result.success, f"failed: {result.stderr}"
512+
assert result.stdout.strip() == b"1"
513+
514+
def test_gpu_devices_empty_no_env(self):
515+
"""gpu_devices=[] (all GPUs) does not set CUDA_VISIBLE_DEVICES."""
516+
code = (
517+
"import os; "
518+
"print(os.environ.get('CUDA_VISIBLE_DEVICES', 'UNSET'))"
519+
)
520+
policy = Policy(gpu_devices=[], fs_readable=self._GPU_POLICY_READABLE)
521+
result = Sandbox(policy).run(["python3", "-c", code])
522+
assert result.success, f"failed: {result.stderr}"
523+
assert result.stdout.strip() == b"UNSET"
524+
525+
def test_no_gpu_no_env(self):
526+
"""gpu_devices=None (default) does not set CUDA_VISIBLE_DEVICES."""
527+
code = (
528+
"import os; "
529+
"print(os.environ.get('CUDA_VISIBLE_DEVICES', 'UNSET'))"
530+
)
531+
policy = Policy()
532+
result = Sandbox(policy).run(["python3", "-c", code])
533+
assert result.success
534+
assert result.stdout.strip() == b"UNSET"

0 commit comments

Comments
 (0)