Skip to content

Commit eb85b6f

Browse files
committed
feat: enforce container provenance in docker and gpu executors
1 parent 615f280 commit eb85b6f

File tree

5 files changed

+129
-0
lines changed

5 files changed

+129
-0
lines changed

src/nimbus/runners/docker.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
from ..common.schemas import JobAssignment
1919
from ..common.settings import HostAgentSettings
20+
from ..common.supply_chain import ImagePolicy, ensure_provenance
2021
from ..common.networking import (
2122
MetadataEndpointDenylist,
2223
EgressPolicyPack,
@@ -37,6 +38,9 @@ def __init__(self, settings: Optional[HostAgentSettings] = None) -> None:
3738
self._job_workspaces: dict[int, Path] = {}
3839
self._egress_enforcer: Optional[OfflineEgressEnforcer] = None
3940
self._container_user: Optional[str] = None
41+
self._image_policy: Optional[ImagePolicy] = None
42+
self._cosign_key: Optional[Path] = None
43+
self._require_provenance: bool = False
4044

4145
def initialize(self, settings: HostAgentSettings) -> None:
4246
"""Initialize the executor with settings."""
@@ -58,6 +62,11 @@ def initialize(self, settings: HostAgentSettings) -> None:
5862
# Create workspace directory
5963
settings.docker_workspace_path.mkdir(parents=True, exist_ok=True)
6064
self._container_user = settings.docker_container_user
65+
self._image_policy = ImagePolicy.from_paths(
66+
settings.image_allow_list_path, settings.image_deny_list_path
67+
)
68+
self._cosign_key = settings.cosign_certificate_authority
69+
self._require_provenance = settings.provenance_required
6170

6271
# Initialize egress enforcer (similar to HostAgent)
6372
metadata_denylist = MetadataEndpointDenylist(settings.metadata_endpoint_denylist)
@@ -137,6 +146,7 @@ async def run(
137146
try:
138147
# Determine container image
139148
image = self._get_container_image(job)
149+
self._verify_image(image)
140150

141151
# Pull image if not present locally (for better performance)
142152
await self._ensure_image(image)
@@ -275,6 +285,17 @@ async def _ensure_image(self, image: str) -> None:
275285
except Exception as exc:
276286
LOGGER.error("Failed to pull image", image=image, error=str(exc))
277287
raise RuntimeError(f"Failed to pull image {image}: {exc}") from exc
288+
def _verify_image(self, image: str) -> None:
289+
if not self._image_policy:
290+
self._image_policy = ImagePolicy.from_paths(
291+
self._settings.image_allow_list_path, self._settings.image_deny_list_path # type: ignore[union-attr]
292+
)
293+
ensure_provenance(
294+
image,
295+
self._image_policy,
296+
public_key_path=self._cosign_key,
297+
require_provenance=self._require_provenance,
298+
)
278299

279300
async def prepare_warm_instance(self, instance_id: str) -> dict:
280301
"""Prepare a warm Docker instance (primarily pre-pulled images)."""

src/nimbus/runners/gpu.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from ..common.schemas import JobAssignment
2121
from ..common.settings import HostAgentSettings
22+
from ..common.supply_chain import ImagePolicy, ensure_provenance
2223
from .base import Executor, RunResult
2324

2425
LOGGER = structlog.get_logger("nimbus.runners.gpu")
@@ -53,6 +54,9 @@ def __init__(self, settings: Optional[HostAgentSettings] = None) -> None:
5354
self._available_gpus: Dict[str, GPUInfo] = {}
5455
self._gpu_allocations: Dict[int, List[str]] = {} # job_id -> gpu_uuids
5556
self._container_user: Optional[str] = None
57+
self._image_policy: Optional[ImagePolicy] = None
58+
self._cosign_key: Optional[Path] = None
59+
self._require_provenance: bool = False
5660

5761
def initialize(self, settings: HostAgentSettings) -> None:
5862
"""Initialize the GPU executor."""
@@ -78,6 +82,11 @@ def initialize(self, settings: HostAgentSettings) -> None:
7882
# Create workspace directory
7983
settings.docker_workspace_path.mkdir(parents=True, exist_ok=True)
8084
self._container_user = settings.docker_container_user
85+
self._image_policy = ImagePolicy.from_paths(
86+
settings.image_allow_list_path, settings.image_deny_list_path
87+
)
88+
self._cosign_key = settings.cosign_certificate_authority
89+
self._require_provenance = settings.provenance_required
8190

8291
@property
8392
def name(self) -> str:
@@ -226,6 +235,7 @@ async def run(
226235
try:
227236
# Get container image
228237
image = self._get_gpu_container_image(job)
238+
self._verify_image(image)
229239

230240
# Ensure image is available
231241
await self._ensure_image(image)
@@ -449,3 +459,15 @@ async def _ensure_image(self, image: str) -> None:
449459
except Exception as exc:
450460
LOGGER.error("Failed to pull GPU image", image=image, error=str(exc))
451461
raise RuntimeError(f"Failed to pull GPU image {image}: {exc}") from exc
462+
463+
def _verify_image(self, image: str) -> None:
464+
if not self._image_policy and self._settings:
465+
self._image_policy = ImagePolicy.from_paths(
466+
self._settings.image_allow_list_path, self._settings.image_deny_list_path
467+
)
468+
ensure_provenance(
469+
image,
470+
self._image_policy or ImagePolicy(set(), set()),
471+
public_key_path=self._cosign_key,
472+
require_provenance=self._require_provenance,
473+
)

tests/test_docker_executor.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ def mock_settings():
2525
settings.egress_policy_pack = None
2626
settings.offline_mode = False
2727
settings.artifact_registry_allow_list = []
28+
settings.image_allow_list_path = None
29+
settings.image_deny_list_path = None
30+
settings.cosign_certificate_authority = None
31+
settings.provenance_required = False
2832
return settings
2933

3034

@@ -183,6 +187,39 @@ def test_get_container_image_default(mock_settings, sample_job):
183187
assert image == mock_settings.docker_default_image
184188

185189

190+
@pytest.mark.asyncio
191+
@patch('src.nimbus.runners.docker.docker.DockerClient')
192+
async def test_docker_executor_enforces_provenance(mock_docker_client, tmp_path, mock_settings, sample_job, monkeypatch):
193+
cosign_key = tmp_path / "cosign.pub"
194+
cosign_key.write_text("public key", encoding="utf-8")
195+
196+
mock_settings.image_allow_list_path = None
197+
mock_settings.image_deny_list_path = None
198+
mock_settings.cosign_certificate_authority = cosign_key
199+
mock_settings.provenance_required = True
200+
201+
executor = DockerExecutor()
202+
203+
mock_client = Mock()
204+
mock_docker_client.return_value = mock_client
205+
mock_client.ping.return_value = True
206+
mock_client.networks.get.return_value = Mock()
207+
mock_client.containers.create.return_value = Mock(wait=lambda timeout: {'StatusCode': 0}, logs=lambda **kw: b'')
208+
209+
with patch('src.nimbus.runners.docker.EgressPolicyPack'), patch('src.nimbus.runners.docker.MetadataEndpointDenylist'):
210+
executor.initialize(mock_settings)
211+
212+
sample_job.labels = ["nimbus", "docker"]
213+
214+
with patch('src.nimbus.runners.docker.ensure_provenance') as ensure_mock:
215+
ensure_mock.return_value = None
216+
with patch.object(executor, '_ensure_image'):
217+
await executor.prepare(sample_job)
218+
await executor.run(sample_job, timeout_seconds=1)
219+
220+
ensure_mock.assert_called()
221+
222+
186223
def test_build_environment_variables(mock_settings, sample_job):
187224
"""Test environment variable building."""
188225
executor = DockerExecutor(mock_settings)

tests/test_executor_integration.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ def mock_settings():
3232
settings.egress_policy_pack = None
3333
settings.offline_mode = False
3434
settings.artifact_registry_allow_list = []
35+
settings.image_allow_list_path = None
36+
settings.image_deny_list_path = None
37+
settings.cosign_certificate_authority = None
38+
settings.provenance_required = False
3539
return settings
3640

3741

tests/test_gpu_executor.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ def mock_settings():
1818
settings.docker_socket_path = "/var/run/docker.sock"
1919
settings.docker_workspace_path = Path("/tmp/test-gpu-workspaces")
2020
settings.docker_container_user = None
21+
settings.image_allow_list_path = None
22+
settings.image_deny_list_path = None
23+
settings.cosign_certificate_authority = None
24+
settings.provenance_required = False
2125
return settings
2226

2327

@@ -88,6 +92,47 @@ def test_gpu_executor_properties():
8892
assert "container" in capabilities
8993

9094

95+
@pytest.mark.asyncio
96+
@patch('src.nimbus.runners.gpu.docker.DockerClient')
97+
@patch('src.nimbus.runners.gpu.subprocess.run')
98+
async def test_gpu_executor_enforces_provenance(mock_run, mock_docker_client, tmp_path, mock_settings, sample_gpu_job):
99+
mock_run.return_value = Mock(returncode=0, stdout='{"nvidia": {"path": "runtime"}}')
100+
101+
def run_side_effect(cmd, **kwargs):
102+
if "nvidia-smi" in cmd:
103+
return Mock(returncode=0, stdout="0, Tesla V100, GPU-12345, 32768, 30720, 7.0, 470.82")
104+
return Mock(returncode=0, stdout='{"nvidia": {"path": "runtime"}}')
105+
106+
mock_run.side_effect = run_side_effect
107+
108+
mock_client = Mock()
109+
mock_docker_client.return_value = mock_client
110+
mock_client.ping.return_value = True
111+
mock_client.containers.create.return_value = Mock(
112+
wait=lambda timeout: {'StatusCode': 0},
113+
logs=lambda **kw: b'',
114+
)
115+
116+
cosign_key = tmp_path / "cosign.pub"
117+
cosign_key.write_text("public key", encoding="utf-8")
118+
119+
mock_settings.cosign_certificate_authority = cosign_key
120+
mock_settings.provenance_required = True
121+
122+
executor = GPUExecutor()
123+
executor.initialize(mock_settings)
124+
125+
sample_gpu_job.labels = ["gpu", "pytorch"]
126+
127+
with patch('src.nimbus.runners.gpu.ensure_provenance') as ensure_mock:
128+
ensure_mock.return_value = None
129+
with patch.object(executor, '_ensure_image'):
130+
await executor.prepare(sample_gpu_job)
131+
await executor.run(sample_gpu_job, timeout_seconds=1)
132+
133+
ensure_mock.assert_called()
134+
135+
91136
@patch('subprocess.run')
92137
def test_check_nvidia_docker_available(mock_run):
93138
"""Test nvidia-docker availability check when available."""

0 commit comments

Comments
 (0)