Skip to content

Commit e904178

Browse files
d4l3kfacebook-github-bot
authored andcommitted
docker_scheduler: add support for building a new image from a workspace (#347)
Summary: This adds a new `build_workspace_image` method on the DockerScheduler. This takes in a base image and the fsspec workspace path and creates a new docker image with the workspace overlaid on the base image and returns it. This is the first steps towards implementing #333. When docker builds an image it uses a build context which is uploaded to it via a tarball. This builds the context by walking the fsspec path for the files and taring them. ## Dockerfile: A generated Dockerfile is added which does a naive copy. If the workspace contains a `Dockerfile`, that Dockerfile is used instead of the generated one. Dockerfile ``` FROM <base> COPY . . ``` Pull Request resolved: #347 Test Plan: pytest torchx/schedulers/tests/docker_scheduler_test.py pyre Reviewed By: kiukchung Differential Revision: D32472559 Pulled By: d4l3k fbshipit-source-id: 79cc5e01af777a586d3d723c690a859af648950a
1 parent 1e4db20 commit e904178

File tree

3 files changed

+119
-1
lines changed

3 files changed

+119
-1
lines changed

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ docstring-parser==0.8.1
33
pyyaml
44
docker
55
filelock
6+
fsspec

torchx/schedulers/docker_scheduler.py

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,27 @@
55
# LICENSE file in the root directory of this source tree.
66

77
import fnmatch
8+
import io
89
import logging
910
import os.path
11+
import posixpath
12+
import tarfile
1013
import tempfile
1114
from dataclasses import dataclass
1215
from datetime import datetime
13-
from typing import Mapping, TYPE_CHECKING, Any, Dict, Iterable, Optional, List, Union
16+
from typing import (
17+
Mapping,
18+
TYPE_CHECKING,
19+
Any,
20+
Dict,
21+
Iterable,
22+
Optional,
23+
List,
24+
Union,
25+
IO,
26+
)
1427

28+
import fsspec
1529
import torchx
1630
import yaml
1731
from torchx.schedulers.api import (
@@ -390,6 +404,21 @@ def log_iter(
390404
else:
391405
return logs
392406

407+
def build_workspace_image(self, img: str, workspace: str) -> str:
408+
"""
409+
build_workspace_image creates a new image with the files in workspace
410+
overlaid on top of it.
411+
412+
Args:
413+
img: a Docker image to use as a base
414+
workspace: a fsspec path to a directory with contents to be overlaid
415+
416+
Returns:
417+
The new Docker image ID.
418+
419+
"""
420+
return _build_container_from_workspace(self._client(), img, workspace)
421+
393422

394423
def _to_str(a: Union[str, bytes]) -> str:
395424
if isinstance(a, bytes):
@@ -403,3 +432,59 @@ def create_scheduler(session_name: str, **kwargs: Any) -> DockerScheduler:
403432
return DockerScheduler(
404433
session_name=session_name,
405434
)
435+
436+
437+
def _copy_to_tarfile(workspace: str, tf: tarfile.TarFile) -> None:
438+
# TODO(d4l3k) implement docker ignore files
439+
440+
fs, path = fsspec.core.url_to_fs(workspace)
441+
assert isinstance(path, str), "path must be str"
442+
443+
for dir, dirs, files in fs.walk(path, detail=True):
444+
assert isinstance(dir, str), "path must be str"
445+
relpath = posixpath.relpath(dir, path)
446+
for file, info in files.items():
447+
print(relpath, dir, file, info)
448+
with fs.open(info["name"], "rb") as f:
449+
tinfo = tarfile.TarInfo(posixpath.join(relpath, file))
450+
tinfo.size = info["size"]
451+
tf.addfile(tinfo, f)
452+
453+
454+
def _build_context(img: str, workspace: str) -> IO[bytes]:
455+
# f is closed by parent, NamedTemporaryFile auto closes on GC
456+
f = tempfile.NamedTemporaryFile( # noqa P201
457+
prefix="torchx-context",
458+
suffix=".tar",
459+
)
460+
dockerfile = bytes(f"FROM {img}\nCOPY . .\n", encoding="utf-8")
461+
with tarfile.open(fileobj=f, mode="w") as tf:
462+
info = tarfile.TarInfo("Dockerfile")
463+
info.size = len(dockerfile)
464+
tf.addfile(info, io.BytesIO(dockerfile))
465+
466+
_copy_to_tarfile(workspace, tf)
467+
468+
f.seek(0)
469+
return f
470+
471+
472+
def _build_container_from_workspace(
473+
client: "DockerClient", img: str, workspace: str
474+
) -> str:
475+
context = _build_context(img, workspace)
476+
477+
try:
478+
image, logs = client.images.build(
479+
fileobj=context,
480+
custom_context=True,
481+
pull=True,
482+
rm=True,
483+
labels={
484+
LABEL_VERSION: torchx.__version__,
485+
},
486+
)
487+
finally:
488+
context.close()
489+
print(image)
490+
return image.id

torchx/schedulers/test/docker_scheduler_test.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from datetime import datetime, timedelta
1010
from unittest.mock import patch
1111

12+
import fsspec
1213
from docker.types import DeviceRequest
1314
from torchx import specs
1415
from torchx.schedulers.api import Stream
@@ -347,3 +348,34 @@ def test_docker_submit_dist(self) -> None:
347348
self.assertEqual(
348349
desc.roles_statuses[0].replicas[1].state, AppState.SUCCEEDED
349350
)
351+
352+
def test_docker_workspace(self) -> None:
353+
fs = fsspec.filesystem("memory")
354+
fs.mkdirs("test_workspace/bar", exist_ok=True)
355+
with fs.open("test_workspace/bar/foo.sh", "w") as f:
356+
f.write("exit 0")
357+
358+
img = self.scheduler.build_workspace_image(
359+
"busybox",
360+
"memory://test_workspace",
361+
)
362+
363+
app = AppDef(
364+
name="test-app",
365+
roles=[
366+
Role(
367+
name="ping",
368+
image=img,
369+
entrypoint="sh",
370+
args=[
371+
"bar/foo.sh",
372+
],
373+
),
374+
],
375+
)
376+
app_id = self.scheduler.submit(app, {})
377+
print(app_id)
378+
379+
desc = self.wait(app_id)
380+
self.assertIsNotNone(desc)
381+
self.assertEqual(AppState.SUCCEEDED, desc.state)

0 commit comments

Comments
 (0)