-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdocker_utils.py
More file actions
309 lines (261 loc) · 9.54 KB
/
docker_utils.py
File metadata and controls
309 lines (261 loc) · 9.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
from __future__ import annotations
import docker
import os
import signal
import tarfile
import threading
import traceback
from pathlib import Path
from docker.models.containers import Container
HEREDOC_DELIMITER = "EOF_1399519320" # different from dataset HEREDOC_DELIMITERs!
def copy_to_container(container: Container, src: Path, dst: Path):
"""
Copy a file from local to a docker container
Args:
container (Container): Docker container to copy to
src (Path): Source file path
dst (Path): Destination file path in the container
"""
# Check if destination path is valid
if os.path.dirname(dst) == "":
raise ValueError(
f"Destination path parent directory cannot be empty!, dst: {dst}"
)
# temporary tar file
tar_path = src.with_suffix(".tar")
with tarfile.open(tar_path, "w") as tar:
tar.add(src, arcname=src.name)
# get bytes for put_archive cmd
with open(tar_path, "rb") as tar_file:
data = tar_file.read()
# Make directory if necessary
container.exec_run(f"mkdir -p {dst.parent}")
# Send tar file to container and extract
container.put_archive(os.path.dirname(dst), data)
container.exec_run(f"tar -xf {dst}.tar -C {dst.parent}")
# clean up in locally and in container
tar_path.unlink()
container.exec_run(f"rm {dst}.tar")
def write_to_container(container: Container, data: str, dst: Path):
"""
Write a string to a file in a docker container
"""
# echo with heredoc to file
command = f"cat <<'{HEREDOC_DELIMITER}' > {dst}\n{data}\n{HEREDOC_DELIMITER}"
container.exec_run(command)
def remove_image(client, image_id, logger=None):
"""
Remove a Docker image by ID.
Args:
client (docker.DockerClient): Docker client.
image_id (str): Image ID.
rm_image (bool): Whether to remove the image.
logger (logging.Logger): Logger to use for output. If None, print to stdout.
"""
if not logger:
# if logger is None, print to stdout
log_info = print
log_error = print
raise_error = True
elif logger == "quiet":
# if logger is "quiet", don't print anything
log_info = lambda x: None
log_error = lambda x: None
raise_error = True
else:
# if logger is a logger object, use it
log_error = logger.info
log_info = logger.info
raise_error = False
try:
log_info(f"Attempting to remove image {image_id}...")
client.images.remove(image_id, force=True)
log_info(f"Image {image_id} removed.")
except Exception as e:
if raise_error:
raise e
log_error(
f"Failed to remove image {image_id}: {e}\n" f"{traceback.format_exc()}"
)
def cleanup_container(client, container, logger):
"""
Stop and remove a Docker container.
Performs this forcefully if the container cannot be stopped with the python API.
Args:
client (docker.DockerClient): Docker client.
container (docker.models.containers.Container): Container to remove.
logger (logging.Logger): Logger to use for output. If None, print to stdout
"""
if not container:
return
container_id = container.id
if not logger:
# if logger is None, print to stdout
log_error = print
log_info = print
raise_error = True
elif logger == "quiet":
# if logger is "quiet", don't print anything
log_info = lambda x: None
log_error = lambda x: None
raise_error = True
else:
# if logger is a logger object, use it
log_error = logger.info
log_info = logger.info
raise_error = False
# Attempt to stop the container
try:
if container:
log_info(f"Attempting to stop container {container.name}...")
container.stop(timeout=15)
except Exception as e:
log_error(
f"Failed to stop container {container.name}: {e}. Trying to forcefully kill..."
)
try:
# Get the PID of the container
container_info = client.api.inspect_container(container_id)
pid = container_info["State"].get("Pid", 0)
# If container PID found, forcefully kill the container
if pid > 0:
log_info(
f"Forcefully killing container {container.name} with PID {pid}..."
)
os.kill(pid, signal.SIGKILL)
else:
log_error(f"PID for container {container.name}: {pid} - not killing.")
except Exception as e2:
if raise_error:
raise e2
log_error(
f"Failed to forcefully kill container {container.name}: {e2}\n"
f"{traceback.format_exc()}"
)
# Attempt to remove the container
try:
log_info(f"Attempting to remove container {container.name}...")
container.remove(force=True)
log_info(f"Container {container.name} removed.")
except Exception as e:
if raise_error:
raise e
log_error(
f"Failed to remove container {container.name}: {e}\n"
f"{traceback.format_exc()}"
)
def exec_run_with_timeout(container, cmd, timeout: int|None=60):
"""
Run a command in a container with a timeout.
Args:
container (docker.Container): Container to run the command in.
cmd (str): Command to run.
timeout (int): Timeout in seconds.
"""
# Local variables to store the result of executing the command
exec_result = None
exec_id = None
exception = None
# Wrapper function to run the command
def run_command():
nonlocal exec_result, exec_id, exception
try:
exec_id = container.client.api.exec_create(container.id, cmd)["Id"]
exec_result = container.client.api.exec_start(exec_id)
except Exception as e:
exception = e
# Start the command in a separate thread
thread = threading.Thread(target=run_command)
thread.start()
thread.join(timeout)
if exception:
raise exception
# If the thread is still alive, the command timed out
if thread.is_alive():
raise TimeoutError(f"Command '{cmd}' timed out after {timeout} seconds")
return exec_result
def find_dependent_images(client: docker.DockerClient, image_name: str):
"""
Find all images that are built upon `image_name` image
Args:
client (docker.DockerClient): Docker client.
image_name (str): Name of the base image.
"""
dependent_images = []
# Get all local images
all_images = client.images.list()
# Get the ID of the base image
try:
base_image = client.images.get(image_name)
base_image_id = base_image.id
except docker.errors.ImageNotFound:
print(f"Base image {image_name} not found.")
return []
for image in all_images:
# Skip the base image itself
if image.id == base_image_id:
continue
# Check if the base image is in this image's history
history = image.history()
for layer in history:
if layer['Id'] == base_image_id:
# If found, add this image to the dependent images list
tags = image.tags
dependent_images.append(tags[0] if tags else image.id)
break
return dependent_images
def list_images(client: docker.DockerClient):
"""
List all images from the Docker client.
"""
# don't use this in multi-threaded context
return {tag for i in client.images.list(all=True) for tag in i.tags}
def clean_images(
client: docker.DockerClient,
prior_images: set,
cache_level: str,
clean: bool
):
"""
Clean Docker images based on cache level and clean flag.
Args:
client (docker.DockerClient): Docker client.
prior_images (set): Set of images that existed before the current run.
cache (str): Cache level to use.
clean (bool): Whether to clean; remove images that are higher in the cache hierarchy than the current
cache level. E.g. if cache_level is set to env, remove all previously built instances images. if
clean is false, previously built instances images will not be removed, but instance images built
in the current run will be removed.
"""
images = list_images(client)
removed = 0
print(f"Cleaning cached images...")
for image_name in images:
if should_remove(image_name, cache_level, clean, prior_images):
try:
remove_image(client, image_name, "quiet")
removed += 1
except Exception as e:
print(f"Error removing image {image_name}: {e}")
continue
print(f"Removed {removed} images.")
def should_remove(
image_name: str,
cache_level: str,
clean: bool,
prior_images: set
):
"""
Determine if an image should be removed based on cache level and clean flag.
"""
existed_before = image_name in prior_images
if image_name.startswith("sweb.base"):
if cache_level in {"none"} and (clean or not existed_before):
return True
elif image_name.startswith("sweb.env"):
if cache_level in {"none", "base"} and (clean or not existed_before):
return True
elif image_name.startswith("sweb.eval"):
if cache_level in {"none", "base", "env"} and (clean or not existed_before):
return True
return False