Skip to content

Commit 05d380b

Browse files
authored
Dlc build engine revamp (#5101)
1 parent 48a09aa commit 05d380b

File tree

1 file changed

+186
-50
lines changed

1 file changed

+186
-50
lines changed

src/image.py

Lines changed: 186 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
import constants
2222
import logging
23-
import json
23+
import subprocess
2424

2525
LOGGER = logging.getLogger(__name__)
2626
LOGGER.setLevel(logging.INFO)
@@ -173,10 +173,14 @@ def build(self):
173173
# Conduct some preprocessing before building the image
174174
self.update_pre_build_configuration()
175175

176-
# Start building the image
177-
with open(self.context.context_path, "rb") as context_file:
178-
self.docker_build(fileobj=context_file, custom_context=True)
179-
self.context.remove()
176+
# Start building the image with Buildx
177+
build_start_time = datetime.now()
178+
self.docker_build(context_path=self.context.context_path, custom_context=True)
179+
build_end_time = datetime.now()
180+
duration_seconds = (build_end_time - build_start_time).total_seconds()
181+
LOGGER.info(f"Build duration: {duration_seconds:.2f} seconds")
182+
183+
self.context.remove()
180184

181185
if self.build_status != constants.SUCCESS:
182186
LOGGER.info(f"Exiting with image build status {self.build_status} without image check.")
@@ -193,64 +197,196 @@ def build(self):
193197
# This return is necessary. Otherwise FORMATTER fails while displaying the status.
194198
return self.build_status
195199

196-
def docker_build(self, fileobj=None, custom_context=False):
200+
def docker_build(self, context_path, custom_context=False):
197201
"""
198-
Uses low level Docker API Client to actually start the process of building the image.
202+
Uses Docker Buildx for vLLM images, falls back to legacy Docker API for others
199203
200-
:param fileobj: FileObject, a readable file-like object pointing to the context tarfile.
201-
:param custom_context: bool
202-
:return: int, Build Status
204+
:param context_path: str, Path to build context
205+
:param custom_context: bool, Whether to use custom context from stdin (default: False)
206+
:return: int, Build status
203207
"""
204-
response = [f"Starting the Build Process for {self.repository}:{self.tag}"]
205-
LOGGER.info(f"Starting the Build Process for {self.repository}:{self.tag}")
208+
if self._is_vllm_image():
209+
LOGGER.info(f"Using Buildx for vLLM image: {self.repository}:{self.tag}")
210+
return self._buildx_build(context_path, custom_context)
211+
else:
212+
LOGGER.info(f"Using legacy Docker API for non-vLLM image: {self.repository}:{self.tag}")
213+
return self._legacy_docker_build(context_path, custom_context)
206214

207-
line_counter = 0
208-
line_interval = 50
209-
for line in self.client.build(
210-
fileobj=fileobj,
211-
path=self.dockerfile,
212-
custom_context=custom_context,
213-
rm=True,
214-
decode=True,
215-
tag=self.ecr_url,
216-
buildargs=self.build_args,
217-
labels=self.labels,
218-
target=self.target,
219-
):
220-
# print the log line during build for every line_interval lines for debugging
221-
if line_counter % line_interval == 0:
222-
LOGGER.debug(line)
223-
line_counter += 1
215+
def _is_vllm_image(self):
216+
"""
217+
Determine if current image is a vLLM image
224218
225-
if line.get("error") is not None:
226-
response.append(line["error"])
227-
self.log.append(response)
228-
self.build_status = constants.FAIL
229-
self.summary["status"] = constants.STATUS_MESSAGE[self.build_status]
230-
self.summary["end_time"] = datetime.now()
219+
:return: bool, True if this is a vLLM image
220+
"""
221+
return (
222+
self.info.get("framework") == "vllm"
223+
or "vllm" in self.repository.lower()
224+
or "vllm" in str(self.info.get("name", "")).lower()
225+
)
231226

232-
LOGGER.info(f"Docker Build Logs: \n {self.get_tail_logs_in_pretty_format(100)}")
233-
LOGGER.error("ERROR during Docker BUILD")
234-
LOGGER.error(
235-
f"Error message received for {self.dockerfile} while docker build: {line}"
236-
)
227+
def _buildx_build(self, context_path, custom_context=False):
228+
"""
229+
Uses Docker Buildx CLI for building with real-time streaming and advanced caching.
237230
238-
return self.build_status
239231
240-
if line.get("stream") is not None:
241-
response.append(line["stream"])
242-
elif line.get("status") is not None:
243-
response.append(line["status"])
232+
Automatically finds and uses the latest available image as a cache source from ECR
233+
to speed up builds through layer reuse.
234+
235+
:param context_path: str, Path to build context
236+
:param custom_context: bool, Whether to use custom context from stdin (default: False)
237+
:return: int, Build status
238+
"""
239+
240+
response = [f"Starting Buildx Process for {self.repository}:{self.tag}"]
241+
LOGGER.info(f"Starting Buildx Process for {self.repository}:{self.tag}")
242+
243+
cmd = [
244+
"docker",
245+
"buildx",
246+
"build",
247+
"-t",
248+
self.ecr_url,
249+
"--progress=plain", # Real-time log streaming
250+
]
251+
252+
for k, v in self.build_args.items():
253+
cmd.extend(["--build-arg", f"{k}={v}"])
254+
255+
for k, v in self.labels.items():
256+
cmd.extend(["--label", f"{k}={v}"])
257+
258+
if self.target:
259+
cmd.extend(["--target", self.target])
260+
261+
# Always use inline cache-to for maximum caching
262+
cmd.extend(["--cache-to", "type=inline"])
263+
264+
# Use shortest tag from additional_tags as a suitable cache source
265+
latest_tag = min(self.additional_tags, key=len)
266+
267+
if latest_tag:
268+
latest_image_uri = f"{self.repository}:{latest_tag}"
269+
LOGGER.info(f"Using cache from registry: {latest_image_uri}")
270+
cmd.extend(["--cache-from", f"type=registry,ref={latest_image_uri}"])
271+
else:
272+
LOGGER.info("No suitable cache source found. Proceeding without registry cache")
273+
274+
if custom_context:
275+
cmd.append("-")
276+
else:
277+
cmd.append(context_path)
278+
279+
context_tarball = open(context_path, "rb") if custom_context else None
280+
281+
try:
282+
process = subprocess.Popen(
283+
cmd,
284+
stdin=context_tarball,
285+
stdout=subprocess.PIPE,
286+
stderr=subprocess.STDOUT,
287+
universal_newlines=True,
288+
bufsize=1,
289+
)
290+
291+
# Stream output in real-time
292+
for line in iter(process.stdout.readline, ""):
293+
line = line.rstrip()
294+
if line:
295+
response.append(line)
296+
LOGGER.info(line)
297+
298+
process.wait()
299+
300+
if process.returncode == 0:
301+
self.build_status = constants.SUCCESS
302+
LOGGER.info(f"Completed Buildx for {self.repository}:{self.tag}")
244303
else:
245-
response.append(str(line))
304+
self.build_status = constants.FAIL
305+
LOGGER.error(f"Buildx failed for {self.repository}:{self.tag}")
306+
307+
except Exception as e:
308+
response.append(f"Buildx error: {str(e)}")
309+
self.build_status = constants.FAIL
310+
LOGGER.error(f"Buildx exception: {str(e)}")
311+
finally:
312+
if context_tarball:
313+
context_tarball.close()
246314

247315
self.log.append(response)
316+
return self.build_status
248317

249-
LOGGER.info(f"DOCKER BUILD LOGS: \n{self.get_tail_logs_in_pretty_format()}")
250-
LOGGER.info(f"Completed Build for {self.repository}:{self.tag}")
318+
def _legacy_docker_build(self, context_path, custom_context=False):
319+
"""
320+
Uses legacy Docker API Client to build the image (for non-vLLM images).
251321
252-
self.build_status = constants.SUCCESS
253-
return self.build_status
322+
:param context_path: str, Path to build context
323+
:param custom_context: bool, Whether to use custom context from stdin (default: False)
324+
:return: int, Build Status
325+
"""
326+
response = [f"Starting Legacy Docker Build Process for {self.repository}:{self.tag}"]
327+
LOGGER.info(f"Starting Legacy Docker Build Process for {self.repository}:{self.tag}")
328+
329+
# Open context tarball for legacy API
330+
fileobj = open(context_path, "rb") if custom_context else None
331+
332+
line_counter = 0
333+
line_interval = 50
334+
335+
try:
336+
for line in self.client.build(
337+
fileobj=fileobj,
338+
path=self.dockerfile if not custom_context else None,
339+
custom_context=custom_context,
340+
rm=True,
341+
decode=True,
342+
tag=self.ecr_url,
343+
buildargs=self.build_args,
344+
labels=self.labels,
345+
target=self.target,
346+
):
347+
# print the log line during build for every line_interval lines
348+
if line_counter % line_interval == 0:
349+
LOGGER.info(line)
350+
line_counter += 1
351+
352+
if line.get("error") is not None:
353+
response.append(line["error"])
354+
self.log.append(response)
355+
self.build_status = constants.FAIL
356+
self.summary["status"] = constants.STATUS_MESSAGE[self.build_status]
357+
self.summary["end_time"] = datetime.now()
358+
359+
LOGGER.info(f"Docker Build Logs: \n {self.get_tail_logs_in_pretty_format(100)}")
360+
LOGGER.error("ERROR during Docker BUILD")
361+
LOGGER.error(
362+
f"Error message received for {self.dockerfile} while docker build: {line}"
363+
)
364+
365+
return self.build_status
366+
367+
if line.get("stream") is not None:
368+
response.append(line["stream"])
369+
elif line.get("status") is not None:
370+
response.append(line["status"])
371+
else:
372+
response.append(str(line))
373+
374+
self.log.append(response)
375+
376+
LOGGER.info(f"DOCKER BUILD LOGS: \n{self.get_tail_logs_in_pretty_format()}")
377+
LOGGER.info(f"Completed Legacy Build for {self.repository}:{self.tag}")
378+
379+
self.build_status = constants.SUCCESS
380+
return self.build_status
381+
382+
except Exception as e:
383+
response.append(f"Legacy Docker build error: {str(e)}")
384+
self.build_status = constants.FAIL
385+
LOGGER.error(f"Legacy Docker build exception: {str(e)}")
386+
return self.build_status
387+
finally:
388+
if fileobj:
389+
fileobj.close()
254390

255391
def image_size_check(self):
256392
"""

0 commit comments

Comments
 (0)