refactor: adjust pull progress

thxCode · thxCode · commit a9796bc5f64c · 2025-12-18T21:19:41.000+08:00
Signed-off-by: thxCode &lt;thxcode0824@gmail.com&gt;
diff --git a/gpustack_runtime/deployer/docker.py b/gpustack_runtime/deployer/docker.py
@@ -6,6 +6,7 @@
 import operator
 import os
 import socket
+import sys
 from dataclasses import dataclass, field
 from functools import lru_cache, reduce
 from math import ceil
@@ -43,7 +44,7 @@
     WorkloadStatusOperation,
     WorkloadStatusStateEnum,
 )
-from .__utils__ import safe_json
+from .__utils__ import _MiB, bytes_to_human_readable, safe_json
 
 if TYPE_CHECKING:
     from collections.abc import Callable, Generator
@@ -444,9 +445,9 @@ def _create_ephemeral_volumes(self, workload: DockerWorkloadPlan) -> dict[str, s
         return ephemeral_volume_name_mapping
 
     def _pull_image(self, image: str) -> docker.models.images.Image:
-        logger.info(f"Pulling image {image}")
-
         try:
+            logger.info("Pulling image %s", image)
+
             repo, tag = parse_repository_tag(image)
             tag = tag or "latest"
             auth_config = None
@@ -466,54 +467,9 @@ def _pull_image(self, image: str) -> docker.models.images.Image:
                 decode=True,
                 auth_config=auth_config,
             )
+            _print_pull_logs(logs, image, tag)
 
-            layers: dict[str, tqdm] = {}
-
-            def clean_layers():
-                if not layers:
-                    return
-                for layer in layers.values():
-                    layer.close()
-                layers.clear()
-
-            for log in logs:
-                if "id" not in log:
-                    clean_layers()
-                    logger.info(log["status"])
-                    continue
-
-                layer_id = log.get("id")
-                layer_status = log.get("status", "")
-                layer_progress = log.get("progressDetail", {})
-                layer_progress_total = layer_progress.get("total", None)
-                layer_progress_current = layer_progress.get("current", None)
-                if layer_id not in layers:
-                    layers[layer_id] = tqdm(
-                        unit="B",
-                        unit_scale=True,
-                        position=len(layers),
-                        ncols=70,
-                        desc=f"{layer_id}: {layer_status}",
-                        bar_format="{desc}",
-                    )
-                else:
-                    layers[layer_id].desc = f"{layer_id}: {layer_status}"
-
-                if layer_progress_total is not None:
-                    layers[layer_id].total = layer_progress_total
-                    bf = "{desc} |{bar}| {n_fmt}/{total_fmt} [{rate_fmt}{postfix}]"
-                    layers[layer_id].bar_format = bf
-                elif layer_progress_current is not None:
-                    layers[layer_id].bar_format = "{desc} {n_fmt} [{rate_fmt}{postfix}]"
-                else:
-                    layers[layer_id].bar_format = "{desc}"
-
-                if layer_progress_current:
-                    layers[layer_id].n = layer_progress_current
-
-                layers[layer_id].refresh()
-
-            clean_layers()
+            logger.info("Pulled image %s", image)
 
             sep = "@" if tag.startswith("sha256:") else ":"
             return self._client.images.get(f"{repo}{sep}{tag}")
@@ -1959,3 +1915,150 @@ def _detail_api_call_error(err: docker.errors.APIError) -> str:
         msg += f": status code {err.response.status_code}"
 
     return msg
+
+
+def _print_pull_logs(logs, image, tag):
+    """
+    Display Docker image pull logs.
+
+    Args:
+        logs:
+            The logs from Docker image pull.
+        image:
+            The image being pulled.
+        tag:
+            The image tag being pulled.
+
+    """
+    if (
+        not envs.GPUSTACK_RUNTIME_DOCKER_IMAGE_NO_PULL_VISUALIZATION
+        and sys.stderr.isatty()
+    ):
+        _visualize_pull_logs(logs, tag)
+    else:
+        _textualize_pull_logs(logs, image, tag)
+
+
+def _visualize_pull_logs(logs, tag):
+    """
+    Display Docker image pull logs as progress bars.
+
+    Args:
+        logs:
+            The logs from Docker image pull.
+        tag:
+            The image tag being pulled.
+
+    """
+    pbars: dict[str, tqdm] = {}
+    dmsgs: list[str] = []
+
+    try:
+        for log in logs:
+            id_ = log.get("id", None)
+            status = log.get("status", "")
+            if not id_:
+                dmsgs.append(status)
+                continue
+            if id_ == tag:
+                continue
+
+            progress = log.get("progressDetail", {})
+            progress_total = progress.get("total", None)
+            progress_current = progress.get("current", None)
+
+            if id_ not in pbars:
+                pbars[id_] = tqdm(
+                    unit="B",
+                    unit_scale=True,
+                    desc=f"{id_}: {status}",
+                    bar_format="{desc}",
+                )
+                continue
+
+            pbars[id_].desc = f"{id_}: {status}"
+            if progress_total is not None:
+                pbars[id_].total = progress_total
+                bf = "{desc} |{bar}| {n_fmt}/{total_fmt} [{rate_fmt}{postfix}]"
+                pbars[id_].bar_format = bf
+            elif progress_current is not None:
+                pbars[id_].bar_format = "{desc} {n_fmt} [{rate_fmt}{postfix}]"
+            else:
+                pbars[id_].bar_format = "{desc}"
+
+            if progress_current:
+                pbars[id_].n = progress_current
+
+            pbars[id_].refresh()
+    finally:
+        for pbar in pbars.values():
+            pbar.close()
+        pbars.clear()
+
+    for msg in dmsgs:
+        print(msg, flush=True)
+
+
+def _textualize_pull_logs(logs, image, tag):
+    """
+    Display Docker image pull logs as plain text.
+
+    Args:
+        logs:
+            The logs from Docker image pull.
+        image:
+            The image being pulled.
+        tag:
+            The image tag being pulled.
+
+    """
+    pstats: dict[str, tuple[int, int]] = {}
+    pstats_cursor: int = 0
+    pstats_cursor_move: int = 1
+    dmsgs: list[str] = []
+
+    for log in logs:
+        id_ = log.get("id", None)
+        status = log.get("status", "")
+        if not id_:
+            dmsgs.append(status)
+            continue
+        if id_ == tag:
+            continue
+
+        if id_ not in pstats:
+            pstats[id_] = (0, 0)
+            continue
+
+        progress = log.get("progressDetail", {})
+        progress_total = progress.get("total", None)
+        progress_current = progress.get("current", None)
+
+        if progress_total is not None or progress_current is not None:
+            pstats[id_] = (progress_total or 0, progress_current or 0)
+
+        pstats_total, pstats_current = 0, 0
+        for t, c in pstats.values():
+            pstats_total += t
+            pstats_current += c
+
+        if pstats_total:
+            pstats_cursor_diff = int(
+                pstats_current * 100 // pstats_total - pstats_cursor,
+            )
+            if pstats_cursor_diff >= pstats_cursor_move and pstats_cursor < 100:
+                pstats_cursor += pstats_cursor_diff
+                pstats_cursor_move = min(5, pstats_cursor_move + 1)
+                print(f"Pulling image {image}: {pstats_cursor}%", flush=True)
+        elif pstats_current:
+            pstats_cursor_diff = int(
+                pstats_current - pstats_cursor,
+            )
+            if pstats_cursor_diff >= pstats_cursor_move:
+                pstats_cursor += pstats_cursor_diff
+                pstats_cursor_move = min(200 * _MiB, pstats_cursor_move + 2 * _MiB)
+                pstats_cursor_human = bytes_to_human_readable(pstats_cursor)
+                print(f"Pulling image {image}: {pstats_cursor_human}", flush=True)
+
+    for msg in dmsgs:
+        print(msg, flush=True)
diff --git a/gpustack_runtime/envs.py b/gpustack_runtime/envs.py
@@ -53,6 +53,10 @@
     The detected backend mapping to resource keys,
     e.g `{"cuda": "nvidia.com/devices", "rocm": "amd.com/devices"}`.
     """
+    GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY: bool = True
+    """
+    Use physical index priority at detecting devices.
+    """
     ## Deployer
     GPUSTACK_RUNTIME_DEPLOY: str | None = None
     """
@@ -174,13 +178,6 @@
     alignment is performed to ensure they are correctly identified.
     """
 
-    # Detector
-
-    GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY: bool = True
-    """
-    Use physical index priority at detecting devices.
-    """
-
     # Deployer
 
     ## Docker
@@ -190,6 +187,10 @@
     Only works when `GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME` is not set.
     Normally, it should be injected automatically via CI without any manual configuration.
     """
+    GPUSTACK_RUNTIME_DOCKER_IMAGE_NO_PULL_VISUALIZATION: bool = False
+    """
+    Disable image pull visualization in Docker deployer.
+    """
     GPUSTACK_RUNTIME_DOCKER_PAUSE_IMAGE: str | None = None
     """
     Docker image used for the pause container.
@@ -246,6 +247,7 @@
     "GPUSTACK_RUNTIME_LOG_EXCEPTION": lambda: to_bool(
         getenv("GPUSTACK_RUNTIME_LOG_EXCEPTION", "1"),
     ),
+    ## Detector
     "GPUSTACK_RUNTIME_DETECT": lambda: getenv(
         "GPUSTACK_RUNTIME_DETECT",
         "Auto",
@@ -271,6 +273,10 @@
             "cuda=nvidia.com/devices;",
         ),
     ),
+    "GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY": lambda: to_bool(
+        getenv("GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY", "1"),
+    ),
+    ## Deployer
     "GPUSTACK_RUNTIME_DEPLOY": lambda: getenv(
         "GPUSTACK_RUNTIME_DEPLOY",
         "Auto",
@@ -373,11 +379,8 @@
         ),
         sep=",",
     ),
-    # Detector
-    "GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY": lambda: to_bool(
-        getenv("GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY", "1"),
-    ),
     # Deployer
+    ## Docker
     "GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS": lambda: to_dict(
         getenv(
             "GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS",
@@ -401,6 +404,10 @@
     "GPUSTACK_RUNTIME_DOCKER_MUTE_ORIGINAL_HEALTHCHECK": lambda: to_bool(
         getenv("GPUSTACK_RUNTIME_DOCKER_MUTE_ORIGINAL_HEALTHCHECK", "1"),
     ),
+    "GPUSTACK_RUNTIME_DOCKER_IMAGE_NO_PULL_VISUALIZATION": lambda: to_bool(
+        getenv("GPUSTACK_RUNTIME_DOCKER_IMAGE_NO_PULL_VISUALIZATION", "0"),
+    ),
+    ## Kubernetes
     "GPUSTACK_RUNTIME_KUBERNETES_NODE_NAME": lambda: getenv(
         "GPUSTACK_RUNTIME_KUBERNETES_NODE_NAME",
         None,