Removing RPD in favor of torch profiler for V1 (ROCm#558)

gshtras · web-flow · commit f4a992cc2696 · 2025-05-29T11:09:20.000-04:00
Signed-off-by: Gregory Shtrasberg &lt;Gregory.Shtrasberg@amd.com&gt;
diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm
@@ -1,6 +1,5 @@
 # default base image
 ARG REMOTE_VLLM="0"
-ARG BUILD_RPD="1"
 ARG COMMON_WORKDIR=/app
 ARG BASE_IMAGE=rocm/vllm-dev:base
 
@@ -87,13 +86,6 @@ RUN case "$(which python3)" in \
         *) ;; esac
 
 RUN python3 -m pip install --upgrade huggingface-hub[cli]
-ARG BUILD_RPD
-RUN if [ ${BUILD_RPD} -eq "1" ]; then \
-    git clone -b nvtx_enabled https://github.com/ROCm/rocmProfileData.git \
-    && cd rocmProfileData/rpd_tracer \
-    && pip install -r requirements.txt && cd ../ \
-    && make && make install \
-    && cd hipMarker && python3 setup.py install ; fi
 
 # Install vLLM
 RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
diff --git a/docs/getting_started/installation/gpu/rocm.inc.md b/docs/getting_started/installation/gpu/rocm.inc.md
@@ -179,8 +179,6 @@ It is important that the user kicks off the docker build using buildkit. Either
 It provides flexibility to customize the build of docker image using the following arguments:
 
 - `BASE_IMAGE`: specifies the base image used when running `docker build`. The default value `rocm/vllm-dev:base` is an image published and maintained by AMD. It is being built using <gh-file:docker/Dockerfile.rocm_base>
-- `USE_CYTHON`: An option to run cython compilation on a subset of python files upon docker build
-- `BUILD_RPD`: Include RocmProfileData profiling tool in the image
 - `ARG_PYTORCH_ROCM_ARCH`: Allows to override the gfx architecture values from the base docker image
 
 Their values can be passed in when running `docker build` with `--build-arg` options.
diff --git a/vllm/envs.py b/vllm/envs.py
@@ -75,7 +75,6 @@
     VLLM_PLUGINS: Optional[list[str]] = None
     VLLM_LORA_RESOLVER_CACHE_DIR: Optional[str] = None
     VLLM_TORCH_PROFILER_DIR: Optional[str] = None
-    VLLM_RPD_PROFILER_DIR: Optional[str] = None
     VLLM_USE_TRITON_AWQ: bool = False
     VLLM_ALLOW_RUNTIME_LORA_UPDATING: bool = False
     VLLM_SKIP_P2P_CHECK: bool = False
@@ -588,12 +587,6 @@ def get_vllm_port() -> Optional[int]:
     lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
              .path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),
 
-    # Enables rpd profiler if set. Path to the directory where torch profiler
-    # traces are saved. Note that it must be an absolute path.
-    "VLLM_RPD_PROFILER_DIR":
-    lambda: (None if os.getenv("VLLM_RPD_PROFILER_DIR", None) is None else os.
-             path.expanduser(os.getenv("VLLM_RPD_PROFILER_DIR", "."))),
-
     # If set, vLLM will use Triton implementations of AWQ.
     "VLLM_USE_TRITON_AWQ":
     lambda: bool(int(os.getenv("VLLM_USE_TRITON_AWQ", "0"))),
diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py
@@ -21,7 +21,6 @@
                            CompletionSequenceGroupOutput, Logprob,
                            PromptLogprobs, SampleLogprobs, SequenceOutput)
 from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics
-from vllm.utils import rpd_mark
 
 if envs.VLLM_USE_FLASHINFER_SAMPLER and find_spec("flashinfer"):
     # yapf: disable
@@ -223,7 +222,6 @@ def _init_sampling_tensors(
         self._do_top_p_top_k = do_top_p_top_k
         self._do_min_p = do_min_p
 
-    @rpd_mark(name="Sampler Forward")
     def forward(
         self,
         logits: torch.Tensor,
@@ -793,7 +791,6 @@ def _sample_with_torch(
         )
 
 
-@rpd_mark()
 def _sample(
     probs: torch.Tensor,
     logprobs: torch.Tensor,
diff --git a/vllm/utils.py b/vllm/utils.py
@@ -205,145 +205,6 @@ class _Sentinel:
 ALL_PINNED_SENTINEL = _Sentinel()
 
 
-class rpd_trace:
-
-    def __init__(self,
-                 filename=None,
-                 name=None,
-                 nvtx=False,
-                 args=None,
-                 skip=False):
-        self.skip = skip
-        if not self.skip:
-            self.name = name
-            self.args = args if args else ""
-            self.rpd = self.initialize_rpd_tracer(filename, nvtx)
-
-    def _recreate_cm(self):
-        return self
-
-    def __call__(self, func):
-        if not self.skip:
-            if self.name:
-                self.name += f"{func.__name__}"
-            else:
-                self.name = f"{func.__qualname__}"
-
-            @wraps(func)
-            def inner(*args, **kwds):
-                with self._recreate_cm():
-                    return func(*args, **kwds)
-
-            return inner
-        return func
-
-    def __enter__(self):
-        if not self.skip:
-            self.rpd.__enter__()
-            self.rpd.rangePush("python", f"{self.name}", f"{self.args}")
-        return self
-
-    def __exit__(self, *exc):
-        if not self.skip:
-            self.rpd.rangePop()
-            self.rpd.__exit__(None, None, None)
-        return False
-
-    @staticmethod
-    def setup_environment_variables(filename):
-        os.environ['RPDT_AUTOSTART'] = '0'
-        os.environ['RPDT_FILENAME'] = filename
-
-    def initialize_rpd_tracer(self, filename, nvtx):
-        try:
-            from rpdTracerControl import rpdTracerControl
-            rpd_trace.setup_environment_variables(filename)
-            rpdTracerControl.setFilename(name=filename, append=True)
-            return rpdTracerControl(nvtx=nvtx)
-        except Exception as e:
-            print(f"Error initializing rpdTracerControl: {e}")
-            raise
-
-    @staticmethod
-    def create_file(filename):
-        import sqlite3
-
-        from rocpd.schema import RocpdSchema
-        try:
-            print("Creating empty rpd schema file ...")
-            filename = str(filename)
-            with sqlite3.connect(filename) as connection:
-                schema = RocpdSchema()
-                schema.writeSchema(connection)
-                connection.commit()
-        except sqlite3.OperationalError as e:
-            print(f"SQLite operational error: {e}")
-        except Exception as e:
-            print(f"An error occurred while creating the filename: {e}")
-
-
-@cache
-def is_hipScopedMarker_available():
-    try:
-        from hipScopedMarker import hipScopedMarker
-    except ImportError:
-        hipScopedMarker = None
-    return hipScopedMarker is not None
-
-
-class rpd_mark:
-
-    def __init__(self, name=None):
-        self.name = name
-
-    def __call__(self, func):
-
-        if is_hipScopedMarker_available():
-            from hipScopedMarker import hipScopedMarker
-
-            @wraps(func)
-            def inner(*args, **kwds):
-                marker_name = self.name if self.name else f"{func.__name__}"
-                with hipScopedMarker(f"{marker_name}"):
-                    return func(*args, **kwds)
-
-            return inner
-
-        else:
-            return func
-
-
-class rpd_user_marker:
-
-    def __init__(self, name=None):
-        self.name = name
-        self.marker = None
-
-    def __enter__(self):
-        if is_hipScopedMarker_available():
-            from hipScopedMarker import hipScopedMarker
-            marker_name = self.name if self.name else "UserMarker Undefined"
-            self.marker = hipScopedMarker(f"{marker_name}")
-            self.marker.__enter__()
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if is_hipScopedMarker_available() and self.marker:
-            self.marker.__exit__(exc_type, exc_val, exc_tb)
-
-    def start(self):
-        if is_hipScopedMarker_available():
-            from hipScopedMarker import hipScopedMarker
-            marker_name = self.name if self.name else "UserMarker Undefined"
-            self.marker = hipScopedMarker(f"{marker_name}")
-            self.marker.__enter__()
-        return self
-
-    def end(self, exc_type=0, exc_val=0, exc_tb=0):
-        if is_hipScopedMarker_available() and self.marker:
-            self.marker.__exit__(exc_type, exc_val, exc_tb)
-
-
 class Device(enum.Enum):
     GPU = enum.auto()
     CPU = enum.auto()
diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py
@@ -2,7 +2,6 @@
 """A GPU worker class."""
 import gc
 import os
-from pathlib import Path
 from typing import Dict, List, Optional, Set, Tuple, Type, Union
 
 import torch
@@ -117,45 +116,18 @@ def __init__(
                 with_stack=True,
                 on_trace_ready=torch.profiler.tensorboard_trace_handler(
                     torch_profiler_trace_dir, use_gzip=True))
-        elif envs.VLLM_RPD_PROFILER_DIR:
-            rpd_profiler_trace_dir = Path(envs.VLLM_RPD_PROFILER_DIR)
-
-            if rpd_profiler_trace_dir.suffix != ".rpd":
-                rpd_profiler_trace_dir = rpd_profiler_trace_dir / "trace.rpd"
-
-            rpd_profiler_trace_dir.parent.mkdir(parents=True, exist_ok=True)
-
-            logger.info("Profiling enabled. Traces will be saved to: %s",
-                        rpd_profiler_trace_dir)
-
-            from vllm.utils import rpd_trace
-
-            if self.rank == 0:
-                rpd_trace.create_file(filename=str(rpd_profiler_trace_dir))
-
-            self.profiler = rpd_trace(filename=str(rpd_profiler_trace_dir),
-                                      name='Worker RPD Enabled',
-                                      nvtx=True)
         else:
             self.profiler = None
 
     def start_profile(self):
         if self.profiler is None:
             raise RuntimeError("Profiler is not enabled.")
-
-        if envs.VLLM_RPD_PROFILER_DIR:
-            self.profiler.__enter__()
-        else:
-            self.profiler.start()
+        self.profiler.start()
 
     def stop_profile(self):
         if self.profiler is None:
             raise RuntimeError("Profiler is not enabled.")
-
-        if envs.VLLM_RPD_PROFILER_DIR:
-            self.profiler.__exit__()
-        else:
-            self.profiler.stop()
+        self.profiler.stop()
 
     def sleep(self, level: int = 1) -> None:
         free_bytes_before_sleep = torch.cuda.mem_get_info()[0]