Skip to content

Commit 8bd76fb

Browse files
Lzy17gshtras
andauthored
Enable user marker for vllm profiling (ROCm#357)
* Enable user marker for vllm profiling --------- Co-authored-by: Gregory Shtrasberg <[email protected]>
1 parent 5976f48 commit 8bd76fb

File tree

2 files changed

+40
-2
lines changed

2 files changed

+40
-2
lines changed

vllm/utils.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,37 @@ def inner(*args, **kwds):
282282
return func
283283

284284

285+
class rpd_user_marker:
286+
287+
def __init__(self, name=None):
288+
self.name = name
289+
self.marker = None
290+
291+
def __enter__(self):
292+
if is_hipScopedMarker_available():
293+
from hipScopedMarker import hipScopedMarker
294+
marker_name = self.name if self.name else "UserMarker Undefined"
295+
self.marker = hipScopedMarker(f"{marker_name}")
296+
self.marker.__enter__()
297+
return self
298+
299+
def __exit__(self, exc_type, exc_val, exc_tb):
300+
if is_hipScopedMarker_available() and self.marker:
301+
self.marker.__exit__(exc_type, exc_val, exc_tb)
302+
303+
def start(self):
304+
if is_hipScopedMarker_available():
305+
from hipScopedMarker import hipScopedMarker
306+
marker_name = self.name if self.name else "UserMarker Undefined"
307+
self.marker = hipScopedMarker(f"{marker_name}")
308+
self.marker.__enter__()
309+
return self
310+
311+
def end(self, exc_type=0, exc_val=0, exc_tb=0):
312+
if is_hipScopedMarker_available() and self.marker:
313+
self.marker.__exit__(exc_type, exc_val, exc_tb)
314+
315+
285316
class Device(enum.Enum):
286317
GPU = enum.auto()
287318
CPU = enum.auto()

vllm/worker/model_runner.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@
4747
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
4848
from vllm.utils import (DeviceMemoryProfiler, GiB_bytes, PyObjectCache,
4949
async_tensor_h2d, flatten_2d_lists,
50-
is_pin_memory_available, rpd_mark, supports_dynamo,
51-
weak_ref_tensor)
50+
is_pin_memory_available, rpd_mark, rpd_user_marker,
51+
supports_dynamo, weak_ref_tensor)
5252
from vllm.worker.model_runner_base import (
5353
ModelRunnerBase, ModelRunnerInputBase, ModelRunnerInputBuilderBase,
5454
_add_attn_metadata_broadcastable_dict,
@@ -1630,6 +1630,12 @@ def execute_model(
16301630
assert model_input.attn_metadata is not None
16311631
prefill_meta = model_input.attn_metadata.prefill_metadata
16321632
decode_meta = model_input.attn_metadata.decode_metadata
1633+
if prefill_meta:
1634+
marker_instance = rpd_user_marker(name="Prefill")
1635+
else:
1636+
marker_instance = rpd_user_marker(name="Decode")
1637+
1638+
marker_instance.start()
16331639
# TODO(andoorve): We can remove this once all
16341640
# virtual engines share the same kv cache.
16351641
virtual_engine = model_input.virtual_engine
@@ -1765,6 +1771,7 @@ def execute_model(
17651771

17661772
output.hidden_states = hidden_states
17671773

1774+
marker_instance.end()
17681775
return [output]
17691776

17701777
def need_recv_kv(self, model_input, kv_caches) -> bool:

0 commit comments

Comments
 (0)