Skip to content

Commit df900ce

Browse files
authored
Add nvtx markers which can be useful in perf profiling (#64)
1 parent ff103c4 commit df900ce

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ project(tritonpytorchbackend LANGUAGES C CXX)
4444

4545
option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
4646
option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
47+
option(TRITON_ENABLE_NVTX "Include nvtx markers collection in backend." OFF)
4748
option(TRITON_PYTORCH_ENABLE_TORCHTRT "Enable TorchTRT support" OFF)
4849
option(TRITON_PYTORCH_ENABLE_TORCHVISION "Enable Torchvision support" ON)
4950

@@ -120,6 +121,10 @@ else()
120121
endif()
121122
endif() # TRITON_ENABLE_GPU
122123

124+
if(${TRITON_ENABLE_NVTX})
125+
add_definitions(-DTRITON_ENABLE_NVTX=1)
126+
endif() # TRITON_ENABLE_NVTX
127+
123128
#
124129
# Shared library implementing the Triton Backend API
125130
#

src/libtorch.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "triton/backend/backend_model.h"
3434
#include "triton/backend/backend_model_instance.h"
3535
#include "triton/backend/backend_output_responder.h"
36+
#include "triton/common/nvtx.h"
3637
#include "triton/core/tritonbackend.h"
3738

3839
#ifdef TRITON_PYTORCH_ENABLE_TORCHVISION
@@ -307,7 +308,6 @@ ModelState::ParseParameters()
307308
TRITONSERVER_ErrorDelete(err);
308309
}
309310
}
310-
311311
LOG_MESSAGE(
312312
TRITONSERVER_LOG_INFO,
313313
(std::string("Inference Mode is ") +
@@ -926,6 +926,8 @@ ModelInstanceState::ProcessRequests(
926926
std::to_string(request_count) + " requests")
927927
.c_str());
928928

929+
NVTX_RANGE(nvtx_, "ProcessRequests " + Name());
930+
929931
uint64_t exec_start_ns = 0;
930932
SET_TIMESTAMP(exec_start_ns);
931933

@@ -1188,6 +1190,8 @@ ModelInstanceState::Execute(
11881190
std::vector<torch::jit::IValue>* input_tensors,
11891191
std::vector<torch::jit::IValue>* output_tensors)
11901192
{
1193+
NVTX_RANGE(nvtx_, "Execute " + Name());
1194+
11911195
torch::jit::IValue model_outputs_;
11921196

11931197
try {
@@ -1758,6 +1762,8 @@ ModelInstanceState::ReadOutputTensors(
17581762
TRITONBACKEND_Request** requests, const uint32_t request_count,
17591763
std::vector<TRITONBACKEND_Response*>* responses, uint64_t* compute_end_ns)
17601764
{
1765+
NVTX_RANGE(nvtx_, "ReadOutputTensors " + Name());
1766+
17611767
BackendOutputResponder responder(
17621768
requests, request_count, responses, model_state_->TritonMemoryManager(),
17631769
model_state_->MaxBatchSize() > 0, model_state_->EnablePinnedInput(),

0 commit comments

Comments
 (0)