|
33 | 33 | #include "triton/backend/backend_model.h" |
34 | 34 | #include "triton/backend/backend_model_instance.h" |
35 | 35 | #include "triton/backend/backend_output_responder.h" |
| 36 | +#include "triton/common/nvtx.h" |
36 | 37 | #include "triton/core/tritonbackend.h" |
37 | 38 |
|
38 | 39 | #ifdef TRITON_PYTORCH_ENABLE_TORCHVISION |
@@ -307,7 +308,6 @@ ModelState::ParseParameters() |
307 | 308 | TRITONSERVER_ErrorDelete(err); |
308 | 309 | } |
309 | 310 | } |
310 | | - |
311 | 311 | LOG_MESSAGE( |
312 | 312 | TRITONSERVER_LOG_INFO, |
313 | 313 | (std::string("Inference Mode is ") + |
@@ -926,6 +926,8 @@ ModelInstanceState::ProcessRequests( |
926 | 926 | std::to_string(request_count) + " requests") |
927 | 927 | .c_str()); |
928 | 928 |
|
| 929 | + NVTX_RANGE(nvtx_, "ProcessRequests " + Name()); |
| 930 | + |
929 | 931 | uint64_t exec_start_ns = 0; |
930 | 932 | SET_TIMESTAMP(exec_start_ns); |
931 | 933 |
|
@@ -1188,6 +1190,8 @@ ModelInstanceState::Execute( |
1188 | 1190 | std::vector<torch::jit::IValue>* input_tensors, |
1189 | 1191 | std::vector<torch::jit::IValue>* output_tensors) |
1190 | 1192 | { |
| 1193 | + NVTX_RANGE(nvtx_, "Execute " + Name()); |
| 1194 | + |
1191 | 1195 | torch::jit::IValue model_outputs_; |
1192 | 1196 |
|
1193 | 1197 | try { |
@@ -1758,6 +1762,8 @@ ModelInstanceState::ReadOutputTensors( |
1758 | 1762 | TRITONBACKEND_Request** requests, const uint32_t request_count, |
1759 | 1763 | std::vector<TRITONBACKEND_Response*>* responses, uint64_t* compute_end_ns) |
1760 | 1764 | { |
| 1765 | + NVTX_RANGE(nvtx_, "ReadOutputTensors " + Name()); |
| 1766 | + |
1761 | 1767 | BackendOutputResponder responder( |
1762 | 1768 | requests, request_count, responses, model_state_->TritonMemoryManager(), |
1763 | 1769 | model_state_->MaxBatchSize() > 0, model_state_->EnablePinnedInput(), |
|
0 commit comments