@@ -513,8 +513,7 @@ class ModelInstanceState : public BackendModelInstance {
513513 const uint32_t request_count,
514514 std::vector<TRITONBACKEND_Response*>* responses,
515515 BackendInputCollector* collector, std::vector<const char *>* input_names,
516- std::vector<torch::jit::IValue>* input_tensors,
517- std::vector<BackendMemory*>* input_memories, bool * cuda_copy);
516+ std::vector<torch::jit::IValue>* input_tensors, bool * cuda_copy);
518517 TRITONSERVER_Error* ReadOutputTensors (
519518 size_t total_batch_size,
520519 const std::vector<torch::jit::IValue>& output_tensors,
@@ -1102,7 +1101,6 @@ ModelInstanceState::ProcessRequests(
11021101
11031102 std::vector<const char *> input_names;
11041103 std::vector<torch::jit::IValue> input_tensors;
1105- std::vector<BackendMemory*> input_memories;
11061104 bool cuda_copy = false ;
11071105 std::unique_ptr<BackendInputCollector> collector;
11081106 if (Kind () == TRITONSERVER_INSTANCEGROUPKIND_GPU) {
@@ -1124,8 +1122,7 @@ ModelInstanceState::ProcessRequests(
11241122 responses, request_count, all_response_failed,
11251123 SetInputTensors (
11261124 total_batch_size, requests, request_count, &responses,
1127- collector.get (), &input_names, &input_tensors, &input_memories,
1128- &cuda_copy));
1125+ collector.get (), &input_names, &input_tensors, &cuda_copy));
11291126 }
11301127
11311128#ifdef TRITON_ENABLE_GPU
@@ -1149,14 +1146,6 @@ ModelInstanceState::ProcessRequests(
11491146 Execute (&responses, request_count, &input_tensors, &output_tensors);
11501147 }
11511148
1152- // Free BackendMemory used for inputs
1153- for (BackendMemory* mem : input_memories) {
1154- if (mem != nullptr ) {
1155- delete mem;
1156- }
1157- }
1158- input_memories.clear ();
1159-
11601149 // Verify output indices are valid with number of outputs after execution
11611150 bool invalid_index = false ;
11621151 int max_index = output_tensors.size () - 1 ;
@@ -1718,8 +1707,7 @@ ModelInstanceState::SetInputTensors(
17181707 const uint32_t request_count,
17191708 std::vector<TRITONBACKEND_Response*>* responses,
17201709 BackendInputCollector* collector, std::vector<const char *>* input_names,
1721- std::vector<torch::jit::IValue>* input_tensors,
1722- std::vector<BackendMemory*>* input_memories, bool * cuda_copy)
1710+ std::vector<torch::jit::IValue>* input_tensors, bool * cuda_copy)
17231711{
17241712 // InferenceMode should be used to guard all tensors operations
17251713 torch::InferenceMode infer_guard (model_state_->EnabledInferenceMode ());
0 commit comments