Fix segfault

krishung5 · krishung5 · commit 921916fdb824 · 2024-10-01T18:20:02.000-07:00
diff --git a/src/pb_stub.cc b/src/pb_stub.cc
@@ -665,6 +665,7 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
   ScopedDefer _(
       [this, &execute_response] { SendIPCMessage(execute_response); });
   py::object execute_return;
+  py::object coroutine_return;
   try {
     if (!py::hasattr(model_instance_, "execute")) {
       std::string message = "Python model " + model_context_.PythonModelPath() +
@@ -685,7 +686,7 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
           // Do not wait for async decoupled execute to return.
           RunCoroutine(execute_return, true /* in_background */);
         } else {
-          py::object coroutine_return =
+          coroutine_return =
               RunCoroutine(execute_return, false /* in_background */);
           ProcessReturnedResponses(
               py_request_list, coroutine_return, response_batch);
@@ -733,6 +734,7 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
     }
   } else {
     if (!response_batch) {
+      std::cerr << "===== response_batch is not set" << std::endl;
       response_batch = shm_pool_->Construct<char>(
           sizeof(ResponseBatch) + sizeof(IPCMessageShm));
       ResponseBatch* response_batch_shm_ptr = reinterpret_cast<ResponseBatch*>(
@@ -743,6 +745,8 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
         response_batch.value().data_.get() + sizeof(IPCMessageShm));
     response_batch_shm_ptr->has_error = false;
     response_batch_shm_ptr->is_error_set = false;
+    std::cerr << "===== response_batch_shm_ptr->batch_size: "
+              << response_batch_shm_ptr->batch_size << std::endl;
   }
 
   execute_response = IPCMessage::Create(
@@ -779,6 +783,27 @@ Stub::ProcessReturnedResponses(
   }
   // Only non-decoupled may return responses.
   if (IsDecoupled()) {
+    // For decoupled mode, if before returning from this error, there was
+    // already a response sent from the response sender, along with the complete
+    // final flag, then use the `is_response_factory_deleted` flag to notify the
+    // backend to NOT to delete the response factory again during error
+    // handling.
+    for (py::handle py_request : py_requests) {
+      InferRequest* request = py_request.cast<InferRequest*>();
+      if (request->GetResponseSender()->IsClosed()) {
+        // Notify the backend to NOT to delete the response factory again during
+        // error handling.
+        if (!response_batch) {
+          response_batch = std::move(shm_pool_->Construct<char>(
+              sizeof(ResponseBatch) + sizeof(IPCMessageShm)));
+        }
+        ResponseBatch* response_batch_shm_ptr =
+            reinterpret_cast<ResponseBatch*>(
+                response_batch.value().data_.get() + sizeof(IPCMessageShm));
+        response_batch_shm_ptr->is_response_factory_deleted = true;
+      }
+    }
+
     throw PythonBackendException(
         "Python model '" + name_ +
         "' is using the decoupled mode and the execute function must return "
@@ -821,8 +846,31 @@ Stub::ProcessReturnedResponses(
       }
 
       InferResponse* response = py_responses[i].cast<InferResponse*>();
-      request->GetResponseSender()->UpdateStateAndCounters(
-          response, TRITONSERVER_RESPONSE_COMPLETE_FINAL);
+
+      try {
+        request->GetResponseSender()->UpdateStateAndCounters(
+            response, TRITONSERVER_RESPONSE_COMPLETE_FINAL);
+      }
+      catch (const PythonBackendException& pb_exception) {
+        // Special case for default(non-decoupled) mode, where the response
+        // factory should already be cleaned up with the previous response sent
+        // from response sender, and yet the model tries to return another
+        // response from `execute()` function. Notify the backend to NOT to
+        // delete the response factory again during error handling.
+        std::string error_string = pb_exception.what();
+        if (error_string.find(
+                "Non-decoupled model cannot send more than one response") !=
+            std::string::npos) {
+          response_batch = std::move(shm_pool_->Construct<char>(
+              sizeof(ResponseBatch) + sizeof(IPCMessageShm)));
+          ResponseBatch* response_batch_shm_ptr =
+              reinterpret_cast<ResponseBatch*>(
+                  response_batch.value().data_.get() + sizeof(IPCMessageShm));
+          response_batch_shm_ptr->is_response_factory_deleted = true;
+          LOG_ERROR << "=== caught error: " << pb_exception.what();
+        }
+        throw pb_exception;
+      }
     }
   }
   // Return all the created responses using response_batch. The reason
diff --git a/src/pb_utils.h b/src/pb_utils.h
@@ -167,6 +167,9 @@ struct ResponseBatch : SendMessageBase {
   bool is_error_set;
 
   uint32_t response_size;
+
+  // Indicates whether the response factory has been deleted or not.
+  bool is_response_factory_deleted = false;
 };
 
 enum LogLevel { kInfo = 0, kWarning, kError, kVerbose };
diff --git a/src/python_be.cc b/src/python_be.cc
@@ -826,6 +826,8 @@ ModelInstanceState::ProcessCleanupRequest(
     infer_payload_.erase(id);
   } else if (message->Command() == PYTHONSTUB_DecoupledResponseFactoryCleanup) {
     // Delete response factory
+    std::cerr << "=== ResponseFactoryDeleter -> ProcessCleanupRequest ==="
+              << std::endl;
     std::unique_ptr<
         TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
         response_factory(reinterpret_cast<TRITONBACKEND_ResponseFactory*>(id));
@@ -1094,6 +1096,8 @@ ModelInstanceState::ResponseSendDecoupled(
       TRITONBACKEND_ResponseFactory* response_factory =
           reinterpret_cast<TRITONBACKEND_ResponseFactory*>(
               send_message_payload->response_factory_address);
+      std::cerr << "=== ResponseFactoryDeleter -> ResponseSendDecoupled ==="
+                << std::endl;
       std::unique_ptr<
           TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
           lresponse_factory(reinterpret_cast<TRITONBACKEND_ResponseFactory*>(
@@ -1284,7 +1288,6 @@ ModelInstanceState::ProcessRequests(
   }
   char* ipc_message_shm =
       reinterpret_cast<char*>(response->GetAllocatedSharedMemory().data_.get());
-  ;
   ResponseBatch* response_batch_shm_ptr =
       reinterpret_cast<ResponseBatch*>(ipc_message_shm + sizeof(IPCMessageShm));
 
@@ -1294,16 +1297,27 @@ ModelInstanceState::ProcessRequests(
   reporter.SetBatchStatistics(total_batch_size);
 
   if (response_batch_shm_ptr->has_error) {
-    if (response_batch_shm_ptr->is_error_set) {
+    // The "is_response_factory_deleted" flag indicates whether the response
+    // factory has been deleted. The flag is used in a corner case
+    // where after the response sender sends a response and complete final flag,
+    // and closes the response factory, the model returns a response from
+    // `execute()`. For both default and decoupled mode, upon handling that
+    // error, no need to delete the response factory.
+    if (!response_batch_shm_ptr->is_response_factory_deleted) {
       for (uint32_t r = 0; r < request_count; r++) {
         TRITONBACKEND_ResponseFactory* response_factory =
             reinterpret_cast<TRITONBACKEND_ResponseFactory*>(
                 pb_infer_requests[r]->GetResponseFactoryAddress());
+        std::cerr << "=== ResponseFactoryDeleter -> "
+                     "response_batch_shm_ptr->has_error ==="
+                  << std::endl;
         std::unique_ptr<
             TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
             lresponse_factory(reinterpret_cast<TRITONBACKEND_ResponseFactory*>(
                 response_factory));
       }
+    }
+    if (response_batch_shm_ptr->is_error_set) {
       auto error = PbString::LoadFromSharedMemory(
           Stub()->ShmPool(), response_batch_shm_ptr->error);
       return TRITONSERVER_ErrorNew(
@@ -1343,6 +1357,7 @@ ModelInstanceState::ProcessRequests(
         gpu_output_buffers(request_count);
     GPUBuffersHelper gpu_buffer_helper;
 
+    std::cerr << "=== PYBE request_count: " << request_count << std::endl;
     for (uint32_t r = 0; r < request_count; ++r) {
       NVTX_RANGE(nvtx_, "LoadingResponse " + Name());
       TRITONBACKEND_Response* response = (*responses)[r];
@@ -1374,6 +1389,8 @@ ModelInstanceState::ProcessRequests(
           TRITONBACKEND_ResponseFactory* response_factory =
               reinterpret_cast<TRITONBACKEND_ResponseFactory*>(
                   pb_infer_requests[r]->GetResponseFactoryAddress());
+          std::cerr << "=== ResponseFactoryDeleter -> regular workflow ==="
+                    << std::endl;
           std::unique_ptr<
               TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
               lresponse_factory(
@@ -1422,7 +1439,8 @@ ModelInstanceState::ProcessRequests(
       GUARDED_RESPOND_IF_ERROR(
           responses, r,
           TRITONBACKEND_RequestOutputCount(request, &requested_output_count));
-
+      std::cerr << "=== PYBE requested_output_count: " << requested_output_count
+                << std::endl;
       std::set<std::string> requested_output_names;
       for (size_t j = 0; j < requested_output_count; ++j) {
         const char* output_name;
diff --git a/src/response_sender.cc b/src/response_sender.cc
@@ -106,6 +106,8 @@ ResponseSender::UpdateStateAndCounters(
   }
 
   if (flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL) {
+    std::cerr << "=== ResponseSender -> UpdateStateAndCounters closing RF ==="
+              << std::endl;
     response_factory_deleted_.exchange(true);
     closed_ = true;
   }
@@ -175,6 +177,7 @@ ResponseSender::Send(
     bi::scoped_lock<bi::interprocess_mutex> guard{send_message_payload->mu};
     // The server will destruct the response factory if the final flag is set.
     if (flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL) {
+      std::cerr << "====== scoped_defer -> closing RF =====" << std::endl;
       response_factory_deleted_.exchange(true);
     }
     stub->SendIPCUtilsMessage(ipc_message);
@@ -259,16 +262,26 @@ ResponseSender::IsCancelled()
   return pb_cancel_->IsCancelled();
 }
 
+bool
+ResponseSender::IsClosed()
+{
+  std::lock_guard<std::mutex> lk(mu_);
+  return closed_;
+}
+
 void
 ResponseSender::Close()
 {
   std::lock_guard<std::mutex> lk(mu_);
   closed_ = true;
+  response_factory_deleted_.exchange(true);
 }
 
 void
 ResponseSender::DeleteResponseFactory()
 {
+  std::cerr << "=== ResponseSender -> DeleteResponseFactory, "
+            << response_factory_deleted_ << " ===" << std::endl;
   bool already_deleted = response_factory_deleted_.exchange(true);
   if (!already_deleted) {
     std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
diff --git a/src/response_sender.h b/src/response_sender.h
@@ -51,6 +51,7 @@ class ResponseSender {
 
   // Can be useful at stopping the model from sending any more responses.
   void Close();
+  bool IsClosed();
 
  private:
   void DeleteResponseFactory();

Original file line number	Diff line number	Diff line change
`@@ -106,6 +106,8 @@ ResponseSender::UpdateStateAndCounters(`
`106`	`106`	`}`
`107`	`107`
`108`	`108`	`if (flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL) {`
	`109`	`+ std::cerr << "=== ResponseSender -> UpdateStateAndCounters closing RF ==="`
	`110`	`+ << std::endl;`
`109`	`111`	`response_factory_deleted_.exchange(true);`
`110`	`112`	`closed_ = true;`
`111`	`113`	`}`
`@@ -175,6 +177,7 @@ ResponseSender::Send(`
`175`	`177`	`bi::scoped_lock<bi::interprocess_mutex> guard{send_message_payload->mu};`
`176`	`178`	`// The server will destruct the response factory if the final flag is set.`
`177`	`179`	`if (flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL) {`
	`180`	`+ std::cerr << "====== scoped_defer -> closing RF =====" << std::endl;`
`178`	`181`	`response_factory_deleted_.exchange(true);`
`179`	`182`	`}`
`180`	`183`	`stub->SendIPCUtilsMessage(ipc_message);`
`@@ -259,16 +262,26 @@ ResponseSender::IsCancelled()`
`259`	`262`	`return pb_cancel_->IsCancelled();`
`260`	`263`	`}`
`261`	`264`
	`265`	`+bool`
	`266`	`+ResponseSender::IsClosed()`
	`267`	`+{`
	`268`	`+ std::lock_guard<std::mutex> lk(mu_);`
	`269`	`+ return closed_;`
	`270`	`+}`
	`271`	`+`
`262`	`272`	`void`
`263`	`273`	`ResponseSender::Close()`
`264`	`274`	`{`
`265`	`275`	`std::lock_guard<std::mutex> lk(mu_);`
`266`	`276`	`closed_ = true;`
	`277`	`+ response_factory_deleted_.exchange(true);`
`267`	`278`	`}`
`268`	`279`
`269`	`280`	`void`
`270`	`281`	`ResponseSender::DeleteResponseFactory()`
`271`	`282`	`{`
	`283`	`+ std::cerr << "=== ResponseSender -> DeleteResponseFactory, "`
	`284`	`+ << response_factory_deleted_ << " ===" << std::endl;`
`272`	`285`	`bool already_deleted = response_factory_deleted_.exchange(true);`
`273`	`286`	`if (!already_deleted) {`
`274`	`287`	`std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();`