@@ -843,8 +843,6 @@ ModelInstanceState::ProcessCleanupRequest(
843843 infer_payload_.erase (id);
844844 } else if (message->Command () == PYTHONSTUB_DecoupledResponseFactoryCleanup) {
845845 // Delete response factory
846- std::cerr << " === ResponseFactoryDeleter -> ProcessCleanupRequest ==="
847- << std::endl;
848846 std::unique_ptr<
849847 TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
850848 response_factory (reinterpret_cast <TRITONBACKEND_ResponseFactory*>(id));
@@ -1165,8 +1163,6 @@ ModelInstanceState::ResponseSendDecoupled(
11651163 TRITONBACKEND_ResponseFactory* response_factory =
11661164 reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
11671165 send_message_payload->response_factory_address );
1168- std::cerr << " === ResponseFactoryDeleter -> ResponseSendDecoupled ==="
1169- << std::endl;
11701166 std::unique_ptr<
11711167 TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
11721168 lresponse_factory (reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
@@ -1366,20 +1362,11 @@ ModelInstanceState::ProcessRequests(
13661362 reporter.SetBatchStatistics (total_batch_size);
13671363
13681364 if (response_batch_shm_ptr->has_error ) {
1369- // The "is_response_factory_deleted" flag indicates whether the response
1370- // factory has been deleted. The flag is used in a corner case
1371- // where after the response sender sends a response and complete final flag,
1372- // and closes the response factory, the model returns a response from
1373- // `execute()`. For both default and decoupled mode, upon handling that
1374- // error, no need to delete the response factory.
13751365 if (!response_batch_shm_ptr->is_response_factory_deleted ) {
13761366 for (uint32_t r = 0 ; r < request_count; r++) {
13771367 TRITONBACKEND_ResponseFactory* response_factory =
13781368 reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
13791369 pb_infer_requests[r]->GetResponseFactoryAddress ());
1380- std::cerr << " === ResponseFactoryDeleter -> "
1381- " response_batch_shm_ptr->has_error ==="
1382- << std::endl;
13831370 std::unique_ptr<
13841371 TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
13851372 lresponse_factory (reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
@@ -1411,7 +1398,6 @@ ModelInstanceState::ProcessRequests(
14111398 // usage of response sender, so only create a TRITONBACKEND_Response
14121399 // object for the valid responses, and skip the None responses later.
14131400 if (response_shm_handle[i] == 0 ) {
1414- std::cerr << " === PYBE response_shm_handle is 0 ===" << std::endl;
14151401 responses->emplace_back (nullptr );
14161402 } else {
14171403 TRITONBACKEND_Response* response;
@@ -1434,18 +1420,15 @@ ModelInstanceState::ProcessRequests(
14341420 gpu_output_buffers (request_count);
14351421 GPUBuffersHelper gpu_buffer_helper;
14361422
1437- std::cerr << " === PYBE request_count: " << request_count << std::endl;
14381423 for (uint32_t r = 0 ; r < request_count; ++r) {
14391424 NVTX_RANGE (nvtx_, " LoadingResponse " + Name ());
1425+ requires_deferred_callback.push_back (false );
14401426 if (response_shm_handle[r] == 0 ) {
1441- std::cerr << " === PYBE skip the response_shm_handle is 0 ==="
1442- << std::endl;
14431427 continue ;
14441428 }
14451429 TRITONBACKEND_Response* response = (*responses)[r];
14461430 TRITONBACKEND_Request* request = requests[r];
14471431 uint32_t requested_output_count = 0 ;
1448- requires_deferred_callback.push_back (false );
14491432
14501433 shm_responses.emplace_back (nullptr );
14511434 std::unique_ptr<InferResponse>& infer_response = shm_responses.back ();
@@ -1459,21 +1442,10 @@ ModelInstanceState::ProcessRequests(
14591442 (*responses)[r] = nullptr ;
14601443 continue ;
14611444 }
1462-
1463- // if (response_shm_handle[r] == 0) {
1464- // std::cerr << "=== PYBE response_shm_handle is 0 ===" << std::endl;
1465- // LOG_IF_ERROR(
1466- // TRITONBACKEND_ResponseDelete((*responses)[r]),
1467- // "failed to delete response");
1468- // (*responses)[r] = nullptr;
1469- // continue;
1470- // }
14711445 {
14721446 TRITONBACKEND_ResponseFactory* response_factory =
14731447 reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
14741448 pb_infer_requests[r]->GetResponseFactoryAddress ());
1475- std::cerr << " === ResponseFactoryDeleter -> regular workflow ==="
1476- << std::endl;
14771449 std::unique_ptr<
14781450 TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
14791451 lresponse_factory (
@@ -1522,17 +1494,13 @@ ModelInstanceState::ProcessRequests(
15221494 GUARDED_RESPOND_IF_ERROR (
15231495 responses, r,
15241496 TRITONBACKEND_RequestOutputCount (request, &requested_output_count));
1525- std::cerr << " === PYBE requested_output_count: " << requested_output_count
1526- << std::endl;
15271497 std::set<std::string> requested_output_names;
15281498 for (size_t j = 0 ; j < requested_output_count; ++j) {
15291499 const char * output_name;
15301500 GUARDED_RESPOND_IF_ERROR (
15311501 responses, r,
15321502 TRITONBACKEND_RequestOutputName (request, j, &output_name));
15331503 requested_output_names.insert (output_name);
1534- std::cerr << " === PYBE requested_output_name: " << output_name
1535- << std::endl;
15361504 }
15371505
15381506 bool require_deferred_callback = false ;
0 commit comments