File tree Expand file tree Collapse file tree 1 file changed +9
-13
lines changed Expand file tree Collapse file tree 1 file changed +9
-13
lines changed Original file line number Diff line number Diff line change @@ -1131,15 +1131,12 @@ ModelInstanceState::ProcessRequests(
11311131 &cuda_copy));
11321132 }
11331133
1134- // If the instance kind is not GPU, we need to synchronize the CUDA stream
1135- if (Kind () != TRITONSERVER_INSTANCEGROUPKIND_GPU) {
11361134#ifdef TRITON_ENABLE_GPU
1137- if (cuda_copy) {
1138- cudaStreamSynchronize (stream_);
1139- cuda_copy = false ;
1140- }
1141- #endif
1135+ if (cuda_copy) {
1136+ cudaStreamSynchronize (stream_);
1137+ cuda_copy = false ;
11421138 }
1139+ #endif
11431140
11441141 std::vector<torch::jit::IValue> output_tensors;
11451142 uint64_t compute_start_ns = 0 ;
@@ -1944,14 +1941,13 @@ ModelInstanceState::ReadOutputTensors(
19441941 // Finalize and wait for any pending buffer copies.
19451942 cuda_copy |= responder.Finalize ();
19461943
1947- if (Kind () != TRITONSERVER_INSTANCEGROUPKIND_GPU) {
19481944#ifdef TRITON_ENABLE_GPU
1949- if (cuda_copy) {
1950- cudaStreamSynchronize (stream_);
1951- cuda_copy = false ;
1952- }
1945+ // We have to always synchronize the stream. This is to make sure that
1946+ // the events on the cuda stream are synchronized. Otherwise, the events
1947+ // are only guaranteed to be synchronized if the model provides the output
1948+ // on GPU.
1949+ cudaStreamSynchronize (stream_);
19531950#endif
1954- }
19551951
19561952 return nullptr ;
19571953}
You can’t perform that action at this time.
0 commit comments