Skip to content

Commit 0220e01

Browse files
authored
Fix stream synchronization (#77)
1 parent bee8fde commit 0220e01

File tree

1 file changed

+9
-13
lines changed

1 file changed

+9
-13
lines changed

src/libtorch.cc

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,15 +1131,12 @@ ModelInstanceState::ProcessRequests(
11311131
&cuda_copy));
11321132
}
11331133

1134-
// If the instance kind is not GPU, we need to synchronize the CUDA stream
1135-
if (Kind() != TRITONSERVER_INSTANCEGROUPKIND_GPU) {
11361134
#ifdef TRITON_ENABLE_GPU
1137-
if (cuda_copy) {
1138-
cudaStreamSynchronize(stream_);
1139-
cuda_copy = false;
1140-
}
1141-
#endif
1135+
if (cuda_copy) {
1136+
cudaStreamSynchronize(stream_);
1137+
cuda_copy = false;
11421138
}
1139+
#endif
11431140

11441141
std::vector<torch::jit::IValue> output_tensors;
11451142
uint64_t compute_start_ns = 0;
@@ -1944,14 +1941,13 @@ ModelInstanceState::ReadOutputTensors(
19441941
// Finalize and wait for any pending buffer copies.
19451942
cuda_copy |= responder.Finalize();
19461943

1947-
if (Kind() != TRITONSERVER_INSTANCEGROUPKIND_GPU) {
19481944
#ifdef TRITON_ENABLE_GPU
1949-
if (cuda_copy) {
1950-
cudaStreamSynchronize(stream_);
1951-
cuda_copy = false;
1952-
}
1945+
// We have to always synchronize the stream. This is to make sure that
1946+
// the events on the cuda stream are synchronized. Otherwise, the events
1947+
// are only guaranteed to be synchronized if the model provides the output
1948+
// on GPU.
1949+
cudaStreamSynchronize(stream_);
19531950
#endif
1954-
}
19551951

19561952
return nullptr;
19571953
}

0 commit comments

Comments
 (0)