fix bug

dominikkallusky · dominikkallusky · commit 7df3ee3c4451 · 2025-02-20T14:41:18.000-08:00
diff --git a/test/test_image.py b/test/test_image.py
@@ -622,6 +622,7 @@ def test_encode_jpeg_cuda(img_path, scripted, contiguous):
     abs_mean_diff = (decoded_jpeg_cuda_tv.float() - decoded_image_tv.float()).abs().mean().item()
     assert abs_mean_diff < 3
 
+
 @needs_cuda
 def test_encode_jpeg_cuda_sync():
     """
@@ -636,14 +637,13 @@ def test_encode_jpeg_cuda_sync():
 
     # manual testing shows this bug appearing often in iterations between 50 and 100
     # as a synchronization bug, this can't be reliably reproduced
-    max_iterations = 200
+    max_iterations = 100
     threshold = 5.0  # in [0..255]
 
     device = torch.device("cuda")
 
     for iteration in range(max_iterations):
-        # Randomly pick a small square image size in [1..64]
-        size = np.random.randint(1, 65)
+        size = np.random.randint(4000, 5000)
         height, width = size, size
 
         image = torch.linspace(0, 1, steps=height * width, device=device)
diff --git a/torchvision/csrc/io/image/cuda/encode_jpegs_cuda.cpp b/torchvision/csrc/io/image/cuda/encode_jpegs_cuda.cpp
@@ -108,10 +108,7 @@ std::vector<torch::Tensor> encode_jpegs_cuda(
   // do not want to block the host at this particular point
   // (which is what cudaStreamSynchronize would do.) Events allow us to
   // synchronize the streams without blocking the host.
-  event.block(at::cuda::getCurrentCUDAStream(
-      cudaJpegEncoder->original_device.has_index()
-          ? cudaJpegEncoder->original_device.index()
-          : 0));
+  event.block(cudaJpegEncoder->current_stream);
   return encoded_images;
 }
 
@@ -121,7 +118,11 @@ CUDAJpegEncoder::CUDAJpegEncoder(const torch::Device& target_device)
       stream{
           target_device.has_index()
               ? at::cuda::getStreamFromPool(false, target_device.index())
-              : at::cuda::getStreamFromPool(false)} {
+              : at::cuda::getStreamFromPool(false)},
+      current_stream{
+          original_device.has_index()
+              ? at::cuda::getCurrentCUDAStream(original_device.index())
+              : at::cuda::getCurrentCUDAStream()} {
   nvjpegStatus_t status;
   status = nvjpegCreateSimple(&nvjpeg_handle);
   TORCH_CHECK(
@@ -186,12 +187,17 @@ CUDAJpegEncoder::~CUDAJpegEncoder() {
 }
 
 torch::Tensor CUDAJpegEncoder::encode_jpeg(const torch::Tensor& src_image) {
+  nvjpegStatus_t status;
+  cudaError_t cudaStatus;
+
+  // Ensure that the incoming src_image is safe to use
+  cudaStatus = cudaStreamSynchronize(current_stream);
+  TORCH_CHECK(cudaStatus == cudaSuccess, "CUDA ERROR: ", cudaStatus);
+
   int channels = src_image.size(0);
   int height = src_image.size(1);
   int width = src_image.size(2);
 
-  nvjpegStatus_t status;
-  cudaError_t cudaStatus;
   status = nvjpegEncoderParamsSetSamplingFactors(
       nv_enc_params, NVJPEG_CSS_444, stream);
   TORCH_CHECK(
diff --git a/torchvision/csrc/io/image/cuda/encode_jpegs_cuda.h b/torchvision/csrc/io/image/cuda/encode_jpegs_cuda.h
@@ -22,6 +22,7 @@ class CUDAJpegEncoder {
   const torch::Device original_device;
   const torch::Device target_device;
   const c10::cuda::CUDAStream stream;
+  const c10::cuda::CUDAStream current_stream;
 
  protected:
   nvjpegEncoderState_t nv_enc_state;