Fix nvJPEGDecoder cache when using new nvJPEG decoupled API (#748)

jantonguirao · JanuszL · commit 4d93fc84c59f · 2019-04-09T08:48:31.000+02:00
Signed-off-by: Joaquin Anton &lt;janton@nvidia.com&gt;
diff --git a/dali/pipeline/operators/decoder/nvjpeg_decoder_decoupled_api.h b/dali/pipeline/operators/decoder/nvjpeg_decoder_decoupled_api.h
@@ -224,19 +224,16 @@ class nvJPEGDecoder : public Operator<MixedBackend>, CachedDecoderImpl {
 
       const auto &in = ws->Input<CPUBackend>(0, i);
       const auto file_name = in.GetSourceInfo();
-      cudaStream_t stream = ws->stream();
       auto *output_data = output.mutable_tensor<uint8_t>(i);
       auto dims = output_shape_[i];
       ImageCache::ImageShape shape = {dims[0], dims[1], dims[2]};
       thread_pool_.DoWorkWithID(
-        [this, i, file_name, stream, &in, output_data, shape](int tid) {
-          if (CacheLoad(file_name, shape, output_data, stream))
+        [this, i, file_name, &in, output_data, shape](int tid) {
+          if (CacheLoad(file_name, shape, output_data, streams_[tid]))
             return;
-
           SampleWorker(i, file_name, in.size(), tid,
             in.data<uint8_t>(), output_data);
-
-          CacheStore(file_name, output_data, shape, stream);
+          CacheStore(file_name, output_data, shape, streams_[tid]);
         });
     }
 
diff --git a/dali/test/python/test_pipeline.py b/dali/test/python/test_pipeline.py
@@ -22,6 +22,11 @@
 
 caffe_db_folder = "/data/imagenet/train-lmdb-256x256"
 
+def check_batch(batch1, batch2, batch_size, eps = 0.0000001):
+    for i in range(batch_size):
+        err = np.mean( np.abs(batch1.at(i) - batch2.at(i)) )
+        assert(err < eps)
+
 def test_tensor_multiple_uses():
     batch_size = 128
     class HybridPipe(Pipeline):
@@ -772,3 +777,38 @@ def iter_setup(self):
         assert out2.shape == out4.shape
         np.testing.assert_array_equal( expected_last, out2 )
         np.testing.assert_array_equal( expected_last, out4 )
+
+def test_nvjpegdecoder_cached_vs_non_cached():
+    """
+        Checking that cached nvJPEGDecoder produces the same output as non cached version
+    """
+    batch_size = 26
+
+    class ComparePipeline(Pipeline):
+        def __init__(self, batch_size=batch_size, num_threads=1, device_id=0, num_gpus=10000):
+            super(ComparePipeline, self).__init__(batch_size, num_threads, device_id, prefetch_queue_depth = 1)
+            self.input = ops.CaffeReader(path = caffe_db_folder, shard_id = device_id, num_shards = num_gpus, stick_to_shard = True)
+            self.decode_non_cached = ops.nvJPEGDecoder(device = "mixed", output_type = types.RGB)
+            self.decode_cached     = ops.nvJPEGDecoder(device = "mixed", output_type = types.RGB,
+                                                       cache_size=8000,
+                                                       cache_threshold=0,
+                                                       cache_type='threshold',
+                                                       cache_debug=False)
+
+        def define_graph(self):
+            self.jpegs, self.labels = self.input()
+            images_non_cached = self.decode_non_cached(self.jpegs)
+            images_cached = self.decode_cached(self.jpegs)
+            return (images_non_cached, images_cached)
+
+        def iter_setup(self):
+            pass
+
+    pipe = ComparePipeline()
+    pipe.build()
+    N_iterations = 100
+    for k in range(N_iterations):
+        pipe_out = pipe.run()
+        non_cached_data = pipe_out[0].as_cpu()
+        cached_data = pipe_out[1].as_cpu()
+        check_batch(non_cached_data, cached_data, batch_size)