@@ -146,7 +146,7 @@ gpujpeg_decoder_create(cudaStream_t stream)
146146 }
147147
148148 // Stream
149- decoder -> stream = stream ;
149+ coder -> stream = stream ;
150150
151151 if (result == 0 ) {
152152 gpujpeg_decoder_destroy (decoder );
@@ -214,7 +214,7 @@ gpujpeg_decoder_init(struct gpujpeg_decoder* decoder, const struct gpujpeg_param
214214 fprintf (stderr , "[GPUJPEG] [Info] Reinitializing decoder.\n" );
215215 }
216216
217- if (0 == gpujpeg_coder_init_image (coder , param , param_image , decoder -> stream )) {
217+ if (0 == gpujpeg_coder_init_image (coder , param , param_image , coder -> stream )) {
218218 fprintf (stderr , "[GPUJPEG] [Error] Failed to init coder image!\n" );
219219 return -1 ;
220220 }
@@ -241,15 +241,15 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
241241
242242 coder -> start_time = coder -> param .perf_stats ? gpujpeg_get_time () : 0 ;
243243
244- GPUJPEG_CUSTOM_TIMER_START (coder -> duration_stream , coder -> param .perf_stats , decoder -> stream , return - 1 );
244+ GPUJPEG_CUSTOM_TIMER_START (coder -> duration_stream , coder -> param .perf_stats , coder -> stream , return - 1 );
245245
246246 // Read JPEG image data
247247 if (0 != (rc = gpujpeg_reader_read_image (decoder , image , image_size ))) {
248248 fprintf (stderr , "[GPUJPEG] [Error] Decoder failed when decoding image data!\n" );
249249 return rc ;
250250 }
251251
252- GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_stream , coder -> param .perf_stats , decoder -> stream , return - 1 );
252+ GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_stream , coder -> param .perf_stats , coder -> stream , return - 1 );
253253
254254 // check if params is ok for GPU decoder
255255 for ( int i = 0 ; i < decoder -> coder .param .comp_count ; ++ i ) {
@@ -273,7 +273,7 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
273273
274274 // Perform huffman decoding on CPU (when there are not enough segments to saturate GPU)
275275 if ( use_cpu_huffman_decoder ) {
276- GPUJPEG_CUSTOM_TIMER_START (coder -> duration_huffman_coder , coder -> param .perf_stats , decoder -> stream , return - 1 );
276+ GPUJPEG_CUSTOM_TIMER_START (coder -> duration_huffman_coder , coder -> param .perf_stats , coder -> stream , return - 1 );
277277 if (coder -> data_quantized == NULL ) {
278278 if (gpujpeg_coder_allocate_cpu_huffman_buf (coder ) != 0 ) {
279279 return -1 ;
@@ -283,53 +283,55 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
283283 fprintf (stderr , "[GPUJPEG] [Error] Huffman decoder failed!\n" );
284284 return -1 ;
285285 }
286- GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_huffman_coder , coder -> param .perf_stats , decoder -> stream , return - 1 );
286+ GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_huffman_coder , coder -> param .perf_stats , coder -> stream , return - 1 );
287287
288288 // Copy quantized data to device memory from cpu memory
289- GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_to , coder -> param .perf_stats , decoder -> stream , return - 1 );
290- cudaMemcpyAsync (coder -> d_data_quantized , coder -> data_quantized , coder -> data_size * sizeof (int16_t ), cudaMemcpyHostToDevice , decoder -> stream );
291- GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_to , coder -> param .perf_stats , decoder -> stream , return - 1 );
289+ GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_to , coder -> param .perf_stats , coder -> stream , return - 1 );
290+ cudaMemcpyAsync (coder -> d_data_quantized , coder -> data_quantized , coder -> data_size * sizeof (int16_t ),
291+ cudaMemcpyHostToDevice , coder -> stream );
292+ GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_to , coder -> param .perf_stats , coder -> stream , return - 1 );
292293
293- GPUJPEG_CUSTOM_TIMER_START (coder -> duration_in_gpu , coder -> param .perf_stats , decoder -> stream , return - 1 );
294+ GPUJPEG_CUSTOM_TIMER_START (coder -> duration_in_gpu , coder -> param .perf_stats , coder -> stream , return - 1 );
294295 }
295296 // Perform huffman decoding on GPU (when there are enough segments to saturate GPU)
296297 else {
297- GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_to , coder -> param .perf_stats , decoder -> stream , return - 1 );
298+ GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_to , coder -> param .perf_stats , coder -> stream , return - 1 );
298299
299300 // Reset huffman output
300- cudaMemsetAsync (coder -> d_data_quantized , 0 , coder -> data_size * sizeof (int16_t ), decoder -> stream );
301+ cudaMemsetAsync (coder -> d_data_quantized , 0 , coder -> data_size * sizeof (int16_t ), coder -> stream );
301302
302303 // Copy scan data to device memory
303304 gpujpeg_cuda_memcpy_async_partially_pinned (coder -> d_data_compressed , coder -> data_compressed , decoder -> data_compressed_size ,
304- cudaMemcpyHostToDevice , decoder -> stream ,
305+ cudaMemcpyHostToDevice , coder -> stream ,
305306 coder -> data_compressed_pinned_sz );
306307 gpujpeg_cuda_check_error ("Decoder copy compressed data to memory" , return - 1 );
307308
308309 // Copy segments to device memory
309- cudaMemcpyAsync (coder -> d_segment , coder -> segment , decoder -> segment_count * sizeof (struct gpujpeg_segment ), cudaMemcpyHostToDevice , decoder -> stream );
310+ cudaMemcpyAsync (coder -> d_segment , coder -> segment , decoder -> segment_count * sizeof (struct gpujpeg_segment ),
311+ cudaMemcpyHostToDevice , coder -> stream );
310312 gpujpeg_cuda_check_error ("Decoder copy compressed data" , return - 1 );
311313
312- GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_to , coder -> param .perf_stats , decoder -> stream , return - 1 );
314+ GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_to , coder -> param .perf_stats , coder -> stream , return - 1 );
313315
314- GPUJPEG_CUSTOM_TIMER_START (coder -> duration_in_gpu , coder -> param .perf_stats , decoder -> stream , return - 1 );
315- GPUJPEG_CUSTOM_TIMER_START (coder -> duration_huffman_coder , coder -> param .perf_stats , decoder -> stream , return - 1 );
316+ GPUJPEG_CUSTOM_TIMER_START (coder -> duration_in_gpu , coder -> param .perf_stats , coder -> stream , return - 1 );
317+ GPUJPEG_CUSTOM_TIMER_START (coder -> duration_huffman_coder , coder -> param .perf_stats , coder -> stream , return - 1 );
316318 // Perform huffman decoding
317319 if (0 != gpujpeg_huffman_gpu_decoder_decode (decoder )) {
318320 fprintf (stderr , "[GPUJPEG] [Error] Huffman decoder on GPU failed!\n" );
319321 return -1 ;
320322 }
321- GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_huffman_coder , coder -> param .perf_stats , decoder -> stream , return - 1 );
323+ GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_huffman_coder , coder -> param .perf_stats , coder -> stream , return - 1 );
322324 }
323325
324- GPUJPEG_CUSTOM_TIMER_START (coder -> duration_dct_quantization , coder -> param .perf_stats , decoder -> stream , return - 1 );
326+ GPUJPEG_CUSTOM_TIMER_START (coder -> duration_dct_quantization , coder -> param .perf_stats , coder -> stream , return - 1 );
325327
326328 // Perform IDCT and dequantization (own CUDA implementation)
327329 if (0 != gpujpeg_idct_gpu (decoder )) {
328330 return -1 ;
329331 }
330332 // gpujpeg_idct_cpu(decoder);
331333
332- GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_dct_quantization , coder -> param .perf_stats , decoder -> stream , return - 1 );
334+ GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_dct_quantization , coder -> param .perf_stats , coder -> stream , return - 1 );
333335
334336 // Reallocate buffers if not enough size
335337 if ( coder -> data_raw_size > coder -> data_raw_allocated_size ) {
@@ -356,33 +358,33 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
356358 coder -> d_data_raw = output -> data ;
357359 }
358360 else if (output -> type == GPUJPEG_DECODER_OUTPUT_OPENGL_TEXTURE && output -> texture -> texture_callback_attach_opengl == NULL ) {
359- GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_map , coder -> param .perf_stats , decoder -> stream , return - 1 );
361+ GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_map , coder -> param .perf_stats , coder -> stream , return - 1 );
360362
361363 // Use OpenGL texture as decoding destination
362364 size_t data_size = 0 ;
363365 uint8_t * d_data = gpujpeg_opengl_texture_map (output -> texture , & data_size );
364366 assert (data_size == coder -> data_raw_size );
365367 coder -> d_data_raw = d_data ;
366368
367- GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_map , coder -> param .perf_stats , decoder -> stream , return - 1 );
369+ GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_map , coder -> param .perf_stats , coder -> stream , return - 1 );
368370 }
369371 else {
370372 // Use internal CUDA buffer as decoding destination
371373 coder -> d_data_raw = coder -> d_data_raw_allocated ;
372374 }
373375
374376 // Postprocessing
375- GPUJPEG_CUSTOM_TIMER_START (coder -> duration_preprocessor , coder -> param .perf_stats , decoder -> stream , return - 1 );
376- rc = gpujpeg_postprocessor_decode (& decoder -> coder , decoder -> stream );
377+ GPUJPEG_CUSTOM_TIMER_START (coder -> duration_preprocessor , coder -> param .perf_stats , coder -> stream , return - 1 );
378+ rc = gpujpeg_postprocessor_decode (& decoder -> coder , coder -> stream );
377379 if (rc != GPUJPEG_NOERR ) {
378380 return rc ;
379381 }
380- GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_preprocessor , coder -> param .perf_stats , decoder -> stream , return - 1 );
382+ GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_preprocessor , coder -> param .perf_stats , coder -> stream , return - 1 );
381383
382384 // Wait for async operations before copying from the device
383- cudaStreamSynchronize (decoder -> stream );
385+ cudaStreamSynchronize (coder -> stream );
384386
385- GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_in_gpu , coder -> param .perf_stats , decoder -> stream , return - 1 );
387+ GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_in_gpu , coder -> param .perf_stats , coder -> stream , return - 1 );
386388
387389 // Set decompressed image size
388390 output -> data_size = coder -> data_raw_size * sizeof (uint8_t );
@@ -393,52 +395,52 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
393395
394396 // Set decompressed image
395397 if (output -> type == GPUJPEG_DECODER_OUTPUT_INTERNAL_BUFFER ) {
396- GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_from , coder -> param .perf_stats , decoder -> stream , return - 1 );
398+ GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_from , coder -> param .perf_stats , coder -> stream , return - 1 );
397399
398400 // Copy decompressed image to host memory
399401 cudaMemcpy (coder -> data_raw , coder -> d_data_raw , coder -> data_raw_size * sizeof (uint8_t ), cudaMemcpyDeviceToHost );
400402
401- GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_from , coder -> param .perf_stats , decoder -> stream , return - 1 );
403+ GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_from , coder -> param .perf_stats , coder -> stream , return - 1 );
402404
403405 // Set output to internal buffer
404406 output -> data = coder -> data_raw ;
405407 }
406408 else if (output -> type == GPUJPEG_DECODER_OUTPUT_CUSTOM_BUFFER ) {
407- GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_from , coder -> param .perf_stats , decoder -> stream , return - 1 );
409+ GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_from , coder -> param .perf_stats , coder -> stream , return - 1 );
408410
409411 assert (output -> data != NULL );
410412
411413 // Copy decompressed image to host memory
412414 cudaMemcpy (output -> data , coder -> d_data_raw , coder -> data_raw_size * sizeof (uint8_t ), cudaMemcpyDeviceToHost );
413415
414- GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_from , coder -> param .perf_stats , decoder -> stream , return - 1 );
416+ GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_from , coder -> param .perf_stats , coder -> stream , return - 1 );
415417 }
416418 else if (output -> type == GPUJPEG_DECODER_OUTPUT_OPENGL_TEXTURE ) {
417419 // If OpenGL texture wasn't mapped and used directly for decoding into it
418420 if (output -> texture -> texture_callback_attach_opengl != NULL ) {
419- GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_map , coder -> param .perf_stats , decoder -> stream , return - 1 );
421+ GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_map , coder -> param .perf_stats , coder -> stream , return - 1 );
420422
421423 // Map OpenGL texture
422424 size_t data_size = 0 ;
423425 uint8_t * d_data = gpujpeg_opengl_texture_map (output -> texture , & data_size );
424426 assert (data_size == coder -> data_raw_size );
425427
426- GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_map , coder -> param .perf_stats , decoder -> stream , return - 1 );
428+ GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_map , coder -> param .perf_stats , coder -> stream , return - 1 );
427429
428- GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_from , coder -> param .perf_stats , decoder -> stream , return - 1 );
430+ GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_from , coder -> param .perf_stats , coder -> stream , return - 1 );
429431
430432 // Copy decompressed image to texture pixel buffer object device data
431433 cudaMemcpy (d_data , coder -> d_data_raw , coder -> data_raw_size * sizeof (uint8_t ), cudaMemcpyDeviceToDevice );
432434
433- GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_from , coder -> param .perf_stats , decoder -> stream , return - 1 );
435+ GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_from , coder -> param .perf_stats , coder -> stream , return - 1 );
434436 }
435437
436- GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_unmap , coder -> param .perf_stats , decoder -> stream , return - 1 );
438+ GPUJPEG_CUSTOM_TIMER_START (coder -> duration_memory_unmap , coder -> param .perf_stats , coder -> stream , return - 1 );
437439
438440 // Unmap OpenGL texture
439441 gpujpeg_opengl_texture_unmap (output -> texture );
440442
441- GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_unmap , coder -> param .perf_stats , decoder -> stream , return - 1 );
443+ GPUJPEG_CUSTOM_TIMER_STOP (coder -> duration_memory_unmap , coder -> param .perf_stats , coder -> stream , return - 1 );
442444 }
443445 else if (output -> type == GPUJPEG_DECODER_OUTPUT_CUDA_BUFFER ) {
444446 // Copy decompressed image to texture pixel buffer object device data
0 commit comments