Skip to content

Commit 6a79a22

Browse files
committed
pre/postprocessor: add state
instead of keeping just the kernel pointer, use state struct the metadata for kernel doesn't need to be recomputed with every image now
1 parent ee3aec8 commit 6a79a22

File tree

5 files changed

+68
-50
lines changed

5 files changed

+68
-50
lines changed

src/gpujpeg_common.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,6 @@ gpujpeg_coder_init(struct gpujpeg_coder * coder)
560560
coder->param.segment_info = -1;
561561
coder->param.color_space_internal = GPUJPEG_NONE;
562562
coder->param_image.color_space = GPUJPEG_NONE;
563-
coder->preprocessor = NULL;
564563
coder->component = NULL;
565564
coder->d_component = NULL;
566565
coder->component_allocated_size = 0;

src/gpujpeg_common_internal.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include <stdlib.h>
4242
#include "../libgpujpeg/gpujpeg_common.h"
4343
#include "../libgpujpeg/gpujpeg_type.h"
44+
#include "gpujpeg_preprocessor_common.cuh"
4445
#include "gpujpeg_util.h"
4546

4647
// static_assert compat
@@ -328,8 +329,8 @@ struct gpujpeg_coder
328329
/// Number of allocated segments
329330
int segment_allocated_size;
330331

331-
/// Preprocessor data (kernel function pointer)
332-
void* preprocessor;
332+
/// Preprocessor or postprocessor data
333+
struct gpujpeg_preprocessor preprocessor;
333334

334335
/// Maximum sampling factor from components
335336
struct gpujpeg_component_sampling_factor sampling_factor;

src/gpujpeg_postprocessor.cu

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,10 @@
3333
* to raw image. It also does color space transformations.
3434
*/
3535

36-
#include "gpujpeg_colorspace.h"
36+
#define PREPROCESSOR_INTERNAL_API
3737
#include "gpujpeg_preprocessor_common.cuh"
38+
39+
#include "gpujpeg_colorspace.h"
3840
#include "gpujpeg_postprocessor.h"
3941
#include "gpujpeg_util.h"
4042

@@ -410,7 +412,18 @@ gpujpeg_preprocessor_decode_no_transform(struct gpujpeg_coder* coder)
410412
int
411413
gpujpeg_postprocessor_decoder_init(struct gpujpeg_coder* coder)
412414
{
413-
coder->preprocessor = NULL;
415+
coder->preprocessor.kernel = NULL;
416+
417+
struct gpujpeg_preprocessor_data *data = &coder->preprocessor.data;;
418+
*data = {};
419+
for ( int comp = 0; comp < coder->param.comp_count; comp++ ) {
420+
assert(coder->sampling_factor.horizontal % coder->component[comp].sampling_factor.horizontal == 0);
421+
assert(coder->sampling_factor.vertical % coder->component[comp].sampling_factor.vertical == 0);
422+
data->comp[comp].d_data = coder->component[comp].d_data;
423+
data->comp[comp].sampling_factor.horizontal = coder->sampling_factor.horizontal / coder->component[comp].sampling_factor.horizontal;
424+
data->comp[comp].sampling_factor.vertical = coder->sampling_factor.vertical / coder->component[comp].sampling_factor.vertical;
425+
data->comp[comp].data_width = coder->component[comp].data_width;
426+
}
414427

415428
if (!gpujpeg_pixel_format_is_interleaved(coder->param_image.pixel_format) &&
416429
gpujpeg_preprocessor_decode_no_transform(coder)) {
@@ -421,24 +434,24 @@ gpujpeg_postprocessor_decoder_init(struct gpujpeg_coder* coder)
421434
// assert(coder->param.comp_count == 3 || coder->param.comp_count == 4);
422435

423436
if (coder->param.color_space_internal == coder->param_image.color_space) {
424-
coder->preprocessor = (void*)gpujpeg_preprocessor_select_decode_kernel<GPUJPEG_NONE>(coder);
437+
coder->preprocessor.kernel = (void*)gpujpeg_preprocessor_select_decode_kernel<GPUJPEG_NONE>(coder);
425438
}
426439
else if (coder->param.color_space_internal == GPUJPEG_RGB) {
427-
coder->preprocessor = (void*)gpujpeg_preprocessor_select_decode_kernel<GPUJPEG_RGB>(coder);
440+
coder->preprocessor.kernel = (void*)gpujpeg_preprocessor_select_decode_kernel<GPUJPEG_RGB>(coder);
428441
}
429442
else if (coder->param.color_space_internal == GPUJPEG_YCBCR_BT601) {
430-
coder->preprocessor = (void*)gpujpeg_preprocessor_select_decode_kernel<GPUJPEG_YCBCR_BT601>(coder);
443+
coder->preprocessor.kernel = (void*)gpujpeg_preprocessor_select_decode_kernel<GPUJPEG_YCBCR_BT601>(coder);
431444
}
432445
else if (coder->param.color_space_internal == GPUJPEG_YCBCR_BT601_256LVLS) {
433-
coder->preprocessor = (void*)gpujpeg_preprocessor_select_decode_kernel<GPUJPEG_YCBCR_BT601_256LVLS>(coder);
446+
coder->preprocessor.kernel = (void*)gpujpeg_preprocessor_select_decode_kernel<GPUJPEG_YCBCR_BT601_256LVLS>(coder);
434447
}
435448
else if (coder->param.color_space_internal == GPUJPEG_YCBCR_BT709) {
436-
coder->preprocessor = (void*)gpujpeg_preprocessor_select_decode_kernel<GPUJPEG_YCBCR_BT709>(coder);
449+
coder->preprocessor.kernel = (void*)gpujpeg_preprocessor_select_decode_kernel<GPUJPEG_YCBCR_BT709>(coder);
437450
}
438451
else {
439452
assert(false);
440453
}
441-
if (coder->preprocessor == NULL) {
454+
if (coder->preprocessor.kernel == NULL) {
442455
return -1;
443456
}
444457
return 0;
@@ -487,12 +500,12 @@ gpujpeg_preprocessor_decoder_copy_planar_data(struct gpujpeg_coder * coder, cuda
487500
int
488501
gpujpeg_postprocessor_decode(struct gpujpeg_coder* coder, cudaStream_t stream)
489502
{
490-
if (!coder->preprocessor) {
503+
if (coder->preprocessor.kernel == nullptr) {
491504
return gpujpeg_preprocessor_decoder_copy_planar_data(coder, stream);
492505
}
493506

494507
// Select kernel
495-
gpujpeg_preprocessor_decode_kernel kernel = (gpujpeg_preprocessor_decode_kernel)coder->preprocessor;
508+
gpujpeg_preprocessor_decode_kernel kernel = (gpujpeg_preprocessor_decode_kernel)coder->preprocessor.kernel;
496509
assert(kernel != NULL);
497510

498511
int image_width = coder->param_image.width + coder->param_image.width_padding;
@@ -521,17 +534,8 @@ gpujpeg_postprocessor_decode(struct gpujpeg_coder* coder, cudaStream_t stream)
521534
}
522535

523536
// Run kernel
524-
struct gpujpeg_preprocessor_data data = {};
525-
for ( int comp = 0; comp < coder->param.comp_count; comp++ ) {
526-
assert(coder->sampling_factor.horizontal % coder->component[comp].sampling_factor.horizontal == 0);
527-
assert(coder->sampling_factor.vertical % coder->component[comp].sampling_factor.vertical == 0);
528-
data.comp[comp].d_data = coder->component[comp].d_data;
529-
data.comp[comp].sampling_factor.horizontal = coder->sampling_factor.horizontal / coder->component[comp].sampling_factor.horizontal;
530-
data.comp[comp].sampling_factor.vertical = coder->sampling_factor.vertical / coder->component[comp].sampling_factor.vertical;
531-
data.comp[comp].data_width = coder->component[comp].data_width;
532-
}
533537
kernel<<<grid, threads, 0, stream>>>(
534-
data,
538+
coder->preprocessor.data,
535539
coder->d_data_raw,
536540
image_width,
537541
image_height

src/gpujpeg_preprocessor.cu

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2011-2024, CESNET
2+
* Copyright (c) 2011-2025, CESNET
33
* Copyright (c) 2011, Silicon Genome, LLC.
44
*
55
* All rights reserved.
@@ -33,8 +33,10 @@
3333
* computational kernels. It also does color space transformations.
3434
*/
3535

36-
#include "gpujpeg_colorspace.h"
36+
#define PREPROCESSOR_INTERNAL_API
3737
#include "gpujpeg_preprocessor_common.cuh"
38+
39+
#include "gpujpeg_colorspace.h"
3840
#include "gpujpeg_preprocessor.h"
3941
#include "gpujpeg_util.h"
4042

@@ -325,30 +327,39 @@ gpujpeg_preprocessor_encode_no_transform(struct gpujpeg_coder *coder)
325327
int
326328
gpujpeg_preprocessor_encoder_init(struct gpujpeg_coder* coder)
327329
{
328-
coder->preprocessor = NULL;
330+
coder->preprocessor.kernel = nullptr;
331+
struct gpujpeg_preprocessor_data *data = &coder->preprocessor.data;
332+
for ( int comp = 0; comp < coder->param.comp_count; comp++ ) {
333+
assert(coder->sampling_factor.horizontal % coder->component[comp].sampling_factor.horizontal == 0);
334+
assert(coder->sampling_factor.vertical % coder->component[comp].sampling_factor.vertical == 0);
335+
data->comp[comp].d_data = coder->component[comp].d_data;
336+
data->comp[comp].sampling_factor.horizontal = coder->sampling_factor.horizontal / coder->component[comp].sampling_factor.horizontal;
337+
data->comp[comp].sampling_factor.vertical = coder->sampling_factor.vertical / coder->component[comp].sampling_factor.vertical;
338+
data->comp[comp].data_width = coder->component[comp].data_width;
339+
}
329340

330341
if ( gpujpeg_preprocessor_encode_no_transform(coder) ) {
331342
DEBUG_MSG(coder->param.verbose, "Matching format detected - not using preprocessor, using memcpy instead.\n");
332343
return 0;
333344
}
334345

335346
if (coder->param.color_space_internal == GPUJPEG_NONE) {
336-
coder->preprocessor = (void*)gpujpeg_preprocessor_select_encode_kernel<GPUJPEG_NONE>(coder);
347+
coder->preprocessor.kernel = (void*)gpujpeg_preprocessor_select_encode_kernel<GPUJPEG_NONE>(coder);
337348
}
338349
else if (coder->param.color_space_internal == GPUJPEG_RGB) {
339-
coder->preprocessor = (void*)gpujpeg_preprocessor_select_encode_kernel<GPUJPEG_RGB>(coder);
350+
coder->preprocessor.kernel = (void*)gpujpeg_preprocessor_select_encode_kernel<GPUJPEG_RGB>(coder);
340351
}
341352
else if (coder->param.color_space_internal == GPUJPEG_YCBCR_BT601) {
342-
coder->preprocessor = (void*)gpujpeg_preprocessor_select_encode_kernel<GPUJPEG_YCBCR_BT601>(coder);
353+
coder->preprocessor.kernel = (void*)gpujpeg_preprocessor_select_encode_kernel<GPUJPEG_YCBCR_BT601>(coder);
343354
}
344355
else if (coder->param.color_space_internal == GPUJPEG_YCBCR_BT601_256LVLS) {
345-
coder->preprocessor = (void*)gpujpeg_preprocessor_select_encode_kernel<GPUJPEG_YCBCR_BT601_256LVLS>(coder);
356+
coder->preprocessor.kernel = (void*)gpujpeg_preprocessor_select_encode_kernel<GPUJPEG_YCBCR_BT601_256LVLS>(coder);
346357
}
347358
else if (coder->param.color_space_internal == GPUJPEG_YCBCR_BT709) {
348-
coder->preprocessor = (void*)gpujpeg_preprocessor_select_encode_kernel<GPUJPEG_YCBCR_BT709>(coder);
359+
coder->preprocessor.kernel = (void*)gpujpeg_preprocessor_select_encode_kernel<GPUJPEG_YCBCR_BT709>(coder);
349360
}
350361

351-
if ( coder->preprocessor == NULL ) {
362+
if ( coder->preprocessor.kernel == NULL ) {
352363
return -1;
353364
}
354365

@@ -361,7 +372,7 @@ gpujpeg_preprocessor_encode_interlaced(struct gpujpeg_encoder * encoder)
361372
struct gpujpeg_coder* coder = &encoder->coder;
362373

363374
// Select kernel
364-
gpujpeg_preprocessor_encode_kernel kernel = (gpujpeg_preprocessor_encode_kernel) coder->preprocessor;
375+
gpujpeg_preprocessor_encode_kernel kernel = (gpujpeg_preprocessor_encode_kernel) coder->preprocessor.kernel;
365376
assert(kernel != NULL);
366377

367378
int image_width = coder->param_image.width;
@@ -395,17 +406,8 @@ gpujpeg_preprocessor_encode_interlaced(struct gpujpeg_encoder * encoder)
395406
gpujpeg_const_div_prepare(image_width, width_div_mul, width_div_shift);
396407

397408
// Run kernel
398-
struct gpujpeg_preprocessor_data data;
399-
for ( int comp = 0; comp < coder->param.comp_count; comp++ ) {
400-
assert(coder->sampling_factor.horizontal % coder->component[comp].sampling_factor.horizontal == 0);
401-
assert(coder->sampling_factor.vertical % coder->component[comp].sampling_factor.vertical == 0);
402-
data.comp[comp].d_data = coder->component[comp].d_data;
403-
data.comp[comp].sampling_factor.horizontal = coder->sampling_factor.horizontal / coder->component[comp].sampling_factor.horizontal;
404-
data.comp[comp].sampling_factor.vertical = coder->sampling_factor.vertical / coder->component[comp].sampling_factor.vertical;
405-
data.comp[comp].data_width = coder->component[comp].data_width;
406-
}
407409
kernel<<<grid, threads, 0, encoder->stream>>>(
408-
data,
410+
coder->preprocessor.data,
409411
coder->d_data_raw,
410412
coder->param_image.width_padding,
411413
image_width,
@@ -465,8 +467,10 @@ gpujpeg_preprocessor_encode(struct gpujpeg_encoder * encoder)
465467
{
466468
struct gpujpeg_coder * coder = &encoder->coder;
467469
/// @todo support padding for other formats
468-
assert(!coder->param_image.width_padding || (coder->param_image.pixel_format == GPUJPEG_444_U8_P012 && coder->preprocessor));
469-
if (coder->preprocessor) {
470+
assert(!coder->param_image.width_padding ||
471+
(coder->param_image.pixel_format == GPUJPEG_444_U8_P012 && coder->preprocessor.kernel != nullptr));
472+
473+
if (coder->preprocessor.kernel != nullptr) {
470474
return gpujpeg_preprocessor_encode_interlaced(encoder);
471475
} else {
472476
return gpujpeg_preprocessor_encoder_copy_planar_data(encoder);

src/gpujpeg_preprocessor_common.cuh

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2011-2024, CESNET
2+
* Copyright (c) 2011-2025, CESNET
33
* Copyright (c) 2011, Silicon Genome, LLC.
44
*
55
* All rights reserved.
@@ -27,12 +27,11 @@
2727
* POSSIBILITY OF SUCH DAMAGE.
2828
*/
2929

30+
#ifndef GPUJPEG_PREPROCESSOR_COMMON_CUH_DCC657E3_2EDF_47E2_90F4_F7CA26829E81
31+
#define GPUJPEG_PREPROCESSOR_COMMON_CUH_DCC657E3_2EDF_47E2_90F4_F7CA26829E81
32+
3033
#include "../libgpujpeg/gpujpeg_common.h"
3134
#include "../libgpujpeg/gpujpeg_type.h"
32-
#include "gpujpeg_common_internal.h"
33-
34-
#include <cassert>
35-
#include <cstdint>
3635

3736
#define RGB_8BIT_THREADS 256
3837

@@ -54,6 +53,15 @@ struct gpujpeg_preprocessor_data
5453
struct gpujpeg_preprocessor_data_component comp[GPUJPEG_MAX_COMPONENT_COUNT];
5554
};
5655

56+
struct gpujpeg_preprocessor {
57+
void* kernel; // function poitner
58+
struct gpujpeg_preprocessor_data data;
59+
};
60+
61+
#ifdef PREPROCESSOR_INTERNAL_API
62+
#include "gpujpeg_common_internal.h"
63+
#include <cassert>
64+
#include <cstdint>
5765
/** Value that means that sampling factor has dynamic value */
5866
#define GPUJPEG_DYNAMIC 16
5967

@@ -122,5 +130,7 @@ gpujpeg_preprocessor_make_sampling_factor_i(int comp_count, int numerator_h, int
122130
coder->component[1].sampling_factor.horizontal, coder->component[1].sampling_factor.vertical, \
123131
coder->component[2].sampling_factor.horizontal, coder->component[2].sampling_factor.vertical, \
124132
coder->component[3].sampling_factor.horizontal, coder->component[3].sampling_factor.vertical)
133+
#endif // defined PREPROCESSOR_INTERNAL_API
125134

135+
#endif // defined GPUJPEG_PREPROCESSOR_COMMON_CUH_DCC657E3_2EDF_47E2_90F4_F7CA26829E81
126136
/* vi: set expandtab sw=4: */

0 commit comments

Comments
 (0)