Skip to content

Commit 50b2904

Browse files
committed
channel_remap: implement for all pixfmts
Template implementation using raw_to_comp_load+comp_to_raw_store, the later implementations must have been moved to a header.
1 parent fd3c305 commit 50b2904

File tree

4 files changed

+114
-117
lines changed

4 files changed

+114
-117
lines changed

libgpujpeg/gpujpeg_encoder.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,6 @@ gpujpeg_encoder_suggest_restart_interval(const struct gpujpeg_image_parameters*
242242
*
243243
* The number of image channes must equal the length of the string. Set to "" (empty string) to revert this setting.
244244
* Letters 'Z' or 'F' can be used instead of indices to fill given output channel with zeros or all-ones.
245-
*
246-
* Currently, the remapping is implemented for GPUJPEG_4444_U8_P0123 and GPUJPEG_444_U8_P012.
247245
*/
248246
#define GPUJPEG_ENC_OPT_CHANNEL_REMAP "enc_opt_channel_remap"
249247

src/gpujpeg_postprocessor.cu

Lines changed: 0 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -112,70 +112,6 @@ struct gpujpeg_preprocessor_comp_to_raw_load
112112
}
113113
};
114114

115-
template<enum gpujpeg_pixel_format pixel_format>
116-
inline __device__ void gpujpeg_comp_to_raw_store(uint8_t *d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r);
117-
118-
template<>
119-
inline __device__ void gpujpeg_comp_to_raw_store<GPUJPEG_U8>(uint8_t *d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r)
120-
{
121-
d_data_raw[image_position] = r.x;
122-
}
123-
124-
template<>
125-
inline __device__ void gpujpeg_comp_to_raw_store<GPUJPEG_444_U8_P012>(uint8_t *d_data_raw, int &image_width, int &image_height, int &offset, int &x, int &y, uchar4 &r)
126-
{
127-
d_data_raw[offset + 0] = r.x;
128-
d_data_raw[offset + 1] = r.y;
129-
d_data_raw[offset + 2] = r.z;
130-
}
131-
132-
template<>
133-
inline __device__ void gpujpeg_comp_to_raw_store<GPUJPEG_4444_U8_P0123>(uint8_t *d_data_raw, int &image_width, int &image_height, int &offset, int &x, int &y, uchar4 &r)
134-
{
135-
d_data_raw[offset + 0] = r.x;
136-
d_data_raw[offset + 1] = r.y;
137-
d_data_raw[offset + 2] = r.z;
138-
d_data_raw[offset + 3] = r.w;
139-
}
140-
141-
template<>
142-
inline __device__ void gpujpeg_comp_to_raw_store<GPUJPEG_444_U8_P0P1P2>(uint8_t *d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r)
143-
{
144-
d_data_raw[image_position] = r.x;
145-
d_data_raw[image_width * image_height + image_position] = r.y;
146-
d_data_raw[2 * image_width * image_height + image_position] = r.z;
147-
}
148-
149-
template<>
150-
inline __device__ void gpujpeg_comp_to_raw_store<GPUJPEG_422_U8_P0P1P2>(uint8_t *d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r)
151-
{
152-
d_data_raw[image_position] = r.x;
153-
if ( (x % 2) == 0 ) {
154-
d_data_raw[image_width * image_height + image_position / 2] = r.y;
155-
d_data_raw[image_width * image_height + image_height * ((image_width + 1) / 2) + image_position / 2] = r.z;
156-
}
157-
}
158-
159-
template<>
160-
inline __device__ void gpujpeg_comp_to_raw_store<GPUJPEG_422_U8_P1020>(uint8_t *d_data_raw, int &image_width, int &image_height, int &offset, int &x, int &y, uchar4 &r)
161-
{
162-
d_data_raw[offset + 1] = r.x;
163-
if ( (x % 2) == 0 )
164-
d_data_raw[offset + 0] = r.y;
165-
else
166-
d_data_raw[offset + 0] = r.z;
167-
}
168-
169-
template<>
170-
inline __device__ void gpujpeg_comp_to_raw_store<GPUJPEG_420_U8_P0P1P2>(uint8_t *d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r)
171-
{
172-
d_data_raw[image_position] = r.x;
173-
if ( (image_position % 2) == 0 && (y % 2) == 0 ) {
174-
d_data_raw[image_width * image_height + y / 2 * ((image_width + 1) / 2) + x / 2] = r.y;
175-
d_data_raw[image_width * image_height + ((image_height + 1) / 2 + y / 2) * ((image_width + 1) / 2) + x / 2] = r.z;
176-
}
177-
}
178-
179115
/// set alpha that may not be included in input data but may be read by the CS conv
180116
template <enum gpujpeg_pixel_format pixel_format>
181117
struct pre_load {

src/gpujpeg_preprocessor.cu

Lines changed: 34 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -485,9 +485,9 @@ flip_lines(struct gpujpeg_encoder* encoder)
485485
return 0;
486486
}
487487

488-
template<bool aligned>
488+
template <enum gpujpeg_pixel_format pixel_format>
489489
__global__ void
490-
channel_remap_kernel_4444_u8_p0123(uint8_t* data, int width, int pitch, int height, unsigned int byte_map)
490+
channel_remap_kernel(uint8_t* data, int width, int pitch, int height, unsigned int byte_map)
491491
{
492492
int x = blockIdx.x * blockDim.x + threadIdx.x; // column index
493493
int y = blockIdx.y * blockDim.y + threadIdx.y; // row index
@@ -496,37 +496,21 @@ channel_remap_kernel_4444_u8_p0123(uint8_t* data, int width, int pitch, int heig
496496
return;
497497
}
498498

499-
if (aligned) {
500-
uint32_t* data32 = (uint32_t*)(data + y * pitch + x * sizeof(uint32_t));
501-
uint32_t val = *data32;
502-
*data32 = __byte_perm(val, 0xFF, byte_map);
503-
} else {
504-
data += y * pitch + x * sizeof(uint32_t);
505-
uint32_t val = data[3] << 24 | data[2] << 16 | data[1] << 8 | data[0];
506-
val = __byte_perm(val, 0xFF, byte_map);
507-
data[0] = val & 0xFF;
508-
data[1] = (val >> 8) & 0xFF;
509-
data[2] = (val >> 16) & 0xFF;
510-
data[3] = val >> 24;
511-
}
512-
}
513-
514-
__global__ void
515-
channel_remap_kernel_444_u8_p012(uint8_t* data, int width, int pitch, int height, unsigned int byte_map)
516-
{
517-
int x = blockIdx.x * blockDim.x + threadIdx.x; // column index
518-
int y = blockIdx.y * blockDim.y + threadIdx.y; // row index
519-
520-
if ( x >= width || y >= height ) {
521-
return;
522-
}
499+
// Load
500+
uchar4 r;
501+
int offset = y * pitch + x * unit_size<pixel_format>();
502+
raw_to_comp_load<pixel_format>(data, width, height, offset, x, y, r);
523503

524-
data += y * pitch + x * 3;
525-
uint32_t val = data[2] << 16 | data[1] << 8 | data[0];
504+
// Permutation
505+
uint32_t val = r.w << 24 | r.z << 16 | r.y << 8 | r.x;
526506
val = __byte_perm(val, 0xFF, byte_map);
527-
data[0] = val & 0xFF;
528-
data[1] = (val >> 8) & 0xFF;
529-
data[2] = val >> 16;
507+
r.w = val >> 24;
508+
r.z = (val >> 16) & 0xFF;
509+
r.y = (val >> 8) & 0xFF;
510+
r.x = val & 0xFF;
511+
512+
// Store
513+
gpujpeg_comp_to_raw_store<pixel_format>(data, width, height, offset, x, y, r);
530514
}
531515

532516
static int
@@ -549,26 +533,25 @@ channel_remap(struct gpujpeg_encoder* encoder)
549533
int pitch = (width * gpujpeg_pixel_format_get_comp_count(coder->param_image.pixel_format)) +
550534
coder->param_image.width_padding;
551535
dim3 grid((width + block.x - 1) / block.x, (height + block.y - 1) / block.y);
552-
if ( coder->param_image.pixel_format == GPUJPEG_4444_U8_P0123 ) {
553-
bool aligned = coder->param_image.width_padding % sizeof(uint32_t) == 0;
554-
if (aligned) {
555-
channel_remap_kernel_4444_u8_p0123<true><<<grid, block, 0, encoder->stream>>>(encoder->coder.d_data_raw, width,
556-
pitch, height, mapping);
557-
} else {
558-
channel_remap_kernel_4444_u8_p0123<false><<<grid, block, 0, encoder->stream>>>(encoder->coder.d_data_raw, width,
559-
pitch, height, mapping);
560-
}
561-
}
562-
else if ( coder->param_image.pixel_format == GPUJPEG_444_U8_P012 ) {
563-
channel_remap_kernel_444_u8_p012<<<grid, block, 0, encoder->stream>>>(encoder->coder.d_data_raw, width, pitch,
564-
height, mapping);
565-
}
566-
else {
567-
ERROR_MSG("Pixel format %s currently not supported for channel remap!\n"
568-
"444-u8-p012 and 4444-u8-p0123 are currently supported.\n",
569-
gpujpeg_pixel_format_get_name(coder->param_image.pixel_format));
570-
return -1;
571-
}
536+
537+
decltype(channel_remap_kernel<GPUJPEG_U8>)* kernel = nullptr;
538+
#define SWITCH_KERNEL(pf) \
539+
case pf: \
540+
kernel = channel_remap_kernel<pf>; \
541+
break
542+
switch ( coder->param_image.pixel_format ) {
543+
SWITCH_KERNEL(GPUJPEG_444_U8_P012);
544+
SWITCH_KERNEL(GPUJPEG_4444_U8_P0123);
545+
SWITCH_KERNEL(GPUJPEG_422_U8_P1020);
546+
SWITCH_KERNEL(GPUJPEG_444_U8_P0P1P2);
547+
SWITCH_KERNEL(GPUJPEG_422_U8_P0P1P2);
548+
SWITCH_KERNEL(GPUJPEG_420_U8_P0P1P2);
549+
SWITCH_KERNEL(GPUJPEG_U8);
550+
case GPUJPEG_PIXFMT_NONE:
551+
GPUJPEG_ASSERT(0 && "Preprocess from GPUJPEG_PIXFMT_NONE not allowed");
552+
}
553+
#undef SWITDH_KERNEL
554+
kernel<<<grid, block, 0, encoder->stream>>>(encoder->coder.d_data_raw, width, pitch, height, mapping);
572555
gpujpeg_cuda_check_error("channel_remap_kernel failed", return -1);
573556
return 0;
574557
}

src/gpujpeg_preprocessor_common.cuh

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,86 @@ inline __device__ int unit_size<GPUJPEG_4444_U8_P0123>() { return 4; }
120120
template<>
121121
inline __device__ int unit_size<GPUJPEG_422_U8_P1020>() { return 2; }
122122

123+
template <enum gpujpeg_pixel_format pixel_format>
124+
inline __device__ void
125+
gpujpeg_comp_to_raw_store(uint8_t* d_data_raw, int& image_width, int& image_height, int& image_position, int& x, int& y,
126+
uchar4& r);
127+
128+
template <>
129+
inline __device__ void
130+
gpujpeg_comp_to_raw_store<GPUJPEG_U8>(uint8_t* d_data_raw, int& image_width, int& image_height, int& image_position,
131+
int& x, int& y, uchar4& r)
132+
{
133+
d_data_raw[image_position] = r.x;
134+
}
135+
136+
template <>
137+
inline __device__ void
138+
gpujpeg_comp_to_raw_store<GPUJPEG_444_U8_P012>(uint8_t* d_data_raw, int& image_width, int& image_height, int& offset,
139+
int& x, int& y, uchar4& r)
140+
{
141+
d_data_raw[offset + 0] = r.x;
142+
d_data_raw[offset + 1] = r.y;
143+
d_data_raw[offset + 2] = r.z;
144+
}
145+
146+
template <>
147+
inline __device__ void
148+
gpujpeg_comp_to_raw_store<GPUJPEG_4444_U8_P0123>(uint8_t* d_data_raw, int& image_width, int& image_height, int& offset,
149+
int& x, int& y, uchar4& r)
150+
{
151+
d_data_raw[offset + 0] = r.x;
152+
d_data_raw[offset + 1] = r.y;
153+
d_data_raw[offset + 2] = r.z;
154+
d_data_raw[offset + 3] = r.w;
155+
}
156+
157+
template <>
158+
inline __device__ void
159+
gpujpeg_comp_to_raw_store<GPUJPEG_444_U8_P0P1P2>(uint8_t* d_data_raw, int& image_width, int& image_height,
160+
int& image_position, int& x, int& y, uchar4& r)
161+
{
162+
d_data_raw[image_position] = r.x;
163+
d_data_raw[image_width * image_height + image_position] = r.y;
164+
d_data_raw[2 * image_width * image_height + image_position] = r.z;
165+
}
166+
167+
template <>
168+
inline __device__ void
169+
gpujpeg_comp_to_raw_store<GPUJPEG_422_U8_P0P1P2>(uint8_t* d_data_raw, int& image_width, int& image_height,
170+
int& image_position, int& x, int& y, uchar4& r)
171+
{
172+
d_data_raw[image_position] = r.x;
173+
if ( (x % 2) == 0 ) {
174+
d_data_raw[image_width * image_height + image_position / 2] = r.y;
175+
d_data_raw[image_width * image_height + image_height * ((image_width + 1) / 2) + image_position / 2] = r.z;
176+
}
177+
}
178+
179+
template <>
180+
inline __device__ void
181+
gpujpeg_comp_to_raw_store<GPUJPEG_422_U8_P1020>(uint8_t* d_data_raw, int& image_width, int& image_height, int& offset,
182+
int& x, int& y, uchar4& r)
183+
{
184+
d_data_raw[offset + 1] = r.x;
185+
if ( (x % 2) == 0 )
186+
d_data_raw[offset + 0] = r.y;
187+
else
188+
d_data_raw[offset + 0] = r.z;
189+
}
190+
191+
template <>
192+
inline __device__ void
193+
gpujpeg_comp_to_raw_store<GPUJPEG_420_U8_P0P1P2>(uint8_t* d_data_raw, int& image_width, int& image_height,
194+
int& image_position, int& x, int& y, uchar4& r)
195+
{
196+
d_data_raw[image_position] = r.x;
197+
if ( (image_position % 2) == 0 && (y % 2) == 0 ) {
198+
d_data_raw[image_width * image_height + y / 2 * ((image_width + 1) / 2) + x / 2] = r.y;
199+
d_data_raw[image_width * image_height + ((image_height + 1) / 2 + y / 2) * ((image_width + 1) / 2) + x / 2] =
200+
r.z;
201+
}
202+
}
123203

124204
#endif // defined GPUJPEG_PREPROCESSOR_COMMON_CUH_DCC657E3_2EDF_47E2_90F4_F7CA26829E81
125205
/* vi: set expandtab sw=4: */

0 commit comments

Comments
 (0)