Skip to content

Commit ede68fc

Browse files
committed
channel_remap 4444: support width_padding
1 parent 6c1d330 commit ede68fc

File tree

1 file changed

+24
-11
lines changed

1 file changed

+24
-11
lines changed

src/gpujpeg_preprocessor.cu

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -497,13 +497,9 @@ flip_lines(struct gpujpeg_encoder* encoder)
497497
return 0;
498498
}
499499

500-
template <enum gpujpeg_pixel_format>
500+
template<bool aligned>
501501
__global__ void
502-
channel_remap_kernel(uint8_t* data, int width, int height, unsigned int byte_map);
503-
504-
template <>
505-
__global__ void
506-
channel_remap_kernel<GPUJPEG_4444_U8_P0123>(uint8_t* data, int width, int height, unsigned int byte_map)
502+
channel_remap_kernel_4444_u8_p0123(uint8_t* data, int width, int pitch, int height, unsigned int byte_map)
507503
{
508504
int x = blockIdx.x * blockDim.x + threadIdx.x; // column index
509505
int y = blockIdx.y * blockDim.y + threadIdx.y; // row index
@@ -512,9 +508,19 @@ channel_remap_kernel<GPUJPEG_4444_U8_P0123>(uint8_t* data, int width, int height
512508
return;
513509
}
514510

515-
uint32_t* data32 = (uint32_t*)data;
516-
uint32_t val = data32[y * width + x];
517-
data32[y * width + x] = __byte_perm(val, 0xFF, byte_map);
511+
if (aligned) {
512+
uint32_t* data32 = (uint32_t*)(data + y * pitch + x * sizeof(uint32_t));
513+
uint32_t val = *data32;
514+
*data32 = __byte_perm(val, 0xFF, byte_map);
515+
} else {
516+
data += y * pitch + x * sizeof(uint32_t);
517+
uint32_t val = data[3] << 24 | data[2] << 16 | data[1] << 8 | data[0];
518+
val = __byte_perm(val, 0xFF, byte_map);
519+
data[0] = val & 0xFF;
520+
data[1] = (val >> 8) & 0xFF;
521+
data[2] = (val >> 16) & 0xFF;
522+
data[3] = val >> 24;
523+
}
518524
}
519525

520526
static int
@@ -536,8 +542,15 @@ channel_remap(struct gpujpeg_encoder* encoder)
536542
int width = coder->param_image.width;
537543
int height = coder->param_image.height;
538544
dim3 grid((width + block.x - 1) / block.x, (height + block.y - 1) / block.y);
539-
channel_remap_kernel<GPUJPEG_4444_U8_P0123><<<grid, block, 0, encoder->stream>>>(
540-
encoder->coder.d_data_raw, width, height, mapping);
545+
int pitch = width * sizeof(uint32_t) + coder->param_image.width_padding;
546+
bool aligned = coder->param_image.width_padding % sizeof(uint32_t) == 0;
547+
if (aligned) {
548+
channel_remap_kernel_4444_u8_p0123<true><<<grid, block, 0, encoder->stream>>>(encoder->coder.d_data_raw, width,
549+
pitch, height, mapping);
550+
} else {
551+
channel_remap_kernel_4444_u8_p0123<false><<<grid, block, 0, encoder->stream>>>(encoder->coder.d_data_raw, width,
552+
pitch, height, mapping);
553+
}
541554
}
542555
else {
543556
ERROR_MSG("Pixel format %s currently unsupported for channel remap!\n",

0 commit comments

Comments
 (0)