@@ -497,13 +497,9 @@ flip_lines(struct gpujpeg_encoder* encoder)
497497 return 0 ;
498498}
499499
500- template < enum gpujpeg_pixel_format >
500+ template < bool aligned >
501501__global__ void
502- channel_remap_kernel (uint8_t * data, int width, int height, unsigned int byte_map);
503-
504- template <>
505- __global__ void
506- channel_remap_kernel<GPUJPEG_4444_U8_P0123>(uint8_t * data, int width, int height, unsigned int byte_map)
502+ channel_remap_kernel_4444_u8_p0123 (uint8_t * data, int width, int pitch, int height, unsigned int byte_map)
507503{
508504 int x = blockIdx .x * blockDim .x + threadIdx .x ; // column index
509505 int y = blockIdx .y * blockDim .y + threadIdx .y ; // row index
@@ -512,9 +508,19 @@ channel_remap_kernel<GPUJPEG_4444_U8_P0123>(uint8_t* data, int width, int height
512508 return ;
513509 }
514510
515- uint32_t * data32 = (uint32_t *)data;
516- uint32_t val = data32[y * width + x];
517- data32[y * width + x] = __byte_perm (val, 0xFF , byte_map);
511+ if (aligned) {
512+ uint32_t * data32 = (uint32_t *)(data + y * pitch + x * sizeof (uint32_t ));
513+ uint32_t val = *data32;
514+ *data32 = __byte_perm (val, 0xFF , byte_map);
515+ } else {
516+ data += y * pitch + x * sizeof (uint32_t );
517+ uint32_t val = data[3 ] << 24 | data[2 ] << 16 | data[1 ] << 8 | data[0 ];
518+ val = __byte_perm (val, 0xFF , byte_map);
519+ data[0 ] = val & 0xFF ;
520+ data[1 ] = (val >> 8 ) & 0xFF ;
521+ data[2 ] = (val >> 16 ) & 0xFF ;
522+ data[3 ] = val >> 24 ;
523+ }
518524}
519525
520526static int
@@ -536,8 +542,15 @@ channel_remap(struct gpujpeg_encoder* encoder)
536542 int width = coder->param_image .width ;
537543 int height = coder->param_image .height ;
538544 dim3 grid ((width + block.x - 1 ) / block.x , (height + block.y - 1 ) / block.y );
539- channel_remap_kernel<GPUJPEG_4444_U8_P0123><<<grid, block, 0 , encoder->stream>>> (
540- encoder->coder .d_data_raw , width, height, mapping);
545+ int pitch = width * sizeof (uint32_t ) + coder->param_image .width_padding ;
546+ bool aligned = coder->param_image .width_padding % sizeof (uint32_t ) == 0 ;
547+ if (aligned) {
548+ channel_remap_kernel_4444_u8_p0123<true ><<<grid, block, 0 , encoder->stream>>> (encoder->coder .d_data_raw , width,
549+ pitch, height, mapping);
550+ } else {
551+ channel_remap_kernel_4444_u8_p0123<false ><<<grid, block, 0 , encoder->stream>>> (encoder->coder .d_data_raw , width,
552+ pitch, height, mapping);
553+ }
541554 }
542555 else {
543556 ERROR_MSG (" Pixel format %s currently unsupported for channel remap!\n " ,
0 commit comments