@@ -461,20 +461,57 @@ gpujpeg_preprocessor_encoder_copy_planar_data(struct gpujpeg_encoder * encoder)
461461 return 0 ;
462462}
463463
464+ static __global__ void
465+ vertical_flip_kernel (uint32_t * data,
466+ int width, // image linesize/4
467+ int height // image height in pixels
468+ )
469+ {
470+ int x = blockIdx .x * blockDim .x + threadIdx .x ; // column index
471+ int y = blockIdx .y * blockDim .y + threadIdx .y ; // row index
472+
473+ if ( x < width ) {
474+ // Flipped row index
475+ int flipped_y = height - 1 - y;
476+ uint32_t tmp = data[y * width + x];
477+ data[y * width + x] = data[flipped_y * width + x];
478+ data[flipped_y * width + x] = tmp;
479+ }
480+ }
481+
482+ static int
483+ flip_lines (struct gpujpeg_encoder * encoder)
484+ {
485+ struct gpujpeg_coder * coder = &encoder->coder ;
486+ for ( int i = 0 ; i < coder->param .comp_count ; ++i ) {
487+ dim3 block (RGB_8BIT_THREADS, 1 );
488+ int width = coder->component [i].data_width / 4 ;
489+ int height = coder->component [i].data_height ;
490+ dim3 grid ((width + block.x - 1 ) / block.x , height / 2 ); // only half of height
491+ vertical_flip_kernel<<<grid, block, 0 , encoder->stream>>> ((uint32_t *)coder->component [i].d_data , width, height);
492+ }
493+ gpujpeg_cuda_check_error (" Preprocessor flip failed" , return -1 );
494+ return 0 ;
495+ }
496+
464497/* Documented at declaration */
465498int
466499gpujpeg_preprocessor_encode (struct gpujpeg_encoder * encoder)
467500{
468501 struct gpujpeg_coder * coder = &encoder->coder ;
469502 // / @todo support padding for other formats
470- assert (! coder->param_image .width_padding ||
503+ assert (coder->param_image .width_padding == 0 ||
471504 (coder->param_image .pixel_format == GPUJPEG_444_U8_P012 && coder->preprocessor .kernel != nullptr ));
472505
473- if (coder->preprocessor .kernel != nullptr ) {
474- return gpujpeg_preprocessor_encode_interlaced (encoder);
475- } else {
476- return gpujpeg_preprocessor_encoder_copy_planar_data (encoder);
506+ int ret = coder->preprocessor .kernel != nullptr ? gpujpeg_preprocessor_encode_interlaced (encoder)
507+ : gpujpeg_preprocessor_encoder_copy_planar_data (encoder);
508+ if (ret != 0 ) {
509+ return ret;
510+ }
511+ if (coder->preprocessor .input_flipped ) {
512+ return flip_lines (encoder);
477513 }
514+ return ret;
478515}
479516
480517/* vi: set expandtab sw=4: */
0 commit comments