Skip to content

Commit 03e07d1

Browse files
committed
fix width_padding with planar data + rm assert
This may now work for other pixel formats as well - packed should be ok, for plannar the semantic is perhaps bytes per every plane line (which should be fine for 4:4:4, maybe not for other subsamplings but it has defined semantics; ok eg. for grayscale).
1 parent 616691a commit 03e07d1

File tree

2 files changed

+10
-7
lines changed

2 files changed

+10
-7
lines changed

src/gpujpeg_postprocessor.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ gpujpeg_preprocessor_decoder_copy_planar_data(struct gpujpeg_coder * coder, cuda
486486
} else {
487487
for ( int i = 0; i < coder->param.comp_count; ++i ) {
488488
int spitch = coder->component[i].data_width;
489-
int dpitch = coder->component[i].width;
489+
int dpitch = coder->component[i].width + coder->param_image.width_padding;
490490
size_t component_size = dpitch * coder->component[i].height;
491491
cudaMemcpy2DAsync(coder->d_data_raw + data_raw_offset, dpitch, coder->component[i].d_data, spitch, coder->component[i].width, coder->component[i].height, cudaMemcpyDeviceToDevice, stream);
492492
data_raw_offset += component_size;

src/gpujpeg_preprocessor.cu

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,8 @@ gpujpeg_preprocessor_encoder_copy_planar_data(struct gpujpeg_encoder * encoder)
440440
size_t data_raw_offset = 0;
441441
bool needs_stride = false; // true if width is not divisible by MCU width
442442
for ( int i = 0; i < coder->param.comp_count; ++i ) {
443-
needs_stride = needs_stride || coder->component[i].width != coder->component[i].data_width;
443+
int component_width = coder->component[i].width + coder->param_image.width_padding;
444+
needs_stride = needs_stride || component_width != coder->component[i].data_width;
444445
}
445446
if (!needs_stride) {
446447
for ( int i = 0; i < coder->param.comp_count; ++i ) {
@@ -450,10 +451,12 @@ gpujpeg_preprocessor_encoder_copy_planar_data(struct gpujpeg_encoder * encoder)
450451
}
451452
} else {
452453
for ( int i = 0; i < coder->param.comp_count; ++i ) {
453-
int spitch = coder->component[i].width;
454+
int spitch = coder->component[i].width + coder->param_image.width_padding;
454455
int dpitch = coder->component[i].data_width;
455456
size_t component_size = spitch * coder->component[i].height;
456-
cudaMemcpy2DAsync(coder->component[i].d_data, dpitch, coder->d_data_raw + data_raw_offset, spitch, spitch, coder->component[i].height, cudaMemcpyDeviceToDevice, encoder->stream);
457+
cudaMemcpy2DAsync(coder->component[i].d_data, dpitch, coder->d_data_raw + data_raw_offset, spitch,
458+
coder->component[i].width, coder->component[i].height, cudaMemcpyDeviceToDevice,
459+
encoder->stream);
457460
data_raw_offset += component_size;
458461
}
459462
}
@@ -540,9 +543,9 @@ int
540543
gpujpeg_preprocessor_encode(struct gpujpeg_encoder * encoder)
541544
{
542545
struct gpujpeg_coder * coder = &encoder->coder;
543-
/// @todo support padding for other formats
544-
assert(coder->param_image.width_padding == 0 ||
545-
(coder->param_image.pixel_format == GPUJPEG_444_U8_P012 && coder->preprocessor.kernel != nullptr));
546+
/// @todo ensure that all combinations work so the assert is really unneeded
547+
// assert(coder->param_image.width_padding == 0 ||
548+
// (coder->param_image.pixel_format == GPUJPEG_444_U8_P012 && coder->preprocessor.kernel != nullptr));
546549

547550
if ( coder->preprocessor.channel_remap != 0 ) {
548551
const int ret = channel_remap(encoder);

0 commit comments

Comments
 (0)