Skip to content

Commit cc12b8d

Browse files
committed
postprocessor: fix width_padding handling
align with the postprocessor implementation the previous was not entirely correct
1 parent 5c9ee47 commit cc12b8d

File tree

3 files changed

+34
-32
lines changed

3 files changed

+34
-32
lines changed

src/gpujpeg_postprocessor.cu

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -122,22 +122,20 @@ inline __device__ void gpujpeg_comp_to_raw_store<GPUJPEG_U8>(uint8_t *d_data_raw
122122
}
123123

124124
template<>
125-
inline __device__ void gpujpeg_comp_to_raw_store<GPUJPEG_444_U8_P012>(uint8_t *d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r)
125+
inline __device__ void gpujpeg_comp_to_raw_store<GPUJPEG_444_U8_P012>(uint8_t *d_data_raw, int &image_width, int &image_height, int &offset, int &x, int &y, uchar4 &r)
126126
{
127-
image_position = image_position * 3;
128-
d_data_raw[image_position + 0] = r.x;
129-
d_data_raw[image_position + 1] = r.y;
130-
d_data_raw[image_position + 2] = r.z;
127+
d_data_raw[offset + 0] = r.x;
128+
d_data_raw[offset + 1] = r.y;
129+
d_data_raw[offset + 2] = r.z;
131130
}
132131

133132
template<>
134-
inline __device__ void gpujpeg_comp_to_raw_store<GPUJPEG_4444_U8_P0123>(uint8_t *d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r)
133+
inline __device__ void gpujpeg_comp_to_raw_store<GPUJPEG_4444_U8_P0123>(uint8_t *d_data_raw, int &image_width, int &image_height, int &offset, int &x, int &y, uchar4 &r)
135134
{
136-
image_position = image_position * 4;
137-
d_data_raw[image_position + 0] = r.x;
138-
d_data_raw[image_position + 1] = r.y;
139-
d_data_raw[image_position + 2] = r.z;
140-
d_data_raw[image_position + 3] = r.w;
135+
d_data_raw[offset + 0] = r.x;
136+
d_data_raw[offset + 1] = r.y;
137+
d_data_raw[offset + 2] = r.z;
138+
d_data_raw[offset + 3] = r.w;
141139
}
142140

143141
template<>
@@ -159,14 +157,13 @@ inline __device__ void gpujpeg_comp_to_raw_store<GPUJPEG_422_U8_P0P1P2>(uint8_t
159157
}
160158

161159
template<>
162-
inline __device__ void gpujpeg_comp_to_raw_store<GPUJPEG_422_U8_P1020>(uint8_t *d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r)
160+
inline __device__ void gpujpeg_comp_to_raw_store<GPUJPEG_422_U8_P1020>(uint8_t *d_data_raw, int &image_width, int &image_height, int &offset, int &x, int &y, uchar4 &r)
163161
{
164-
image_position = image_position * 2;
165-
d_data_raw[image_position + 1] = r.x;
162+
d_data_raw[offset + 1] = r.x;
166163
if ( (x % 2) == 0 )
167-
d_data_raw[image_position + 0] = r.y;
164+
d_data_raw[offset + 0] = r.y;
168165
else
169-
d_data_raw[image_position + 0] = r.z;
166+
d_data_raw[offset + 0] = r.z;
170167
}
171168

172169
template<>
@@ -244,7 +241,8 @@ struct post_load<in_is_rgb, GPUJPEG_DYNAMIC>
244241
* @param pixel_count Number of pixels to copy
245242
* @return void
246243
*/
247-
typedef void (*gpujpeg_preprocessor_decode_kernel)(struct gpujpeg_preprocessor_data data, uint8_t* d_data_raw, int image_width, int image_height);
244+
typedef void (*gpujpeg_preprocessor_decode_kernel)(struct gpujpeg_preprocessor_data data, uint8_t* d_data_raw,
245+
int width_padding, int image_width, int image_height);
248246

249247
template<
250248
enum gpujpeg_color_space color_space_internal,
@@ -256,7 +254,8 @@ template<
256254
uint8_t s_comp4_samp_factor_h, uint8_t s_comp4_samp_factor_v
257255
>
258256
__global__ void
259-
gpujpeg_preprocessor_comp_to_raw_kernel(struct gpujpeg_preprocessor_data data, uint8_t* d_data_raw, int image_width, int image_height)
257+
gpujpeg_preprocessor_comp_to_raw_kernel(struct gpujpeg_preprocessor_data data, uint8_t* d_data_raw,
258+
int image_width_padding, int image_width, int image_height)
260259
{
261260
int x = threadIdx.x;
262261
int gX = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x;
@@ -276,7 +275,8 @@ gpujpeg_preprocessor_comp_to_raw_kernel(struct gpujpeg_preprocessor_data data, u
276275
gpujpeg_color_transform<color_space_internal, color_space>::perform(r);
277276

278277
// Save
279-
gpujpeg_comp_to_raw_store<pixel_format>(d_data_raw, image_width, image_height, image_position, image_position_x, image_position_y, r);
278+
int offset = image_position * unit_size<pixel_format>() + image_width_padding * image_position_y;
279+
gpujpeg_comp_to_raw_store<pixel_format>(d_data_raw, image_width, image_height, offset, image_position_x, image_position_y, r);
280280
}
281281

282282
/**
@@ -508,7 +508,7 @@ gpujpeg_postprocessor_decode(struct gpujpeg_coder* coder, cudaStream_t stream)
508508
gpujpeg_preprocessor_decode_kernel kernel = (gpujpeg_preprocessor_decode_kernel)coder->preprocessor.kernel;
509509
assert(kernel != NULL);
510510

511-
int image_width = coder->param_image.width + coder->param_image.width_padding;
511+
int image_width = coder->param_image.width;
512512
int image_height = coder->param_image.height;
513513

514514
// When saving 4:2:2 data of odd width, the data should have even width, so round it
@@ -537,6 +537,7 @@ gpujpeg_postprocessor_decode(struct gpujpeg_coder* coder, cudaStream_t stream)
537537
kernel<<<grid, threads, 0, stream>>>(
538538
coder->preprocessor.data,
539539
coder->d_data_raw,
540+
coder->param_image.width_padding,
540541
image_width,
541542
image_height
542543
);

src/gpujpeg_preprocessor.cu

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,6 @@ struct gpujpeg_preprocessor_raw_to_comp_store {
8787
template<enum gpujpeg_pixel_format>
8888
inline __device__ void raw_to_comp_load(const uint8_t* d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r);
8989

90-
template<enum gpujpeg_pixel_format>
91-
inline __device__ int unit_size() { return 1; }
92-
9390
template<>
9491
inline __device__ void raw_to_comp_load<GPUJPEG_U8>(const uint8_t* d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r)
9592
{
@@ -122,9 +119,6 @@ inline __device__ void raw_to_comp_load<GPUJPEG_420_U8_P0P1P2>(const uint8_t* d_
122119
r.z = d_data_raw[image_width * image_height + ((image_height + 1) / 2 + y / 2) * ((image_width + 1) / 2) + x / 2];
123120
}
124121

125-
template<>
126-
inline __device__ int unit_size<GPUJPEG_444_U8_P012>() { return 3; }
127-
128122
template<>
129123
inline __device__ void raw_to_comp_load<GPUJPEG_444_U8_P012>(const uint8_t* d_data_raw, int &image_width, int &image_height, int &offset, int &x, int &y, uchar4 &r)
130124
{
@@ -133,9 +127,6 @@ inline __device__ void raw_to_comp_load<GPUJPEG_444_U8_P012>(const uint8_t* d_da
133127
r.z = d_data_raw[offset + 2];
134128
}
135129

136-
template<>
137-
inline __device__ int unit_size<GPUJPEG_4444_U8_P0123>() { return 4; }
138-
139130
template<>
140131
inline __device__ void raw_to_comp_load<GPUJPEG_4444_U8_P0123>(const uint8_t* d_data_raw, int &image_width, int &image_height, int &offset, int &x, int &y, uchar4 &r)
141132
{
@@ -145,9 +136,6 @@ inline __device__ void raw_to_comp_load<GPUJPEG_4444_U8_P0123>(const uint8_t* d_
145136
r.w = d_data_raw[offset + 3];
146137
}
147138

148-
template<>
149-
inline __device__ int unit_size<GPUJPEG_422_U8_P1020>() { return 2; }
150-
151139
template<>
152140
inline __device__ void raw_to_comp_load<GPUJPEG_422_U8_P1020>(const uint8_t* d_data_raw, int &image_width, int &image_height, int &offset, int &x, int &y, uchar4 &r)
153141
{

src/gpujpeg_preprocessor_common.cuh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,5 +108,18 @@ gpujpeg_preprocessor_make_sampling_factor_i(int comp_count, int numerator_h, int
108108
coder->component[2].sampling_factor.horizontal, coder->component[2].sampling_factor.vertical, \
109109
coder->component[3].sampling_factor.horizontal, coder->component[3].sampling_factor.vertical)
110110

111+
template<enum gpujpeg_pixel_format>
112+
inline __device__ int unit_size() { return 1; }
113+
114+
template<>
115+
inline __device__ int unit_size<GPUJPEG_444_U8_P012>() { return 3; }
116+
117+
template<>
118+
inline __device__ int unit_size<GPUJPEG_4444_U8_P0123>() { return 4; }
119+
120+
template<>
121+
inline __device__ int unit_size<GPUJPEG_422_U8_P1020>() { return 2; }
122+
123+
111124
#endif // defined GPUJPEG_PREPROCESSOR_COMMON_CUH_DCC657E3_2EDF_47E2_90F4_F7CA26829E81
112125
/* vi: set expandtab sw=4: */

0 commit comments

Comments
 (0)