@@ -44,110 +44,174 @@ inline void bcdec_bc6h_half_u(const void *compressedBlock, void *decompressedBlo
4444 bcdec_bc6h_half (compressedBlock, decompressedBlock, destinationPitch, false );
4545}
4646
47- static void decompress_image (BCdecFormat format, const void *src, void *dst, const uint64_t width, const uint64_t height) {
48- const uint8_t *src_blocks = reinterpret_cast <const uint8_t *>(src);
49- uint8_t *dec_blocks = reinterpret_cast <uint8_t *>(dst);
47+ template <void (*decompress_func)(const void *, void *, int ), int block_size, int pixel_size, int component_size>
48+ static inline void _safe_decompress_mipmap (int width, int height, const uint8_t *src, uint8_t *dst) {
49+ // A stack-allocated output buffer large enough to contain an entire uncompressed block.
50+ uint8_t temp_buf[4 * 4 * pixel_size];
51+
52+ // The amount of misaligned pixels on each axis.
53+ const int width_diff = width - (width & ~0x03 );
54+ const int height_diff = height - (height & ~0x03 );
55+
56+ // The amount of uncompressed blocks on each axis.
57+ const int width_blocks = (width & ~0x03 ) / 4 ;
58+ const int height_blocks = (height & ~0x03 ) / 4 ;
59+
60+ // The pitch of the image in bytes.
61+ const int image_pitch = width * pixel_size;
62+ // The pitch of a block in bytes.
63+ const int block_pitch = 4 * pixel_size;
64+ // The pitch of the last block in bytes.
65+ const int odd_pitch = width_diff * pixel_size;
66+
67+ size_t src_pos = 0 ;
68+ size_t dst_pos = 0 ;
69+
70+ // Decompress the blocks, starting from the top.
71+ for (int y = 0 ; y < height_blocks; y += 1 ) {
72+ // Decompress the blocks, starting from the left.
73+ for (int x = 0 ; x < width_blocks; x += 1 ) {
74+ decompress_func (&src[src_pos], &dst[dst_pos], image_pitch / component_size);
75+ src_pos += block_size;
76+ dst_pos += block_pitch;
77+ }
78+
79+ // Decompress the block on the right.
80+ if (width_diff > 0 ) {
81+ decompress_func (&src[src_pos], temp_buf, block_pitch / component_size);
82+
83+ // Copy the data from the temporary buffer to the output.
84+ for (int i = 0 ; i < 4 ; i++) {
85+ memcpy (&dst[dst_pos + i * image_pitch], &temp_buf[i * block_pitch], odd_pitch);
86+ }
87+
88+ src_pos += block_size;
89+ dst_pos += odd_pitch;
90+ }
91+
92+ // Skip to the next row of blocks, the current one has already been filled.
93+ dst_pos += 3 * image_pitch;
94+ }
95+
96+ // Decompress the blocks at the bottom of the image.
97+ if (height_diff > 0 ) {
98+ // Decompress the blocks at the bottom.
99+ for (int x = 0 ; x < width_blocks; x += 1 ) {
100+ decompress_func (&src[src_pos], temp_buf, block_pitch / component_size);
101+
102+ // Copy the data from the temporary buffer to the output.
103+ for (int i = 0 ; i < height_diff; i++) {
104+ memcpy (&dst[dst_pos + i * image_pitch], &temp_buf[i * block_pitch], block_pitch);
105+ }
106+
107+ src_pos += block_size;
108+ dst_pos += block_pitch;
109+ }
110+
111+ // Decompress the block in the lower-right corner.
112+ if (width_diff > 0 ) {
113+ decompress_func (&src[src_pos], temp_buf, block_pitch / component_size);
114+
115+ // Copy the data from the temporary buffer to the output.
116+ for (int i = 0 ; i < height_diff; i++) {
117+ memcpy (&dst[dst_pos + i * image_pitch], &temp_buf[i * block_pitch], odd_pitch);
118+ }
50119
51- #define DECOMPRESS_LOOP (func, block_size, color_bytesize, color_components ) \
52- for (uint64_t y = 0 ; y < height; y += 4 ) { \
53- for (uint64_t x = 0 ; x < width; x += 4 ) { \
54- func (&src_blocks[src_pos], &dec_blocks[dst_pos], width * color_components); \
55- src_pos += block_size; \
56- dst_pos += 4 * color_bytesize; \
57- } \
58- dst_pos += 3 * width * color_bytesize; \
120+ src_pos += block_size;
121+ dst_pos += odd_pitch;
122+ }
59123 }
124+ }
125+
126+ template <void (*decompress_func)(const void *, void *, int ), int block_size, int pixel_size, int component_size>
127+ static inline void _decompress_mipmap (int width, int height, const uint8_t *src, uint8_t *dst) {
128+ size_t src_pos = 0 ;
129+ size_t dst_pos = 0 ;
130+
131+ // The size of a single block in bytes.
132+ const int block_pitch = 4 * pixel_size;
133+ // The pitch of the image in bytes.
134+ const int image_pitch = width * pixel_size;
135+
136+ for (int y = 0 ; y < height; y += 4 ) {
137+ for (int x = 0 ; x < width; x += 4 ) {
138+ decompress_func (&src[src_pos], &dst[dst_pos], image_pitch / component_size);
139+ src_pos += block_size;
140+ dst_pos += block_pitch;
141+ }
60142
61- #define DECOMPRESS_LOOP_SAFE (func, block_size, color_bytesize, color_components, output ) \
62- for (uint64_t y = 0 ; y < height; y += 4 ) { \
63- for (uint64_t x = 0 ; x < width; x += 4 ) { \
64- const uint32_t yblock = MIN (height - y, 4ul ); \
65- const uint32_t xblock = MIN (width - x, 4ul ); \
66- \
67- const bool incomplete = yblock < 4 || xblock < 4 ; \
68- uint8_t *dec_out = incomplete ? output : &dec_blocks[y * 4 * width + x * color_bytesize]; \
69- \
70- func (&src_blocks[src_pos], dec_out, 4 * color_components); \
71- src_pos += block_size; \
72- \
73- if (incomplete) { \
74- for (uint32_t cy = 0 ; cy < yblock; cy++) { \
75- for (uint32_t cx = 0 ; cx < xblock; cx++) { \
76- memcpy (&dec_blocks[(y + cy) * 4 * width + (x + cx) * color_bytesize], &output[cy * 4 + cx * color_bytesize], color_bytesize); \
77- } \
78- } \
79- } \
80- } \
143+ // Skip to the next row of blocks, the current one has already been filled.
144+ dst_pos += 3 * image_pitch;
81145 }
146+ }
82147
83- if (width % 4 != 0 || height % 4 != 0 ) {
84- uint64_t src_pos = 0 ;
148+ static void decompress_image (BCdecFormat format, const void *src, void *dst, const uint64_t width, const uint64_t height) {
149+ const uint8_t *src_blocks = reinterpret_cast <const uint8_t *>(src);
150+ uint8_t *dec_blocks = reinterpret_cast <uint8_t *>(dst);
85151
86- uint8_t r8_output[4 * 4 ];
87- uint8_t rg8_output[4 * 4 * 2 ];
88- uint8_t rgba8_output[4 * 4 * 4 ];
89- uint8_t rgbh_output[4 * 4 * 6 ];
152+ const uint64_t aligned_width = (width + 3 ) & ~0x03 ;
153+ const uint64_t aligned_height = (height + 3 ) & ~0x03 ;
90154
155+ if (width != aligned_width || height != aligned_height) {
156+ // Decompress the mipmap in a 'safe' way, which involves starting from the top left.
157+ // For each block row, decompress all of the 'full' blocks, then the misaligned one (on the x axis).
158+ // Then, decompress the final misaligned block row at the bottom.
159+ // Finally, decompress the misaligned block at the bottom right.
91160 switch (format) {
92161 case BCdec_BC1: {
93- DECOMPRESS_LOOP_SAFE ( bcdec_bc1, BCDEC_BC1_BLOCK_SIZE, 4 , 4 , rgba8_output)
162+ _safe_decompress_mipmap< bcdec_bc1, BCDEC_BC1_BLOCK_SIZE, 4 , 1 >(width, height, src_blocks, dec_blocks);
94163 } break ;
95164 case BCdec_BC2: {
96- DECOMPRESS_LOOP_SAFE ( bcdec_bc2, BCDEC_BC2_BLOCK_SIZE, 4 , 4 , rgba8_output)
165+ _safe_decompress_mipmap< bcdec_bc2, BCDEC_BC2_BLOCK_SIZE, 4 , 1 >(width, height, src_blocks, dec_blocks);
97166 } break ;
98167 case BCdec_BC3: {
99- DECOMPRESS_LOOP_SAFE ( bcdec_bc3, BCDEC_BC3_BLOCK_SIZE, 4 , 4 , rgba8_output)
168+ _safe_decompress_mipmap< bcdec_bc3, BCDEC_BC3_BLOCK_SIZE, 4 , 1 >(width, height, src_blocks, dec_blocks);
100169 } break ;
101170 case BCdec_BC4: {
102- DECOMPRESS_LOOP_SAFE ( bcdec_bc4, BCDEC_BC4_BLOCK_SIZE, 1 , 1 , r8_output)
171+ _safe_decompress_mipmap< bcdec_bc4, BCDEC_BC4_BLOCK_SIZE, 1 , 1 >(width, height, src_blocks, dec_blocks);
103172 } break ;
104173 case BCdec_BC5: {
105- DECOMPRESS_LOOP_SAFE ( bcdec_bc5, BCDEC_BC5_BLOCK_SIZE, 2 , 2 , rg8_output)
174+ _safe_decompress_mipmap< bcdec_bc5, BCDEC_BC5_BLOCK_SIZE, 2 , 1 >(width, height, src_blocks, dec_blocks);
106175 } break ;
107176 case BCdec_BC6U: {
108- DECOMPRESS_LOOP_SAFE ( bcdec_bc6h_half_u, BCDEC_BC6H_BLOCK_SIZE, 6 , 3 , rgbh_output)
177+ _safe_decompress_mipmap< bcdec_bc6h_half_u, BCDEC_BC6H_BLOCK_SIZE, 6 , 2 >(width, height, src_blocks, dec_blocks);
109178 } break ;
110179 case BCdec_BC6S: {
111- DECOMPRESS_LOOP_SAFE ( bcdec_bc6h_half_s, BCDEC_BC6H_BLOCK_SIZE, 6 , 3 , rgbh_output)
180+ _safe_decompress_mipmap< bcdec_bc6h_half_s, BCDEC_BC6H_BLOCK_SIZE, 6 , 2 >(width, height, src_blocks, dec_blocks);
112181 } break ;
113182 case BCdec_BC7: {
114- DECOMPRESS_LOOP_SAFE ( bcdec_bc7, BCDEC_BC7_BLOCK_SIZE, 4 , 4 , rgba8_output)
183+ _safe_decompress_mipmap< bcdec_bc7, BCDEC_BC7_BLOCK_SIZE, 4 , 1 >(width, height, src_blocks, dec_blocks);
115184 } break ;
116185 }
117-
118186 } else {
119- uint64_t src_pos = 0 , dst_pos = 0 ;
120-
187+ // Just decompress as usual, as fast as possible.
121188 switch (format) {
122189 case BCdec_BC1: {
123- DECOMPRESS_LOOP ( bcdec_bc1, BCDEC_BC1_BLOCK_SIZE, 4 , 4 )
190+ _decompress_mipmap< bcdec_bc1, BCDEC_BC1_BLOCK_SIZE, 4 , 1 >(width, height, src_blocks, dec_blocks);
124191 } break ;
125192 case BCdec_BC2: {
126- DECOMPRESS_LOOP ( bcdec_bc2, BCDEC_BC2_BLOCK_SIZE, 4 , 4 )
193+ _decompress_mipmap< bcdec_bc2, BCDEC_BC2_BLOCK_SIZE, 4 , 1 >(width, height, src_blocks, dec_blocks);
127194 } break ;
128195 case BCdec_BC3: {
129- DECOMPRESS_LOOP ( bcdec_bc3, BCDEC_BC3_BLOCK_SIZE, 4 , 4 )
196+ _decompress_mipmap< bcdec_bc3, BCDEC_BC3_BLOCK_SIZE, 4 , 1 >(width, height, src_blocks, dec_blocks);
130197 } break ;
131198 case BCdec_BC4: {
132- DECOMPRESS_LOOP ( bcdec_bc4, BCDEC_BC4_BLOCK_SIZE, 1 , 1 )
199+ _decompress_mipmap< bcdec_bc4, BCDEC_BC4_BLOCK_SIZE, 1 , 1 >(width, height, src_blocks, dec_blocks);
133200 } break ;
134201 case BCdec_BC5: {
135- DECOMPRESS_LOOP ( bcdec_bc5, BCDEC_BC5_BLOCK_SIZE, 2 , 2 )
202+ _decompress_mipmap< bcdec_bc5, BCDEC_BC5_BLOCK_SIZE, 2 , 1 >(width, height, src_blocks, dec_blocks);
136203 } break ;
137204 case BCdec_BC6U: {
138- DECOMPRESS_LOOP ( bcdec_bc6h_half_u, BCDEC_BC6H_BLOCK_SIZE, 6 , 3 )
205+ _decompress_mipmap< bcdec_bc6h_half_u, BCDEC_BC6H_BLOCK_SIZE, 6 , 2 >(width, height, src_blocks, dec_blocks);
139206 } break ;
140207 case BCdec_BC6S: {
141- DECOMPRESS_LOOP ( bcdec_bc6h_half_s, BCDEC_BC6H_BLOCK_SIZE, 6 , 3 )
208+ _decompress_mipmap< bcdec_bc6h_half_s, BCDEC_BC6H_BLOCK_SIZE, 6 , 2 >(width, height, src_blocks, dec_blocks);
142209 } break ;
143210 case BCdec_BC7: {
144- DECOMPRESS_LOOP ( bcdec_bc7, BCDEC_BC7_BLOCK_SIZE, 4 , 4 )
211+ _decompress_mipmap< bcdec_bc7, BCDEC_BC7_BLOCK_SIZE, 4 , 1 >(width, height, src_blocks, dec_blocks);
145212 } break ;
146213 }
147214 }
148-
149- #undef DECOMPRESS_LOOP
150- #undef DECOMPRESS_LOOP_SAFE
151215}
152216
153217void image_decompress_bcdec (Image *p_image) {
@@ -156,21 +220,6 @@ void image_decompress_bcdec(Image *p_image) {
156220 int width = p_image->get_width ();
157221 int height = p_image->get_height ();
158222
159- // Compressed images' dimensions should be padded to the upper multiple of 4.
160- // If they aren't, they need to be realigned (the actual data is correctly padded though).
161- const bool need_width_realign = width % 4 != 0 ;
162- const bool need_height_realign = height % 4 != 0 ;
163-
164- if (need_width_realign || need_height_realign) {
165- int new_width = need_width_realign ? width + (4 - (width % 4 )) : width;
166- int new_height = need_height_realign ? height + (4 - (height % 4 )) : height;
167-
168- print_verbose (vformat (" Compressed image's dimensions are not multiples of 4 (%dx%d), aligning to (%dx%d)" , width, height, new_width, new_height));
169-
170- width = new_width;
171- height = new_height;
172- }
173-
174223 Image::Format source_format = p_image->get_format ();
175224 Image::Format target_format = Image::FORMAT_MAX;
176225
@@ -237,8 +286,8 @@ void image_decompress_bcdec(Image *p_image) {
237286 // Decompress mipmaps.
238287 for (int i = 0 ; i <= mm_count; i++) {
239288 int mipmap_w = 0 , mipmap_h = 0 ;
240- int64_t src_ofs = Image::get_image_mipmap_offset_and_dimensions (width, height, source_format, i, mipmap_w, mipmap_h );
241- int64_t dst_ofs = Image::get_image_mipmap_offset (width, height, target_format, i);
289+ int64_t src_ofs = Image::get_image_mipmap_offset (width, height, source_format, i);
290+ int64_t dst_ofs = Image::get_image_mipmap_offset_and_dimensions (width, height, target_format, i, mipmap_w, mipmap_h );
242291 decompress_image (bcdec_format, rb + src_ofs, wb + dst_ofs, mipmap_w, mipmap_h);
243292 }
244293
0 commit comments