Skip to content

Commit 8402a1d

Browse files
committed
bcdec: Fix decompressing mipmaps of non-power-of-2 textures
1 parent 8f78e75 commit 8402a1d

File tree

1 file changed

+125
-76
lines changed

1 file changed

+125
-76
lines changed

modules/bcdec/image_decompress_bcdec.cpp

Lines changed: 125 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -44,110 +44,174 @@ inline void bcdec_bc6h_half_u(const void *compressedBlock, void *decompressedBlo
4444
bcdec_bc6h_half(compressedBlock, decompressedBlock, destinationPitch, false);
4545
}
4646

47-
static void decompress_image(BCdecFormat format, const void *src, void *dst, const uint64_t width, const uint64_t height) {
48-
const uint8_t *src_blocks = reinterpret_cast<const uint8_t *>(src);
49-
uint8_t *dec_blocks = reinterpret_cast<uint8_t *>(dst);
47+
template <void (*decompress_func)(const void *, void *, int), int block_size, int pixel_size, int component_size>
48+
static inline void _safe_decompress_mipmap(int width, int height, const uint8_t *src, uint8_t *dst) {
49+
// A stack-allocated output buffer large enough to contain an entire uncompressed block.
50+
uint8_t temp_buf[4 * 4 * pixel_size];
51+
52+
// The amount of misaligned pixels on each axis.
53+
const int width_diff = width - (width & ~0x03);
54+
const int height_diff = height - (height & ~0x03);
55+
56+
// The amount of uncompressed blocks on each axis.
57+
const int width_blocks = (width & ~0x03) / 4;
58+
const int height_blocks = (height & ~0x03) / 4;
59+
60+
// The pitch of the image in bytes.
61+
const int image_pitch = width * pixel_size;
62+
// The pitch of a block in bytes.
63+
const int block_pitch = 4 * pixel_size;
64+
// The pitch of the last block in bytes.
65+
const int odd_pitch = width_diff * pixel_size;
66+
67+
size_t src_pos = 0;
68+
size_t dst_pos = 0;
69+
70+
// Decompress the blocks, starting from the top.
71+
for (int y = 0; y < height_blocks; y += 1) {
72+
// Decompress the blocks, starting from the left.
73+
for (int x = 0; x < width_blocks; x += 1) {
74+
decompress_func(&src[src_pos], &dst[dst_pos], image_pitch / component_size);
75+
src_pos += block_size;
76+
dst_pos += block_pitch;
77+
}
78+
79+
// Decompress the block on the right.
80+
if (width_diff > 0) {
81+
decompress_func(&src[src_pos], temp_buf, block_pitch / component_size);
82+
83+
// Copy the data from the temporary buffer to the output.
84+
for (int i = 0; i < 4; i++) {
85+
memcpy(&dst[dst_pos + i * image_pitch], &temp_buf[i * block_pitch], odd_pitch);
86+
}
87+
88+
src_pos += block_size;
89+
dst_pos += odd_pitch;
90+
}
91+
92+
// Skip to the next row of blocks, the current one has already been filled.
93+
dst_pos += 3 * image_pitch;
94+
}
95+
96+
// Decompress the blocks at the bottom of the image.
97+
if (height_diff > 0) {
98+
// Decompress the blocks at the bottom.
99+
for (int x = 0; x < width_blocks; x += 1) {
100+
decompress_func(&src[src_pos], temp_buf, block_pitch / component_size);
101+
102+
// Copy the data from the temporary buffer to the output.
103+
for (int i = 0; i < height_diff; i++) {
104+
memcpy(&dst[dst_pos + i * image_pitch], &temp_buf[i * block_pitch], block_pitch);
105+
}
106+
107+
src_pos += block_size;
108+
dst_pos += block_pitch;
109+
}
110+
111+
// Decompress the block in the lower-right corner.
112+
if (width_diff > 0) {
113+
decompress_func(&src[src_pos], temp_buf, block_pitch / component_size);
114+
115+
// Copy the data from the temporary buffer to the output.
116+
for (int i = 0; i < height_diff; i++) {
117+
memcpy(&dst[dst_pos + i * image_pitch], &temp_buf[i * block_pitch], odd_pitch);
118+
}
50119

51-
#define DECOMPRESS_LOOP(func, block_size, color_bytesize, color_components) \
52-
for (uint64_t y = 0; y < height; y += 4) { \
53-
for (uint64_t x = 0; x < width; x += 4) { \
54-
func(&src_blocks[src_pos], &dec_blocks[dst_pos], width * color_components); \
55-
src_pos += block_size; \
56-
dst_pos += 4 * color_bytesize; \
57-
} \
58-
dst_pos += 3 * width * color_bytesize; \
120+
src_pos += block_size;
121+
dst_pos += odd_pitch;
122+
}
59123
}
124+
}
125+
126+
template <void (*decompress_func)(const void *, void *, int), int block_size, int pixel_size, int component_size>
127+
static inline void _decompress_mipmap(int width, int height, const uint8_t *src, uint8_t *dst) {
128+
size_t src_pos = 0;
129+
size_t dst_pos = 0;
130+
131+
// The size of a single block in bytes.
132+
const int block_pitch = 4 * pixel_size;
133+
// The pitch of the image in bytes.
134+
const int image_pitch = width * pixel_size;
135+
136+
for (int y = 0; y < height; y += 4) {
137+
for (int x = 0; x < width; x += 4) {
138+
decompress_func(&src[src_pos], &dst[dst_pos], image_pitch / component_size);
139+
src_pos += block_size;
140+
dst_pos += block_pitch;
141+
}
60142

61-
#define DECOMPRESS_LOOP_SAFE(func, block_size, color_bytesize, color_components, output) \
62-
for (uint64_t y = 0; y < height; y += 4) { \
63-
for (uint64_t x = 0; x < width; x += 4) { \
64-
const uint32_t yblock = MIN(height - y, 4ul); \
65-
const uint32_t xblock = MIN(width - x, 4ul); \
66-
\
67-
const bool incomplete = yblock < 4 || xblock < 4; \
68-
uint8_t *dec_out = incomplete ? output : &dec_blocks[y * 4 * width + x * color_bytesize]; \
69-
\
70-
func(&src_blocks[src_pos], dec_out, 4 * color_components); \
71-
src_pos += block_size; \
72-
\
73-
if (incomplete) { \
74-
for (uint32_t cy = 0; cy < yblock; cy++) { \
75-
for (uint32_t cx = 0; cx < xblock; cx++) { \
76-
memcpy(&dec_blocks[(y + cy) * 4 * width + (x + cx) * color_bytesize], &output[cy * 4 + cx * color_bytesize], color_bytesize); \
77-
} \
78-
} \
79-
} \
80-
} \
143+
// Skip to the next row of blocks, the current one has already been filled.
144+
dst_pos += 3 * image_pitch;
81145
}
146+
}
82147

83-
if (width % 4 != 0 || height % 4 != 0) {
84-
uint64_t src_pos = 0;
148+
static void decompress_image(BCdecFormat format, const void *src, void *dst, const uint64_t width, const uint64_t height) {
149+
const uint8_t *src_blocks = reinterpret_cast<const uint8_t *>(src);
150+
uint8_t *dec_blocks = reinterpret_cast<uint8_t *>(dst);
85151

86-
uint8_t r8_output[4 * 4];
87-
uint8_t rg8_output[4 * 4 * 2];
88-
uint8_t rgba8_output[4 * 4 * 4];
89-
uint8_t rgbh_output[4 * 4 * 6];
152+
const uint64_t aligned_width = (width + 3) & ~0x03;
153+
const uint64_t aligned_height = (height + 3) & ~0x03;
90154

155+
if (width != aligned_width || height != aligned_height) {
156+
// Decompress the mipmap in a 'safe' way, which involves starting from the top left.
157+
// For each block row, decompress all of the 'full' blocks, then the misaligned one (on the x axis).
158+
// Then, decompress the final misaligned block row at the bottom.
159+
// Finally, decompress the misaligned block at the bottom right.
91160
switch (format) {
92161
case BCdec_BC1: {
93-
DECOMPRESS_LOOP_SAFE(bcdec_bc1, BCDEC_BC1_BLOCK_SIZE, 4, 4, rgba8_output)
162+
_safe_decompress_mipmap<bcdec_bc1, BCDEC_BC1_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
94163
} break;
95164
case BCdec_BC2: {
96-
DECOMPRESS_LOOP_SAFE(bcdec_bc2, BCDEC_BC2_BLOCK_SIZE, 4, 4, rgba8_output)
165+
_safe_decompress_mipmap<bcdec_bc2, BCDEC_BC2_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
97166
} break;
98167
case BCdec_BC3: {
99-
DECOMPRESS_LOOP_SAFE(bcdec_bc3, BCDEC_BC3_BLOCK_SIZE, 4, 4, rgba8_output)
168+
_safe_decompress_mipmap<bcdec_bc3, BCDEC_BC3_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
100169
} break;
101170
case BCdec_BC4: {
102-
DECOMPRESS_LOOP_SAFE(bcdec_bc4, BCDEC_BC4_BLOCK_SIZE, 1, 1, r8_output)
171+
_safe_decompress_mipmap<bcdec_bc4, BCDEC_BC4_BLOCK_SIZE, 1, 1>(width, height, src_blocks, dec_blocks);
103172
} break;
104173
case BCdec_BC5: {
105-
DECOMPRESS_LOOP_SAFE(bcdec_bc5, BCDEC_BC5_BLOCK_SIZE, 2, 2, rg8_output)
174+
_safe_decompress_mipmap<bcdec_bc5, BCDEC_BC5_BLOCK_SIZE, 2, 1>(width, height, src_blocks, dec_blocks);
106175
} break;
107176
case BCdec_BC6U: {
108-
DECOMPRESS_LOOP_SAFE(bcdec_bc6h_half_u, BCDEC_BC6H_BLOCK_SIZE, 6, 3, rgbh_output)
177+
_safe_decompress_mipmap<bcdec_bc6h_half_u, BCDEC_BC6H_BLOCK_SIZE, 6, 2>(width, height, src_blocks, dec_blocks);
109178
} break;
110179
case BCdec_BC6S: {
111-
DECOMPRESS_LOOP_SAFE(bcdec_bc6h_half_s, BCDEC_BC6H_BLOCK_SIZE, 6, 3, rgbh_output)
180+
_safe_decompress_mipmap<bcdec_bc6h_half_s, BCDEC_BC6H_BLOCK_SIZE, 6, 2>(width, height, src_blocks, dec_blocks);
112181
} break;
113182
case BCdec_BC7: {
114-
DECOMPRESS_LOOP_SAFE(bcdec_bc7, BCDEC_BC7_BLOCK_SIZE, 4, 4, rgba8_output)
183+
_safe_decompress_mipmap<bcdec_bc7, BCDEC_BC7_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
115184
} break;
116185
}
117-
118186
} else {
119-
uint64_t src_pos = 0, dst_pos = 0;
120-
187+
// Just decompress as usual, as fast as possible.
121188
switch (format) {
122189
case BCdec_BC1: {
123-
DECOMPRESS_LOOP(bcdec_bc1, BCDEC_BC1_BLOCK_SIZE, 4, 4)
190+
_decompress_mipmap<bcdec_bc1, BCDEC_BC1_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
124191
} break;
125192
case BCdec_BC2: {
126-
DECOMPRESS_LOOP(bcdec_bc2, BCDEC_BC2_BLOCK_SIZE, 4, 4)
193+
_decompress_mipmap<bcdec_bc2, BCDEC_BC2_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
127194
} break;
128195
case BCdec_BC3: {
129-
DECOMPRESS_LOOP(bcdec_bc3, BCDEC_BC3_BLOCK_SIZE, 4, 4)
196+
_decompress_mipmap<bcdec_bc3, BCDEC_BC3_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
130197
} break;
131198
case BCdec_BC4: {
132-
DECOMPRESS_LOOP(bcdec_bc4, BCDEC_BC4_BLOCK_SIZE, 1, 1)
199+
_decompress_mipmap<bcdec_bc4, BCDEC_BC4_BLOCK_SIZE, 1, 1>(width, height, src_blocks, dec_blocks);
133200
} break;
134201
case BCdec_BC5: {
135-
DECOMPRESS_LOOP(bcdec_bc5, BCDEC_BC5_BLOCK_SIZE, 2, 2)
202+
_decompress_mipmap<bcdec_bc5, BCDEC_BC5_BLOCK_SIZE, 2, 1>(width, height, src_blocks, dec_blocks);
136203
} break;
137204
case BCdec_BC6U: {
138-
DECOMPRESS_LOOP(bcdec_bc6h_half_u, BCDEC_BC6H_BLOCK_SIZE, 6, 3)
205+
_decompress_mipmap<bcdec_bc6h_half_u, BCDEC_BC6H_BLOCK_SIZE, 6, 2>(width, height, src_blocks, dec_blocks);
139206
} break;
140207
case BCdec_BC6S: {
141-
DECOMPRESS_LOOP(bcdec_bc6h_half_s, BCDEC_BC6H_BLOCK_SIZE, 6, 3)
208+
_decompress_mipmap<bcdec_bc6h_half_s, BCDEC_BC6H_BLOCK_SIZE, 6, 2>(width, height, src_blocks, dec_blocks);
142209
} break;
143210
case BCdec_BC7: {
144-
DECOMPRESS_LOOP(bcdec_bc7, BCDEC_BC7_BLOCK_SIZE, 4, 4)
211+
_decompress_mipmap<bcdec_bc7, BCDEC_BC7_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
145212
} break;
146213
}
147214
}
148-
149-
#undef DECOMPRESS_LOOP
150-
#undef DECOMPRESS_LOOP_SAFE
151215
}
152216

153217
void image_decompress_bcdec(Image *p_image) {
@@ -156,21 +220,6 @@ void image_decompress_bcdec(Image *p_image) {
156220
int width = p_image->get_width();
157221
int height = p_image->get_height();
158222

159-
// Compressed images' dimensions should be padded to the upper multiple of 4.
160-
// If they aren't, they need to be realigned (the actual data is correctly padded though).
161-
const bool need_width_realign = width % 4 != 0;
162-
const bool need_height_realign = height % 4 != 0;
163-
164-
if (need_width_realign || need_height_realign) {
165-
int new_width = need_width_realign ? width + (4 - (width % 4)) : width;
166-
int new_height = need_height_realign ? height + (4 - (height % 4)) : height;
167-
168-
print_verbose(vformat("Compressed image's dimensions are not multiples of 4 (%dx%d), aligning to (%dx%d)", width, height, new_width, new_height));
169-
170-
width = new_width;
171-
height = new_height;
172-
}
173-
174223
Image::Format source_format = p_image->get_format();
175224
Image::Format target_format = Image::FORMAT_MAX;
176225

@@ -237,8 +286,8 @@ void image_decompress_bcdec(Image *p_image) {
237286
// Decompress mipmaps.
238287
for (int i = 0; i <= mm_count; i++) {
239288
int mipmap_w = 0, mipmap_h = 0;
240-
int64_t src_ofs = Image::get_image_mipmap_offset_and_dimensions(width, height, source_format, i, mipmap_w, mipmap_h);
241-
int64_t dst_ofs = Image::get_image_mipmap_offset(width, height, target_format, i);
289+
int64_t src_ofs = Image::get_image_mipmap_offset(width, height, source_format, i);
290+
int64_t dst_ofs = Image::get_image_mipmap_offset_and_dimensions(width, height, target_format, i, mipmap_w, mipmap_h);
242291
decompress_image(bcdec_format, rb + src_ofs, wb + dst_ofs, mipmap_w, mipmap_h);
243292
}
244293

0 commit comments

Comments
 (0)