Skip to content

Commit 350ac5b

Browse files
committed
Merge pull request #104575 from bruvzg/cvtt_mul_4
Force multiple of 4 sizes for CVTT compressor.
2 parents c5e36a9 + 6f50511 commit 350ac5b

File tree

1 file changed

+55
-15
lines changed

1 file changed

+55
-15
lines changed

modules/cvtt/image_compress_cvtt.cpp

Lines changed: 55 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ struct CVTTCompressionJobParams {
4646
};
4747

4848
struct CVTTCompressionRowTask {
49-
const uint8_t *in_mm_bytes = nullptr;
49+
Vector<uint8_t> in_mm;
5050
uint8_t *out_mm_bytes = nullptr;
5151
int y_start = 0;
5252
int width = 0;
@@ -61,7 +61,7 @@ struct CVTTCompressionJobQueue {
6161
};
6262

6363
static void _digest_row_task(const CVTTCompressionJobParams &p_job_params, const CVTTCompressionRowTask &p_row_task) {
64-
const uint8_t *in_bytes = p_row_task.in_mm_bytes;
64+
const uint8_t *in_bytes = p_row_task.in_mm.ptr();
6565
uint8_t *out_bytes = p_row_task.out_mm_bytes;
6666
int w = p_row_task.width;
6767
int h = p_row_task.height;
@@ -151,6 +151,11 @@ void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) {
151151
int w = p_image->get_width();
152152
int h = p_image->get_height();
153153

154+
if (w % 4 != 0 || h % 4 != 0) {
155+
w = w <= 2 ? w : (w + 3) & ~3;
156+
h = h <= 2 ? h : (h + 3) & ~3;
157+
}
158+
154159
bool is_ldr = (p_image->get_format() <= Image::FORMAT_RGBA8);
155160
bool is_hdr = (p_image->get_format() >= Image::FORMAT_RF) && (p_image->get_format() <= Image::FORMAT_RGBE9995);
156161

@@ -180,8 +185,6 @@ void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) {
180185
p_image->convert(Image::FORMAT_RGBA8); //still uses RGBA to convert
181186
}
182187

183-
const uint8_t *rb = p_image->get_data().ptr();
184-
185188
Vector<uint8_t> data;
186189
int64_t target_size = Image::get_image_data_size(w, h, target_format, p_image->has_mipmaps());
187190
int mm_count = p_image->has_mipmaps() ? Image::get_image_required_mipmaps(w, h, target_format) : 0;
@@ -209,20 +212,59 @@ void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) {
209212
Vector<CVTTCompressionRowTask> tasks;
210213

211214
for (int i = 0; i <= mm_count; i++) {
212-
int bw = w % 4 != 0 ? w + (4 - w % 4) : w;
213-
int bh = h % 4 != 0 ? h + (4 - h % 4) : h;
215+
Vector<uint8_t> in_data;
216+
int width, height;
217+
Image::get_image_mipmap_offset_and_dimensions(w, h, target_format, i, width, height);
218+
219+
int bw = width % 4 != 0 ? width + (4 - width % 4) : width;
220+
int bh = height % 4 != 0 ? height + (4 - height % 4) : height;
221+
222+
int64_t src_mip_ofs, src_mip_size;
223+
int src_mip_w, src_mip_h;
224+
p_image->get_mipmap_offset_size_and_dimensions(i, src_mip_ofs, src_mip_size, src_mip_w, src_mip_h);
225+
226+
// Pad textures to nearest block by smearing.
227+
if (width != src_mip_w || height != src_mip_h) {
228+
const uint8_t *src_mip_read = p_image->ptr() + src_mip_ofs;
229+
230+
// Reserve the buffer for padded image data.
231+
int px_size = Image::get_format_pixel_size(p_image->get_format());
232+
in_data.resize(width * height * px_size);
233+
uint8_t *ptrw = in_data.ptrw();
234+
235+
int x = 0, y = 0;
236+
for (y = 0; y < src_mip_h; y++) {
237+
for (x = 0; x < src_mip_w; x++) {
238+
memcpy(ptrw + (width * y + x) * px_size, src_mip_read + (src_mip_w * y + x) * px_size, px_size);
239+
}
214240

215-
int64_t src_ofs = p_image->get_mipmap_offset(i);
241+
// First, smear in x.
242+
for (; x < width; x++) {
243+
memcpy(ptrw + (width * y + x) * px_size, ptrw + (width * y + x - 1) * px_size, px_size);
244+
}
245+
}
216246

217-
const uint8_t *in_bytes = &rb[src_ofs];
247+
// Then, smear in y.
248+
for (; y < height; y++) {
249+
for (x = 0; x < width; x++) {
250+
memcpy(ptrw + (width * y + x) * px_size, ptrw + (width * y + x - width) * px_size, px_size);
251+
}
252+
}
253+
} else {
254+
// Create a buffer filled with the source mip layer data.
255+
in_data.resize(src_mip_size);
256+
memcpy(in_data.ptrw(), p_image->ptr() + src_mip_ofs, src_mip_size);
257+
}
258+
259+
//const uint8_t *in_bytes = &rb[src_ofs];
218260
uint8_t *out_bytes = &wb[dst_ofs];
219261

220-
for (int y_start = 0; y_start < h; y_start += 4) {
262+
for (int y_start = 0; y_start < height; y_start += 4) {
221263
CVTTCompressionRowTask row_task;
222-
row_task.width = w;
223-
row_task.height = h;
264+
row_task.width = width;
265+
row_task.height = height;
224266
row_task.y_start = y_start;
225-
row_task.in_mm_bytes = in_bytes;
267+
row_task.in_mm = in_data;
226268
row_task.out_mm_bytes = out_bytes;
227269

228270
tasks.push_back(row_task);
@@ -231,8 +273,6 @@ void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) {
231273
}
232274

233275
dst_ofs += (MAX(4, bw) * MAX(4, bh)) >> shift;
234-
w = MAX(w / 2, 1);
235-
h = MAX(h / 2, 1);
236276
}
237277

238278
const CVTTCompressionRowTask *tasks_rb = tasks.ptr();
@@ -242,7 +282,7 @@ void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) {
242282
WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_native_group_task(&_digest_job_queue, &job_queue, WorkerThreadPool::get_singleton()->get_thread_count(), -1, true, SNAME("CVTT Compress"));
243283
WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
244284

245-
p_image->set_data(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data);
285+
p_image->set_data(w, h, p_image->has_mipmaps(), target_format, data);
246286

247287
print_verbose(vformat("CVTT: Encoding took %d ms.", OS::get_singleton()->get_ticks_msec() - start_time));
248288
}

0 commit comments

Comments
 (0)