Skip to content

Commit 70a2ce9

Browse files
committed
Merge pull request godotengine#110060 from BlueCube3310/betsy-rgb
Betsy: Convert RGB to RGBA on the GPU for faster compression
2 parents da3bdac + 885904e commit 70a2ce9

File tree

4 files changed

+254
-24
lines changed

4 files changed

+254
-24
lines changed

modules/betsy/SCsub

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ env_betsy.GLSL_HEADER("bc6h.glsl")
1111
env_betsy.GLSL_HEADER("bc1.glsl")
1212
env_betsy.GLSL_HEADER("bc4.glsl")
1313
env_betsy.GLSL_HEADER("alpha_stitch.glsl")
14+
env_betsy.GLSL_HEADER("rgb_to_rgba.glsl")
1415

1516
env_betsy.Depends(Glob("*.glsl.gen.h"), ["#glsl_builders.py"])
1617

modules/betsy/image_compress_betsy.cpp

Lines changed: 119 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "bc1.glsl.gen.h"
3939
#include "bc4.glsl.gen.h"
4040
#include "bc6h.glsl.gen.h"
41+
#include "rgb_to_rgba.glsl.gen.h"
4142
#include "servers/display/display_server.h"
4243

4344
static Mutex betsy_mutex;
@@ -220,6 +221,44 @@ void BetsyCompressor::_init() {
220221
cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled);
221222
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline.is_null());
222223
}
224+
225+
{
226+
Ref<RDShaderFile> rgb_to_rgba_shader;
227+
rgb_to_rgba_shader.instantiate();
228+
Error err = rgb_to_rgba_shader->parse_versions_from_text(rgb_to_rgba_shader_glsl);
229+
230+
if (err != OK) {
231+
rgb_to_rgba_shader->print_errors("Betsy RGB to RGBA shader");
232+
}
233+
234+
// Float32.
235+
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_float"));
236+
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled.is_null());
237+
238+
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled);
239+
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].pipeline.is_null());
240+
241+
// Float16.
242+
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_half"));
243+
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled.is_null());
244+
245+
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled);
246+
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].pipeline.is_null());
247+
248+
// Unorm8.
249+
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_unorm8"));
250+
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled.is_null());
251+
252+
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled);
253+
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].pipeline.is_null());
254+
255+
// Unorm16.
256+
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_unorm16"));
257+
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled.is_null());
258+
259+
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled);
260+
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].pipeline.is_null());
261+
}
223262
}
224263

225264
void BetsyCompressor::init() {
@@ -284,7 +323,9 @@ static int get_next_multiple(int n, int m) {
284323
return n + (m - (n % m));
285324
}
286325

287-
static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
326+
static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format, bool &r_is_rgb) {
327+
r_is_rgb = false;
328+
288329
switch (r_img->get_format()) {
289330
case Image::FORMAT_L8:
290331
r_img->convert(Image::FORMAT_RGBA8);
@@ -305,7 +346,7 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
305346
break;
306347

307348
case Image::FORMAT_RGB8:
308-
r_img->convert(Image::FORMAT_RGBA8);
349+
r_is_rgb = true;
309350
r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
310351
break;
311352

@@ -322,7 +363,7 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
322363
break;
323364

324365
case Image::FORMAT_RGBH:
325-
r_img->convert(Image::FORMAT_RGBAH);
366+
r_is_rgb = true;
326367
r_format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
327368
break;
328369

@@ -339,7 +380,7 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
339380
break;
340381

341382
case Image::FORMAT_RGBF:
342-
r_img->convert(Image::FORMAT_RGBAF);
383+
r_is_rgb = true;
343384
r_format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
344385
break;
345386

@@ -360,31 +401,14 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
360401
break;
361402

362403
case Image::FORMAT_RGB16:
363-
r_img->convert(Image::FORMAT_RGBA16);
404+
r_is_rgb = true;
364405
r_format = RD::DATA_FORMAT_R16G16B16A16_UNORM;
365406
break;
366407

367408
case Image::FORMAT_RGBA16:
368409
r_format = RD::DATA_FORMAT_R16G16B16A16_UNORM;
369410
break;
370411

371-
case Image::FORMAT_R16I:
372-
r_format = RD::DATA_FORMAT_R16_UINT;
373-
break;
374-
375-
case Image::FORMAT_RG16I:
376-
r_format = RD::DATA_FORMAT_R16G16_UINT;
377-
break;
378-
379-
case Image::FORMAT_RGB16I:
380-
r_img->convert(Image::FORMAT_RGBA16I);
381-
r_format = RD::DATA_FORMAT_R16G16B16A16_UINT;
382-
break;
383-
384-
case Image::FORMAT_RGBA16I:
385-
r_format = RD::DATA_FORMAT_R16G16B16A16_UINT;
386-
break;
387-
388412
default: {
389413
return ERR_UNAVAILABLE;
390414
}
@@ -445,7 +469,8 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) {
445469
src_texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
446470
}
447471

448-
err = get_src_texture_format(r_img, src_texture_format.format);
472+
bool needs_rgb_to_rgba = false;
473+
err = get_src_texture_format(r_img, src_texture_format.format, needs_rgb_to_rgba);
449474

450475
if (err != OK) {
451476
return err;
@@ -546,9 +571,79 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) {
546571
}
547572

548573
// Create the textures on the GPU.
549-
RID src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images);
574+
RID src_texture;
550575
RID dst_texture_primary = compress_rd->texture_create(dst_texture_format, RD::TextureView());
551576

577+
if (needs_rgb_to_rgba) {
578+
// RGB textures cannot be sampled directly on most hardware, so we do a little trick involving a compute shader
579+
// which takes the input data as an SSBO and converts it directly into an RGBA image.
580+
BetsyShaderType rgb_shader_type = BETSY_SHADER_MAX;
581+
582+
switch (r_img->get_format()) {
583+
case Image::FORMAT_RGB8:
584+
rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_UNORM8;
585+
break;
586+
case Image::FORMAT_RGBH:
587+
rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_HALF;
588+
break;
589+
case Image::FORMAT_RGBF:
590+
rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_FLOAT;
591+
break;
592+
case Image::FORMAT_RGB16:
593+
rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_UNORM16;
594+
break;
595+
default:
596+
break;
597+
}
598+
599+
// The source 'RGB' buffer.
600+
RID source_buffer = compress_rd->storage_buffer_create(src_image_ptr[0].size(), src_image_ptr[0].span());
601+
602+
RD::TextureFormat rgba_texture_format = src_texture_format;
603+
rgba_texture_format.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
604+
src_texture = compress_rd->texture_create(rgba_texture_format, RD::TextureView());
605+
606+
Vector<RD::Uniform> uniforms;
607+
{
608+
{
609+
RD::Uniform u;
610+
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
611+
u.binding = 0;
612+
u.append_id(source_buffer);
613+
uniforms.push_back(u);
614+
}
615+
{
616+
RD::Uniform u;
617+
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
618+
u.binding = 1;
619+
u.append_id(src_texture);
620+
uniforms.push_back(u);
621+
}
622+
}
623+
624+
BetsyShader &rgb_shader = cached_shaders[rgb_shader_type];
625+
626+
RID uniform_set = compress_rd->uniform_set_create(uniforms, rgb_shader.compiled, 0);
627+
RD::ComputeListID compute_list = compress_rd->compute_list_begin();
628+
629+
compress_rd->compute_list_bind_compute_pipeline(compute_list, rgb_shader.pipeline);
630+
compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
631+
632+
// Prepare the push constant with the mipmap's resolution.
633+
RGBToRGBAPushConstant push_constant;
634+
push_constant.width = width;
635+
push_constant.height = height;
636+
637+
compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RGBToRGBAPushConstant));
638+
compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 8) / 8, get_next_multiple(height, 8) / 8, 1);
639+
640+
compress_rd->compute_list_end();
641+
642+
compress_rd->free_rid(source_buffer);
643+
} else {
644+
src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images);
645+
}
646+
552647
{
553648
Vector<RD::Uniform> uniforms;
554649
{

modules/betsy/image_compress_betsy.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ enum BetsyShaderType {
6666
BETSY_SHADER_BC6_SIGNED,
6767
BETSY_SHADER_BC6_UNSIGNED,
6868
BETSY_SHADER_ALPHA_STITCH,
69+
BETSY_SHADER_RGB_TO_RGBA_FLOAT,
70+
BETSY_SHADER_RGB_TO_RGBA_HALF,
71+
BETSY_SHADER_RGB_TO_RGBA_UNORM8,
72+
BETSY_SHADER_RGB_TO_RGBA_UNORM16,
6973
BETSY_SHADER_MAX,
7074
};
7175

@@ -85,6 +89,12 @@ struct BC4PushConstant {
8589
uint32_t padding[3] = { 0 };
8690
};
8791

92+
struct RGBToRGBAPushConstant {
93+
uint32_t width;
94+
uint32_t height;
95+
uint32_t padding[2];
96+
};
97+
8898
void free_device();
8999

90100
Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels);

modules/betsy/rgb_to_rgba.glsl

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
#[versions]
2+
3+
version_float = "#define VER_FLOAT";
4+
version_half = "#define VER_HALF";
5+
version_unorm8 = "#define VER_UINT8";
6+
version_unorm16 = "#define VER_UINT16";
7+
8+
#[compute]
9+
#version 450
10+
11+
#VERSION_DEFINES
12+
13+
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
14+
15+
layout(std430, binding = 0) buffer Source {
16+
#if defined(VER_FLOAT)
17+
float data[];
18+
#else
19+
uint data[];
20+
#endif
21+
}
22+
source;
23+
24+
#if defined(VER_FLOAT)
25+
layout(binding = 1, rgba32f) uniform writeonly image2D dest;
26+
#elif defined(VER_HALF)
27+
layout(binding = 1, rgba16f) uniform writeonly image2D dest;
28+
#elif defined(VER_UINT8)
29+
layout(binding = 1, rgba8) uniform writeonly image2D dest;
30+
#elif defined(VER_UINT16)
31+
layout(binding = 1, rgba16) uniform writeonly image2D dest;
32+
#endif
33+
34+
layout(push_constant, std430) uniform Params {
35+
uint p_width;
36+
uint p_height;
37+
uint p_padding[2];
38+
}
39+
params;
40+
41+
void main() {
42+
// gl_GlobalInvocationID is equivalent to the current texel coordinates.
43+
if (gl_GlobalInvocationID.x >= params.p_width || gl_GlobalInvocationID.y >= params.p_height) {
44+
return;
45+
}
46+
47+
// The index of a texel in the source buffer, NOT an index of source.data[]
48+
const int texel_index = int(gl_GlobalInvocationID.y * params.p_width + gl_GlobalInvocationID.x);
49+
50+
#if defined(VER_FLOAT)
51+
// Since 32-bit floats are aligned with RGBF texel data, just retrieve the values from the array.
52+
// Multiply by 3 to align with the components.
53+
54+
int data_index = texel_index * 3;
55+
vec3 color_rgb = vec3(source.data[data_index], source.data[data_index + 1], source.data[data_index + 2]);
56+
57+
#elif defined(VER_UINT8)
58+
// RGB8 texel data and 32-bit uints are not aligned, so we have to use a bit of magic.
59+
// The source texel can be in either of 4 alignment 'states':
60+
// 0 - [ XYZ_-____ ]
61+
// 1 - [ _YZW-____ ]
62+
// 2 - [ __ZW-X___ ]
63+
// 3 - [ ___W-XY__ ]
64+
// The texel index additionally needs to be decremented after every 'cycle' in order to properly fit into the source array.
65+
66+
vec3 color_rgb = vec3(0.0);
67+
int data_index = texel_index - (texel_index / 4);
68+
69+
switch ((texel_index * 3) % 4) {
70+
case 0:
71+
color_rgb = unpackUnorm4x8(source.data[data_index]).xyz;
72+
break;
73+
case 1:
74+
color_rgb = unpackUnorm4x8(source.data[data_index - 1]).yzw;
75+
break;
76+
case 2:
77+
color_rgb.rg = unpackUnorm4x8(source.data[data_index - 1]).zw;
78+
color_rgb.b = unpackUnorm4x8(source.data[data_index]).x;
79+
break;
80+
case 3:
81+
color_rgb.r = unpackUnorm4x8(source.data[data_index - 1]).w;
82+
color_rgb.gb = unpackUnorm4x8(source.data[data_index]).xy;
83+
break;
84+
default:
85+
break;
86+
}
87+
88+
#else
89+
// In a similar vein to RGB8, the RGBH/RGB16 source texel can be in either of 2 alignment 'states':
90+
// 0 - [ XY-X_ ]
91+
// 1 - [ _Y-XY ]
92+
// The texel index has to be incremented this time, as the size of a texel (6 bytes) is greater than that of a 32-bit uint (4 bytes).
93+
94+
vec3 color_rgb = vec3(0.0);
95+
int data_index = texel_index + (texel_index / 2);
96+
97+
switch ((texel_index * 3) % 2) {
98+
#if defined(VER_HALF)
99+
case 0:
100+
color_rgb.xy = unpackHalf2x16(source.data[data_index]);
101+
color_rgb.z = unpackHalf2x16(source.data[data_index + 1]).x;
102+
break;
103+
case 1:
104+
color_rgb.x = unpackHalf2x16(source.data[data_index]).y;
105+
color_rgb.yz = unpackHalf2x16(source.data[data_index + 1]);
106+
break;
107+
#elif defined(VER_UINT16)
108+
case 0:
109+
color_rgb.xy = unpackUnorm2x16(source.data[data_index]);
110+
color_rgb.z = unpackUnorm2x16(source.data[data_index + 1]).x;
111+
break;
112+
case 1:
113+
color_rgb.x = unpackUnorm2x16(source.data[data_index]).y;
114+
color_rgb.yz = unpackUnorm2x16(source.data[data_index + 1]);
115+
break;
116+
#endif
117+
default:
118+
break;
119+
}
120+
#endif
121+
122+
// Store the resulting RGBA color.
123+
imageStore(dest, ivec2(gl_GlobalInvocationID.xy), vec4(color_rgb, 1.0));
124+
}

0 commit comments

Comments
 (0)