Skip to content

Commit 394ea65

Browse files
committed
Add Betsy to speed up BC6 compression
1 parent 96be44c commit 394ea65

17 files changed

+1325
-2
lines changed

core/io/image.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
#include "image.h"
3232

33+
#include "core/config/project_settings.h"
3334
#include "core/error/error_list.h"
3435
#include "core/error/error_macros.h"
3536
#include "core/io/image_loader.h"
@@ -2734,6 +2735,27 @@ Error Image::compress(CompressMode p_mode, CompressSource p_source, ASTCFormat p
27342735
Error Image::compress_from_channels(CompressMode p_mode, UsedChannels p_channels, ASTCFormat p_astc_format) {
27352736
ERR_FAIL_COND_V(data.is_empty(), ERR_INVALID_DATA);
27362737

2738+
// RenderingDevice only.
2739+
if (GLOBAL_GET("rendering/textures/vram_compression/compress_with_gpu")) {
2740+
switch (p_mode) {
2741+
case COMPRESS_BPTC: {
2742+
// BC7 is unsupported currently.
2743+
if ((format >= FORMAT_RF && format <= FORMAT_RGBE9995) && _image_compress_bptc_rd_func) {
2744+
Error result = _image_compress_bptc_rd_func(this, p_channels);
2745+
2746+
// If the image was compressed successfully, we return here. If not, we fall back to the default compression scheme.
2747+
if (result == OK) {
2748+
return OK;
2749+
}
2750+
}
2751+
2752+
} break;
2753+
2754+
default: {
2755+
}
2756+
}
2757+
}
2758+
27372759
switch (p_mode) {
27382760
case COMPRESS_S3TC: {
27392761
ERR_FAIL_NULL_V(_image_compress_bc_func, ERR_UNAVAILABLE);
@@ -3115,6 +3137,7 @@ void (*Image::_image_compress_bptc_func)(Image *, Image::UsedChannels) = nullptr
31153137
void (*Image::_image_compress_etc1_func)(Image *) = nullptr;
31163138
void (*Image::_image_compress_etc2_func)(Image *, Image::UsedChannels) = nullptr;
31173139
void (*Image::_image_compress_astc_func)(Image *, Image::ASTCFormat) = nullptr;
3140+
Error (*Image::_image_compress_bptc_rd_func)(Image *, Image::UsedChannels) = nullptr;
31183141
void (*Image::_image_decompress_bc)(Image *) = nullptr;
31193142
void (*Image::_image_decompress_bptc)(Image *) = nullptr;
31203143
void (*Image::_image_decompress_etc1)(Image *) = nullptr;

core/io/image.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ class Image : public Resource {
159159
static void (*_image_compress_etc2_func)(Image *, UsedChannels p_channels);
160160
static void (*_image_compress_astc_func)(Image *, ASTCFormat p_format);
161161

162+
static Error (*_image_compress_bptc_rd_func)(Image *, UsedChannels p_channels);
163+
162164
static void (*_image_decompress_bc)(Image *);
163165
static void (*_image_decompress_bptc)(Image *);
164166
static void (*_image_decompress_etc1)(Image *);

doc/classes/ProjectSettings.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2875,6 +2875,11 @@
28752875
<member name="rendering/textures/lossless_compression/force_png" type="bool" setter="" getter="" default="false">
28762876
If [code]true[/code], the texture importer will import lossless textures using the PNG format. Otherwise, it will default to using WebP.
28772877
</member>
2878+
<member name="rendering/textures/vram_compression/compress_with_gpu" type="bool" setter="" getter="" default="true">
2879+
If [code]true[/code], the texture importer will utilize the GPU for compressing textures, which makes large textures import significantly faster.
2880+
[b]Note:[/b] This setting requires either Vulkan or D3D12 available as a rendering backend.
2881+
[b]Note:[/b] Currently this only affects BC6H compression, which is used on Desktop and Console for HDR images.
2882+
</member>
28782883
<member name="rendering/textures/vram_compression/import_etc2_astc" type="bool" setter="" getter="" default="false">
28792884
If [code]true[/code], the texture importer will import VRAM-compressed textures using the Ericsson Texture Compression 2 algorithm for lower quality textures and normal maps and Adaptable Scalable Texture Compression algorithm for high quality textures (in 4×4 block size).
28802885
[b]Note:[/b] This setting is an override. The texture importer will always import the format the host platform needs, even if this is set to [code]false[/code].

editor/import/resource_importer_layered_texture.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,8 +341,6 @@ Error ResourceImporterLayeredTexture::import(const String &p_source_file, const
341341
}
342342

343343
if (compress_mode == COMPRESS_VRAM_COMPRESSED) {
344-
mipmaps = true;
345-
346344
//if using video ram, optimize
347345
if (channel_pack == 0) {
348346
//remove alpha if not needed, so compression is more efficient
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
2+
#define min3(a, b, c) min(a, min(b, c))
3+
#define max3(a, b, c) max(a, max(b, c))
4+
5+
#define float2 vec2
6+
#define float3 vec3
7+
#define float4 vec4
8+
9+
#define int2 ivec2
10+
#define int3 ivec3
11+
#define int4 ivec4
12+
13+
#define uint2 uvec2
14+
#define uint3 uvec3
15+
#define uint4 uvec4
16+
17+
#define float2x2 mat2
18+
#define float3x3 mat3
19+
#define float4x4 mat4
20+
#define ogre_float4x3 mat3x4
21+
22+
#define ushort uint
23+
#define ushort3 uint3
24+
#define ushort4 uint4
25+
26+
//Short used for read operations. It's an int in GLSL & HLSL. An ushort in Metal
27+
#define rshort int
28+
#define rshort2 int2
29+
#define rint int
30+
//Short used for write operations. It's an int in GLSL. An ushort in HLSL & Metal
31+
#define wshort2 int2
32+
#define wshort3 int3
33+
34+
#define toFloat3x3(x) mat3(x)
35+
#define buildFloat3x3(row0, row1, row2) mat3(row0, row1, row2)
36+
37+
#define mul(x, y) ((x) * (y))
38+
#define saturate(x) clamp((x), 0.0, 1.0)
39+
#define lerp mix
40+
#define rsqrt inversesqrt
41+
#define INLINE
42+
#define NO_INTERPOLATION_PREFIX flat
43+
#define NO_INTERPOLATION_SUFFIX
44+
45+
#define PARAMS_ARG_DECL
46+
#define PARAMS_ARG
47+
48+
#define reversebits bitfieldReverse
49+
50+
#define OGRE_Sample(tex, sampler, uv) texture(tex, uv)
51+
#define OGRE_SampleLevel(tex, sampler, uv, lod) textureLod(tex, uv, lod)
52+
#define OGRE_SampleArray2D(tex, sampler, uv, arrayIdx) texture(tex, vec3(uv, arrayIdx))
53+
#define OGRE_SampleArray2DLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec3(uv, arrayIdx), lod)
54+
#define OGRE_SampleArrayCubeLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec4(uv, arrayIdx), lod)
55+
#define OGRE_SampleGrad(tex, sampler, uv, ddx, ddy) textureGrad(tex, uv, ddx, ddy)
56+
#define OGRE_SampleArray2DGrad(tex, sampler, uv, arrayIdx, ddx, ddy) textureGrad(tex, vec3(uv, arrayIdx), ddx, ddy)
57+
#define OGRE_ddx(val) dFdx(val)
58+
#define OGRE_ddy(val) dFdy(val)
59+
#define OGRE_Load2D(tex, iuv, lod) texelFetch(tex, iuv, lod)
60+
#define OGRE_LoadArray2D(tex, iuv, arrayIdx, lod) texelFetch(tex, ivec3(iuv, arrayIdx), lod)
61+
#define OGRE_Load2DMS(tex, iuv, subsample) texelFetch(tex, iuv, subsample)
62+
63+
#define OGRE_Load3D(tex, iuv, lod) texelFetch(tex, ivec3(iuv), lod)
64+
65+
#define OGRE_GatherRed(tex, sampler, uv) textureGather(tex, uv, 0)
66+
#define OGRE_GatherGreen(tex, sampler, uv) textureGather(tex, uv, 1)
67+
#define OGRE_GatherBlue(tex, sampler, uv) textureGather(tex, uv, 2)
68+
69+
#define bufferFetch1(buffer, idx) texelFetch(buffer, idx).x
70+
71+
#define OGRE_SAMPLER_ARG_DECL(samplerName)
72+
#define OGRE_SAMPLER_ARG(samplerName)
73+
74+
#define OGRE_Texture3D_float4 sampler3D
75+
#define OGRE_OUT_REF(declType, variableName) out declType variableName
76+
#define OGRE_INOUT_REF(declType, variableName) inout declType variableName

modules/betsy/SCsub

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# !/ usr / bin / env python
2+
Import("env")
3+
Import("env_modules")
4+
5+
env_betsy = env_modules.Clone()
6+
env_betsy.GLSL_HEADER("bc6h.glsl")
7+
env_betsy.Depends(Glob("*.glsl.gen.h"), ["#glsl_builders.py"])
8+
9+
# Thirdparty source files
10+
thirdparty_obj = []
11+
thirdparty_dir = "#thirdparty/betsy/"
12+
env_betsy.Prepend(CPPPATH=[thirdparty_dir])
13+
14+
env_thirdparty = env_betsy.Clone()
15+
env_thirdparty.disable_warnings()
16+
env.modules_sources += thirdparty_obj
17+
18+
# Godot source files
19+
module_obj = []
20+
env_betsy.add_source_files(module_obj, "*.cpp")
21+
env.modules_sources += module_obj
22+
23+
# Needed to force rebuilding the module files when the thirdparty library is updated.
24+
env.Depends(module_obj, thirdparty_obj)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
2+
#define OGRE_imageLoad2D(inImage, iuv) imageLoad(inImage, int2(iuv))
3+
#define OGRE_imageLoad2DArray(inImage, iuvw) imageLoad(inImage, int3(iuvw))
4+
5+
#define OGRE_imageWrite2D1(outImage, iuv, value) imageStore(outImage, int2(iuv), float4(value, 0, 0, 0))
6+
#define OGRE_imageWrite2D2(outImage, iuv, value) imageStore(outImage, int2(iuv), float4(value, 0, 0))
7+
#define OGRE_imageWrite2D4(outImage, iuv, value) imageStore(outImage, int2(iuv), value)
8+
9+
#define OGRE_imageLoad3D(inImage, iuv) imageLoad(inImage, int3(iuv))
10+
11+
#define OGRE_imageWrite3D1(outImage, iuv, value) imageStore(outImage, int3(iuv), value)
12+
#define OGRE_imageWrite3D4(outImage, iuv, value) imageStore(outImage, int3(iuv), value)
13+
14+
#define OGRE_imageWrite2DArray1(outImage, iuvw, value) imageStore(outImage, int3(iuvw), value)
15+
#define OGRE_imageWrite2DArray4(outImage, iuvw, value) imageStore(outImage, int3(iuvw), value)
16+
17+
//#define sharedOnlyBarrier memoryBarrierShared();barrier();

0 commit comments

Comments
 (0)