Skip to content
This repository was archived by the owner on May 22, 2025. It is now read-only.

Commit 8624ed6

Browse files
committed
BC6HBC7EncoderCS updated with latest fixes to shaders from DirectXTex
1 parent c959120 commit 8624ed6

File tree

7 files changed

+9944
-9519
lines changed

7 files changed

+9944
-9519
lines changed

BC6HBC7EncoderCS/BC6HEncoderCS10.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ HRESULT CGPUBC6HEncoder::GPU_Encode( ID3D11Device* pDevice, ID3D11DeviceContext*
7171
ID3D11UnorderedAccessView* pErrBestModeUAV[2] = { nullptr, nullptr };
7272
ID3D11ShaderResourceView* pErrBestModeSRV[2] = { nullptr, nullptr };
7373
ID3D11Buffer* pCBCS = nullptr;
74+
D3D11_BUFFER_DESC sbOutDesc = {};
7475

7576
if ( !(dstFormat == DXGI_FORMAT_BC6H_SF16 || dstFormat == DXGI_FORMAT_BC6H_UF16) ||
7677
!ppDstTextureAsBufOut )
@@ -96,7 +97,6 @@ HRESULT CGPUBC6HEncoder::GPU_Encode( ID3D11Device* pDevice, ID3D11DeviceContext*
9697
}
9798

9899
// Create output buffer with its size identical to input texture
99-
D3D11_BUFFER_DESC sbOutDesc = {};
100100
{
101101
sbOutDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
102102
sbOutDesc.CPUAccessFlags = 0;

BC6HBC7EncoderCS/Shaders/BC6HEncode.hlsl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
// The Compute Shader for BC6H Encoder
55
//
66
// Copyright (c) Microsoft Corporation. All rights reserved.
7+
// Licensed under the MIT License.
78
//--------------------------------------------------------------------------------------
89

910
#define REF_DEVICE

BC6HBC7EncoderCS/Shaders/BC7Encode.hlsl

Lines changed: 54 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
// The Compute Shader for BC7 Encoder
55
//
66
// Copyright (c) Microsoft Corporation. All rights reserved.
7+
// Licensed under the MIT License.
78
//--------------------------------------------------------------------------------------
89

9-
//#define REF_DEVICE
10+
#define REF_DEVICE
1011

1112
#define CHAR_LENGTH 8
1213
#define NCHANNELS 4
@@ -679,16 +680,16 @@ void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
679680
if (1 == g_mode_id)
680681
{
681682
// in mode 1, there is only one p bit per subset
682-
max_p = 4;
683+
max_p = 2;
683684
}
684685
else
685686
{
686687
// in mode 3 7, there are two p bits per subset, one for each end point
687-
max_p = 16;
688+
max_p = 4;
688689
}
689690

690-
uint rotation = 0;
691-
uint error = MAX_UINT;
691+
uint final_p[2] = { 0, 0 };
692+
uint error[2] = { MAX_UINT, MAX_UINT };
692693
for ( uint p = 0; p < max_p; p ++ )
693694
{
694695
endPoint[0] = endPointBackup[0];
@@ -698,15 +699,15 @@ void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
698699
{
699700
if (g_mode_id == 1)
700701
{
701-
compress_endpoints1( endPoint[i], (p >> i) & 1 );
702+
compress_endpoints1( endPoint[i], p );
702703
}
703704
else if (g_mode_id == 3)
704705
{
705-
compress_endpoints3( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 );
706+
compress_endpoints3( endPoint[i], uint2(p, p >> 1) & 1 );
706707
}
707708
else if (g_mode_id == 7)
708709
{
709-
compress_endpoints7( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 );
710+
compress_endpoints7( endPoint[i], uint2(p, p >> 1) & 1 );
710711
}
711712
}
712713

@@ -747,10 +748,12 @@ void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
747748
step_selector = 1; // mode 1 has 3 bit index
748749
}
749750

750-
uint p_error = 0;
751+
uint p_error[2] = { 0, 0 };
751752
for ( i = 0; i < 16; i ++ )
752753
{
753-
if (((bits >> i) & 0x01) == 1)
754+
uint subset_index = (bits >> i) & 0x01;
755+
756+
if (subset_index == 1)
754757
{
755758
dotProduct = dot( span[1], shared_temp[threadBase + i].pixel - endPoint[1][0] );
756759
color_index = (span_norm_sqr[1] <= 0 || dotProduct <= 0) ? 0
@@ -763,8 +766,6 @@ void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
763766
: ((dotProduct < span_norm_sqr[0]) ? aStep[step_selector][uint(dotProduct * 63.49999 / span_norm_sqr[0])] : aStep[step_selector][63]);
764767
}
765768

766-
uint subset_index = (bits >> i) & 0x01;
767-
768769
pixel_r = ((64 - aWeight[step_selector][color_index]) * endPoint[subset_index][0]
769770
+ aWeight[step_selector][color_index] * endPoint[subset_index][1] + 32) >> 6;
770771
if (g_mode_id != 7)
@@ -775,20 +776,32 @@ void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
775776
uint4 pixel = shared_temp[threadBase + i].pixel;
776777
Ensure_A_Is_Larger( pixel_r, pixel );
777778
pixel_r -= pixel;
778-
p_error += ComputeError(pixel_r, pixel_r);
779+
uint pixel_error = ComputeError(pixel_r, pixel_r);
780+
if ( subset_index == 1 )
781+
p_error[1] += pixel_error;
782+
else
783+
p_error[0] += pixel_error;
779784
}
780785

781-
if (p_error < error)
786+
for ( i = 0; i < 2; i++ )
782787
{
783-
error = p_error;
784-
rotation = p;
788+
if (p_error[i] < error[i])
789+
{
790+
error[i] = p_error[i];
791+
final_p[i] = p;
792+
}
785793
}
786794
}
787795

788-
shared_temp[GI].error = error;
796+
shared_temp[GI].error = error[0] + error[1];
789797
shared_temp[GI].mode = g_mode_id;
790798
shared_temp[GI].partition = partition;
791-
shared_temp[GI].rotation = rotation; // mode 1 3 7 don't have rotation, we use rotation for p bits
799+
800+
// mode 1 3 7 don't have rotation, we use rotation for p bits
801+
if ( g_mode_id == 1 )
802+
shared_temp[GI].rotation = (final_p[1] << 1) | final_p[0];
803+
else
804+
shared_temp[GI].rotation = (final_p[1] << 2) | final_p[0];
792805
}
793806
GroupMemoryBarrierWithGroupSync();
794807

@@ -954,15 +967,15 @@ void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
954967
uint max_p;
955968
if (0 == g_mode_id)
956969
{
957-
max_p = 64; // changed from 32 to 64
970+
max_p = 4;
958971
}
959972
else
960973
{
961974
max_p = 1;
962975
}
963976

964-
uint rotation = 0;
965-
uint error = MAX_UINT;
977+
uint final_p[3] = { 0, 0, 0 };
978+
uint error[3] = { MAX_UINT, MAX_UINT, MAX_UINT };
966979
for ( uint p = 0; p < max_p; p ++ )
967980
{
968981
endPoint[0] = endPointBackup[0];
@@ -973,7 +986,7 @@ void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
973986
{
974987
if (0 == g_mode_id)
975988
{
976-
compress_endpoints0( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 );
989+
compress_endpoints0( endPoint[i], uint2(p, p >> 1) & 1 );
977990
}
978991
else
979992
{
@@ -1005,7 +1018,7 @@ void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
10051018
}
10061019
}
10071020

1008-
uint p_error = 0;
1021+
uint p_error[3] = { 0, 0, 0 };
10091022
for ( i = 0; i < 16; i ++ )
10101023
{
10111024
uint subset_index = ( bits2 >> ( i * 2 ) ) & 0x03;
@@ -1035,19 +1048,30 @@ void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
10351048
uint4 pixel = shared_temp[threadBase + i].pixel;
10361049
Ensure_A_Is_Larger( pixel_r, pixel );
10371050
pixel_r -= pixel;
1038-
p_error += ComputeError(pixel_r, pixel_r);
1051+
1052+
uint pixel_error = ComputeError(pixel_r, pixel_r);
1053+
1054+
if ( subset_index == 2 )
1055+
p_error[2] += pixel_error;
1056+
else if ( subset_index == 1 )
1057+
p_error[1] += pixel_error;
1058+
else
1059+
p_error[0] += pixel_error;
10391060
}
10401061

1041-
if (p_error < error)
1062+
for ( i = 0; i < 3; i++ )
10421063
{
1043-
error = p_error;
1044-
rotation = p; // Borrow rotation for p
1064+
if (p_error[i] < error[i])
1065+
{
1066+
error[i] = p_error[i];
1067+
final_p[i] = p; // Borrow rotation for p
1068+
}
10451069
}
10461070
}
10471071

1048-
shared_temp[GI].error = error;
1072+
shared_temp[GI].error = error[0] + error[1] + error[2];
10491073
shared_temp[GI].partition = partition;
1050-
shared_temp[GI].rotation = rotation;
1074+
shared_temp[GI].rotation = (final_p[2] << 4) | (final_p[1] << 2) | final_p[0];
10511075
}
10521076
GroupMemoryBarrierWithGroupSync();
10531077

@@ -1561,8 +1585,7 @@ void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
15611585

15621586
uint4 quantize( uint4 color, uint uPrec )
15631587
{
1564-
uint4 rnd = min(255, color + (1 << (7 - uPrec)));
1565-
return rnd >> (8 - uPrec);
1588+
return (((color << 8) + color) * ((1 << uPrec) - 1) + 32768) >> 16;
15661589
}
15671590

15681591
uint4 unquantize( uint4 color, uint uPrec )

0 commit comments

Comments
 (0)