44// The Compute Shader for BC7 Encoder
55//
66// Copyright (c) Microsoft Corporation. All rights reserved.
7+ // Licensed under the MIT License.
78//--------------------------------------------------------------------------------------
89
9- // #define REF_DEVICE
10+ #define REF_DEVICE
1011
1112#define CHAR_LENGTH 8
1213#define NCHANNELS 4
@@ -679,16 +680,16 @@ void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
679680 if (1 == g_mode_id)
680681 {
681682 // in mode 1, there is only one p bit per subset
682- max_p = 4 ;
683+ max_p = 2 ;
683684 }
684685 else
685686 {
686687 // in mode 3 7, there are two p bits per subset, one for each end point
687- max_p = 16 ;
688+ max_p = 4 ;
688689 }
689690
690- uint rotation = 0 ;
691- uint error = MAX_UINT;
691+ uint final_p[ 2 ] = { 0 , 0 } ;
692+ uint error[ 2 ] = { MAX_UINT, MAX_UINT } ;
692693 for ( uint p = 0 ; p < max_p; p ++ )
693694 {
694695 endPoint[0 ] = endPointBackup[0 ];
@@ -698,15 +699,15 @@ void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
698699 {
699700 if (g_mode_id == 1 )
700701 {
701- compress_endpoints1 ( endPoint[i], (p >> i) & 1 );
702+ compress_endpoints1 ( endPoint[i], p );
702703 }
703704 else if (g_mode_id == 3 )
704705 {
705- compress_endpoints3 ( endPoint[i], uint2 (p >> (i * 2 + 0 ) , p >> (i * 2 + 1 ) ) & 1 );
706+ compress_endpoints3 ( endPoint[i], uint2 (p, p >> 1 ) & 1 );
706707 }
707708 else if (g_mode_id == 7 )
708709 {
709- compress_endpoints7 ( endPoint[i], uint2 (p >> (i * 2 + 0 ) , p >> (i * 2 + 1 ) ) & 1 );
710+ compress_endpoints7 ( endPoint[i], uint2 (p, p >> 1 ) & 1 );
710711 }
711712 }
712713
@@ -747,10 +748,12 @@ void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
747748 step_selector = 1 ; // mode 1 has 3 bit index
748749 }
749750
750- uint p_error = 0 ;
751+ uint p_error[ 2 ] = { 0 , 0 };
751752 for ( i = 0 ; i < 16 ; i ++ )
752753 {
753- if (((bits >> i) & 0x01 ) == 1 )
754+ uint subset_index = (bits >> i) & 0x01 ;
755+
756+ if (subset_index == 1 )
754757 {
755758 dotProduct = dot ( span[1 ], shared_temp[threadBase + i].pixel - endPoint[1 ][0 ] );
756759 color_index = (span_norm_sqr[1 ] <= 0 || dotProduct <= 0 ) ? 0
@@ -763,8 +766,6 @@ void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
763766 : ((dotProduct < span_norm_sqr[0 ]) ? aStep[step_selector][uint (dotProduct * 63.49999 / span_norm_sqr[0 ])] : aStep[step_selector][63 ]);
764767 }
765768
766- uint subset_index = (bits >> i) & 0x01 ;
767-
768769 pixel_r = ((64 - aWeight[step_selector][color_index]) * endPoint[subset_index][0 ]
769770 + aWeight[step_selector][color_index] * endPoint[subset_index][1 ] + 32 ) >> 6 ;
770771 if (g_mode_id != 7 )
@@ -775,20 +776,32 @@ void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
775776 uint4 pixel = shared_temp[threadBase + i].pixel;
776777 Ensure_A_Is_Larger ( pixel_r, pixel );
777778 pixel_r -= pixel;
778- p_error += ComputeError (pixel_r, pixel_r);
779+ uint pixel_error = ComputeError (pixel_r, pixel_r);
780+ if ( subset_index == 1 )
781+ p_error[1 ] += pixel_error;
782+ else
783+ p_error[0 ] += pixel_error;
779784 }
780785
781- if (p_error < error )
786+ for ( i = 0 ; i < 2 ; i++ )
782787 {
783- error = p_error;
784- rotation = p;
788+ if (p_error[i] < error[i])
789+ {
790+ error[i] = p_error[i];
791+ final_p[i] = p;
792+ }
785793 }
786794 }
787795
788- shared_temp[GI].error = error;
796+ shared_temp[GI].error = error[ 0 ] + error[ 1 ] ;
789797 shared_temp[GI].mode = g_mode_id;
790798 shared_temp[GI].partition = partition;
791- shared_temp[GI].rotation = rotation; // mode 1 3 7 don't have rotation, we use rotation for p bits
799+
800+ // mode 1 3 7 don't have rotation, we use rotation for p bits
801+ if ( g_mode_id == 1 )
802+ shared_temp[GI].rotation = (final_p[1 ] << 1 ) | final_p[0 ];
803+ else
804+ shared_temp[GI].rotation = (final_p[1 ] << 2 ) | final_p[0 ];
792805 }
793806 GroupMemoryBarrierWithGroupSync ();
794807
@@ -954,15 +967,15 @@ void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
954967 uint max_p;
955968 if (0 == g_mode_id)
956969 {
957- max_p = 64 ; // changed from 32 to 64
970+ max_p = 4 ;
958971 }
959972 else
960973 {
961974 max_p = 1 ;
962975 }
963976
964- uint rotation = 0 ;
965- uint error = MAX_UINT;
977+ uint final_p[ 3 ] = { 0 , 0 , 0 } ;
978+ uint error[ 3 ] = { MAX_UINT, MAX_UINT, MAX_UINT } ;
966979 for ( uint p = 0 ; p < max_p; p ++ )
967980 {
968981 endPoint[0 ] = endPointBackup[0 ];
@@ -973,7 +986,7 @@ void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
973986 {
974987 if (0 == g_mode_id)
975988 {
976- compress_endpoints0 ( endPoint[i], uint2 (p >> (i * 2 + 0 ) , p >> (i * 2 + 1 ) ) & 1 );
989+ compress_endpoints0 ( endPoint[i], uint2 (p, p >> 1 ) & 1 );
977990 }
978991 else
979992 {
@@ -1005,7 +1018,7 @@ void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
10051018 }
10061019 }
10071020
1008- uint p_error = 0 ;
1021+ uint p_error[ 3 ] = { 0 , 0 , 0 } ;
10091022 for ( i = 0 ; i < 16 ; i ++ )
10101023 {
10111024 uint subset_index = ( bits2 >> ( i * 2 ) ) & 0x03 ;
@@ -1035,19 +1048,30 @@ void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
10351048 uint4 pixel = shared_temp[threadBase + i].pixel;
10361049 Ensure_A_Is_Larger ( pixel_r, pixel );
10371050 pixel_r -= pixel;
1038- p_error += ComputeError (pixel_r, pixel_r);
1051+
1052+ uint pixel_error = ComputeError (pixel_r, pixel_r);
1053+
1054+ if ( subset_index == 2 )
1055+ p_error[2 ] += pixel_error;
1056+ else if ( subset_index == 1 )
1057+ p_error[1 ] += pixel_error;
1058+ else
1059+ p_error[0 ] += pixel_error;
10391060 }
10401061
1041- if (p_error < error )
1062+ for ( i = 0 ; i < 3 ; i++ )
10421063 {
1043- error = p_error;
1044- rotation = p; // Borrow rotation for p
1064+ if (p_error[i] < error[i])
1065+ {
1066+ error[i] = p_error[i];
1067+ final_p[i] = p; // Borrow rotation for p
1068+ }
10451069 }
10461070 }
10471071
1048- shared_temp[GI].error = error;
1072+ shared_temp[GI].error = error[ 0 ] + error[ 1 ] + error[ 2 ] ;
10491073 shared_temp[GI].partition = partition;
1050- shared_temp[GI].rotation = rotation ;
1074+ shared_temp[GI].rotation = (final_p[ 2 ] << 4 ) | (final_p[ 1 ] << 2 ) | final_p[ 0 ] ;
10511075 }
10521076 GroupMemoryBarrierWithGroupSync ();
10531077
@@ -1561,8 +1585,7 @@ void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
15611585
15621586uint4 quantize ( uint4 color, uint uPrec )
15631587{
1564- uint4 rnd = min (255 , color + (1 << (7 - uPrec)));
1565- return rnd >> (8 - uPrec);
1588+ return (((color << 8 ) + color) * ((1 << uPrec) - 1 ) + 32768 ) >> 16 ;
15661589}
15671590
15681591uint4 unquantize ( uint4 color, uint uPrec )
0 commit comments