@@ -34,7 +34,7 @@ float3 BlurPixels( float3 a, float3 b, float3 c, float3 d, float3 e, float3 f, f
3434 return Weights[0 ]*e + Weights[1 ]*(d+f) + Weights[2 ]*(c+g) + Weights[3 ]*(b+h) + Weights[4 ]*(a+i);
3535}
3636
37- // 16x16 pixels with an 8x8 center that we will be blurring writing out. Each uint is two color channels packed together
37+ // 16x16 pixels with an 8x8 center that we will be blurring writing out. Each uint is two color channels packed together
3838groupshared uint CacheR[128 ];
3939groupshared uint CacheG[128 ];
4040groupshared uint CacheB[128 ];
@@ -67,7 +67,7 @@ void Load1Pixel( uint index, out float3 pixel )
6767 pixel = asfloat ( uint3 (CacheR[index], CacheG[index], CacheB[index]) );
6868}
6969
70- // Blur two pixels horizontally. This reduces LDS reads and pixel unpacking.
70+ // Blur two pixels horizontally. This reduces LDS reads and pixel unpacking.
7171void BlurHorizontally ( uint outIndex, uint leftMostIndex )
7272{
7373 float3 s0, s1, s2, s3, s4, s5, s6, s7, s8, s9;
@@ -77,6 +77,9 @@ void BlurHorizontally( uint outIndex, uint leftMostIndex )
7777 Load2Pixels ( leftMostIndex + 3 , s6, s7 );
7878 Load2Pixels ( leftMostIndex + 4 , s8, s9 );
7979
80+ // Be sure to finish loading values before we rewrite them.
81+ GroupMemoryBarrierWithGroupSync ();
82+
8083 Store1Pixel (outIndex , BlurPixels (s0, s1, s2, s3, s4, s5, s6, s7, s8));
8184 Store1Pixel (outIndex+1 , BlurPixels (s1, s2, s3, s4, s5, s6, s7, s8, s9));
8285}
@@ -104,8 +107,8 @@ void main( uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint3 DTid : S
104107 //
105108 // Load 4 pixels per thread into LDS
106109 //
107- int2 GroupUL = (Gid.xy << 3 ) - 4 ; // Upper-left pixel coordinate of group read location
108- int2 ThreadUL = (GTid.xy << 1 ) + GroupUL; // Upper-left pixel coordinate of quad that this thread will read
110+ int2 GroupUL = (Gid.xy << 3 ) - 4 ; // Upper-left pixel coordinate of group read location
111+ int2 ThreadUL = (GTid.xy << 1 ) + GroupUL; // Upper-left pixel coordinate of quad that this thread will read
109112
110113 //
111114 // Store 4 unblurred pixels in LDS
0 commit comments