Fix pending, want to merge master

Fletterio · Fletterio · commit ee32a83550d0 · 2024-06-26T20:47:49.000-03:00
diff --git a/examples_tests b/examples_tests
@@ -1 +1 @@
-Subproject commit b822d8e4ada94f7410b4d710a2f70bdeb54f63dd
+Subproject commit bfae67a41b01d178f254dcb9a26f154c248a0045
diff --git a/include/nbl/builtin/hlsl/workgroup/fft.hlsl b/include/nbl/builtin/hlsl/workgroup/fft.hlsl
@@ -92,7 +92,7 @@ struct FFT<2,false, Scalar, device_capabilities>
         complex_t<Scalar> hi = {hiVec.x, hiVec.y};
 
         // special first iteration - only if workgroupsize > subgroupsize
-        if (_NBL_HLSL_WORKGROUP_SIZE_ ^ glsl::gl_SubgroupSize())
+        if (_NBL_HLSL_WORKGROUP_SIZE_ > glsl::gl_SubgroupSize())
             fft::DIF<Scalar>::radix2(fft::twiddle<false, Scalar>(threadID, _NBL_HLSL_WORKGROUP_SIZE_), lo, hi); 
 
         // Run bigger steps until Subgroup-sized
@@ -105,7 +105,7 @@ struct FFT<2,false, Scalar, device_capabilities>
         }
 
         // special last workgroup-shuffle - only if workgroupsize > subgroupsize
-        if (_NBL_HLSL_WORKGROUP_SIZE_ ^ glsl::gl_SubgroupSize()) 
+        if (_NBL_HLSL_WORKGROUP_SIZE_ > glsl::gl_SubgroupSize()) 
         {
             // Wait for all threads to be done with reads in the last loop before writing to shared mem      
             sharedmemAdaptor.workgroupExecutionAndMemoryBarrier(); 
@@ -168,7 +168,7 @@ struct FFT<2,true, Scalar, device_capabilities>
         subgroup::FFT<true, Scalar, device_capabilities>::__call(lo, hi);
         
         // special first workgroup-shuffle - only if workgroupsize > subgroupsize
-        if (_NBL_HLSL_WORKGROUP_SIZE_ ^ glsl::gl_SubgroupSize()) 
+        if (_NBL_HLSL_WORKGROUP_SIZE_ > glsl::gl_SubgroupSize()) 
         { 
             exchangeValues<SharedMemoryAccessor, Scalar>(lo, hi, threadID, glsl::gl_SubgroupSize(), sharedmemAdaptor);
         }
@@ -182,7 +182,7 @@ struct FFT<2,true, Scalar, device_capabilities>
         }
 
         // special last iteration - only if workgroupsize > subgroupsize
-        if (_NBL_HLSL_WORKGROUP_SIZE_ ^ glsl::gl_SubgroupSize())
+        if (_NBL_HLSL_WORKGROUP_SIZE_ > glsl::gl_SubgroupSize())
         {
             fft::DIT<Scalar>::radix2(fft::twiddle<true, Scalar>(threadID, _NBL_HLSL_WORKGROUP_SIZE_), lo, hi); 
             divides_assign< complex_t<Scalar> > divAss;

Original file line number	Diff line number	Diff line change
`@@ -92,7 +92,7 @@ struct FFT<2,false, Scalar, device_capabilities>`
`92`	`92`	`complex_t<Scalar> hi = {hiVec.x, hiVec.y};`
`93`	`93`
`94`	`94`	`// special first iteration - only if workgroupsize > subgroupsize`
`95`		`- if (_NBL_HLSL_WORKGROUP_SIZE_ ^ glsl::gl_SubgroupSize())`
	`95`	`+ if (_NBL_HLSL_WORKGROUP_SIZE_ > glsl::gl_SubgroupSize())`
`96`	`96`	`fft::DIF<Scalar>::radix2(fft::twiddle<false, Scalar>(threadID, _NBL_HLSL_WORKGROUP_SIZE_), lo, hi);`
`97`	`97`
`98`	`98`	`// Run bigger steps until Subgroup-sized`
`@@ -105,7 +105,7 @@ struct FFT<2,false, Scalar, device_capabilities>`
`105`	`105`	`}`
`106`	`106`
`107`	`107`	`// special last workgroup-shuffle - only if workgroupsize > subgroupsize`
`108`		`- if (_NBL_HLSL_WORKGROUP_SIZE_ ^ glsl::gl_SubgroupSize())`
	`108`	`+ if (_NBL_HLSL_WORKGROUP_SIZE_ > glsl::gl_SubgroupSize())`
`109`	`109`	`{`
`110`	`110`	`// Wait for all threads to be done with reads in the last loop before writing to shared mem`
`111`	`111`	`sharedmemAdaptor.workgroupExecutionAndMemoryBarrier();`
`@@ -168,7 +168,7 @@ struct FFT<2,true, Scalar, device_capabilities>`
`168`	`168`	`subgroup::FFT<true, Scalar, device_capabilities>::__call(lo, hi);`
`169`	`169`
`170`	`170`	`// special first workgroup-shuffle - only if workgroupsize > subgroupsize`
`171`		`- if (_NBL_HLSL_WORKGROUP_SIZE_ ^ glsl::gl_SubgroupSize())`
	`171`	`+ if (_NBL_HLSL_WORKGROUP_SIZE_ > glsl::gl_SubgroupSize())`
`172`	`172`	`{`
`173`	`173`	`exchangeValues<SharedMemoryAccessor, Scalar>(lo, hi, threadID, glsl::gl_SubgroupSize(), sharedmemAdaptor);`
`174`	`174`	`}`
`@@ -182,7 +182,7 @@ struct FFT<2,true, Scalar, device_capabilities>`
`182`	`182`	`}`
`183`	`183`
`184`	`184`	`// special last iteration - only if workgroupsize > subgroupsize`
`185`		`- if (_NBL_HLSL_WORKGROUP_SIZE_ ^ glsl::gl_SubgroupSize())`
	`185`	`+ if (_NBL_HLSL_WORKGROUP_SIZE_ > glsl::gl_SubgroupSize())`
`186`	`186`	`{`
`187`	`187`	`fft::DIT<Scalar>::radix2(fft::twiddle<true, Scalar>(threadID, _NBL_HLSL_WORKGROUP_SIZE_), lo, hi);`
`188`	`188`	`divides_assign< complex_t<Scalar> > divAss;`