@@ -81,6 +81,17 @@ struct max
81
81
82
82
_NBL_STATIC_INLINE_CONSTEXPR const char * name = " max" ;
83
83
};
84
+ template <typename T>
85
+ struct bitcount
86
+ {
87
+ using type_t = T;
88
+ _NBL_STATIC_INLINE_CONSTEXPR T IdentityElement = T(0 );
89
+
90
+ inline T operator ()(T left, T right) { return T (0 ); }
91
+
92
+ _NBL_STATIC_INLINE_CONSTEXPR const char * name = " bitcount" ;
93
+ };
94
+
84
95
85
96
86
97
// subgroup method emulations on the CPU, to verify the results of the GPU methods
@@ -250,7 +261,7 @@ bool validateResults(video::IVideoDriver* driver, const uint32_t* inputData, con
250
261
251
262
}
252
263
template <template <class > class Arithmetic >
253
- bool runTest (video::IVideoDriver* driver, video::IGPUComputePipeline* pipeline, const video::IGPUDescriptorSet* ds, const uint32_t * inputData, const uint32_t workgroupSize, core::smart_refctd_ptr<IGPUBuffer>* const buffers)
264
+ bool runTest (video::IVideoDriver* driver, video::IGPUComputePipeline* pipeline, const video::IGPUDescriptorSet* ds, const uint32_t * inputData, const uint32_t workgroupSize, core::smart_refctd_ptr<IGPUBuffer>* const buffers, bool is_workgroup_test = false )
254
265
{
255
266
driver->bindComputePipeline (pipeline);
256
267
driver->bindDescriptorSets (video::EPBP_COMPUTE,pipeline->getLayout (),0u ,1u ,&ds,nullptr );
@@ -265,6 +276,9 @@ bool runTest(video::IVideoDriver* driver, video::IGPUComputePipeline* pipeline,
265
276
passed = validateResults<Arithmetic,mul>(driver, inputData, workgroupSize, workgroupCount, buffers[4 ].get ())&&passed;
266
277
passed = validateResults<Arithmetic,::min>(driver, inputData, workgroupSize, workgroupCount, buffers[5 ].get ())&&passed;
267
278
passed = validateResults<Arithmetic,::max>(driver, inputData, workgroupSize, workgroupCount, buffers[6 ].get ())&&passed;
279
+ if (is_workgroup_test)
280
+ passed = validateResults<Arithmetic,bitcount>(driver, inputData, workgroupSize, workgroupCount, buffers[7 ].get ()) && passed;
281
+
268
282
return passed;
269
283
}
270
284
@@ -300,43 +314,41 @@ int main()
300
314
}
301
315
auto gpuinputDataBuffer = driver->createFilledDeviceLocalGPUBufferOnDedMem (kBufferSize , inputData);
302
316
303
- // create 7 buffers.
304
- core::smart_refctd_ptr<IGPUBuffer> buffers[7 ];
305
- for (size_t i = 0 ; i < 7 ; i++)
317
+ // create 8 buffers.
318
+ constexpr const int outputBufferCount = 8 ;
319
+ constexpr const int totalBufferCount = outputBufferCount+1 ;
320
+
321
+ core::smart_refctd_ptr<IGPUBuffer> buffers[outputBufferCount];
322
+ for (size_t i = 0 ; i < outputBufferCount; i++)
306
323
{
307
324
buffers[i] = driver->createDeviceLocalGPUBufferOnDedMem (kBufferSize );
308
325
}
309
326
310
- IGPUDescriptorSetLayout::SBinding binding[8 ] = {
311
- {0u ,EDT_STORAGE_BUFFER,1u ,IGPUSpecializedShader::ESS_COMPUTE,nullptr }, // input with randomized numbers
312
- {1u ,EDT_STORAGE_BUFFER,1u ,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
313
- {2u ,EDT_STORAGE_BUFFER,1u ,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
314
- {3u ,EDT_STORAGE_BUFFER,1u ,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
315
- {4u ,EDT_STORAGE_BUFFER,1u ,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
316
- {5u ,EDT_STORAGE_BUFFER,1u ,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
317
- {6u ,EDT_STORAGE_BUFFER,1u ,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
318
- {7u ,EDT_STORAGE_BUFFER,1u ,IGPUSpecializedShader::ESS_COMPUTE,nullptr },
319
- };
320
- auto gpuDSLayout = driver->createGPUDescriptorSetLayout (binding, binding + 8 );
321
- constexpr uint32_t pushconstantSize = 64u ;
327
+ IGPUDescriptorSetLayout::SBinding binding[totalBufferCount];
328
+ for (uint32_t i = 0u ; i < totalBufferCount; i++)
329
+ {
330
+ binding[i] = { i,EDT_STORAGE_BUFFER,1u ,IGPUSpecializedShader::ESS_COMPUTE,nullptr };
331
+ }
332
+ auto gpuDSLayout = driver->createGPUDescriptorSetLayout (binding, binding + totalBufferCount);
333
+ constexpr uint32_t pushconstantSize = 8u * totalBufferCount;
322
334
SPushConstantRange pcRange[1 ] = { IGPUSpecializedShader::ESS_COMPUTE,0u ,pushconstantSize };
323
335
auto pipelineLayout = driver->createGPUPipelineLayout (pcRange, pcRange + pushconstantSize, core::smart_refctd_ptr (gpuDSLayout));
324
336
325
337
auto descriptorSet = driver->createGPUDescriptorSet (core::smart_refctd_ptr (gpuDSLayout));
326
338
{
327
- IGPUDescriptorSet::SDescriptorInfo infos[8 ];
339
+ IGPUDescriptorSet::SDescriptorInfo infos[totalBufferCount ];
328
340
infos[0 ].desc = gpuinputDataBuffer;
329
341
infos[0 ].buffer = { 0u ,kBufferSize };
330
- for (uint32_t i=1u ; i<=7u ; i++)
342
+ for (uint32_t i=1u ; i<= outputBufferCount ; i++)
331
343
{
332
344
infos[i].desc = buffers[i - 1 ];
333
345
infos[i].buffer = { 0u ,kBufferSize };
334
346
335
347
}
336
- IGPUDescriptorSet::SWriteDescriptorSet writes[8 ];
337
- for (uint32_t i=0u ; i<8u ; i++)
348
+ IGPUDescriptorSet::SWriteDescriptorSet writes[totalBufferCount ];
349
+ for (uint32_t i=0u ; i< totalBufferCount ; i++)
338
350
writes[i] = { descriptorSet.get (),i,0u ,1u ,EDT_STORAGE_BUFFER,infos + i };
339
- driver->updateDescriptorSets (8 , writes, 0u , nullptr );
351
+ driver->updateDescriptorSets (totalBufferCount , writes, 0u , nullptr );
340
352
}
341
353
struct GLSLCodeWithWorkgroup {
342
354
uint32_t workgroup_definition_position;
@@ -391,9 +403,9 @@ int main()
391
403
passed = runTest<emulatedSubgroupReduction>(driver,pipelines[0u ].get (),descriptorSet.get (),inputData,workgroupSize,buffers)&&passed;
392
404
passed = runTest<emulatedSubgroupScanExclusive>(driver,pipelines[1u ].get (),descriptorSet.get (),inputData,workgroupSize,buffers)&&passed;
393
405
passed = runTest<emulatedSubgroupScanInclusive>(driver,pipelines[2u ].get (),descriptorSet.get (),inputData,workgroupSize,buffers)&&passed;
394
- passed = runTest<emulatedWorkgroupReduction>(driver,pipelines[3u ].get (),descriptorSet.get (),inputData,workgroupSize,buffers)&&passed;
395
- passed = runTest<emulatedWorkgroupScanExclusive>(driver,pipelines[4u ].get (),descriptorSet.get (),inputData,workgroupSize,buffers)&&passed;
396
- passed = runTest<emulatedWorkgroupScanInclusive>(driver,pipelines[5u ].get (),descriptorSet.get (),inputData,workgroupSize,buffers)&&passed;
406
+ passed = runTest<emulatedWorkgroupReduction>(driver,pipelines[3u ].get (),descriptorSet.get (),inputData,workgroupSize,buffers, true )&&passed;
407
+ passed = runTest<emulatedWorkgroupScanExclusive>(driver,pipelines[4u ].get (),descriptorSet.get (),inputData,workgroupSize,buffers, true )&&passed;
408
+ passed = runTest<emulatedWorkgroupScanInclusive>(driver,pipelines[5u ].get (),descriptorSet.get (),inputData,workgroupSize,buffers, true )&&passed;
397
409
398
410
if (passed)
399
411
os::Printer::log (" Passed test #" + std::to_string (workgroupSize), ELL_INFORMATION);
0 commit comments