@@ -79,6 +79,9 @@ IFrameBuffer* createDepthOnlyFrameBuffer(video::IVideoDriver* driver)
79
79
return frameBuffer;
80
80
}
81
81
82
+ constexpr uint32_t TEST_CASE_COUNT = 5u ;
83
+ constexpr uint32_t TEST_CASE_SUBGROUPS = 4u ;
84
+
82
85
int main ()
83
86
{
84
87
// create device with full flexibility over creation parameters
@@ -117,14 +120,16 @@ int main()
117
120
auto vertexShaderBundle_2 = am->getAsset (" ../test_2.vert" , lp);
118
121
auto vertexShaderBundle_3 = am->getAsset (" ../test_3.vert" , lp);
119
122
auto vertexShaderBundle_4 = am->getAsset (" ../test_4.vert" , lp);
123
+ auto vertexShaderBundle_5 = am->getAsset (" ../test_5.vert" , lp);
120
124
#else
121
125
auto vertexShaderBundle_1 = am->getAsset (" ../benchmark_1.vert" , lp);
122
126
auto vertexShaderBundle_2 = am->getAsset (" ../benchmark_2.vert" , lp);
123
127
auto vertexShaderBundle_3 = am->getAsset (" ../benchmark_3.vert" , lp);
124
128
auto vertexShaderBundle_4 = am->getAsset (" ../benchmark_4.vert" , lp);
129
+ auto vertexShaderBundle_5 = am->getAsset (" ../benchmark_5.vert" , lp);
125
130
#endif
126
131
auto fragShaderBundle = am->getAsset (" ../dirLight.frag" , lp);
127
- ICPUSpecializedShader* shaders[4 ][2 ];
132
+ ICPUSpecializedShader* shaders[TEST_CASE_COUNT ][2 ];
128
133
shaders[0 ][0 ] = IAsset::castDown<ICPUSpecializedShader>(vertexShaderBundle_1.getContents ().begin ()->get ());
129
134
shaders[0 ][1 ] = IAsset::castDown<ICPUSpecializedShader>(fragShaderBundle.getContents ().begin ()->get ());
130
135
shaders[1 ][0 ] = IAsset::castDown<ICPUSpecializedShader>(vertexShaderBundle_2.getContents ().begin ()->get ());
@@ -133,6 +138,8 @@ int main()
133
138
shaders[2 ][1 ] = IAsset::castDown<ICPUSpecializedShader>(fragShaderBundle.getContents ().begin ()->get ());
134
139
shaders[3 ][0 ] = IAsset::castDown<ICPUSpecializedShader>(vertexShaderBundle_4.getContents ().begin ()->get ());
135
140
shaders[3 ][1 ] = IAsset::castDown<ICPUSpecializedShader>(fragShaderBundle.getContents ().begin ()->get ());
141
+ shaders[4 ][0 ] = IAsset::castDown<ICPUSpecializedShader>(vertexShaderBundle_5.getContents ().begin ()->get ());
142
+ shaders[4 ][1 ] = IAsset::castDown<ICPUSpecializedShader>(fragShaderBundle.getContents ().begin ()->get ());
136
143
137
144
core::vector<uint16_t > boneMatMaxCnt;
138
145
@@ -320,7 +327,7 @@ int main()
320
327
core::matrix4SIMD boneMatrix;
321
328
core::matrix3x4SIMD normalMatrix;
322
329
};
323
- core::smart_refctd_ptr<IGPUBuffer> drawDataBuffer[4 ];
330
+ core::smart_refctd_ptr<IGPUBuffer> drawDataBuffer[TEST_CASE_COUNT ];
324
331
vector<core::matrix3x4SIMD> translationMatrices_2 (diskCount);
325
332
core::vector<core::matrix4SIMD> boneMatrices (boneMatrixCnt);
326
333
core::vector<core::matrix3x4SIMD> normalMatrices (boneMatrixCnt);
@@ -350,6 +357,8 @@ int main()
350
357
351
358
// as floats
352
359
drawDataBuffer[3 ] = driver->createDeviceLocalGPUBufferOnDedMem ((BONE_COMP_MAX_CNT + NORM_COMP_MAX_CNT) * sizeof (float ));
360
+
361
+ drawDataBuffer[TEST_CASE_SUBGROUPS] = drawDataBuffer[0 ];
353
362
}
354
363
355
364
@@ -367,9 +376,10 @@ int main()
367
376
uint32_t matrixOffsets[16 ];
368
377
};
369
378
370
- core::smart_refctd_ptr<IGPUPipelineLayout> gpuPipelineLayout[4 ];
371
- core::smart_refctd_ptr<IGPURenderpassIndependentPipeline> gpuPipeline[4 ];
372
- core::smart_refctd_ptr<IGPUDescriptorSet> descriptorSet[4 ];
379
+ // TODO
380
+ core::smart_refctd_ptr<IGPUPipelineLayout> gpuPipelineLayout[TEST_CASE_COUNT];
381
+ core::smart_refctd_ptr<IGPURenderpassIndependentPipeline> gpuPipeline[TEST_CASE_COUNT];
382
+ core::smart_refctd_ptr<IGPUDescriptorSet> descriptorSet[TEST_CASE_COUNT];
373
383
374
384
Shader3PushConstants s3pc;
375
385
s3pc.matrixOffsets = core::vector4du32_SIMD (0u , boneMatrixCnt, boneMatrixCnt * 2 , boneMatrixCnt * 3 );
@@ -379,42 +389,50 @@ int main()
379
389
s4pc.matrixOffsets [i] = i * boneMatrixCnt;
380
390
381
391
{
382
- asset::SPushConstantRange range[4 ] = {
392
+ asset::SPushConstantRange range[TEST_CASE_COUNT ] = {
383
393
asset::ISpecializedShader::ESS_UNKNOWN, 0u , 0u ,
384
394
asset::ISpecializedShader::ESS_UNKNOWN, 0u , 0u ,
385
395
asset::ISpecializedShader::ESS_VERTEX, 0u , sizeof (Shader3PushConstants),
386
- asset::ISpecializedShader::ESS_VERTEX, 0u , sizeof (Shader4PushConstants)
396
+ asset::ISpecializedShader::ESS_VERTEX, 0u , sizeof (Shader4PushConstants),
397
+ asset::ISpecializedShader::ESS_UNKNOWN, 0u , 0u
387
398
};
388
399
389
- for (uint32_t i = 0u ; i < 4u ; i++)
400
+ // TODO
401
+ for (uint32_t i = 0u ; i < TEST_CASE_COUNT; i++)
390
402
{
391
403
core::smart_refctd_ptr<IGPUDescriptorSetLayout> layout;
392
404
{
393
- video::IGPUDescriptorSetLayout::SBinding b[1 ];
405
+ video::IGPUDescriptorSetLayout::SBinding b[2 ];
394
406
b[0 ].binding = 0u ;
395
407
b[0 ].count = 1u ;
396
408
b[0 ].type = EDT_STORAGE_BUFFER;
409
+ b[1 ] = b[0 ];
410
+ b[1 ].binding = 1u ;
397
411
398
- layout = driver->createGPUDescriptorSetLayout (b, b + 1 );
412
+ uint32_t count = i == TEST_CASE_SUBGROUPS ? 2u : 1u ;
413
+ layout = driver->createGPUDescriptorSetLayout (b, b + count);
399
414
}
400
415
401
416
descriptorSet[i] = driver->createGPUDescriptorSet (core::smart_refctd_ptr (layout));
402
417
{
403
- video::IGPUDescriptorSet::SWriteDescriptorSet w;
404
- w.binding = 0u ;
405
- w.arrayElement = 0u ;
406
- w.count = 1u ;
407
- w.descriptorType = EDT_STORAGE_BUFFER;
408
- w.dstSet = descriptorSet[i].get ();
418
+ video::IGPUDescriptorSet::SWriteDescriptorSet w[2 ];
419
+ w[0 ].binding = 0u ;
420
+ w[0 ].arrayElement = 0u ;
421
+ w[0 ].count = 1u ;
422
+ w[0 ].descriptorType = EDT_STORAGE_BUFFER;
423
+ w[0 ].dstSet = descriptorSet[i].get ();
424
+ w[1 ] = w[0 ];
409
425
410
426
video::IGPUDescriptorSet::SDescriptorInfo info;
411
427
info.buffer .offset = 0u ;
412
428
info.buffer .size = drawDataBuffer[i]->getSize ();
413
429
info.desc = drawDataBuffer[i];
414
430
415
- w.info = &info;
431
+ w[0 ].info = &info;
432
+ w[1 ].info = &info;
416
433
417
- driver->updateDescriptorSets (1u , &w, 0u , nullptr );
434
+ uint32_t count = i == TEST_CASE_SUBGROUPS ? 2u : 1u ;
435
+ driver->updateDescriptorSets (count, w, 0u , nullptr );
418
436
}
419
437
420
438
auto gpuShaders = driver->getGPUObjectsFromAssets (shaders[i], shaders[i] + 2 );
@@ -520,8 +538,9 @@ int main()
520
538
{
521
539
switch (caseID)
522
540
{
523
- case 0 :
524
- case 1 :
541
+ case 0 : [[fallthrough]];
542
+ case 1 : [[fallthrough]];
543
+ case TEST_CASE_SUBGROUPS:
525
544
break ;
526
545
case 2 :
527
546
driver->pushConstants (gpuPipelineLayout[2 ].get (), asset::ISpecializedShader::ESS_VERTEX, 0u , sizeof (Shader3PushConstants), &s3pc);
@@ -538,7 +557,8 @@ int main()
538
557
{
539
558
switch (caseID)
540
559
{
541
- case 0 :
560
+ case 0 : [[fallthrough]];
561
+ case TEST_CASE_SUBGROUPS:
542
562
{
543
563
const size_t matricesByteSize = sizeof (BoneNormalMatPair) * boneAndNormalMatrices.size ();
544
564
@@ -591,7 +611,7 @@ int main()
591
611
592
612
constexpr uint32_t iterationCnt = 1000u ;
593
613
constexpr uint32_t warmupIterationCnt = iterationCnt / 10u ;
594
- for (uint32_t caseID = 0u ; caseID < 4u ; caseID++)
614
+ for (uint32_t caseID = 0u ; caseID < TEST_CASE_COUNT ; caseID++)
595
615
{
596
616
os::Printer::print (std::string (" Benchmark for case nr. " + std::to_string (caseID)));
597
617
0 commit comments