@@ -88,9 +88,9 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I
88
88
}
89
89
{
90
90
#ifndef DISABLE_NEE
91
- constexpr auto additionalGlobalDescriptorCount = 6u ;
91
+ constexpr auto additionalGlobalDescriptorCount = 5u ;
92
92
#else
93
- constexpr auto additionalGlobalDescriptorCount = 4u ;
93
+ constexpr auto additionalGlobalDescriptorCount = 3u ;
94
94
#endif
95
95
IGPUDescriptorSetLayout::SBinding bindings[additionalGlobalDescriptorCount];
96
96
fillIotaDescriptorBindingDeclarations (bindings,ISpecializedShader::ESS_COMPUTE|ISpecializedShader::ESS_VERTEX|ISpecializedShader::ESS_FRAGMENT,additionalGlobalDescriptorCount,asset::EDT_STORAGE_BUFFER);
@@ -209,7 +209,7 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh
209
209
info.buffer .offset = 0u ;
210
210
info.desc = std::move (buf);
211
211
};
212
- constexpr uint32_t writeBound = 4u ;
212
+ constexpr uint32_t writeBound = 3u ;
213
213
IGPUDescriptorSet::SWriteDescriptorSet writes[writeBound];
214
214
auto recordSSBOWrite = [](IGPUDescriptorSet::SWriteDescriptorSet& write, IGPUDescriptorSet::SDescriptorInfo* infos, uint32_t binding, uint32_t count=1u ) -> void
215
215
{
@@ -250,15 +250,18 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh
250
250
251
251
constexpr uint8_t kIndicesPerTriangle = 3u ;
252
252
constexpr uint16_t minIndicesBatch = minTrisBatch*kIndicesPerTriangle ;
253
-
253
+
254
254
CPUMeshPacker::AllocationParams allocParams;
255
255
allocParams.vertexBuffSupportedByteSize = 1u <<31u ;
256
256
allocParams.vertexBufferMinAllocByteSize = minTrisBatch*minVertexSize;
257
257
allocParams.indexBuffSupportedCnt = (allocParams.vertexBuffSupportedByteSize /allocParams.vertexBufferMinAllocByteSize )*minIndicesBatch;
258
258
allocParams.indexBufferMinAllocCnt = minIndicesBatch;
259
259
allocParams.MDIDataBuffSupportedCnt = allocParams.indexBuffSupportedCnt /minIndicesBatch;
260
260
allocParams.MDIDataBuffMinAllocCnt = 1u ; // so structs from different meshbuffers are adjacent in memory
261
-
261
+
262
+ constexpr auto combinedNormalUVAttributeIx = 1 ;
263
+ constexpr auto newEnabledAttributeMask = (0x1u <<combinedNormalUVAttributeIx)|0b1 ;
264
+
262
265
auto cpump = core::make_smart_refctd_ptr<CCPUMeshPackerV2<>>(allocParams,minTrisBatch,maxTrisBatch);
263
266
uint32_t mdiBoundMax=0u ,batchInstanceBoundTotal=0u ;
264
267
core::vector<CPUMeshPacker::ReservedAllocationMeshBuffers> allocData;
@@ -274,14 +277,49 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh
274
277
assert (!meshBuffers.empty ());
275
278
const uint32_t instanceCount = (*meshBuffers.begin ())->getInstanceCount ();
276
279
for (auto mbIt=meshBuffers.begin (); mbIt!=meshBuffers.end (); mbIt++)
277
- assert ((*mbIt)->getInstanceCount ()==instanceCount);
280
+ {
281
+ auto meshBuffer = *mbIt;
282
+ assert (meshBuffer->getInstanceCount ()==instanceCount);
283
+ // We'll disable certain attributes to ensure we only copy position, normal and uv attribute
284
+ SVertexInputParams& vertexInput = meshBuffer->getPipeline ()->getVertexInputParams ();
285
+ // but we'll pack normals and UVs together to save one SSBO binding (and quantize UVs to half floats)
286
+ constexpr auto freeBinding = 15u ;
287
+ vertexInput.attributes [combinedNormalUVAttributeIx].binding = freeBinding;
288
+ vertexInput.attributes [combinedNormalUVAttributeIx].format = EF_R32G32_UINT;
289
+ vertexInput.attributes [combinedNormalUVAttributeIx].relativeOffset = 0u ;
290
+ vertexInput.enabledBindingFlags |= 0x1u <<freeBinding;
291
+ vertexInput.bindings [freeBinding].inputRate = EVIR_PER_VERTEX;
292
+ vertexInput.bindings [freeBinding].stride = 0u ;
293
+ const auto approxVxCount = IMeshManipulator::upperBoundVertexID (meshBuffer)+meshBuffer->getBaseVertex ();
294
+ struct CombinedNormalUV
295
+ {
296
+ uint32_t nml;
297
+ uint16_t u,v;
298
+ };
299
+ auto newBuff = core::make_smart_refctd_ptr<ICPUBuffer>(sizeof (CombinedNormalUV)*approxVxCount);
300
+ auto * dst = reinterpret_cast <CombinedNormalUV*>(newBuff->getPointer ())+meshBuffer->getBaseVertex ();
301
+ meshBuffer->setVertexBufferBinding ({0u ,newBuff},freeBinding);
302
+ // copy and pack data
303
+ const auto normalAttr = meshBuffer->getNormalAttributeIx ();
304
+ vertexInput.attributes [normalAttr].format = EF_R32_UINT;
305
+ for (auto i=0u ; i<approxVxCount; i++)
306
+ {
307
+ meshBuffer->getAttribute (&dst[i].nml ,normalAttr,i);
308
+ core::vectorSIMDf uv;
309
+ meshBuffer->getAttribute (uv,2u ,i);
310
+ dst[i].u = core::Float16Compressor::compress (uv.x );
311
+ dst[i].v = core::Float16Compressor::compress (uv.y );
312
+ }
313
+ }
278
314
279
315
const uint32_t mdiBound = cpump->calcMDIStructMaxCount (meshBuffers.begin (),meshBuffers.end ());
280
316
mdiBoundMax = core::max (mdiBound,mdiBoundMax);
281
317
batchInstanceBoundTotal += mdiBound*instanceCount;
282
318
283
319
meshBuffersToProcess.insert (meshBuffersToProcess.end (),meshBuffers.begin (),meshBuffers.end ());
284
320
}
321
+ for (auto meshBuffer : meshBuffersToProcess)
322
+ const_cast <ICPUMeshBuffer*>(meshBuffer)->getPipeline ()->getVertexInputParams ().enabledAttribFlags = newEnabledAttributeMask;
285
323
allocData.resize (meshBuffersToProcess.size ());
286
324
287
325
cpump->alloc (allocData.data (),meshBuffersToProcess.begin (),meshBuffersToProcess.end ());
@@ -356,22 +394,20 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh
356
394
indexCount/kIndicesPerTriangle
357
395
);
358
396
359
- const auto normalAttrID = mb->getNormalAttributeIx ();
360
397
const auto thisShapeInstancesBeginIx = rrInstances.size ();
361
398
const auto & batchAABB = mb->getBoundingBox ();// TODO: replace with batch AABB
362
399
for (auto auxIt=instanceAuxData.begin (); auxIt!=instanceAuxData.end (); auxIt++)
363
400
{
364
- constexpr auto UVAttributeIx = 2 ;
365
401
const auto batchInstanceGUID = cullData.size ();
366
402
367
403
const auto instanceID = std::distance (instanceAuxData.begin (),auxIt);
368
404
*newInstanceData = mbInstanceData[instanceID];
369
405
assert (instanceData.begin ()[instanceID].worldTform ==newInstanceData->tform );
370
- newInstanceData->padding0 = reinterpret_cast < const uint32_t &>(cdotIt-> attribInfo [posAttrID]) ;
371
- newInstanceData->padding1 = reinterpret_cast <const uint32_t &>(cdotIt->attribInfo [normalAttrID ]);
406
+ newInstanceData->padding0 = firstIndex ;
407
+ newInstanceData->padding1 = reinterpret_cast <const uint32_t &>(cdotIt->attribInfo [posAttrID ]);
372
408
newInstanceData->determinantSignBit = core::bitfieldInsert (
373
409
newInstanceData->determinantSignBit ,
374
- reinterpret_cast <const uint32_t &>(cdotIt->attribInfo [UVAttributeIx ]),
410
+ reinterpret_cast <const uint32_t &>(cdotIt->attribInfo [combinedNormalUVAttributeIx ]),
375
411
0u ,31u
376
412
);
377
413
if (frontFaceIsCCW) // compensate for Nabla's default camera being left handed
@@ -446,7 +482,7 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh
446
482
recordInfoBuffer (infos[i],core::smart_refctd_ptr (dataStore.vertexBuffer ));
447
483
recordSSBOWrite (writes[i],infos+i,i);
448
484
}
449
- recordInfoBuffer (infos[2 ],core::smart_refctd_ptr (m_indexBuffer));
485
+ recordInfoBuffer (infos[1 ],core::smart_refctd_ptr (m_indexBuffer));
450
486
451
487
setDstSetOnAllWrites (m_additionalGlobalDS.get ());
452
488
m_driver->updateDescriptorSets (writeBound,writes,0u ,nullptr );
@@ -866,7 +902,7 @@ void Renderer::init(const SAssetBundle& meshes, core::smart_refctd_ptr<ICPUBuffe
866
902
createFilledBufferAndSetUpInfoFromVector (infos+0 ,initData.lightCDF );
867
903
createFilledBufferAndSetUpInfoFromVector (infos+1 ,initData.lights );
868
904
869
- setDstSetAndDescTypesOnWrites (m_additionalGlobalDS.get (),writes,infos,{EDT_STORAGE_BUFFER,EDT_STORAGE_BUFFER},4u );
905
+ setDstSetAndDescTypesOnWrites (m_additionalGlobalDS.get (),writes,infos,{EDT_STORAGE_BUFFER,EDT_STORAGE_BUFFER},3u );
870
906
}
871
907
m_driver->updateDescriptorSets (descriptorUpdateCounts[0 ],writes,0u ,nullptr );
872
908
#endif
0 commit comments