Skip to content

Commit 80c3f4c

Browse files
Merge pull request #119 from Devsh-Graphics-Programming/failed_pt_refactor2
Pack UV and Normal into one attribute
2 parents 643ca4f + 2a72149 commit 80c3f4c

File tree

4 files changed

+65
-29
lines changed

4 files changed

+65
-29
lines changed

examples_tests/22.RaytracedAO/Renderer.cpp

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,9 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I
8888
}
8989
{
9090
#ifndef DISABLE_NEE
91-
constexpr auto additionalGlobalDescriptorCount = 6u;
91+
constexpr auto additionalGlobalDescriptorCount = 5u;
9292
#else
93-
constexpr auto additionalGlobalDescriptorCount = 4u;
93+
constexpr auto additionalGlobalDescriptorCount = 3u;
9494
#endif
9595
IGPUDescriptorSetLayout::SBinding bindings[additionalGlobalDescriptorCount];
9696
fillIotaDescriptorBindingDeclarations(bindings,ISpecializedShader::ESS_COMPUTE|ISpecializedShader::ESS_VERTEX|ISpecializedShader::ESS_FRAGMENT,additionalGlobalDescriptorCount,asset::EDT_STORAGE_BUFFER);
@@ -209,7 +209,7 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh
209209
info.buffer.offset = 0u;
210210
info.desc = std::move(buf);
211211
};
212-
constexpr uint32_t writeBound = 4u;
212+
constexpr uint32_t writeBound = 3u;
213213
IGPUDescriptorSet::SWriteDescriptorSet writes[writeBound];
214214
auto recordSSBOWrite = [](IGPUDescriptorSet::SWriteDescriptorSet& write, IGPUDescriptorSet::SDescriptorInfo* infos, uint32_t binding, uint32_t count=1u) -> void
215215
{
@@ -250,15 +250,18 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh
250250

251251
constexpr uint8_t kIndicesPerTriangle = 3u;
252252
constexpr uint16_t minIndicesBatch = minTrisBatch*kIndicesPerTriangle;
253-
253+
254254
CPUMeshPacker::AllocationParams allocParams;
255255
allocParams.vertexBuffSupportedByteSize = 1u<<31u;
256256
allocParams.vertexBufferMinAllocByteSize = minTrisBatch*minVertexSize;
257257
allocParams.indexBuffSupportedCnt = (allocParams.vertexBuffSupportedByteSize/allocParams.vertexBufferMinAllocByteSize)*minIndicesBatch;
258258
allocParams.indexBufferMinAllocCnt = minIndicesBatch;
259259
allocParams.MDIDataBuffSupportedCnt = allocParams.indexBuffSupportedCnt/minIndicesBatch;
260260
allocParams.MDIDataBuffMinAllocCnt = 1u; //so structs from different meshbuffers are adjacent in memory
261-
261+
262+
constexpr auto combinedNormalUVAttributeIx = 1;
263+
constexpr auto newEnabledAttributeMask = (0x1u<<combinedNormalUVAttributeIx)|0b1;
264+
262265
auto cpump = core::make_smart_refctd_ptr<CCPUMeshPackerV2<>>(allocParams,minTrisBatch,maxTrisBatch);
263266
uint32_t mdiBoundMax=0u,batchInstanceBoundTotal=0u;
264267
core::vector<CPUMeshPacker::ReservedAllocationMeshBuffers> allocData;
@@ -274,14 +277,49 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh
274277
assert(!meshBuffers.empty());
275278
const uint32_t instanceCount = (*meshBuffers.begin())->getInstanceCount();
276279
for (auto mbIt=meshBuffers.begin(); mbIt!=meshBuffers.end(); mbIt++)
277-
assert((*mbIt)->getInstanceCount()==instanceCount);
280+
{
281+
auto meshBuffer = *mbIt;
282+
assert(meshBuffer->getInstanceCount()==instanceCount);
283+
// We'll disable certain attributes to ensure we only copy position, normal and uv attribute
284+
SVertexInputParams& vertexInput = meshBuffer->getPipeline()->getVertexInputParams();
285+
// but we'll pack normals and UVs together to save one SSBO binding (and quantize UVs to half floats)
286+
constexpr auto freeBinding = 15u;
287+
vertexInput.attributes[combinedNormalUVAttributeIx].binding = freeBinding;
288+
vertexInput.attributes[combinedNormalUVAttributeIx].format = EF_R32G32_UINT;
289+
vertexInput.attributes[combinedNormalUVAttributeIx].relativeOffset = 0u;
290+
vertexInput.enabledBindingFlags |= 0x1u<<freeBinding;
291+
vertexInput.bindings[freeBinding].inputRate = EVIR_PER_VERTEX;
292+
vertexInput.bindings[freeBinding].stride = 0u;
293+
const auto approxVxCount = IMeshManipulator::upperBoundVertexID(meshBuffer)+meshBuffer->getBaseVertex();
294+
struct CombinedNormalUV
295+
{
296+
uint32_t nml;
297+
uint16_t u,v;
298+
};
299+
auto newBuff = core::make_smart_refctd_ptr<ICPUBuffer>(sizeof(CombinedNormalUV)*approxVxCount);
300+
auto* dst = reinterpret_cast<CombinedNormalUV*>(newBuff->getPointer())+meshBuffer->getBaseVertex();
301+
meshBuffer->setVertexBufferBinding({0u,newBuff},freeBinding);
302+
// copy and pack data
303+
const auto normalAttr = meshBuffer->getNormalAttributeIx();
304+
vertexInput.attributes[normalAttr].format = EF_R32_UINT;
305+
for (auto i=0u; i<approxVxCount; i++)
306+
{
307+
meshBuffer->getAttribute(&dst[i].nml,normalAttr,i);
308+
core::vectorSIMDf uv;
309+
meshBuffer->getAttribute(uv,2u,i);
310+
dst[i].u = core::Float16Compressor::compress(uv.x);
311+
dst[i].v = core::Float16Compressor::compress(uv.y);
312+
}
313+
}
278314

279315
const uint32_t mdiBound = cpump->calcMDIStructMaxCount(meshBuffers.begin(),meshBuffers.end());
280316
mdiBoundMax = core::max(mdiBound,mdiBoundMax);
281317
batchInstanceBoundTotal += mdiBound*instanceCount;
282318

283319
meshBuffersToProcess.insert(meshBuffersToProcess.end(),meshBuffers.begin(),meshBuffers.end());
284320
}
321+
for (auto meshBuffer : meshBuffersToProcess)
322+
const_cast<ICPUMeshBuffer*>(meshBuffer)->getPipeline()->getVertexInputParams().enabledAttribFlags = newEnabledAttributeMask;
285323
allocData.resize(meshBuffersToProcess.size());
286324

287325
cpump->alloc(allocData.data(),meshBuffersToProcess.begin(),meshBuffersToProcess.end());
@@ -356,22 +394,20 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh
356394
indexCount/kIndicesPerTriangle
357395
);
358396

359-
const auto normalAttrID = mb->getNormalAttributeIx();
360397
const auto thisShapeInstancesBeginIx = rrInstances.size();
361398
const auto& batchAABB = mb->getBoundingBox();// TODO: replace with batch AABB
362399
for (auto auxIt=instanceAuxData.begin(); auxIt!=instanceAuxData.end(); auxIt++)
363400
{
364-
constexpr auto UVAttributeIx = 2;
365401
const auto batchInstanceGUID = cullData.size();
366402

367403
const auto instanceID = std::distance(instanceAuxData.begin(),auxIt);
368404
*newInstanceData = mbInstanceData[instanceID];
369405
assert(instanceData.begin()[instanceID].worldTform==newInstanceData->tform);
370-
newInstanceData->padding0 = reinterpret_cast<const uint32_t&>(cdotIt->attribInfo[posAttrID]);
371-
newInstanceData->padding1 = reinterpret_cast<const uint32_t&>(cdotIt->attribInfo[normalAttrID]);
406+
newInstanceData->padding0 = firstIndex;
407+
newInstanceData->padding1 = reinterpret_cast<const uint32_t&>(cdotIt->attribInfo[posAttrID]);
372408
newInstanceData->determinantSignBit = core::bitfieldInsert(
373409
newInstanceData->determinantSignBit,
374-
reinterpret_cast<const uint32_t&>(cdotIt->attribInfo[UVAttributeIx]),
410+
reinterpret_cast<const uint32_t&>(cdotIt->attribInfo[combinedNormalUVAttributeIx]),
375411
0u,31u
376412
);
377413
if (frontFaceIsCCW) // compensate for Nabla's default camera being left handed
@@ -446,7 +482,7 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh
446482
recordInfoBuffer(infos[i],core::smart_refctd_ptr(dataStore.vertexBuffer));
447483
recordSSBOWrite(writes[i],infos+i,i);
448484
}
449-
recordInfoBuffer(infos[2],core::smart_refctd_ptr(m_indexBuffer));
485+
recordInfoBuffer(infos[1],core::smart_refctd_ptr(m_indexBuffer));
450486

451487
setDstSetOnAllWrites(m_additionalGlobalDS.get());
452488
m_driver->updateDescriptorSets(writeBound,writes,0u,nullptr);
@@ -866,7 +902,7 @@ void Renderer::init(const SAssetBundle& meshes, core::smart_refctd_ptr<ICPUBuffe
866902
createFilledBufferAndSetUpInfoFromVector(infos+0,initData.lightCDF);
867903
createFilledBufferAndSetUpInfoFromVector(infos+1,initData.lights);
868904

869-
setDstSetAndDescTypesOnWrites(m_additionalGlobalDS.get(),writes,infos,{EDT_STORAGE_BUFFER,EDT_STORAGE_BUFFER},4u);
905+
setDstSetAndDescTypesOnWrites(m_additionalGlobalDS.get(),writes,infos,{EDT_STORAGE_BUFFER,EDT_STORAGE_BUFFER},3u);
870906
}
871907
m_driver->updateDescriptorSets(descriptorUpdateCounts[0],writes,0u,nullptr);
872908
#endif

examples_tests/22.RaytracedAO/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ int main()
234234

235235
if (generateNewSamples)
236236
{
237-
/** TODO: move into the renderer and redo the sampling
237+
/** TODO: move into the renderer and redo the sampling (compress into R21G21B21_UINT)
238238
Locality Level 0: the 3 dimensions consumed for a BxDF or NEE sample
239239
Locality Level 1: the k = 3 (1 + NEE) samples which will be consumed in the same invocation
240240
Locality Level 2-COMP: the N = k dispatchSPP Resolution samples consumed by a raygen dispatch (another TODO: would be order CS and everything in a morton curve)

examples_tests/22.RaytracedAO/raytraceCommon.glsl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@ layout(push_constant, row_major) uniform PushConstants
1111

1212
#ifndef DISABLE_NEE
1313
// lights
14-
layout(set = 1, binding = 4, std430) restrict readonly buffer CumulativeLightPDF
14+
layout(set = 1, binding = 3, std430) restrict readonly buffer CumulativeLightPDF
1515
{
1616
uint lightCDF[];
1717
};
18-
layout(set = 1, binding = 5, std430, row_major) restrict readonly buffer Lights
18+
layout(set = 1, binding = 4, std430, row_major) restrict readonly buffer Lights
1919
{
2020
SLight light[];
2121
};
@@ -262,7 +262,7 @@ for (uint i=1u; i!=vertex_depth; i++)
262262
atomicMax(traceIndirect[vertex_depth_mod_2_inv].params.num_groups_x,(baseOutputID+raysToAllocate-1u)/WORKGROUP_SIZE+1u);
263263

264264
// TODO: improve ray offset (maybe using smooth normal wouldn't be a sin)
265-
vec3 geomNormal = cross(dPdBary[0], dPdBary[1]);
265+
vec3 geomNormal = cross(dPdBary[0],dPdBary[1]);
266266
const vec3 absGeomNormal = abs(geomNormal);
267267
geomNormal /= max(max(absGeomNormal.x,absGeomNormal.y),max(absGeomNormal.z,0.001f))*96.f;
268268
uint offset = 0u;

examples_tests/22.RaytracedAO/virtualGeometry.glsl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,13 @@
55

66
#define _NBL_VG_USE_SSBO
77
#define _NBL_VG_SSBO_DESCRIPTOR_SET 1
8-
#define _NBL_VG_USE_SSBO_UINT
9-
#define _NBL_VG_SSBO_UINT_BINDING 0
10-
#define _NBL_VG_USE_SSBO_UVEC3
11-
#define _NBL_VG_SSBO_UVEC3_BINDING 1
12-
#define _NBL_VG_USE_SSBO_INDEX
13-
#define _NBL_VG_SSBO_INDEX_BINDING 2
14-
// TODO: remove after all quantization optimizations in CSerializedLoader and the like
158
#define _NBL_VG_USE_SSBO_UVEC2
16-
#define _NBL_VG_SSBO_UVEC2_BINDING 3
9+
#define _NBL_VG_SSBO_UVEC2_BINDING 0
10+
#define _NBL_VG_USE_SSBO_INDEX
11+
#define _NBL_VG_SSBO_INDEX_BINDING 1
12+
// TODO: remove after Doom Eternal position quantization trick
13+
#define _NBL_VG_USE_SSBO_UVEC3
14+
#define _NBL_VG_SSBO_UVEC3_BINDING 2
1715
#include <nbl/builtin/glsl/virtual_geometry/virtual_attribute_fetch.glsl>
1816

1917

@@ -22,20 +20,22 @@
2220

2321
vec3 nbl_glsl_fetchVtxPos(in uint vtxID, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData)
2422
{
25-
nbl_glsl_VG_VirtualAttributePacked_t va = batchInstanceData.padding0;
23+
nbl_glsl_VG_VirtualAttributePacked_t va = batchInstanceData.padding1;
2624
return nbl_glsl_VG_attribFetch_RGB32_SFLOAT(va,vtxID);
2725
}
2826

2927
vec3 nbl_glsl_fetchVtxNormal(in uint vtxID, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData)
3028
{
31-
nbl_glsl_VG_VirtualAttributePacked_t va = batchInstanceData.padding1;
32-
return normalize(nbl_glsl_VG_attribFetch_RGB10A2_SNORM(va,vtxID).xyz);
29+
nbl_glsl_VG_VirtualAttributePacked_t va = batchInstanceData.determinantSignBit;
30+
const uint codedNormal = nbl_glsl_VG_attribFetch2u(va,vtxID)[0];
31+
return normalize(nbl_glsl_decodeRGB10A2_SNORM(codedNormal).xyz);
3332
}
3433

3534
vec2 nbl_glsl_fetchVtxUV(in uint vtxID, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData)
3635
{
3736
nbl_glsl_VG_VirtualAttributePacked_t va = batchInstanceData.determinantSignBit;
38-
return nbl_glsl_VG_attribFetch_RG32_SFLOAT(va,vtxID);
37+
const uint codedUV = nbl_glsl_VG_attribFetch2u(va,vtxID)[1];
38+
return unpackHalf2x16(codedUV).xy;
3939
}
4040

4141

0 commit comments

Comments
 (0)