Skip to content

Commit 3d38260

Browse files
Merge pull request #111 from Przemog1/mp
Mesh packer V2
2 parents 4ba1fb2 + 1234d1b commit 3d38260

37 files changed

+2031
-1560
lines changed

3rdparty/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,8 @@ if (NBL_BUILD_RADEON_RAYS)
416416
option(RR_NO_TESTS "RADEON_RAYS: don't do tests" ON)
417417
option(RR_ENABLE_STATIC "RADEON_RAYS: compile into a static library" ON)
418418
option(RR_EMBED_KERNELS "RADEON_RAYS: so we don't have to go looking for OpenCL kernel code" ON)
419-
option(RR_USE_VULKAN "RADEON_RAYS: use Vulkan (only if you have RR 3.0)" OFF)
419+
set(RR_USE_VULKAN OFF CACHE BOOL "RADEON_RAYS: Cannot use Vulkan for RR 2.0 the backend is broken" FORCE)
420+
set(RR_ENABLE_BACKFACE_CULL OFF CACHE BOOL "RADEON_RAYS: Don't enable backface culling (we use the memory of the ray struct for something else there)." FORCE)
420421
set(MSVC_USE_STATIC_CRT ON CACHE BOOL "Use /MT flag (static CRT) when compiling in MSVC")
421422
add_subdirectory(radeonrays EXCLUDE_FROM_ALL)
422423

examples_tests/18.MitsubaLoader/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ void main()
105105
mat2 dUV = mat2(dFdx(UV),dFdy(UV));
106106
107107
// "The sign of this computation is negated when the value of GL_CLIP_ORIGIN (the clip volume origin, set with glClipControl) is GL_UPPER_LEFT."
108-
const bool front = (!gl_FrontFacing) != (PC.camTformDeterminant*InstData.data[InstanceIndex].determinant < 0.0);
108+
const bool front = bool((InstData.data[InstanceIndex].determinantSignBit^mix(~0u,0u,gl_FrontFacing!=PC.camTformDeterminant<0.0))&0x80000000u);
109109
precomp = nbl_glsl_MC_precomputeData(front);
110110
material = nbl_glsl_MC_material_data_t_getOriented(InstData.data[InstanceIndex].material,precomp.frontface);
111111
#ifdef TEX_PREFETCH_STREAM

examples_tests/22.RaytracedAO/Renderer.cpp

Lines changed: 577 additions & 489 deletions
Large diffs are not rendered by default.

examples_tests/22.RaytracedAO/Renderer.h

Lines changed: 23 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,18 @@
1919
class Renderer : public nbl::core::IReferenceCounted, public nbl::core::InterfaceUnmovable
2020
{
2121
public:
22-
#include "drawCommon.glsl"
23-
#include "raytraceCommon.glsl"
22+
#include "rasterizationCommon.h"
23+
#include "raytraceCommon.h"
2424
#ifdef __cplusplus
2525
#undef uint
26+
#undef vec4
2627
#undef mat4
2728
#undef mat4x3
2829
#endif
2930

30-
// No 8k yet, too many rays to store
31-
_NBL_STATIC_INLINE_CONSTEXPR uint32_t MaxResolution[2] = {7680/2,4320/2};
32-
33-
3431
Renderer(nbl::video::IVideoDriver* _driver, nbl::asset::IAssetManager* _assetManager, nbl::scene::ISceneManager* _smgr, bool useDenoiser = true);
3532

36-
void init( const nbl::asset::SAssetBundle& meshes, nbl::core::smart_refctd_ptr<nbl::asset::ICPUBuffer>&& sampleSequence,
37-
uint32_t rayBufferSize=(sizeof(::RadeonRays::ray)+sizeof(::RadeonRays::Intersection))*2u*MaxResolution[0]*MaxResolution[1]); // 2 samples for MIS, TODO: compute default buffer size
33+
void init(const nbl::asset::SAssetBundle& meshes, nbl::core::smart_refctd_ptr<nbl::asset::ICPUBuffer>&& sampleSequence);
3834

3935
void deinit();
4036

@@ -47,7 +43,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
4743
uint64_t getTotalSamplesComputed() const
4844
{
4945
const auto samplesPerDispatch = static_cast<uint64_t>(m_staticViewData.samplesPerRowPerDispatch*m_staticViewData.imageDimensions.y);
50-
const auto framesDispatched = static_cast<uint64_t>(m_raytraceCommonData.framesDispatched);
46+
const auto framesDispatched = static_cast<uint64_t>(m_framesDispatched);
5147
return framesDispatched*samplesPerDispatch;
5248
}
5349

@@ -59,7 +55,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
5955

6056
struct InitializationData
6157
{
62-
InitializationData() : lights(),lightRadiances(),lightCDF(),globalMeta(nullptr) {}
58+
InitializationData() : mdiFirstIndices(), lights(),lightRadiances(),lightCDF(),globalMeta(nullptr) {}
6359
InitializationData(InitializationData&& other) : InitializationData()
6460
{
6561
operator=(std::move(other));
@@ -68,14 +64,15 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
6864

6965
inline InitializationData& operator=(InitializationData&& other)
7066
{
67+
mdiFirstIndices = std::move(other.mdiFirstIndices);
7168
lights = std::move(other.lights);
7269
lightRadiances = std::move(other.lightRadiances);
7370
lightCDF = std::move(other.lightCDF);
7471
globalMeta = other.globalMeta;
7572
return *this;
7673
}
7774

78-
75+
nbl::core::vector<uint32_t> mdiFirstIndices;
7976
nbl::core::vector<SLight> lights;
8077
nbl::core::vector<nbl::core::vectorSIMDf> lightRadiances;
8178
union
@@ -89,7 +86,9 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
8986
void initSceneNonAreaLights(InitializationData& initData);
9087
void finalizeScene(InitializationData& initData);
9188

92-
nbl::core::smart_refctd_ptr<nbl::video::IGPUImageView> createScreenSizedTexture(nbl::asset::E_FORMAT format);
89+
nbl::core::smart_refctd_ptr<nbl::video::IGPUImageView> createScreenSizedTexture(nbl::asset::E_FORMAT format, uint32_t layers = 0u);
90+
91+
void traceBounce();
9392

9493

9594
// "constants"
@@ -111,33 +110,27 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
111110

112111
// persistent (intialized in constructor
113112
nbl::core::smart_refctd_ptr<nbl::video::IGPUDescriptorSetLayout> m_cullDSLayout;
114-
115-
nbl::core::smart_refctd_ptr<nbl::asset::ICPUSpecializedShader> m_visibilityBufferFillShaders[2];
116-
nbl::core::smart_refctd_ptr<nbl::asset::ICPUPipelineLayout> m_visibilityBufferFillPipelineLayoutCPU;
117-
nbl::core::smart_refctd_ptr<nbl::video::IGPUPipelineLayout> m_visibilityBufferFillPipelineLayoutGPU;
118-
nbl::core::smart_refctd_ptr<nbl::video::IGPUDescriptorSetLayout> m_perCameraRasterDSLayout;
119-
120-
nbl::core::smart_refctd_ptr<nbl::video::IGPUDescriptorSetLayout> m_commonRaytracingDSLayout, m_raygenDSLayout, m_resolveDSLayout;
113+
nbl::core::smart_refctd_ptr<const nbl::video::IGPUDescriptorSetLayout> m_perCameraRasterDSLayout;
114+
nbl::core::smart_refctd_ptr<nbl::video::IGPUDescriptorSetLayout> m_rasterInstanceDataDSLayout,m_additionalGlobalDSLayout,m_commonRaytracingDSLayout,m_raygenDSLayout,m_resolveDSLayout;
115+
nbl::core::smart_refctd_ptr<nbl::video::IGPURenderpassIndependentPipeline> m_visibilityBufferFillPipeline;
121116

122117

123118
// scene specific data
124-
nbl::ext::RadeonRays::MockSceneManager m_mock_smgr;
125-
nbl::ext::RadeonRays::Manager::MeshBufferRRShapeCache rrShapeCache;
126-
nbl::ext::RadeonRays::Manager::NblInstanceRRInstanceCache rrInstances;
119+
nbl::core::vector<::RadeonRays::Shape*> rrShapes;
120+
nbl::core::vector<::RadeonRays::Shape*> rrInstances;
127121

128122
nbl::core::matrix3x4SIMD m_prevView;
129123
nbl::core::aabbox3df m_sceneBound;
130124
uint32_t m_maxRaysPerDispatch;
125+
uint32_t m_framesDispatched;
131126
StaticViewData_t m_staticViewData;
132127
RaytraceShaderCommonData_t m_raytraceCommonData;
133128

129+
nbl::core::smart_refctd_ptr<nbl::video::IGPUBuffer> m_indexBuffer;
134130
nbl::core::smart_refctd_ptr<nbl::video::IGPUBuffer> m_indirectDrawBuffers[2];
135131
struct MDICall
136132
{
137-
nbl::asset::SBufferBinding<const nbl::video::IGPUBuffer> vertexBindings[nbl::video::IGPUMeshBuffer::MAX_ATTR_BUF_BINDING_COUNT];
138-
nbl::core::smart_refctd_ptr<const nbl::video::IGPUBuffer> indexBuffer;
139-
nbl::core::smart_refctd_ptr<const nbl::video::IGPURenderpassIndependentPipeline> pipeline;
140-
uint32_t mdiOffset, mdiCount;
133+
uint32_t mdiOffset,mdiCount;
141134
};
142135
nbl::core::vector<MDICall> m_mdiDrawCalls;
143136
nbl::core::smart_refctd_ptr<nbl::video::IGPUDescriptorSet> m_cullDS;
@@ -148,21 +141,21 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
148141

149142
nbl::core::smart_refctd_ptr<nbl::video::IGPUPipelineLayout> m_cullPipelineLayout, m_raygenPipelineLayout, m_resolvePipelineLayout;
150143
nbl::core::smart_refctd_ptr<nbl::video::IGPUComputePipeline> m_cullPipeline, m_raygenPipeline, m_resolvePipeline;
151-
nbl::core::smart_refctd_ptr<nbl::video::IGPUDescriptorSet> m_globalBackendDataDS,m_commonRaytracingDS,m_raygenDS;
144+
nbl::core::smart_refctd_ptr<nbl::video::IGPUDescriptorSet> m_globalBackendDataDS,m_rasterInstanceDataDS,m_additionalGlobalDS,m_commonRaytracingDS,m_raygenDS;
152145
uint32_t m_raygenWorkGroups[2];
153146

154147
struct InteropBuffer
155148
{
156149
nbl::core::smart_refctd_ptr<nbl::video::IGPUBuffer> buffer;
157150
std::pair<::RadeonRays::Buffer*, cl_mem> asRRBuffer = { nullptr,0u };
158151
};
159-
InteropBuffer m_rayCountBuffer,m_rayBuffer,m_intersectionBuffer;
152+
InteropBuffer m_rayCountBuffer[2];
153+
InteropBuffer m_rayBuffer,m_intersectionBuffer;
160154

161155
nbl::core::smart_refctd_ptr<nbl::video::IGPUDescriptorSet> m_resolveDS;
162-
uint32_t m_resolveWorkGroups[2];
163156

164157
nbl::core::smart_refctd_ptr<nbl::video::IGPUImageView> m_accumulation,m_tonemapOutput;
165-
nbl::video::IFrameBuffer* m_visibilityBuffer,* m_colorBuffer,* tmpTonemapBuffer;
158+
nbl::video::IFrameBuffer* m_visibilityBuffer,* m_colorBuffer;
166159

167160
#ifdef _NBL_BUILD_OPTIX_
168161
nbl::core::smart_refctd_ptr<nbl::ext::OptiX::IDenoiser> m_denoiser;

examples_tests/22.RaytracedAO/common.glsl renamed to examples_tests/22.RaytracedAO/common.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
#ifndef _COMMON_INCLUDED_
22
#define _COMMON_INCLUDED_
33

4-
#define MAX_ACCUMULATED_SAMPLES (1024*1024)
4+
5+
#define MAX_TRIANGLES_IN_BATCH 512
6+
#define MAX_ACCUMULATED_SAMPLES 0x10000
7+
58

69
#define WORKGROUP_SIZE 256
710

11+
812
#ifdef __cplusplus
913
#define uint uint32_t
1014
struct uvec2
1115
{
12-
uint32_t x,y;
16+
uint x,y;
1317
};
1418
struct vec2
1519
{
@@ -24,4 +28,5 @@
2428
#define mat4x3 nbl::core::matrix3x4SIMD
2529
#endif
2630

31+
2732
#endif

examples_tests/22.RaytracedAO/cull.comp

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#version 430 core
22

3-
4-
#include "drawCommon.glsl"
3+
#include "rasterizationCommon.h"
54
layout(local_size_x = WORKGROUP_SIZE) in;
65

76
#include <nbl/builtin/glsl/ext/MitsubaLoader/instance_data_descriptor.glsl>
@@ -37,36 +36,39 @@ layout(push_constant, row_major) uniform PushConstants
3736
// we just do atomic add on the instance count
3837
void main()
3938
{
40-
if (gl_GlobalInvocationID.x<pc.data.maxDrawCount)
41-
commandBuff[pc.data.currentCommandBufferIx^0x1u].draws[gl_GlobalInvocationID.x].instanceCount = 0u;
39+
for (uint drawCommandGUID=gl_GlobalInvocationID.x; drawCommandGUID<pc.data.maxDrawCommandCount; drawCommandGUID+=gl_NumWorkGroups.x*WORKGROUP_SIZE)
40+
commandBuff[pc.data.currentCommandBufferIx^0x1u].draws[drawCommandGUID].instanceCount = 0u;
4241

43-
uint instanceMeshBufferID = gl_GlobalInvocationID.x;
44-
if (instanceMeshBufferID>=pc.data.maxObjectCount)
42+
uint batchInstanceID = gl_GlobalInvocationID.x;
43+
if (batchInstanceID>=pc.data.maxGlobalInstanceCount)
4544
return;
4645

4746
// fetch instance data
48-
const CullData_t instanceMeshBufferData = cullData[instanceMeshBufferID];
49-
const uint globalObjectID = instanceMeshBufferData.globalObjectID;
47+
const CullData_t batchInstanceData = cullData[batchInstanceID];
48+
const uint batchInstanceGUID = batchInstanceData.batchInstanceGUID;
5049

51-
const mat4x3 worldMatrix = InstData.data[globalObjectID].tform;
50+
const nbl_glsl_ext_Mitsuba_Loader_instance_data_t instanceData = InstData.data[batchInstanceGUID];
51+
const mat4x3 worldMatrix = InstData.data[batchInstanceGUID].tform;
5252
const mat4 MVP = nbl_glsl_pseudoMul4x4with4x3(pc.data.viewProjMatrix,worldMatrix);
5353

5454
// cull
55-
bool notCulled;
55+
bool notCulled = true;
56+
if (false)
5657
{
57-
const mat2x3 bbox = mat2x3(instanceMeshBufferData.aabbMinEdge,instanceMeshBufferData.aabbMaxEdge);
58+
const mat2x3 bbox = mat2x3(batchInstanceData.aabbMinEdge,batchInstanceData.aabbMaxEdge);
5859
notCulled = nbl_glsl_couldBeVisible(MVP,bbox);
5960
}
6061

6162
// set up MDI
6263
if (notCulled)
6364
{
64-
const uint drawID = instanceMeshBufferData.drawID;
65-
const uint drawInstanceID = atomicAdd(commandBuff[pc.data.currentCommandBufferIx].draws[drawID].instanceCount,1u)
66-
+commandBuff[pc.data.currentCommandBufferIx].draws[drawID].baseInstance;
65+
const uint drawCommandGUID = batchInstanceData.drawCommandGUID;
66+
const uint drawInstanceID = commandBuff[pc.data.currentCommandBufferIx].draws[drawCommandGUID].baseInstance+
67+
atomicAdd(commandBuff[pc.data.currentCommandBufferIx].draws[drawCommandGUID].instanceCount,1u);
6768

6869
instanceDataPerCamera.data[drawInstanceID].MVP = MVP;
69-
float detMVP = pc.data.viewProjDeterminant*InstData.data[globalObjectID].determinant;
70-
instanceDataPerCamera.data[drawInstanceID].backfacingBit_objectID = globalObjectID|(floatBitsToUint(detMVP)&0x80000000u); // use MSB to denote if face orientation should be flipped
70+
// use the MSB to denote if face orientation should be flipped
71+
instanceDataPerCamera.data[drawInstanceID].backfacingBit_batchInstanceGUID = batchInstanceGUID|((instanceData.determinantSignBit^floatBitsToUint(pc.data.viewProjDeterminant))&0x80000000u);
72+
instanceDataPerCamera.data[drawInstanceID].firstIndex = commandBuff[pc.data.currentCommandBufferIx].draws[drawCommandGUID].firstIndex;
7173
}
7274
}

examples_tests/22.RaytracedAO/drawCommon.glsl

Lines changed: 0 additions & 32 deletions
This file was deleted.
Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,36 @@
11
// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
22
// This file is part of the "Nabla Engine".
33
// For conditions of distribution and use, see copyright notice in nabla.h
4-
54
#version 430 core
5+
#extension GL_EXT_shader_16bit_storage : require
6+
67

7-
#include <nbl/builtin/glsl/utils/normal_encode.glsl>
8+
#define _NBL_GLSL_EXT_MITSUBA_LOADER_INSTANCE_DATA_BINDING_ 0
9+
#include "virtualGeometry.glsl"
810

911

10-
layout(location = 0) flat in uint BackfacingBit_ObjectID;
11-
layout(location = 1) in vec3 Normal;
12-
layout(location = 2) in vec2 UV;
12+
#include <nbl/builtin/glsl/barycentric/frag.glsl>
13+
layout(location = 2) flat in uint BackfacingBit_BatchInstanceGUID;
14+
layout(location = 3) flat in uint drawCmdFirstIndex;
15+
16+
uint nbl_glsl_barycentric_frag_getDrawID() {return BackfacingBit_BatchInstanceGUID&0x7fffffffu;}
17+
vec3 nbl_glsl_barycentric_frag_getVertexPos(in uint drawID, in uint primID, in uint primsVx)
18+
{
19+
const uint ix = nbl_glsl_VG_fetchTriangleVertexIndex(primID*3u+drawCmdFirstIndex,primsVx);
20+
return nbl_glsl_fetchVtxPos(ix,drawID);
21+
}
22+
23+
24+
layout(location = 0) out uvec4 frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2; // should it be called backfacing or frontfacing?
1325

14-
layout(location = 0) out uvec2 frontFacing_Object_Triangle; // should it be called backfacing or frontfacing?
15-
layout(location = 1) out vec2 encodedNormal;
16-
layout(location = 2) out vec2 uv;
1726

1827
void main()
19-
{
20-
frontFacing_Object_Triangle = uvec2(BackfacingBit_ObjectID^(gl_FrontFacing ? 0x0u:0x80000000u),gl_PrimitiveID);
21-
// TODO: these will disappear once we finally have MeshPackerV2 and settle on a way to obtain barycentrics
22-
encodedNormal = nbl_glsl_NormalEncode_signedSpherical(normalize(Normal));
23-
uv = UV;
28+
{
29+
vec2 bary = nbl_glsl_barycentric_frag_get();
30+
31+
const int triangleIDBitcount = findMSB(MAX_TRIANGLES_IN_BATCH-1)+1;
32+
frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[0] = bitfieldInsert(gl_PrimitiveID,BackfacingBit_BatchInstanceGUID^(gl_FrontFacing ? 0x0u:0x80000000u),triangleIDBitcount,32-triangleIDBitcount);
33+
frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[1] = packUnorm2x16(bary);
34+
frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[2] = packHalf2x16(dFdx(bary));
35+
frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[3] = packHalf2x16(dFdy(bary));
2436
}
Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,33 @@
1+
// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
2+
// This file is part of the "Nabla Engine".
3+
// For conditions of distribution and use, see copyright notice in nabla.h
14
#version 430 core
5+
#extension GL_EXT_shader_16bit_storage : require
26

3-
#include "drawCommon.glsl"
4-
layout(set=1, binding=0, row_major) readonly restrict buffer PerInstancePerCamera
7+
#include "rasterizationCommon.h"
8+
9+
#define _NBL_GLSL_EXT_MITSUBA_LOADER_INSTANCE_DATA_BINDING_ 0
10+
#include "virtualGeometry.glsl"
11+
12+
layout(set=2, binding=0, row_major) readonly restrict buffer PerInstancePerCamera
513
{
614
DrawData_t data[];
715
} instanceDataPerCamera;
816

9-
layout(location = 0) in vec3 vPosition;
10-
layout(location = 2) in vec2 vUV;
11-
layout(location = 3) in vec3 vNormal;
12-
13-
layout(location = 0) flat out uint BackfacingBit_ObjectID;
14-
layout(location = 1) out vec3 Normal;
15-
layout(location = 2) out vec2 UV;
17+
#include <nbl/builtin/glsl/barycentric/vert.glsl>
18+
layout(location = 2) flat out uint BackfacingBit_BatchInstanceGUID;
19+
layout(location = 3) flat out uint drawCmdFirstIndex;
1620

1721
#include <nbl/builtin/glsl/utils/transform.glsl>
18-
1922
void main()
2023
{
2124
DrawData_t self = instanceDataPerCamera.data[gl_InstanceIndex];
22-
BackfacingBit_ObjectID = self.backfacingBit_objectID;
25+
BackfacingBit_BatchInstanceGUID = self.backfacingBit_batchInstanceGUID;
26+
drawCmdFirstIndex = self.firstIndex;
2327

24-
gl_Position = nbl_glsl_pseudoMul4x4with3x1(self.MVP,vPosition);
28+
const uint batchInstanceGUID = self.backfacingBit_batchInstanceGUID&0x7fffffffu;
2529

26-
Normal = normalize(vNormal);
27-
28-
UV = vUV;
30+
const vec3 modelPos = nbl_glsl_fetchVtxPos(gl_VertexIndex,batchInstanceGUID);
31+
nbl_glsl_barycentric_vert_set(modelPos);
32+
gl_Position = nbl_glsl_pseudoMul4x4with3x1(self.MVP,modelPos);
2933
}

0 commit comments

Comments
 (0)