@@ -21,7 +21,7 @@ constexpr const char* SHADER_OVERRIDES =
21
21
R"(
22
22
#define _NBL_VERT_INPUTS_DEFINED_
23
23
24
- // #define nbl_glsl_VirtualAttribute_t uint
24
+ #define nbl_glsl_VirtualAttribute_t uint
25
25
26
26
vec4 nbl_glsl_decodeRGB10A2_UNORM(in uint x)
27
27
{
@@ -39,28 +39,18 @@ vec4 nbl_glsl_decodeRGB10A2_SNORM(in uint x)
39
39
}
40
40
41
41
//pos
42
- layout(set = 0, binding = 0) uniform samplerBuffer MeshPackedData_R32G32B32_SFLOAT ;
42
+ layout(set = 0, binding = 0) uniform samplerBuffer MeshPackedDataFloat[2] ;
43
43
44
44
//uv
45
- layout(set = 0, binding = 1) uniform samplerBuffer MeshPackedData_R32G32_SFLOAT ;
45
+ layout(set = 0, binding = 1) uniform isamplerBuffer MeshPackedDataInt[1] ;
46
46
47
47
//normal
48
- layout(set = 0, binding = 2) uniform usamplerBuffer MeshPackedData_A2B10G10R10_SNORM_PACK32 ;
48
+ layout(set = 0, binding = 2) uniform usamplerBuffer MeshPackedDataUint[1] ;
49
49
50
- layout(set = 0, binding = 3) readonly buffer VertexDataOffsetTable
50
+ layout(set = 0, binding = 3) readonly buffer VirtualAttributes
51
51
{
52
- int dataOffsetTable[];
53
- } vertexPosition;
54
-
55
- layout(set = 0, binding = 4) readonly buffer VertexUVOffsetTable
56
- {
57
- int dataOffsetTable[];
58
- } vertexUV;
59
-
60
- layout(set = 0, binding = 5) readonly buffer VertexNormalOffsetTable
61
- {
62
- int dataOffsetTable[];
63
- } vertexNormal;
52
+ nbl_glsl_VirtualAttribute_t vAttr[][3];
53
+ } virtualAttribTable;
64
54
65
55
#define _NBL_BASIC_VTX_ATTRIB_FETCH_FUCTIONS_DEFINED_
66
56
#define _NBL_POS_FETCH_FUNCTION_DEFINED
@@ -73,31 +63,40 @@ layout(set = 0, binding = 5) readonly buffer VertexNormalOffsetTable
73
63
//vec3 nbl_glsl_readAttrib(uint offset)
74
64
//..
75
65
76
- vec3 nbl_glsl_fetchVtxPos()
66
+ struct VirtualAttribute
77
67
{
78
- int vtxPosOffset = int(gl_VertexIndex) + vertexPosition.dataOffsetTable[gl_DrawID] ;
79
- return texelFetch(MeshPackedData_R32G32B32_SFLOAT, vtxPosOffset).xyz ;
80
- }
68
+ uint binding ;
69
+ int offset ;
70
+ };
81
71
82
- vec2 nbl_glsl_fetchVtxUV( )
72
+ VirtualAttribute unpackVirtualAttribute(in nbl_glsl_VirtualAttribute_t vaPacked )
83
73
{
84
- int vtxUVOffset = int(gl_VertexIndex) + vertexUV.dataOffsetTable[gl_DrawID];
85
- return texelFetch(MeshPackedData_R32G32_SFLOAT, vtxUVOffset).xy;
74
+ VirtualAttribute result;
75
+ result.binding = bitfieldExtract(vaPacked, 0, 4);
76
+ result.offset = int(bitfieldExtract(vaPacked, 4, 28));
77
+
78
+ return result;
86
79
}
87
80
88
- vec3 nbl_glsl_fetchVtxNormal( )
81
+ vec3 nbl_glsl_fetchVtxPos(in uint vtxID )
89
82
{
90
- int vtxNormOffset = int(gl_VertexIndex) + vertexNormal.dataOffsetTable [gl_DrawID];
91
- return normalize(nbl_glsl_decodeRGB10A2_SNORM( texelFetch(MeshPackedData_A2B10G10R10_SNORM_PACK32, vtxNormOffset).x) .xyz) ;
83
+ VirtualAttribute va = unpackVirtualAttribute(virtualAttribTable.vAttr [gl_DrawID][0]) ;
84
+ return texelFetch(MeshPackedDataFloat[va.binding], va.offset + int(vtxID)) .xyz;
92
85
}
93
86
94
- )" ;
87
+ vec2 nbl_glsl_fetchVtxUV(in uint vtxID)
88
+ {
89
+ VirtualAttribute va = unpackVirtualAttribute(virtualAttribTable.vAttr[gl_DrawID][1]);
90
+ return texelFetch(MeshPackedDataFloat[va.binding], va.offset + int(vtxID)).xy;
91
+ }
95
92
96
- struct DataOffsetTable
93
+ vec3 nbl_glsl_fetchVtxNormal(in uint vtxID)
97
94
{
98
- uint32_t binding;
99
- asset::SBufferBinding<IGPUBuffer> offsetBuffer;
100
- };
95
+ VirtualAttribute va = unpackVirtualAttribute(virtualAttribTable.vAttr[gl_DrawID][2]);
96
+ return nbl_glsl_decodeRGB10A2_SNORM(texelFetch(MeshPackedDataUint[va.binding], va.offset + int(vtxID)).x).xyz;
97
+ }
98
+
99
+ )" ;
101
100
102
101
core::smart_refctd_ptr<asset::ICPUSpecializedShader> createModifiedVertexShader (const asset::ICPUSpecializedShader* _fs)
103
102
{
@@ -136,8 +135,7 @@ struct DrawIndexedIndirectInput
136
135
size_t countOffset = 0u ;
137
136
};
138
137
139
-
140
- void packMeshBuffers (video::IVideoDriver* driver, core::vector<ICPUMeshBuffer*>& meshBuffers, DrawIndexedIndirectInput& output, std::array<DataOffsetTable, 3 >& offsetTable)
138
+ void packMeshBuffers (video::IVideoDriver* driver, core::vector<ICPUMeshBuffer*>& meshBuffers, DrawIndexedIndirectInput& output, core::smart_refctd_ptr<IGPUBuffer>& virtualAttribTableOut)
141
139
{
142
140
using MeshPacker = CCPUMeshPackerV2<DrawElementsIndirectCommand_t>;
143
141
@@ -155,63 +153,65 @@ void packMeshBuffers(video::IVideoDriver* driver, core::vector<ICPUMeshBuffer*>&
155
153
156
154
bool allocSuccessfull = mp.alloc (allocData.data (), meshBuffers.begin (), meshBuffers.end ());
157
155
if (!allocSuccessfull)
156
+ {
158
157
std::cout << " Alloc failed \n " ;
158
+ _NBL_DEBUG_BREAK_IF (true );
159
+ }
160
+
159
161
160
162
mp.instantiateDataStorage ();
161
163
MeshPacker::PackerDataStore packerDataStore = mp.getPackerDataStore ();
162
164
163
165
core::vector<IMeshPackerBase::PackedMeshBufferData> pmbd (meshBuffers.size ());
164
-
165
- const uint32_t offsetTableSz = mp. calcDataTableNeededSize ( meshBuffers.begin (), meshBuffers. end ()) ;
166
+
167
+ const uint32_t offsetTableSz = meshBuffers.size () * 3u ;
166
168
core::vector<MeshPacker::CombinedDataOffsetTable> cdot (offsetTableSz);
167
169
168
- mp.commit (pmbd.data (), cdot.data (), allocData.data (), meshBuffers.begin (), meshBuffers.end ());
170
+ bool commitSuccessfull = mp.commit (pmbd.data (), cdot.data (), allocData.data (), meshBuffers.begin (), meshBuffers.end ());
171
+ if (!commitSuccessfull)
172
+ {
173
+ std::cout << " Commit failed \n " ;
174
+ _NBL_DEBUG_BREAK_IF (true );
175
+ }
169
176
170
177
output.vtxBuffer = { 0ull , driver->createFilledDeviceLocalGPUBufferOnDedMem (packerDataStore.vertexBuffer ->getSize (), packerDataStore.vertexBuffer ->getPointer ()) };
171
178
output.idxBuff = driver->createFilledDeviceLocalGPUBufferOnDedMem (packerDataStore.indexBuffer ->getSize (), packerDataStore.indexBuffer ->getPointer ());
172
179
output.indirectDrawBuff = driver->createFilledDeviceLocalGPUBufferOnDedMem (packerDataStore.MDIDataBuffer ->getSize (), packerDataStore.MDIDataBuffer ->getPointer ());
173
180
174
- output.maxCount = offsetTableSz;
181
+ output.maxCount = meshBuffers. size (); // TODO
175
182
output.stride = sizeof (DrawElementsIndirectCommand_t);
176
183
177
- auto glsl = mp.generateGLSLBufferDefinitions (0u );
184
+ // auto glsl = mp.generateGLSLBufferDefinitions(0u);
185
+
186
+ // setOffsetTables
187
+
188
+ core::vector<MeshPacker::VirtualAttribute> offsetTableLocal;
189
+ offsetTableLocal.reserve (meshBuffers.size () * 3u );
190
+ for (uint32_t i = 0u ; i < meshBuffers.size (); i++)
191
+ {
192
+ MeshPacker::CombinedDataOffsetTable& virtualAttribTable = cdot[i];
193
+
194
+ offsetTableLocal.push_back (virtualAttribTable.attribInfo [0 ]);
195
+ offsetTableLocal.push_back (virtualAttribTable.attribInfo [2 ]);
196
+ offsetTableLocal.push_back (virtualAttribTable.attribInfo [3 ]);
197
+ }
178
198
179
- /* DrawElementsIndirectCommand_t* mdiPtr = static_cast<DrawElementsIndirectCommand_t*>(packerDataStore.MDIDataBuffer->getPointer());
199
+ /* DrawElementsIndirectCommand_t* mdiPtr = static_cast<DrawElementsIndirectCommand_t*>(packerDataStore.MDIDataBuffer->getPointer()) + 99u ;
180
200
uint16_t* idxBuffPtr = static_cast<uint16_t*>(packerDataStore.indexBuffer->getPointer());
181
201
float* vtxBuffPtr = static_cast<float*>(packerDataStore.vertexBuffer->getPointer());
182
202
183
- for (uint32_t i = 0u; i < 1188 ; i++)
203
+ for (uint32_t i = 0u; i < 264 ; i++)
184
204
{
185
- float* firstCoord = vtxBuffPtr + (*(idxBuffPtr + i) * 3u);
205
+ float* firstCoord = vtxBuffPtr + (( *(idxBuffPtr + i) + cdot[99].attribInfo[0].offset ) * 3u);
186
206
std::cout << "vtx: " << i << " idx: " << *(idxBuffPtr + i) << " ";
187
207
std::cout << *firstCoord << ' ' << *(firstCoord + 1u) << ' ' << *(firstCoord + 2u) << std::endl;
188
208
}*/
189
209
190
- // setOffsetTables
191
-
192
- core::vector<uint32_t > offsetTableLocal (offsetTableSz);
193
-
194
- for (uint32_t i = 0u ; i < offsetTableLocal.size (); i++)
195
- offsetTableLocal[i] = cdot[i].attribOffset [0 ];
196
-
197
- offsetTable[0 ].offsetBuffer .offset = 0u ;
198
- offsetTable[0 ].offsetBuffer .buffer = driver->createFilledDeviceLocalGPUBufferOnDedMem (sizeof (uint32_t ) * offsetTableLocal.size (), static_cast <void *>(offsetTableLocal.data ()));
199
-
200
- for (uint32_t i = 0u ; i < offsetTableLocal.size (); i++)
201
- offsetTableLocal[i] = cdot[i].attribOffset [1 ];
202
-
203
- offsetTable[1 ].offsetBuffer .offset = 0u ;
204
- offsetTable[1 ].offsetBuffer .buffer = driver->createFilledDeviceLocalGPUBufferOnDedMem (sizeof (uint32_t ) * offsetTableLocal.size (), static_cast <void *>(offsetTableLocal.data ()));
205
-
206
- for (uint32_t i = 0u ; i < offsetTableLocal.size (); i++)
207
- offsetTableLocal[i] = cdot[i].attribOffset [2 ];
208
-
209
- offsetTable[2 ].offsetBuffer .offset = 0u ;
210
- offsetTable[2 ].offsetBuffer .buffer = driver->createFilledDeviceLocalGPUBufferOnDedMem (sizeof (uint32_t ) * offsetTableLocal.size (), static_cast <void *>(offsetTableLocal.data ()));
210
+ virtualAttribTableOut = driver->createFilledDeviceLocalGPUBufferOnDedMem (offsetTableLocal.size (), offsetTableLocal.data ());
211
211
}
212
212
213
213
void setPipeline (IVideoDriver* driver, ICPUSpecializedShader* vs, ICPUSpecializedShader* fs,
214
- core::smart_refctd_ptr<IGPUBuffer>& vtxBuffer, core::smart_refctd_ptr<IGPUBuffer>& outputUBO, std::array<DataOffsetTable, 3 >& dataOffsetBuffers ,
214
+ core::smart_refctd_ptr<IGPUBuffer>& vtxBuffer, core::smart_refctd_ptr<IGPUBuffer>& outputUBO, core::smart_refctd_ptr<IGPUBuffer >& virtualAttribBuffer ,
215
215
core::smart_refctd_ptr<IGPUDescriptorSet>& outputGPUDescriptorSet0,
216
216
core::smart_refctd_ptr<IGPUDescriptorSet>& outputGPUDescriptorSet1,
217
217
core::smart_refctd_ptr<IGPURenderpassIndependentPipeline>& outputGpuPipeline)
@@ -222,16 +222,19 @@ void setPipeline(IVideoDriver* driver, ICPUSpecializedShader* vs, ICPUSpecialize
222
222
core::smart_refctd_ptr<IGPUDescriptorSetLayout> ds0Layout;
223
223
core::smart_refctd_ptr<IGPUDescriptorSetLayout> ds1Layout;
224
224
{
225
- IGPUDescriptorSetLayout::SBinding b[6 ];
226
- b[0 ].binding = 0u ; b[1 ].binding = 1u ; b[2 ].binding = 2u ; b[3 ].binding = 3u ; b[ 4 ]. binding = 4u ; b[ 5 ]. binding = 5u ;
225
+ IGPUDescriptorSetLayout::SBinding b[4 ];
226
+ b[0 ].binding = 0u ; b[1 ].binding = 1u ; b[2 ].binding = 2u ; b[3 ].binding = 3u ;
227
227
b[0 ].type = b[1 ].type = b[2 ].type = EDT_UNIFORM_TEXEL_BUFFER;
228
- b[3 ].type = b[4 ].type = b[5 ].type = EDT_STORAGE_BUFFER;
229
- b[0 ].stageFlags = b[1 ].stageFlags = b[2 ].stageFlags = b[3 ].stageFlags = b[4 ].stageFlags = b[5 ].stageFlags = ISpecializedShader::ESS_VERTEX;
230
- b[0 ].count = b[1 ].count = b[2 ].count = b[3 ].count = b[4 ].count = b[5 ].count = 1u ;
231
- ds0Layout = driver->createGPUDescriptorSetLayout (b, b + 6u );
228
+ b[3 ].type = EDT_STORAGE_BUFFER;
229
+ b[0 ].stageFlags = b[1 ].stageFlags = b[2 ].stageFlags = b[3 ].stageFlags = ISpecializedShader::ESS_VERTEX;
230
+ b[0 ].count = 2u ;
231
+ b[1 ].count = 1u ;
232
+ b[2 ].count = 1u ;
233
+ b[3 ].count = 1u ;
234
+ ds0Layout = driver->createGPUDescriptorSetLayout (b, b + 4u );
232
235
233
236
IGPUDescriptorSetLayout::SBinding b2;
234
- b2.binding = 0 ;
237
+ b2.binding = 0u ;
235
238
b2.type = EDT_UNIFORM_BUFFER;
236
239
b2.stageFlags = ISpecializedShader::ESS_VERTEX;
237
240
b2.count = 1u ;
@@ -244,48 +247,46 @@ void setPipeline(IVideoDriver* driver, ICPUSpecializedShader* vs, ICPUSpecialize
244
247
outputGPUDescriptorSet0 = driver->createGPUDescriptorSet (std::move (ds0Layout));
245
248
outputGPUDescriptorSet1 = driver->createGPUDescriptorSet (std::move (ds1Layout));
246
249
{
247
- IGPUDescriptorSet::SWriteDescriptorSet w[6 ];
248
- w[0 ].arrayElement = w[1 ].arrayElement = w[2 ].arrayElement = w[3 ].arrayElement = w[4 ].arrayElement = w[5 ].arrayElement = 0u ;
249
- w[0 ].count = w[1 ].count = w[2 ].count = w[3 ].count = w[4 ].count = w[5 ].count = 1u ;
250
- w[0 ].binding = 0u ; w[1 ].binding = 1u ; w[2 ].binding = 2u ; w[3 ].binding = 3u ; w[4 ].binding = 4u ; w[5 ].binding = 4u ;
251
- w[0 ].descriptorType = w[1 ].descriptorType = w[2 ].descriptorType = EDT_UNIFORM_TEXEL_BUFFER;
252
- w[3 ].descriptorType = w[4 ].descriptorType = w[5 ].descriptorType = EDT_STORAGE_BUFFER;
250
+ IGPUDescriptorSet::SWriteDescriptorSet w[5 ];
251
+ w[0 ].arrayElement = 0u ;
252
+ w[1 ].arrayElement = 1u ;
253
+ w[2 ].arrayElement = 0u ;
254
+ w[3 ].arrayElement = 0u ;
255
+ w[4 ].arrayElement = 0u ;
256
+ w[0 ].count = w[1 ].count = w[2 ].count = w[3 ].count = w[4 ].count = 1u ;
257
+ w[0 ].binding = 0u ; w[1 ].binding = 0u ; w[2 ].binding = 1u ; w[3 ].binding = 2u ; w[4 ].binding = 3u ;
258
+ w[0 ].descriptorType = w[1 ].descriptorType = w[2 ].descriptorType = w[3 ].descriptorType = EDT_UNIFORM_TEXEL_BUFFER;
259
+ w[4 ].descriptorType = EDT_STORAGE_BUFFER;
253
260
w[0 ].dstSet = w[1 ].dstSet = w[2 ].dstSet = w[3 ].dstSet = w[4 ].dstSet = w[5 ].dstSet = outputGPUDescriptorSet0.get ();
254
261
255
- IGPUDescriptorSet::SDescriptorInfo info[6 ];
262
+ IGPUDescriptorSet::SDescriptorInfo info[5 ];
256
263
257
264
info[0 ].buffer .offset = 0u ;
258
265
info[0 ].buffer .size = vtxBuffer->getSize ();
259
266
info[0 ].desc = driver->createGPUBufferView (vtxBuffer.get (), EF_R32G32B32_SFLOAT);
260
267
info[1 ].buffer .offset = 0u ;
261
268
info[1 ].buffer .size = vtxBuffer->getSize ();
262
- info[1 ].desc = driver->createGPUBufferView (vtxBuffer.get (), EF_R32G32_SFLOAT );
269
+ info[1 ].desc = driver->createGPUBufferView (vtxBuffer.get (), EF_R32G32B32_SFLOAT );
263
270
info[2 ].buffer .offset = 0u ;
264
271
info[2 ].buffer .size = vtxBuffer->getSize ();
265
- info[2 ].desc = driver->createGPUBufferView (vtxBuffer.get (), EF_R32_UINT);
272
+ info[2 ].desc = driver->createGPUBufferView (vtxBuffer.get (), EF_R32G32_SFLOAT);
273
+ info[3 ].buffer .offset = 0u ;
274
+ info[3 ].buffer .size = vtxBuffer->getSize ();
275
+ info[3 ].desc = driver->createGPUBufferView (vtxBuffer.get (), EF_R32_UINT);
266
276
267
277
// sampler buffers
268
278
w[0 ].info = &info[0 ];
269
279
w[1 ].info = &info[1 ];
270
280
w[2 ].info = &info[2 ];
271
-
272
- // offset tables
273
- info[3 ].buffer .offset = dataOffsetBuffers[0 ].offsetBuffer .offset ;
274
- info[3 ].buffer .size = dataOffsetBuffers[0 ].offsetBuffer .buffer ->getSize ();
275
- info[3 ].desc = core::smart_refctd_ptr (dataOffsetBuffers[0 ].offsetBuffer .buffer );
276
281
w[3 ].info = &info[3 ];
277
282
278
- info[4 ].buffer .offset = dataOffsetBuffers[1 ].offsetBuffer .offset ;
279
- info[4 ].buffer .size = dataOffsetBuffers[1 ].offsetBuffer .buffer ->getSize ();
280
- info[4 ].desc = core::smart_refctd_ptr (dataOffsetBuffers[1 ].offsetBuffer .buffer );
283
+ // offset tables
284
+ info[4 ].buffer .offset = 0u ;
285
+ info[4 ].buffer .size = virtualAttribBuffer->getSize ();
286
+ info[4 ].desc = core::smart_refctd_ptr (virtualAttribBuffer);
281
287
w[4 ].info = &info[4 ];
282
288
283
- info[5 ].buffer .offset = dataOffsetBuffers[2 ].offsetBuffer .offset ;
284
- info[5 ].buffer .size = dataOffsetBuffers[2 ].offsetBuffer .buffer ->getSize ();
285
- info[5 ].desc = core::smart_refctd_ptr (dataOffsetBuffers[2 ].offsetBuffer .buffer );
286
- w[5 ].info = &info[5 ];
287
-
288
- driver->updateDescriptorSets (6u , w, 0u , nullptr );
289
+ driver->updateDescriptorSets (5u , w, 0u , nullptr );
289
290
290
291
IGPUDescriptorSet::SWriteDescriptorSet w2;
291
292
w2.arrayElement = 0u ;
@@ -381,11 +382,11 @@ int main()
381
382
auto * vtxShader = pipeline->getShaderAtIndex (asset::ICPURenderpassIndependentPipeline::ESSI_VERTEX_SHADER_IX);
382
383
core::smart_refctd_ptr<ICPUSpecializedShader> vs = createModifiedVertexShader (vtxShader);
383
384
ICPUSpecializedShader* fs = IAsset::castDown<ICPUSpecializedShader>(am->getAsset (" ../shader.frag" , lp).getContents ().begin ()->get ());
384
- std::array<DataOffsetTable, 3 > offsetTable ;
385
+ core::smart_refctd_ptr<IGPUBuffer> virtualAttribTable ;
385
386
386
- packMeshBuffers (driver, meshBuffers, mdiCallParams, offsetTable );
387
+ packMeshBuffers (driver, meshBuffers, mdiCallParams, virtualAttribTable );
387
388
388
- setPipeline (driver, vs.get (), fs, mdiCallParams.vtxBuffer .buffer , ubo, offsetTable , ds0, ds1, gpuPipeline);
389
+ setPipeline (driver, vs.get (), fs, mdiCallParams.vtxBuffer .buffer , ubo, virtualAttribTable , ds0, ds1, gpuPipeline);
389
390
}
390
391
391
392
// ! we want to move around the scene and view it from different angles
0 commit comments