@@ -1095,10 +1095,9 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
1095
1095
PushResult push (const ImDrawList* list)
1096
1096
{
1097
1097
drawIndirectCount += list->CmdBuffer .Size ;
1098
- const PushResult retval = {
1099
- .indexByteOffset = geoAllocator.alloc_addr (sizeof (ImDrawIdx)*list->IdxBuffer .size (),sizeof (ImDrawIdx)),
1100
- .vertexByteOffset = geoAllocator.alloc_addr (sizeof (ImDrawVert)*list->VtxBuffer .size (),sizeof (ImDrawVert))
1101
- };
1098
+ PushResult retval = {};
1099
+ retval.indexByteOffset = geoAllocator.alloc_addr (list->IdxBuffer .size_in_bytes (),sizeof (ImDrawIdx));
1100
+ retval.vertexByteOffset = geoAllocator.alloc_addr (list->VtxBuffer .size_in_bytes (),sizeof (ImDrawVert));
1102
1101
// should never happen, the linear address allocator space is enormous
1103
1102
const auto InvalidAddress = suballocator_t ::invalid_address;
1104
1103
assert (retval.indexByteOffset !=InvalidAddress && retval.vertexByteOffset !=InvalidAddress);
@@ -1117,12 +1116,11 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
1117
1116
FinalizeResult finalize () const
1118
1117
{
1119
1118
suballocator_t imaginaryChunk (nullptr ,memBlockOffset,0 ,roundUpToPoT (MaxAlignment),ImaginarySizeUpperBound);
1120
- FinalizeResult retval = {
1121
- .drawIndirectByteOffset = imaginaryChunk.alloc_addr (sizeof (VkDrawIndexedIndirectCommand)*drawIndirectCount,sizeof (VkDrawIndexedIndirectCommand)),
1122
- .perDrawByteOffset = imaginaryChunk.alloc_addr (sizeof (PerObjectData)*drawIndirectCount,sizeof (PerObjectData)),
1123
- .geometryByteOffset = imaginaryChunk.alloc_addr (geoAllocator.get_allocated_size (),GeoAlignment),
1124
- .totalSize = imaginaryChunk.get_allocated_size ()
1125
- };
1119
+ FinalizeResult retval = {};
1120
+ retval.drawIndirectByteOffset = imaginaryChunk.alloc_addr (sizeof (VkDrawIndexedIndirectCommand)*drawIndirectCount,sizeof (VkDrawIndexedIndirectCommand));
1121
+ retval.perDrawByteOffset = imaginaryChunk.alloc_addr (sizeof (PerObjectData)*drawIndirectCount,sizeof (PerObjectData));
1122
+ retval.geometryByteOffset = imaginaryChunk.alloc_addr (geoAllocator.get_allocated_size (),GeoAlignment);
1123
+ retval.totalSize = imaginaryChunk.get_allocated_size ();
1126
1124
// should never happen, the linear address allocator space is enormous
1127
1125
const auto InvalidAddress = suballocator_t ::invalid_address;
1128
1126
assert (retval.drawIndirectByteOffset !=InvalidAddress && retval.perDrawByteOffset !=InvalidAddress && retval.geometryByteOffset !=InvalidAddress);
@@ -1150,7 +1148,7 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
1150
1148
}
1151
1149
1152
1150
// allocate max sized chunk and set up our linear allocators
1153
- offset_t memBlockOffset = streaming_buffer_t ::invalid_value ;
1151
+ offset_t memBlockOffset;
1154
1152
offset_t memBlockSize = 0 ;
1155
1153
1156
1154
private:
@@ -1207,14 +1205,18 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
1207
1205
// actual lambda
1208
1206
auto beginDrawBatch = [&]()->bool
1209
1207
{
1210
- // just a conservative lower bound, we will check again if allocation is hopeless to record a draw later
1211
- constexpr uint32_t SmallestAlloc = 3 *sizeof (ImDrawIdx)+3 *sizeof (ImDrawVert)+sizeof (VkDrawIndexedIndirectCommand)+sizeof (PerObjectData);
1208
+ // push first item, because we need to fit at least one draw
1209
+ metaAlloc.push (*(listIt++));
1210
+ metaAlloc.memBlockOffset = 0 ;
1211
+ const uint32_t SmallestAlloc = metaAlloc.finalize ().totalSize ;
1212
+ metaAlloc.memBlockOffset = streaming_buffer_t ::invalid_value;
1212
1213
// 2 tries
1213
1214
for (auto t=0 ; t<2 ; t++)
1214
1215
{
1215
1216
// Allocate a chunk as large as possible, a bit of trivia, `max_size` pessimizes the size assuming you're going to ask for allocation with Allocator's Max Alignment
1216
1217
// There's a bit of a delay/inaccuracy with `max_size` so try many sizes before giving up
1217
- metaAlloc.memBlockSize = streaming->max_size ();
1218
+ // Also `max_size` doesn't defragment, meaning it cannot be trusted to be accurate, so force at least one try with the size we need
1219
+ metaAlloc.memBlockSize = core::max (streaming->max_size (),SmallestAlloc);
1218
1220
while (metaAlloc.memBlockSize >=SmallestAlloc)
1219
1221
{
1220
1222
// first time don't wait and require block ASAP, second time wait till timeout point
@@ -1240,20 +1242,23 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
1240
1242
auto endDrawBatch = [&]()->void
1241
1243
{
1242
1244
const auto offsets = metaAlloc.finalize ();
1245
+ const auto endOffset = offsets.drawIndirectByteOffset +offsets.totalSize ;
1246
+ uint32_t drawID = 0 ;
1243
1247
auto * drawIndirectIt = reinterpret_cast <VkDrawIndexedIndirectCommand*>(streamingPtr+offsets.drawIndirectByteOffset );
1244
1248
auto * elementIt = reinterpret_cast <PerObjectData*>(streamingPtr+offsets.perDrawByteOffset );
1245
1249
// replay allocations and this time actually memcpy
1246
1250
{
1251
+ const auto endByte = streamingPtr+endOffset;
1247
1252
metaAlloc.reset ();
1248
- // we use base instance as `gl_DrawID` in case GPU is missing it
1249
- uint32_t drawID = 0 ;
1250
- for (auto localListIt=lastBatchEnd; localListIt!=listIt; localListIt++)
1253
+ for (; lastBatchEnd!=listIt; lastBatchEnd++)
1251
1254
{
1252
- const auto * list = *localListIt ;
1255
+ const auto * list = *lastBatchEnd ;
1253
1256
auto geo = metaAlloc.push (list);
1254
1257
// now add the global offsets
1255
1258
geo.indexByteOffset += offsets.geometryByteOffset ;
1256
1259
geo.vertexByteOffset += offsets.geometryByteOffset ;
1260
+ assert (geo.indexByteOffset <endOffset);
1261
+ assert (geo.vertexByteOffset <endOffset);
1257
1262
// alignments should match
1258
1263
assert ((geo.indexByteOffset %sizeof (ImDrawIdx))==0 );
1259
1264
assert ((geo.vertexByteOffset %sizeof (ImDrawVert))==0 );
@@ -1265,6 +1270,7 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
1265
1270
for (auto j=0 ; j!=imCmdBuf.size (); j++)
1266
1271
{
1267
1272
const auto & cmd = imCmdBuf[j];
1273
+ // we use base instance as `gl_DrawID` in case GPU is missing it
1268
1274
drawIndirectIt->firstInstance = drawID++;
1269
1275
drawIndirectIt->indexCount = cmd.ElemCount ;
1270
1276
drawIndirectIt->instanceCount = 1u ;
@@ -1294,8 +1300,17 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
1294
1300
}
1295
1301
memcpy (streamingPtr+geo.indexByteOffset ,list->IdxBuffer .Data ,list->IdxBuffer .size_in_bytes ());
1296
1302
memcpy (streamingPtr+geo.vertexByteOffset ,list->VtxBuffer .Data ,list->VtxBuffer .size_in_bytes ());
1303
+ // not writing past the end
1304
+ assert (streamingPtr+geo.indexByteOffset <endByte);
1305
+ assert (streamingPtr+geo.vertexByteOffset <endByte);
1297
1306
}
1298
1307
}
1308
+ // the offets were enough and allocation should not overlap
1309
+ assert (reinterpret_cast <VkDrawIndexedIndirectCommand*>(streamingPtr+offsets.perDrawByteOffset )>=drawIndirectIt);
1310
+ assert (reinterpret_cast <PerObjectData*>(streamingPtr+offsets.geometryByteOffset )>=elementIt);
1311
+
1312
+ // flush the used range
1313
+ assert (!streaming->needsManualFlushOrInvalidate ());
1299
1314
1300
1315
// record draw call
1301
1316
{
@@ -1307,7 +1322,7 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
1307
1322
PushConstants constants
1308
1323
{
1309
1324
.elementBDA = streamingBaseAddress+offsets.perDrawByteOffset ,
1310
- .elementCount = metaAlloc. getElementCount () ,
1325
+ .elementCount = drawID ,
1311
1326
.scale = { trs.scale [0u ], trs.scale [1u ] },
1312
1327
.translate = { trs.translate [0u ], trs.translate [1u ] },
1313
1328
.viewport = { viewport.x , viewport.y , viewport.width , viewport.height }
@@ -1319,7 +1334,7 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
1319
1334
{
1320
1335
auto mdiBinding = binding;
1321
1336
mdiBinding.offset = offsets.drawIndirectByteOffset ;
1322
- commandBuffer->drawIndexedIndirect (binding,metaAlloc. getElementCount () ,sizeof (VkDrawIndexedIndirectCommand));
1337
+ commandBuffer->drawIndexedIndirect (binding,drawID ,sizeof (VkDrawIndexedIndirectCommand));
1323
1338
}
1324
1339
}
1325
1340
@@ -1328,14 +1343,15 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
1328
1343
if (offsets.totalSize >=minBlockSize)
1329
1344
if (const offset_t unusedStart=metaAlloc.memBlockOffset +offsets.totalSize , unusedSize=metaAlloc.memBlockSize -offsets.totalSize ; unusedSize>=minBlockSize)
1330
1345
{
1346
+ assert (unusedStart==endOffset);
1347
+ assert (unusedStart+unusedSize==metaAlloc.memBlockOffset +metaAlloc.memBlockSize );
1331
1348
streaming->multi_deallocate (1 ,&unusedStart,&unusedSize);
1332
- // trime the leftover actually used block
1349
+ // trim the leftover actually used block
1333
1350
metaAlloc.memBlockSize = offsets.totalSize ;
1334
1351
}
1335
1352
// latch our used chunk free
1336
1353
streaming->multi_deallocate (1 ,&metaAlloc.memBlockOffset ,&metaAlloc.memBlockSize ,waitInfo);
1337
1354
// reset to initial state
1338
- metaAlloc.memBlockOffset = streaming_buffer_t ::invalid_value;
1339
1355
metaAlloc.reset ();
1340
1356
};
1341
1357
@@ -1346,11 +1362,6 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
1346
1362
{
1347
1363
if (!metaAlloc.tryPush (*listIt))
1348
1364
{
1349
- if (listIt==lastBatchEnd)
1350
- {
1351
- logger.log (" Obtained maximum allocation from streaming buffer isn't even enough to fit a single IMGUI commandlist" ,system::ILogger::ELL_ERROR);
1352
- return false ;
1353
- }
1354
1365
endDrawBatch ();
1355
1366
if (!beginDrawBatch ())
1356
1367
return false ;
0 commit comments