Skip to content

Commit aaac792

Browse files
author
devsh
committed
I was being super dumb with not advancing lastBatchEnd (huge overwrites upon breaks), also spotted my issues with beginDrawBatch
P.S. also unsure if aggregate initialization has no well defined order
1 parent 3ae9e9e commit aaac792

File tree

1 file changed

+38
-27
lines changed

1 file changed

+38
-27
lines changed

src/nbl/ext/ImGui/ImGui.cpp

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,10 +1095,9 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
10951095
PushResult push(const ImDrawList* list)
10961096
{
10971097
drawIndirectCount += list->CmdBuffer.Size;
1098-
const PushResult retval = {
1099-
.indexByteOffset = geoAllocator.alloc_addr(sizeof(ImDrawIdx)*list->IdxBuffer.size(),sizeof(ImDrawIdx)),
1100-
.vertexByteOffset = geoAllocator.alloc_addr(sizeof(ImDrawVert)*list->VtxBuffer.size(),sizeof(ImDrawVert))
1101-
};
1098+
PushResult retval = {};
1099+
retval.indexByteOffset = geoAllocator.alloc_addr(list->IdxBuffer.size_in_bytes(),sizeof(ImDrawIdx));
1100+
retval.vertexByteOffset = geoAllocator.alloc_addr(list->VtxBuffer.size_in_bytes(),sizeof(ImDrawVert));
11021101
// should never happen, the linear address allocator space is enormous
11031102
const auto InvalidAddress = suballocator_t::invalid_address;
11041103
assert(retval.indexByteOffset!=InvalidAddress && retval.vertexByteOffset!=InvalidAddress);
@@ -1117,12 +1116,11 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
11171116
FinalizeResult finalize() const
11181117
{
11191118
suballocator_t imaginaryChunk(nullptr,memBlockOffset,0,roundUpToPoT(MaxAlignment),ImaginarySizeUpperBound);
1120-
FinalizeResult retval = {
1121-
.drawIndirectByteOffset = imaginaryChunk.alloc_addr(sizeof(VkDrawIndexedIndirectCommand)*drawIndirectCount,sizeof(VkDrawIndexedIndirectCommand)),
1122-
.perDrawByteOffset = imaginaryChunk.alloc_addr(sizeof(PerObjectData)*drawIndirectCount,sizeof(PerObjectData)),
1123-
.geometryByteOffset = imaginaryChunk.alloc_addr(geoAllocator.get_allocated_size(),GeoAlignment),
1124-
.totalSize = imaginaryChunk.get_allocated_size()
1125-
};
1119+
FinalizeResult retval = {};
1120+
retval.drawIndirectByteOffset = imaginaryChunk.alloc_addr(sizeof(VkDrawIndexedIndirectCommand)*drawIndirectCount,sizeof(VkDrawIndexedIndirectCommand));
1121+
retval.perDrawByteOffset = imaginaryChunk.alloc_addr(sizeof(PerObjectData)*drawIndirectCount,sizeof(PerObjectData));
1122+
retval.geometryByteOffset = imaginaryChunk.alloc_addr(geoAllocator.get_allocated_size(),GeoAlignment);
1123+
retval.totalSize = imaginaryChunk.get_allocated_size();
11261124
// should never happen, the linear address allocator space is enormous
11271125
const auto InvalidAddress = suballocator_t::invalid_address;
11281126
assert(retval.drawIndirectByteOffset!=InvalidAddress && retval.perDrawByteOffset!=InvalidAddress && retval.geometryByteOffset!=InvalidAddress);
@@ -1150,7 +1148,7 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
11501148
}
11511149

11521150
// allocate max sized chunk and set up our linear allocators
1153-
offset_t memBlockOffset = streaming_buffer_t::invalid_value;
1151+
offset_t memBlockOffset;
11541152
offset_t memBlockSize = 0;
11551153

11561154
private:
@@ -1207,14 +1205,18 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
12071205
// actual lambda
12081206
auto beginDrawBatch = [&]()->bool
12091207
{
1210-
// just a conservative lower bound, we will check again if allocation is hopeless to record a draw later
1211-
constexpr uint32_t SmallestAlloc = 3*sizeof(ImDrawIdx)+3*sizeof(ImDrawVert)+sizeof(VkDrawIndexedIndirectCommand)+sizeof(PerObjectData);
1208+
// push first item, because we need to fit at least one draw
1209+
metaAlloc.push(*(listIt++));
1210+
metaAlloc.memBlockOffset = 0;
1211+
const uint32_t SmallestAlloc = metaAlloc.finalize().totalSize;
1212+
metaAlloc.memBlockOffset = streaming_buffer_t::invalid_value;
12121213
// 2 tries
12131214
for (auto t=0; t<2; t++)
12141215
{
12151216
// Allocate a chunk as large as possible, a bit of trivia, `max_size` pessimizes the size assuming you're going to ask for allocation with Allocator's Max Alignment
12161217
// There's a bit of a delay/inaccuracy with `max_size` so try many sizes before giving up
1217-
metaAlloc.memBlockSize = streaming->max_size();
1218+
// Also `max_size` doesn't defragment, meaning it cannot be trusted to be accurate, so force at least one try with the size we need
1219+
metaAlloc.memBlockSize = core::max(streaming->max_size(),SmallestAlloc);
12181220
while (metaAlloc.memBlockSize>=SmallestAlloc)
12191221
{
12201222
// first time don't wait and require block ASAP, second time wait till timeout point
@@ -1240,20 +1242,23 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
12401242
auto endDrawBatch = [&]()->void
12411243
{
12421244
const auto offsets = metaAlloc.finalize();
1245+
const auto endOffset = offsets.drawIndirectByteOffset+offsets.totalSize;
1246+
uint32_t drawID = 0;
12431247
auto* drawIndirectIt = reinterpret_cast<VkDrawIndexedIndirectCommand*>(streamingPtr+offsets.drawIndirectByteOffset);
12441248
auto* elementIt = reinterpret_cast<PerObjectData*>(streamingPtr+offsets.perDrawByteOffset);
12451249
// replay allocations and this time actually memcpy
12461250
{
1251+
const auto endByte = streamingPtr+endOffset;
12471252
metaAlloc.reset();
1248-
// we use base instance as `gl_DrawID` in case GPU is missing it
1249-
uint32_t drawID = 0;
1250-
for (auto localListIt=lastBatchEnd; localListIt!=listIt; localListIt++)
1253+
for (; lastBatchEnd!=listIt; lastBatchEnd++)
12511254
{
1252-
const auto* list = *localListIt;
1255+
const auto* list = *lastBatchEnd;
12531256
auto geo = metaAlloc.push(list);
12541257
// now add the global offsets
12551258
geo.indexByteOffset += offsets.geometryByteOffset;
12561259
geo.vertexByteOffset += offsets.geometryByteOffset;
1260+
assert(geo.indexByteOffset<endOffset);
1261+
assert(geo.vertexByteOffset<endOffset);
12571262
// alignments should match
12581263
assert((geo.indexByteOffset%sizeof(ImDrawIdx))==0);
12591264
assert((geo.vertexByteOffset%sizeof(ImDrawVert))==0);
@@ -1265,6 +1270,7 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
12651270
for (auto j=0; j!=imCmdBuf.size(); j++)
12661271
{
12671272
const auto& cmd = imCmdBuf[j];
1273+
// we use base instance as `gl_DrawID` in case GPU is missing it
12681274
drawIndirectIt->firstInstance = drawID++;
12691275
drawIndirectIt->indexCount = cmd.ElemCount;
12701276
drawIndirectIt->instanceCount = 1u;
@@ -1294,8 +1300,17 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
12941300
}
12951301
memcpy(streamingPtr+geo.indexByteOffset,list->IdxBuffer.Data,list->IdxBuffer.size_in_bytes());
12961302
memcpy(streamingPtr+geo.vertexByteOffset,list->VtxBuffer.Data,list->VtxBuffer.size_in_bytes());
1303+
// not writing past the end
1304+
assert(streamingPtr+geo.indexByteOffset<endByte);
1305+
assert(streamingPtr+geo.vertexByteOffset<endByte);
12971306
}
12981307
}
1308+
// the offets were enough and allocation should not overlap
1309+
assert(reinterpret_cast<VkDrawIndexedIndirectCommand*>(streamingPtr+offsets.perDrawByteOffset)>=drawIndirectIt);
1310+
assert(reinterpret_cast<PerObjectData*>(streamingPtr+offsets.geometryByteOffset)>=elementIt);
1311+
1312+
// flush the used range
1313+
assert(!streaming->needsManualFlushOrInvalidate());
12991314

13001315
// record draw call
13011316
{
@@ -1307,7 +1322,7 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
13071322
PushConstants constants
13081323
{
13091324
.elementBDA = streamingBaseAddress+offsets.perDrawByteOffset,
1310-
.elementCount = metaAlloc.getElementCount(),
1325+
.elementCount = drawID,
13111326
.scale = { trs.scale[0u], trs.scale[1u] },
13121327
.translate = { trs.translate[0u], trs.translate[1u] },
13131328
.viewport = { viewport.x, viewport.y, viewport.width, viewport.height }
@@ -1319,7 +1334,7 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
13191334
{
13201335
auto mdiBinding = binding;
13211336
mdiBinding.offset = offsets.drawIndirectByteOffset;
1322-
commandBuffer->drawIndexedIndirect(binding,metaAlloc.getElementCount(),sizeof(VkDrawIndexedIndirectCommand));
1337+
commandBuffer->drawIndexedIndirect(binding,drawID,sizeof(VkDrawIndexedIndirectCommand));
13231338
}
13241339
}
13251340

@@ -1328,14 +1343,15 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
13281343
if (offsets.totalSize>=minBlockSize)
13291344
if (const offset_t unusedStart=metaAlloc.memBlockOffset+offsets.totalSize, unusedSize=metaAlloc.memBlockSize-offsets.totalSize; unusedSize>=minBlockSize)
13301345
{
1346+
assert(unusedStart==endOffset);
1347+
assert(unusedStart+unusedSize==metaAlloc.memBlockOffset+metaAlloc.memBlockSize);
13311348
streaming->multi_deallocate(1,&unusedStart,&unusedSize);
1332-
// trime the leftover actually used block
1349+
// trim the leftover actually used block
13331350
metaAlloc.memBlockSize = offsets.totalSize;
13341351
}
13351352
// latch our used chunk free
13361353
streaming->multi_deallocate(1,&metaAlloc.memBlockOffset,&metaAlloc.memBlockSize,waitInfo);
13371354
// reset to initial state
1338-
metaAlloc.memBlockOffset = streaming_buffer_t::invalid_value;
13391355
metaAlloc.reset();
13401356
};
13411357

@@ -1346,11 +1362,6 @@ bool UI::render(IGPUCommandBuffer* const commandBuffer, ISemaphore::SWaitInfo wa
13461362
{
13471363
if (!metaAlloc.tryPush(*listIt))
13481364
{
1349-
if (listIt==lastBatchEnd)
1350-
{
1351-
logger.log("Obtained maximum allocation from streaming buffer isn't even enough to fit a single IMGUI commandlist",system::ILogger::ELL_ERROR);
1352-
return false;
1353-
}
13541365
endDrawBatch();
13551366
if (!beginDrawBatch())
13561367
return false;

0 commit comments

Comments
 (0)