You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: src/nbl/ext/ImGui/ImGui.cpp
+82-46Lines changed: 82 additions & 46 deletions
Original file line number
Diff line number
Diff line change
@@ -1143,24 +1143,60 @@ namespace nbl::ext::imgui
1143
1143
1144
1144
struct
1145
1145
{
1146
-
mdi_size_t offset, size;
1146
+
std::vector<mdi_size_t> offsets, sizes;
1147
+
float memoryBlockFactor = 1.f;
1147
1148
} bigChunkRequestState;
1148
1149
1149
1150
//! we will try to upload entrie MDI buffer with all available indirect data to our streaming buffer, but we cannot guarantee the allocation can be done in single request nor we allocate all totalIndirectDrawCount at all - we can hit timeout and it may appear not all of totalIndirectDrawCount will be uploaded then
1150
1151
for (mdi_size_t uploadedSize = 0ull; uploadedSize < mdiLimits.totalByteSizeRequest;)
1151
1152
{
1152
-
bigChunkRequestState.offset = InvalidAddress;
1153
-
bigChunkRequestState.size = streamingBuffer->max_size(); // TODO: divide by 2 request strategy on fail
// with bigChunkRequestState.memoryBlockFactor being divided by 2 because we will always have at least one offset which cannot be suballocated by linear allocator, this will be too tight for the suballocator to respect alignments - to make it work this delta would need a little factor which would add something to this difference I guess
1156
+
// tests:
1157
+
1158
+
#defineALLOC_STRATEGY_1
1159
+
//#define ALLOC_STRATEGY_2
1160
+
//#define ALLOC_STRATEGY_3
1161
+
1162
+
#ifdef ALLOC_STRATEGY_1
1163
+
mdi_size_t chunkOffset = InvalidAddress, chunkSize = min(streamingBuffer->max_size(), (mdiLimits.totalByteSizeRequest * bigChunkRequestState.memoryBlockFactor)); // we divide requests, delta has space for suballocator's padding - we trying to add another block with the fixed size, but if not posible we divide the block by 2
1164
+
1165
+
constexprauto StreamingAllocationCount = 1u;
1166
+
constsize_t unallocatedSize = m_mdi.compose->multi_allocate(std::chrono::steady_clock::now() + std::chrono::microseconds(100u), StreamingAllocationCount, &chunkOffset, &chunkSize, &MdiMaxAlignment); //! (*) note we request single tight chunk of memory with fixed max alignment - big address space from which we fill try to suballocate to fill data
1167
+
1168
+
if (chunkOffset == InvalidAddress)
1169
+
{
1170
+
bigChunkRequestState.memoryBlockFactor *= 0.5f;
1171
+
continue;
1172
+
}
1173
+
#endif
1174
+
#ifdef ALLOC_STRATEGY_2
1175
+
mdi_size_t chunkOffset = InvalidAddress, chunkSize = min(streamingBuffer->max_size(), (mdiLimits.totalByteSizeRequest - uploadedSize) * 2u/* we request twice the delta with respect to max_size UB */);
1154
1176
1155
1177
constexprauto StreamingAllocationCount = 1u;
1156
-
constsize_t unallocatedSize = m_mdi.compose->multi_allocate(std::chrono::steady_clock::now() + std::chrono::microseconds(100u), StreamingAllocationCount, &bigChunkRequestState.offset, &bigChunkRequestState.size, &MdiMaxAlignment); //! (*) note we request single tight chunk of memory with fixed max alignment - big address space from which we fill try to suballocate to fill data
1178
+
constsize_t unallocatedSize = m_mdi.compose->multi_allocate(std::chrono::steady_clock::now() + std::chrono::microseconds(100u), StreamingAllocationCount, &chunkOffset, &chunkSize, &MdiMaxAlignment); //! (*) note we request single tight chunk of memory with fixed max alignment - big address space from which we fill try to suballocate to fill data
1157
1179
1158
-
if (bigChunkRequestState.offset == InvalidAddress)
1180
+
if (chunkOffset == InvalidAddress)
1159
1181
continue;
1182
+
#endif
1183
+
#ifdef ALLOC_STRATEGY_3
1184
+
mdi_size_t chunkOffset = InvalidAddress, chunkSize = streamingBuffer->max_size(); // take all whats available <- dumbie I guess
1185
+
1186
+
constexprauto StreamingAllocationCount = 1u;
1187
+
constsize_t unallocatedSize = m_mdi.compose->multi_allocate(std::chrono::steady_clock::now() + std::chrono::microseconds(100u), StreamingAllocationCount, &chunkOffset, &chunkSize, &MdiMaxAlignment); //! (*) note we request single tight chunk of memory with fixed max alignment - big address space from which we fill try to suballocate to fill data
SMdiBuffer::suballocator_traits_t::allocator_type fillSubAllocator(mdiData, bigChunkRequestState.offset, alignOffsetNeeded, MdiMaxAlignment, bigChunkRequestState.size); //! (*) we create linear suballocator to fill the allocated chunk of memory (some of at least)
1194
+
// chunk allocated? put state onto stack & keep alive for suballocator to fill it as required
SMdiBuffer::suballocator_traits_t::allocator_type fillSubAllocator(mdiData, chunkOffset, alignOffsetNeeded, MdiMaxAlignment, chunkSize); //! (*) we create linear suballocator to fill the allocated chunk of memory (some of at least)
1164
1200
SMdiBuffer::suballocator_traits_t::multi_alloc_addr(fillSubAllocator, allocation.offsets.size(), allocation.offsets.data(), mdiLimits.sizes.data(), mdiLimits.alignments.data()); //! (*) we suballocate memory regions from the allocated chunk with required alignments - multi request all with single traits call
1165
1201
1166
1202
auto upload = [&]() -> size_t
@@ -1181,16 +1217,18 @@ namespace nbl::ext::imgui
1181
1217
return0u;
1182
1218
};
1183
1219
1184
-
// they are very small & negligible in size compared to buffers, but this small pool which we will conditionally fill on successfull object buffer suballocations is required to not complicate things (if we cannot allocate all mdiLimits.totalIndirectDrawCount object buffers then simply those coresponding structures will be filled with dummy params making it an invocation with 0u indices, we treat both components as arrays)
1220
+
// they are *very* small (<1% of the total request size) & negligible in size compared to buffers - at the end we must have them all anyway (explained in following comment)
if (structuresSuballocated) // note that suballocated only means we have valid address(es) we can work on, it doesn't mean we filled anything
1223
+
if (structuresSuballocated) // note that suballocated only means we have valid address(es) we can work on, it doesn't mean we filled anything (suballocated -> *can* fill)
// I make a assumption here since I can access them later but I don't guarantee all of them will be present,
1193
-
// we can fail other suballocations which are required for the struct, note that in reality we fill them below & conditionally
1228
+
// I make a assumption here since I can access them later but I don't guarantee all of them will be present at the first run, we can fail buffer
1229
+
// subalocations from the current memory block chunk which makes a command list invalid for the iteration! Because of that we fill them conditionally
1230
+
// once buffers are correctly suballocated for handled command list - at the end we must have them all filled regardless what chunk their data come from due
1231
+
// to the fact we cannot submit an overflow, we don't have dynamic rendering allowing us to stop recording the subpass, submit work to queue & start recording again
assert(validateObjectOffsets()); // debug check only
1239
1277
1240
-
// we consider buffers valid if we suballocated them (under the hood filled) - if buffers are valid then subindirect call referencing them is too
1278
+
// we consider buffers valid for command list if we suballocated them (under the hood filled at first time then skipped to not repeat memcpy) - if buffers are valid then command list with indirects is as well
for (uint32_t j = 0u; j < commandList->CmdBuffer.Size; j++)
1282
+
if (buffersSuballocated)
1245
1283
{
1246
-
constauto* cmd = &commandList->CmdBuffer[j];
1247
-
auto* indirect = indirectStructures + drawID;
1248
-
auto* element = elementStructures + drawID;
1284
+
for (uint32_t j = 0u; j < commandList->CmdBuffer.Size; j++)
1285
+
{
1286
+
constauto* cmd = &commandList->CmdBuffer[j];
1287
+
auto* indirect = indirectStructures + drawID;
1288
+
auto* element = elementStructures + drawID;
1249
1289
1250
-
// we make a trick to keep indirect & element structs in the mdi iteration but explicitly execute dummy null invocation if we don't have vertex or index buffer for the struct (suballocation failed for any of those 2 buffers).
1251
-
// TODO: we could make the current structs pool "dynamic" in size and treat as simple stack instead (trying it first to make things easier)
// starting to wonder, for some reason imgui decided to keep single vertex & index shared between cmds within cmd list
1258
-
// but maybe we should cut current [vertexBuffer, indexBuffer] with respect to cmd->IdxOffset & cmd->VtxOffset (therefore we could have even smaller alloc requests, now a few structs can point to the same buffer but with different offsets [indirect])
1259
-
// though not sure if I don't double some data then <- EDIT: YES, turns out we may double some data
1260
-
indirect->vertexOffset = vtxGlobalObjectOffset + cmd->VtxOffset; // safe to assume due to indirect->indexCount depending on buffersSuballocated
1261
-
indirect->firstIndex = idxGlobalObjectOffset + cmd->IdxOffset; // safe to assume due to indirect->indexCount depending on buffersSuballocated
return std::round<int16_t>(std::clamp(ndc, -1.0f, 1.0f) * 32767.0f); // TODO: ok encodePixels<EF_R16_SNORM, double>(void* _pix, const double* _input) but iirc we have issues with our encode/decode utils
1304
+
};
1265
1305
1266
-
auto packSnorm16 = [](float ndc) -> int16_t
1267
-
{
1268
-
return std::round<int16_t>(std::clamp(ndc, -1.0f, 1.0f) * 32767.0f); // TODO: ok encodePixels<EF_R16_SNORM, double>(void* _pix, const double* _input) but iirc we have issues with our encode/decode utils
streamingBuffer->multi_deallocate(StreamingAllocationCount, &bigChunkRequestState.offset, &bigChunkRequestState.size, waitInfo); //! (*) block allocated, we just latch offsets deallocation to keep it alive as long as required
streamingBuffer->multi_deallocate(bigChunkRequestState.offsets.size(), bigChunkRequestState.offsets.data(), bigChunkRequestState.sizes.data(), waitInfo); //! (*) blocks allocated, we just latch offsets deallocation to keep them alive as long as required
1308
1344
}
1309
1345
1310
1346
auto mdiBuffer = smart_refctd_ptr<IGPUBuffer>(m_mdi.compose->getBuffer());
0 commit comments