Skip to content

Commit 83991ac

Browse files
committed
clean code, add more debug validation, update align offset required for suballocator
1 parent 7eb21ac commit 83991ac

File tree

2 files changed

+55
-41
lines changed

2 files changed

+55
-41
lines changed

include/nbl/core/math/intutil.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ NBL_FORCE_INLINE constexpr bool isPoT(INT_TYPE value)
4242
template<typename INT_TYPE>
4343
NBL_FORCE_INLINE constexpr INT_TYPE roundUpToPoT(INT_TYPE value)
4444
{
45-
return INT_TYPE(0x1u)<<INT_TYPE(1+hlsl::findMSB<INT_TYPE>(value-INT_TYPE(1)));
45+
return INT_TYPE(0x1u)<<INT_TYPE(1+hlsl::findMSB<INT_TYPE>(value-INT_TYPE(1))); // this wont result in constexpr because findMSB is not one
4646
}
4747

4848
template<typename INT_TYPE>

src/nbl/ext/ImGui/ImGui.cpp

Lines changed: 54 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,9 @@ namespace nbl::ext::imgui
2828
using mdi_size_t = compose_t::size_type;
2929

3030
static constexpr auto InvalidAddress = compose_t::invalid_value;
31-
static constexpr auto MdiAlignments = std::to_array<mdi_size_t>({ alignof(VkDrawIndexedIndirectCommand), alignof(PerObjectData), alignof(ImDrawIdx), alignof(ImDrawVert) });
3231
static constexpr auto MdiSizes = std::to_array<mdi_size_t>({ sizeof(VkDrawIndexedIndirectCommand), sizeof(PerObjectData), sizeof(ImDrawIdx), sizeof(ImDrawVert) });
33-
static constexpr auto MdiMaxAlignment = *std::max_element(MdiAlignments.begin(), MdiAlignments.end());
3432
static constexpr auto MdiMaxSize = *std::max_element(MdiSizes.begin(), MdiSizes.end());
33+
static const auto MdiMaxAlignment = roundUpToPoT(MdiMaxSize);
3534

3635
// those must be allocated within single block for content - even though its possible to tell vkCmdDrawIndexedIndirect about stride we cannot guarantee each one will be the same size with our allocation strategy
3736
enum class TightContent : uint16_t
@@ -1151,46 +1150,53 @@ namespace nbl::ext::imgui
11511150
for (mdi_size_t uploadedSize = 0ull; uploadedSize < mdiLimits.totalByteSizeRequest;)
11521151
{
11531152
bigChunkRequestState.offset = InvalidAddress;
1154-
bigChunkRequestState.size = streamingBuffer->max_size(); // request available block memory size which we can try to allocate
1153+
bigChunkRequestState.size = streamingBuffer->max_size(); // TODO: divide by 2 request strategy on fail
11551154

11561155
constexpr auto StreamingAllocationCount = 1u;
11571156
const size_t unallocatedSize = m_mdi.compose->multi_allocate(std::chrono::steady_clock::now() + std::chrono::microseconds(100u), StreamingAllocationCount, &bigChunkRequestState.offset, &bigChunkRequestState.size, &MdiMaxAlignment); //! (*) note we request single tight chunk of memory with fixed max alignment - big address space from which we fill try to suballocate to fill data
11581157

11591158
if (bigChunkRequestState.offset == InvalidAddress)
1160-
continue; // failed? lets try again, TODO: should I here have my "blockMemoryFactor =* 0.5" and apply to bigChunkRequestState.size?
1159+
continue;
11611160
else
11621161
{
1163-
constexpr auto AlignOffsetNeeded = 0u;
1164-
SMdiBuffer::suballocator_traits_t::allocator_type fillSubAllocator(mdiData, bigChunkRequestState.offset, AlignOffsetNeeded, 32u /* we care about vertex struct which is MdiMaxSize (20) bytes */, bigChunkRequestState.size); //! (*) we create linear suballocator to fill the allocated chunk of memory (some of at least)
1162+
const auto alignOffsetNeeded = MdiMaxSize - (bigChunkRequestState.offset % MdiMaxSize);
1163+
SMdiBuffer::suballocator_traits_t::allocator_type fillSubAllocator(mdiData, bigChunkRequestState.offset, alignOffsetNeeded, MdiMaxAlignment, bigChunkRequestState.size); //! (*) we create linear suballocator to fill the allocated chunk of memory (some of at least)
11651164
SMdiBuffer::suballocator_traits_t::multi_alloc_addr(fillSubAllocator, allocation.offsets.size(), allocation.offsets.data(), mdiLimits.sizes.data(), mdiLimits.alignments.data()); //! (*) we suballocate memory regions from the allocated chunk with required alignments - multi request all with single traits call
11661165

11671166
auto upload = [&]() -> size_t
11681167
{
11691168
size_t uploaded = {};
11701169

1171-
// they are very small & negligible in size compared to buffers, but this small pool which we will conditionally fill on successfull object buffer suballocations is required to not complicate things (if we cannot allocate all mdiLimits.totalIndirectDrawCount object buffers then simply those coresponding structures wont be filled, we treat both components as arrays)
1170+
auto updateSuballocation = [&](const uint32_t allocationIx) -> size_t
1171+
{
1172+
const bool isFilled = allocation.filled[allocationIx];
1173+
1174+
if (!isFilled)
1175+
{
1176+
const auto bytesToFill = mdiLimits.sizes[allocationIx];
1177+
uploaded += bytesToFill;
1178+
allocation.filled[allocationIx] = true;
1179+
return bytesToFill;
1180+
}
1181+
return 0u;
1182+
};
1183+
1184+
// they are very small & negligible in size compared to buffers, but this small pool which we will conditionally fill on successfull object buffer suballocations is required to not complicate things (if we cannot allocate all mdiLimits.totalIndirectDrawCount object buffers then simply those coresponding structures will be filled with dummy params making it an invocation with 0u indices, we treat both components as arrays)
11721185
const bool structuresSuballocated = allocation.offsets[(uint32_t)TightContent::INDIRECT_STRUCTURES] != InvalidAddress && allocation.offsets[(uint32_t)TightContent::ELEMENT_STRUCTURES] != InvalidAddress;
11731186

11741187
if (structuresSuballocated) // note that suballocated only means we have valid address(es) we can work on, it doesn't mean we filled anything
11751188
{
11761189
auto* const indirectStructures = reinterpret_cast<VkDrawIndexedIndirectCommand*>(mdiData + allocation.offsets[(uint32_t)TightContent::INDIRECT_STRUCTURES]);
11771190
auto* const elementStructures = reinterpret_cast<PerObjectData*>(mdiData + allocation.offsets[(uint32_t)TightContent::ELEMENT_STRUCTURES]);
11781191
{
1179-
if (!allocation.filled[(uint32_t)TightContent::INDIRECT_STRUCTURES])
1180-
{
1181-
uploaded += mdiLimits.sizes[(uint32_t)TightContent::INDIRECT_STRUCTURES];
1182-
allocation.filled[(uint32_t)TightContent::INDIRECT_STRUCTURES] = true; // I make a assumption here since I can access them later - but I don't guarantee all of them will be present since we can fail other suballocations which are required for the struct, note that in reality we fill them below & conditionally
1183-
}
1184-
1185-
if (!allocation.filled[(uint32_t)TightContent::ELEMENT_STRUCTURES])
1186-
{
1187-
uploaded += mdiLimits.sizes[(uint32_t)TightContent::ELEMENT_STRUCTURES];
1188-
allocation.filled[(uint32_t)TightContent::ELEMENT_STRUCTURES] = true; // I make a assumption here since I can access them later - but I don't guarantee all of them will be present since we can fail other suballocations which are required for the struct, note that in reality we fill them below & conditionally
1189-
}
1192+
// I make a assumption here since I can access them later but I don't guarantee all of them will be present,
1193+
// we can fail other suballocations which are required for the struct, note that in reality we fill them below & conditionally
1194+
updateSuballocation((uint32_t)TightContent::INDIRECT_STRUCTURES);
1195+
updateSuballocation((uint32_t)TightContent::ELEMENT_STRUCTURES);
11901196
}
11911197

11921198
uint32_t drawID = {};
1193-
for (uint32_t i = 0; i < drawData->CmdListsCount; i++)
1199+
for (uint32_t i = 0u; i < drawData->CmdListsCount; i++)
11941200
{
11951201
const auto* commandList = drawData->CmdLists[i];
11961202
const auto& [vertexBuffer, indexBuffer] = std::make_tuple(commandList->VtxBuffer, commandList->IdxBuffer);
@@ -1203,37 +1209,45 @@ namespace nbl::ext::imgui
12031209
if (offset == InvalidAddress)
12041210
return false;
12051211
else
1206-
if (!allocation.filled[allocationIx])
1207-
{
1212+
{
1213+
const auto bytesToFill = updateSuballocation(allocationIx);
1214+
1215+
if (bytesToFill != 0u)
12081216
::memcpy(mdiData + offset, in, bytesToFill);
1209-
uploaded += bytesToFill;
1210-
allocation.filled[allocationIx] = true;
1211-
}
1217+
}
12121218

12131219
return true;
12141220
};
12151221

12161222
auto validateObjectOffsets = [&]() -> bool
12171223
{
1218-
const auto vtxModulo = allocation.offsets[vtxAllocationIx] % sizeof(ImDrawVert);
1219-
const auto ixModulo = allocation.offsets[idxAllocationIx] % sizeof(ImDrawIdx);
1224+
const auto [vtxOffset, idxOffset] = std::make_tuple(allocation.offsets[vtxAllocationIx], allocation.offsets[idxAllocationIx]);
1225+
bool ok = true;
1226+
1227+
if (vtxOffset != InvalidAddress)
1228+
ok &= ((vtxOffset % sizeof(ImDrawVert)) == 0u);
1229+
1230+
if (idxOffset != InvalidAddress)
1231+
ok &= ((idxOffset % sizeof(ImDrawIdx)) == 0u);
1232+
1233+
_NBL_BREAK_IF(!ok);
12201234

1221-
return (vtxModulo == 0u && ixModulo == 0u); // we must be aligned!
1235+
return ok; // if offsets are valid then must be aligned properly!
12221236
};
12231237

1224-
assert(validateObjectOffsets()); // debug only
1225-
1226-
// we consider buffers valid if we suballocated them (under the hood filled)
1238+
assert(validateObjectOffsets()); // debug check only
1239+
1240+
// we consider buffers valid if we suballocated them (under the hood filled) - if buffers are valid then subindirect call referencing them is too
12271241
const auto buffersSuballocated = fillBuffer(vertexBuffer.Data, vtxAllocationIx) && fillBuffer(indexBuffer.Data, idxAllocationIx);
12281242
const auto [vtxGlobalObjectOffset, idxGlobalObjectOffset] = buffersSuballocated ? std::make_tuple(allocation.offsets[vtxAllocationIx] / sizeof(ImDrawVert), allocation.offsets[idxAllocationIx] / sizeof(ImDrawIdx)) : std::make_tuple((size_t)0u, (size_t)0u);
1229-
1230-
for (uint32_t j = 0; j < commandList->CmdBuffer.Size; j++)
1243+
1244+
for (uint32_t j = 0u; j < commandList->CmdBuffer.Size; j++)
12311245
{
12321246
const auto* cmd = &commandList->CmdBuffer[j];
12331247
auto* indirect = indirectStructures + drawID;
12341248
auto* element = elementStructures + drawID;
12351249

1236-
// we will make a trick to keep indirect & element structs in the mdi iteration but explicitly execute dummy null invocation if we don't have vertex or index buffer for the struct (suballocation failed for any of those 2 buffers).
1250+
// we make a trick to keep indirect & element structs in the mdi iteration but explicitly execute dummy null invocation if we don't have vertex or index buffer for the struct (suballocation failed for any of those 2 buffers).
12371251
// TODO: we could make the current structs pool "dynamic" in size and treat as simple stack instead (trying it first to make things easier)
12381252
indirect->indexCount = buffersSuballocated ? cmd->ElemCount /* valid invocation */ : 0u /* null invocation */;
12391253

@@ -1242,7 +1256,7 @@ namespace nbl::ext::imgui
12421256

12431257
// starting to wonder, for some reason imgui decided to keep single vertex & index shared between cmds within cmd list
12441258
// but maybe we should cut current [vertexBuffer, indexBuffer] with respect to cmd->IdxOffset & cmd->VtxOffset (therefore we could have even smaller alloc requests, now a few structs can point to the same buffer but with different offsets [indirect])
1245-
// though not sure if I don't double some data then
1259+
// though not sure if I don't double some data then <- EDIT: YES, turns out we may double some data
12461260
indirect->vertexOffset = vtxGlobalObjectOffset + cmd->VtxOffset; // safe to assume due to indirect->indexCount depending on buffersSuballocated
12471261
indirect->firstIndex = idxGlobalObjectOffset + cmd->IdxOffset; // safe to assume due to indirect->indexCount depending on buffersSuballocated
12481262

@@ -1272,15 +1286,14 @@ namespace nbl::ext::imgui
12721286
}
12731287
}
12741288
}
1275-
12761289
return uploaded;
12771290
};
12781291

12791292
uploadedSize += upload();
12801293
}
12811294
streamingBuffer->multi_deallocate(StreamingAllocationCount, &bigChunkRequestState.offset, &bigChunkRequestState.size, waitInfo); //! (*) block allocated, we just latch offsets deallocation to keep it alive as long as required
1282-
1283-
// we let to run it at least once
1295+
1296+
// we let it run at least once
12841297
const bool timeout = std::chrono::steady_clock::now() >= waitPoint;
12851298

12861299
if (timeout)
@@ -1303,7 +1316,8 @@ namespace nbl::ext::imgui
13031316
.buffer = smart_refctd_ptr(mdiBuffer)
13041317
};
13051318

1306-
if (!commandBuffer->bindIndexBuffer(binding, sizeof(ImDrawIdx) == 2 ? EIT_16BIT : EIT_32BIT))
1319+
constexpr auto IndexType = sizeof(ImDrawIdx) == 2u ? EIT_16BIT : EIT_32BIT;
1320+
if (!commandBuffer->bindIndexBuffer(binding, IndexType))
13071321
{
13081322
m_cachedCreationParams.utilities->getLogger()->log("Could not bind index buffer!", ILogger::ELL_ERROR);
13091323
assert(false);
@@ -1317,7 +1331,7 @@ namespace nbl::ext::imgui
13171331
.buffer = smart_refctd_ptr(mdiBuffer)
13181332
}};
13191333

1320-
if(!commandBuffer->bindVertexBuffers(0, 1, bindings))
1334+
if(!commandBuffer->bindVertexBuffers(0u, 1u, bindings))
13211335
{
13221336
m_cachedCreationParams.utilities->getLogger()->log("Could not bind vertex buffer!", ILogger::ELL_ERROR);
13231337
assert(false);
@@ -1334,7 +1348,7 @@ namespace nbl::ext::imgui
13341348
.maxDepth = 1.0f,
13351349
};
13361350

1337-
commandBuffer->setViewport(0, 1, &viewport);
1351+
commandBuffer->setViewport(0u, 1u, &viewport);
13381352
{
13391353
if (scissors.empty())
13401354
{

0 commit comments

Comments
 (0)