Skip to content

Commit f085ccf

Browse files
devshkeptsecret
authored andcommitted
change the deferred TLAS write container to a set
1 parent 28d75be commit f085ccf

File tree

2 files changed

+45
-46
lines changed

2 files changed

+45
-46
lines changed

include/nbl/video/utilities/CAssetConverter.h

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,6 +1057,19 @@ class CAssetConverter : public core::IReferenceCounted
10571057
return enqueueSuccess;
10581058
}
10591059

1060+
// public only because `GetDependantVisit<ICPUDescriptorSet>` needs it
1061+
struct SDeferredTLASWrite
1062+
{
1063+
inline bool operator==(const SDeferredTLASWrite& other) const
1064+
{
1065+
return dstSet == other.dstSet && binding == other.binding && arrayElement == other.arrayElement;
1066+
}
1067+
1068+
IGPUDescriptorSet* dstSet;
1069+
uint32_t binding;
1070+
uint32_t arrayElement;
1071+
core::smart_refctd_ptr<IGPUTopLevelAccelerationStructure> tlas;
1072+
};
10601073
private:
10611074
friend class CAssetConverter;
10621075

@@ -1127,19 +1140,6 @@ class CAssetConverter : public core::IReferenceCounted
11271140
};
11281141
using cpu_to_gpu_blas_map_t = core::unordered_map<const asset::ICPUBottomLevelAccelerationStructure*,BLASUsedInTLASBuild>;
11291142
cpu_to_gpu_blas_map_t m_blasBuildMap;
1130-
//
1131-
struct SDeferredTLASWrite
1132-
{
1133-
inline bool operator==(const SDeferredTLASWrite& other) const
1134-
{
1135-
return dstSet==other.dstSet && binding==other.binding && arrayElement==other.arrayElement;
1136-
}
1137-
1138-
IGPUDescriptorSet* dstSet;
1139-
uint32_t binding;
1140-
uint32_t arrayElement;
1141-
core::smart_refctd_ptr<IGPUTopLevelAccelerationStructure> tlas;
1142-
};
11431143
struct SDeferredTLASWriteHasher
11441144
{
11451145
inline size_t operator()(const SDeferredTLASWrite& write) const

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 32 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2528,19 +2528,19 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
25282528

25292529
MetaDeviceMemoryAllocator deferredAllocator(inputs.allocator ? inputs.allocator:device,inputs.logger);
25302530

2531-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
25322531
// BLAS and TLAS creation is somewhat delayed by buffer creation and allocation
25332532
struct DeferredASCreationParams
25342533
{
25352534
asset_cached_t<ICPUBuffer> storage;
25362535
size_t scratchSize : 62 = 0;
25372536
size_t motionBlur : 1 = false;
25382537
size_t compactAfterBuild : 1 = false;
2538+
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
25392539
size_t inputSize = 0;
25402540
uint32_t maxInstanceCount = 0;
2541+
#endif
25412542
};
25422543
core::vector<DeferredASCreationParams> accelerationStructureParams[2];
2543-
#endif
25442544
// Deduplication, Creation and Propagation
25452545
auto dedupCreateProp = [&]<Asset AssetType>()->void
25462546
{
@@ -2709,7 +2709,6 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
27092709
assign(entry.first,entry.second.firstCopyIx,i,device->createBuffer(std::move(params)));
27102710
}
27112711
}
2712-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
27132712
if constexpr (std::is_same_v<AssetType,ICPUBottomLevelAccelerationStructure> || std::is_same_v<AssetType,ICPUTopLevelAccelerationStructure>)
27142713
{
27152714
using mem_prop_f = IDeviceMemoryAllocation::E_MEMORY_PROPERTY_FLAGS;
@@ -2724,9 +2723,10 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
27242723
const auto* as = entry.second.canonicalAsset;
27252724
const auto& patch = dfsCache.nodes[entry.second.patchIndex.value].patch;
27262725
const bool motionBlur = as->usesMotion();
2726+
ILogicalDevice::AccelerationStructureBuildSizes sizes = {};
2727+
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
27272728
// we will need to temporarily store the build input buffers somewhere
27282729
size_t inputSize = 0;
2729-
ILogicalDevice::AccelerationStructureBuildSizes sizes = {};
27302730
{
27312731
const auto buildFlags = patch.getBuildFlags(as);
27322732
if constexpr (IsTLAS)
@@ -2813,31 +2813,30 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
28132813
}
28142814
if (!sizes)
28152815
continue;
2816-
// this is where it gets a bit weird, we need to create a buffer to back the acceleration structure
2817-
IGPUBuffer::SCreationParams params = {};
2818-
constexpr size_t MinASBufferAlignment = 256u;
2819-
params.size = core::roundUp(sizes.accelerationStructureSize,MinASBufferAlignment);
2820-
params.usage = IGPUBuffer::E_USAGE_FLAGS::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT|IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT;
2821-
// concurrent ownership if any
2822-
const auto outIx = i+entry.second.firstCopyIx;
2823-
const auto uniqueCopyGroupID = gpuObjUniqueCopyGroupIDs[outIx];
2824-
const auto queueFamilies = inputs.getSharedOwnershipQueueFamilies(uniqueCopyGroupID,as,patch);
2825-
params.queueFamilyIndexCount = queueFamilies.size();
2826-
params.queueFamilyIndices = queueFamilies.data();
2816+
#endif
28272817
// we need to save the buffer in a side-channel for later
2828-
auto& out = accelerationStructureParams[IsTLAS][baseOffset+entry.second.firstCopyIx+i];
2829-
out = {
2830-
.storage = device->createBuffer(std::move(params)),
2831-
.scratchSize = sizes.buildScratchSize,
2832-
.motionBlur = motionBlur,
2833-
.compactAfterBuild = patch.compactAfterBuild,
2834-
.inputSize = inputSize
2835-
};
2836-
if (out.storage)
2837-
requestAllocation(&out.storage,patch.hostBuild ? hostBuildMemoryTypes:deviceBuildMemoryTypes);
2818+
auto& out = accelerationStructureParams[IsTLAS][entry.second.firstCopyIx+i];
2819+
// this is where it gets a bit weird, we need to create a buffer to back the acceleration structure
2820+
{
2821+
IGPUBuffer::SCreationParams params = {};
2822+
constexpr size_t MinASBufferAlignment = 256u;
2823+
params.size = core::roundUp(sizes.accelerationStructureSize,MinASBufferAlignment);
2824+
params.usage = IGPUBuffer::E_USAGE_FLAGS::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT|IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT;
2825+
// concurrent ownership if any
2826+
const auto outIx = i + entry.second.firstCopyIx;
2827+
const auto uniqueCopyGroupID = gpuObjUniqueCopyGroupIDs[outIx];
2828+
const auto queueFamilies = inputs.getSharedOwnershipQueueFamilies(uniqueCopyGroupID,as,patch);
2829+
params.queueFamilyIndexCount = queueFamilies.size();
2830+
params.queueFamilyIndices = queueFamilies.data();
2831+
out.storage.value = device->createBuffer(std::move(params));
2832+
}
2833+
out.scratchSize = sizes.buildScratchSize;
2834+
out.motionBlur = motionBlur;
2835+
out.compactAfterBuild = patch.compactAfterBuild;
2836+
if (out.storage && !deferredAllocator.request(&out.storage,patch.hostBuild ? hostBuildMemoryTypes:deviceBuildMemoryTypes))
2837+
out.storage.value = nullptr;
28382838
}
28392839
}
2840-
#endif
28412840
if constexpr (std::is_same_v<AssetType,ICPUImage>)
28422841
{
28432842
for (auto& entry : conversionRequests)
@@ -3392,9 +3391,6 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
33923391
if (!retval.m_bufferConversions.empty())
33933392
retval.m_queueFlags |= IQueue::FAMILY_FLAGS::TRANSFER_BIT;
33943393
}
3395-
3396-
3397-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
33983394
// Deal with Deferred Creation of Acceleration structures
33993395
{
34003396
for (auto asLevel=0; asLevel<2; asLevel++)
@@ -3406,8 +3402,9 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
34063402
for (const auto& deferredParams : accelerationStructureParams[asLevel])
34073403
{
34083404
// buffer failed to create/allocate
3409-
if (!deferredParams.storage.get())
3405+
if (!deferredParams.storage)
34103406
continue;
3407+
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
34113408
IGPUAccelerationStructure::SCreationParams baseParams;
34123409
{
34133410
auto* buf = deferredParams.storage.get();
@@ -3434,15 +3431,16 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
34343431
retval.m_minASBuildScratchSize = core::max(buildSize,retval.m_minASBuildScratchSize);
34353432
scratchSizeFullParallelBuild += buildSize;
34363433
// triangles, AABBs or Instance Transforms will need to be supplied from VRAM
3434+
#endif
34373435
}
34383436
//
3439-
retval.m_maxASBuildScratchSize[0] = core::max(scratchSizeFullParallelBuild,retval.m_maxASBuildScratchSize);
3437+
// retval.m_maxASBuildScratchSize[0] = core::max(scratchSizeFullParallelBuild,retval.m_maxASBuildScratchSize);
34403438
}
34413439
//
34423440
if (retval.willDeviceASBuild())
34433441
retval.m_queueFlags |= IQueue::FAMILY_FLAGS::COMPUTE_BIT;
34443442
}
3445-
#endif
3443+
34463444
dedupCreateProp.operator()<ICPUBufferView>();
34473445
dedupCreateProp.operator()<ICPUImageView>();
34483446
dedupCreateProp.operator()<ICPUShader>();
@@ -4813,8 +4811,9 @@ if (worstSize>minScratchSize)
48134811
const auto* oldBuffer = as->getCreationParams().bufferRange.buffer.get();
48144812
assert(oldBuffer);
48154813
//
4814+
constexpr size_t MinASBufferAlignment = 256u;
48164815
using usage_f = IGPUBuffer::E_USAGE_FLAGS;
4817-
IGPUBuffer::SCreationParams creationParams = { {.size=sizes[i],.usage=usage_f::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT},{} };
4816+
IGPUBuffer::SCreationParams creationParams = { {.size=core::roundUp(sizes[i],MinASBufferAlignment),.usage = usage_f::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT|usage_f::EUF_SHADER_DEVICE_ADDRESS_BIT},{}};
48184817
creationParams.queueFamilyIndexCount = oldBuffer->getCachedCreationParams().queueFamilyIndexCount;
48194818
creationParams.queueFamilyIndices = oldBuffer->getCachedCreationParams().queueFamilyIndices;
48204819
auto buf = device->createBuffer(std::move(creationParams));

0 commit comments

Comments
 (0)