@@ -2528,19 +2528,19 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
25282528
25292529 MetaDeviceMemoryAllocator deferredAllocator (inputs.allocator ? inputs.allocator :device,inputs.logger );
25302530
2531- #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
25322531 // BLAS and TLAS creation is somewhat delayed by buffer creation and allocation
25332532 struct DeferredASCreationParams
25342533 {
25352534 asset_cached_t <ICPUBuffer> storage;
25362535 size_t scratchSize : 62 = 0 ;
25372536 size_t motionBlur : 1 = false ;
25382537 size_t compactAfterBuild : 1 = false ;
2538+ #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
25392539 size_t inputSize = 0 ;
25402540 uint32_t maxInstanceCount = 0 ;
2541+ #endif
25412542 };
25422543 core::vector<DeferredASCreationParams> accelerationStructureParams[2 ];
2543- #endif
25442544 // Deduplication, Creation and Propagation
25452545 auto dedupCreateProp = [&]<Asset AssetType>()->void
25462546 {
@@ -2709,7 +2709,6 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
27092709 assign (entry.first ,entry.second .firstCopyIx ,i,device->createBuffer (std::move (params)));
27102710 }
27112711 }
2712- #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
27132712 if constexpr (std::is_same_v<AssetType,ICPUBottomLevelAccelerationStructure> || std::is_same_v<AssetType,ICPUTopLevelAccelerationStructure>)
27142713 {
27152714 using mem_prop_f = IDeviceMemoryAllocation::E_MEMORY_PROPERTY_FLAGS;
@@ -2724,9 +2723,10 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
27242723 const auto * as = entry.second .canonicalAsset ;
27252724 const auto & patch = dfsCache.nodes [entry.second .patchIndex .value ].patch ;
27262725 const bool motionBlur = as->usesMotion ();
2726+ ILogicalDevice::AccelerationStructureBuildSizes sizes = {};
2727+ #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
27272728 // we will need to temporarily store the build input buffers somewhere
27282729 size_t inputSize = 0 ;
2729- ILogicalDevice::AccelerationStructureBuildSizes sizes = {};
27302730 {
27312731 const auto buildFlags = patch.getBuildFlags (as);
27322732 if constexpr (IsTLAS)
@@ -2813,31 +2813,30 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
28132813 }
28142814 if (!sizes)
28152815 continue ;
2816- // this is where it gets a bit weird, we need to create a buffer to back the acceleration structure
2817- IGPUBuffer::SCreationParams params = {};
2818- constexpr size_t MinASBufferAlignment = 256u ;
2819- params.size = core::roundUp (sizes.accelerationStructureSize ,MinASBufferAlignment);
2820- params.usage = IGPUBuffer::E_USAGE_FLAGS::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT|IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT;
2821- // concurrent ownership if any
2822- const auto outIx = i+entry.second .firstCopyIx ;
2823- const auto uniqueCopyGroupID = gpuObjUniqueCopyGroupIDs[outIx];
2824- const auto queueFamilies = inputs.getSharedOwnershipQueueFamilies (uniqueCopyGroupID,as,patch);
2825- params.queueFamilyIndexCount = queueFamilies.size ();
2826- params.queueFamilyIndices = queueFamilies.data ();
2816+ #endif
28272817 // we need to save the buffer in a side-channel for later
2828- auto & out = accelerationStructureParams[IsTLAS][baseOffset+entry.second .firstCopyIx +i];
2829- out = {
2830- .storage = device->createBuffer (std::move (params)),
2831- .scratchSize = sizes.buildScratchSize ,
2832- .motionBlur = motionBlur,
2833- .compactAfterBuild = patch.compactAfterBuild ,
2834- .inputSize = inputSize
2835- };
2836- if (out.storage )
2837- requestAllocation (&out.storage ,patch.hostBuild ? hostBuildMemoryTypes:deviceBuildMemoryTypes);
2818+ auto & out = accelerationStructureParams[IsTLAS][entry.second .firstCopyIx +i];
2819+ // this is where it gets a bit weird, we need to create a buffer to back the acceleration structure
2820+ {
2821+ IGPUBuffer::SCreationParams params = {};
2822+ constexpr size_t MinASBufferAlignment = 256u ;
2823+ params.size = core::roundUp (sizes.accelerationStructureSize ,MinASBufferAlignment);
2824+ params.usage = IGPUBuffer::E_USAGE_FLAGS::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT|IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT;
2825+ // concurrent ownership if any
2826+ const auto outIx = i + entry.second .firstCopyIx ;
2827+ const auto uniqueCopyGroupID = gpuObjUniqueCopyGroupIDs[outIx];
2828+ const auto queueFamilies = inputs.getSharedOwnershipQueueFamilies (uniqueCopyGroupID,as,patch);
2829+ params.queueFamilyIndexCount = queueFamilies.size ();
2830+ params.queueFamilyIndices = queueFamilies.data ();
2831+ out.storage .value = device->createBuffer (std::move (params));
2832+ }
2833+ out.scratchSize = sizes.buildScratchSize ;
2834+ out.motionBlur = motionBlur;
2835+ out.compactAfterBuild = patch.compactAfterBuild ;
2836+ if (out.storage && !deferredAllocator.request (&out.storage ,patch.hostBuild ? hostBuildMemoryTypes:deviceBuildMemoryTypes))
2837+ out.storage .value = nullptr ;
28382838 }
28392839 }
2840- #endif
28412840 if constexpr (std::is_same_v<AssetType,ICPUImage>)
28422841 {
28432842 for (auto & entry : conversionRequests)
@@ -3392,9 +3391,6 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
33923391 if (!retval.m_bufferConversions .empty ())
33933392 retval.m_queueFlags |= IQueue::FAMILY_FLAGS::TRANSFER_BIT;
33943393 }
3395-
3396-
3397- #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
33983394 // Deal with Deferred Creation of Acceleration structures
33993395 {
34003396 for (auto asLevel=0 ; asLevel<2 ; asLevel++)
@@ -3406,8 +3402,9 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
34063402 for (const auto & deferredParams : accelerationStructureParams[asLevel])
34073403 {
34083404 // buffer failed to create/allocate
3409- if (!deferredParams.storage . get () )
3405+ if (!deferredParams.storage )
34103406 continue ;
3407+ #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
34113408 IGPUAccelerationStructure::SCreationParams baseParams;
34123409 {
34133410 auto * buf = deferredParams.storage .get ();
@@ -3434,15 +3431,16 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
34343431 retval.m_minASBuildScratchSize = core::max (buildSize,retval.m_minASBuildScratchSize );
34353432 scratchSizeFullParallelBuild += buildSize;
34363433 // triangles, AABBs or Instance Transforms will need to be supplied from VRAM
3434+ #endif
34373435 }
34383436 //
3439- retval.m_maxASBuildScratchSize [0 ] = core::max (scratchSizeFullParallelBuild,retval.m_maxASBuildScratchSize );
3437+ // retval.m_maxASBuildScratchSize[0] = core::max(scratchSizeFullParallelBuild,retval.m_maxASBuildScratchSize);
34403438 }
34413439 //
34423440 if (retval.willDeviceASBuild ())
34433441 retval.m_queueFlags |= IQueue::FAMILY_FLAGS::COMPUTE_BIT;
34443442 }
3445- # endif
3443+
34463444 dedupCreateProp.operator ()<ICPUBufferView>();
34473445 dedupCreateProp.operator ()<ICPUImageView>();
34483446 dedupCreateProp.operator ()<ICPUShader>();
@@ -4813,8 +4811,9 @@ if (worstSize>minScratchSize)
48134811 const auto * oldBuffer = as->getCreationParams ().bufferRange .buffer .get ();
48144812 assert (oldBuffer);
48154813 //
4814+ constexpr size_t MinASBufferAlignment = 256u ;
48164815 using usage_f = IGPUBuffer::E_USAGE_FLAGS;
4817- IGPUBuffer::SCreationParams creationParams = { {.size =sizes[i],.usage = usage_f::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT},{} };
4816+ IGPUBuffer::SCreationParams creationParams = { {.size =core::roundUp ( sizes[i],MinASBufferAlignment), .usage = usage_f::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT|usage_f::EUF_SHADER_DEVICE_ADDRESS_BIT },{}};
48184817 creationParams.queueFamilyIndexCount = oldBuffer->getCachedCreationParams ().queueFamilyIndexCount ;
48194818 creationParams.queueFamilyIndices = oldBuffer->getCachedCreationParams ().queueFamilyIndices ;
48204819 auto buf = device->createBuffer (std::move (creationParams));
0 commit comments