@@ -2528,19 +2528,19 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
2528
2528
2529
2529
MetaDeviceMemoryAllocator deferredAllocator (inputs.allocator ? inputs.allocator :device,inputs.logger );
2530
2530
2531
- #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
2532
2531
// BLAS and TLAS creation is somewhat delayed by buffer creation and allocation
2533
2532
struct DeferredASCreationParams
2534
2533
{
2535
2534
asset_cached_t <ICPUBuffer> storage;
2536
2535
size_t scratchSize : 62 = 0 ;
2537
2536
size_t motionBlur : 1 = false ;
2538
2537
size_t compactAfterBuild : 1 = false ;
2538
+ #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
2539
2539
size_t inputSize = 0 ;
2540
2540
uint32_t maxInstanceCount = 0 ;
2541
+ #endif
2541
2542
};
2542
2543
core::vector<DeferredASCreationParams> accelerationStructureParams[2 ];
2543
- #endif
2544
2544
// Deduplication, Creation and Propagation
2545
2545
auto dedupCreateProp = [&]<Asset AssetType>()->void
2546
2546
{
@@ -2709,7 +2709,6 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
2709
2709
assign (entry.first ,entry.second .firstCopyIx ,i,device->createBuffer (std::move (params)));
2710
2710
}
2711
2711
}
2712
- #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
2713
2712
if constexpr (std::is_same_v<AssetType,ICPUBottomLevelAccelerationStructure> || std::is_same_v<AssetType,ICPUTopLevelAccelerationStructure>)
2714
2713
{
2715
2714
using mem_prop_f = IDeviceMemoryAllocation::E_MEMORY_PROPERTY_FLAGS;
@@ -2724,9 +2723,10 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
2724
2723
const auto * as = entry.second .canonicalAsset ;
2725
2724
const auto & patch = dfsCache.nodes [entry.second .patchIndex .value ].patch ;
2726
2725
const bool motionBlur = as->usesMotion ();
2726
+ ILogicalDevice::AccelerationStructureBuildSizes sizes = {};
2727
+ #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
2727
2728
// we will need to temporarily store the build input buffers somewhere
2728
2729
size_t inputSize = 0 ;
2729
- ILogicalDevice::AccelerationStructureBuildSizes sizes = {};
2730
2730
{
2731
2731
const auto buildFlags = patch.getBuildFlags (as);
2732
2732
if constexpr (IsTLAS)
@@ -2813,31 +2813,30 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
2813
2813
}
2814
2814
if (!sizes)
2815
2815
continue ;
2816
- // this is where it gets a bit weird, we need to create a buffer to back the acceleration structure
2817
- IGPUBuffer::SCreationParams params = {};
2818
- constexpr size_t MinASBufferAlignment = 256u ;
2819
- params.size = core::roundUp (sizes.accelerationStructureSize ,MinASBufferAlignment);
2820
- params.usage = IGPUBuffer::E_USAGE_FLAGS::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT|IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT;
2821
- // concurrent ownership if any
2822
- const auto outIx = i+entry.second .firstCopyIx ;
2823
- const auto uniqueCopyGroupID = gpuObjUniqueCopyGroupIDs[outIx];
2824
- const auto queueFamilies = inputs.getSharedOwnershipQueueFamilies (uniqueCopyGroupID,as,patch);
2825
- params.queueFamilyIndexCount = queueFamilies.size ();
2826
- params.queueFamilyIndices = queueFamilies.data ();
2816
+ #endif
2827
2817
// we need to save the buffer in a side-channel for later
2828
- auto & out = accelerationStructureParams[IsTLAS][baseOffset+entry.second .firstCopyIx +i];
2829
- out = {
2830
- .storage = device->createBuffer (std::move (params)),
2831
- .scratchSize = sizes.buildScratchSize ,
2832
- .motionBlur = motionBlur,
2833
- .compactAfterBuild = patch.compactAfterBuild ,
2834
- .inputSize = inputSize
2835
- };
2836
- if (out.storage )
2837
- requestAllocation (&out.storage ,patch.hostBuild ? hostBuildMemoryTypes:deviceBuildMemoryTypes);
2818
+ auto & out = accelerationStructureParams[IsTLAS][entry.second .firstCopyIx +i];
2819
+ // this is where it gets a bit weird, we need to create a buffer to back the acceleration structure
2820
+ {
2821
+ IGPUBuffer::SCreationParams params = {};
2822
+ constexpr size_t MinASBufferAlignment = 256u ;
2823
+ params.size = core::roundUp (sizes.accelerationStructureSize ,MinASBufferAlignment);
2824
+ params.usage = IGPUBuffer::E_USAGE_FLAGS::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT|IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT;
2825
+ // concurrent ownership if any
2826
+ const auto outIx = i + entry.second .firstCopyIx ;
2827
+ const auto uniqueCopyGroupID = gpuObjUniqueCopyGroupIDs[outIx];
2828
+ const auto queueFamilies = inputs.getSharedOwnershipQueueFamilies (uniqueCopyGroupID,as,patch);
2829
+ params.queueFamilyIndexCount = queueFamilies.size ();
2830
+ params.queueFamilyIndices = queueFamilies.data ();
2831
+ out.storage .value = device->createBuffer (std::move (params));
2832
+ }
2833
+ out.scratchSize = sizes.buildScratchSize ;
2834
+ out.motionBlur = motionBlur;
2835
+ out.compactAfterBuild = patch.compactAfterBuild ;
2836
+ if (out.storage && !deferredAllocator.request (&out.storage ,patch.hostBuild ? hostBuildMemoryTypes:deviceBuildMemoryTypes))
2837
+ out.storage .value = nullptr ;
2838
2838
}
2839
2839
}
2840
- #endif
2841
2840
if constexpr (std::is_same_v<AssetType,ICPUImage>)
2842
2841
{
2843
2842
for (auto & entry : conversionRequests)
@@ -3392,9 +3391,6 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
3392
3391
if (!retval.m_bufferConversions .empty ())
3393
3392
retval.m_queueFlags |= IQueue::FAMILY_FLAGS::TRANSFER_BIT;
3394
3393
}
3395
-
3396
-
3397
- #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
3398
3394
// Deal with Deferred Creation of Acceleration structures
3399
3395
{
3400
3396
for (auto asLevel=0 ; asLevel<2 ; asLevel++)
@@ -3406,8 +3402,9 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
3406
3402
for (const auto & deferredParams : accelerationStructureParams[asLevel])
3407
3403
{
3408
3404
// buffer failed to create/allocate
3409
- if (!deferredParams.storage . get () )
3405
+ if (!deferredParams.storage )
3410
3406
continue ;
3407
+ #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
3411
3408
IGPUAccelerationStructure::SCreationParams baseParams;
3412
3409
{
3413
3410
auto * buf = deferredParams.storage .get ();
@@ -3434,15 +3431,16 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
3434
3431
retval.m_minASBuildScratchSize = core::max (buildSize,retval.m_minASBuildScratchSize );
3435
3432
scratchSizeFullParallelBuild += buildSize;
3436
3433
// triangles, AABBs or Instance Transforms will need to be supplied from VRAM
3434
+ #endif
3437
3435
}
3438
3436
//
3439
- retval.m_maxASBuildScratchSize [0 ] = core::max (scratchSizeFullParallelBuild,retval.m_maxASBuildScratchSize );
3437
+ // retval.m_maxASBuildScratchSize[0] = core::max(scratchSizeFullParallelBuild,retval.m_maxASBuildScratchSize);
3440
3438
}
3441
3439
//
3442
3440
if (retval.willDeviceASBuild ())
3443
3441
retval.m_queueFlags |= IQueue::FAMILY_FLAGS::COMPUTE_BIT;
3444
3442
}
3445
- # endif
3443
+
3446
3444
dedupCreateProp.operator ()<ICPUBufferView>();
3447
3445
dedupCreateProp.operator ()<ICPUImageView>();
3448
3446
dedupCreateProp.operator ()<ICPUShader>();
@@ -4813,8 +4811,9 @@ if (worstSize>minScratchSize)
4813
4811
const auto * oldBuffer = as->getCreationParams ().bufferRange .buffer .get ();
4814
4812
assert (oldBuffer);
4815
4813
//
4814
+ constexpr size_t MinASBufferAlignment = 256u ;
4816
4815
using usage_f = IGPUBuffer::E_USAGE_FLAGS;
4817
- IGPUBuffer::SCreationParams creationParams = { {.size =sizes[i],.usage = usage_f::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT},{} };
4816
+ IGPUBuffer::SCreationParams creationParams = { {.size =core::roundUp ( sizes[i],MinASBufferAlignment), .usage = usage_f::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT|usage_f::EUF_SHADER_DEVICE_ADDRESS_BIT },{}};
4818
4817
creationParams.queueFamilyIndexCount = oldBuffer->getCachedCreationParams ().queueFamilyIndexCount ;
4819
4818
creationParams.queueFamilyIndices = oldBuffer->getCachedCreationParams ().queueFamilyIndices ;
4820
4819
auto buf = device->createBuffer (std::move (creationParams));
0 commit comments