@@ -3991,7 +3991,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3991
3991
core::vector<buffer_mem_barrier_t > finalReleases;
3992
3992
finalReleases.reserve (buffersToUpload.size ());
3993
3993
// do the uploads
3994
- if (!buffersToUpload.empty ())
3994
+ if (!buffersToUpload.empty () && xferCmdBuf )
3995
3995
{
3996
3996
xferCmdBuf->cmdbuf ->beginDebugMarker (" Asset Converter Upload Buffers START" );
3997
3997
xferCmdBuf->cmdbuf ->endDebugMarker ();
@@ -4039,7 +4039,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4039
4039
.range = range
4040
4040
});
4041
4041
}
4042
- if (!buffersToUpload.empty ())
4042
+ if (!buffersToUpload.empty () && xferCmdBuf )
4043
4043
{
4044
4044
xferCmdBuf->cmdbuf ->beginDebugMarker (" Asset Converter Upload Buffers END" );
4045
4045
xferCmdBuf->cmdbuf ->endDebugMarker ();
@@ -4653,6 +4653,12 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4653
4653
return false ;
4654
4654
};
4655
4655
//
4656
+ using scratch_allocator_t = std::remove_reference_t <decltype (*params.scratchForDeviceASBuild )>;
4657
+ using addr_t = typename scratch_allocator_t ::size_type;
4658
+ core::vector<addr_t > scratchOffsets;
4659
+ scratchOffsets.reserve (maxASCount);
4660
+ core::vector<addr_t > scratchSizes;
4661
+ scratchSizes.reserve (maxASCount);
4656
4662
auto recordBuildCommandsBase = [&](auto & buildInfos, auto & rangeInfos)->void
4657
4663
{
4658
4664
if (buildInfos.empty ())
@@ -4665,13 +4671,25 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4665
4671
bool success = !uniQueue || !deviceASBuildScratchPtr || pipelineBarrier (computeCmdBuf,{.memBarriers ={&readGeometryOrInstanceInASBuildBarrier,1 }}," Pipeline Barriers of Acceleration Structure backing Buffers failed!" );
4666
4672
//
4667
4673
success = success && computeCmdBuf->cmdbuf ->buildAccelerationStructures ({buildInfos},rangeInfos.data ());
4668
- if (!success)
4669
- for (const auto & info : buildInfos)
4674
+ if (success)
4670
4675
{
4671
- const auto stagingFound = findInStaging.template operator ()<ICPUTopLevelAccelerationStructure>(info.dstAS );
4672
- smart_refctd_ptr<const ICPUTopLevelAccelerationStructure> dummy; // already null at this point
4673
- markFailure (" AS Build Command Recording" ,&dummy,&stagingFound->second );
4676
+ submitsNeeded |= IQueue::FAMILY_FLAGS::COMPUTE_BIT;
4677
+ // queue up a deferred allocation
4678
+ params.scratchForDeviceASBuild ->multi_deallocate (scratchOffsets.size (),scratchOffsets.data (),scratchSizes.data (),params.compute ->getFutureScratchSemaphore ());
4679
+ }
4680
+ else
4681
+ {
4682
+ // release right away
4683
+ params.scratchForDeviceASBuild ->multi_deallocate (scratchOffsets.size (),scratchOffsets.data (),scratchSizes.data ());
4684
+ for (const auto & info : buildInfos)
4685
+ {
4686
+ const auto stagingFound = findInStaging.template operator ()<ICPUTopLevelAccelerationStructure>(info.dstAS );
4687
+ smart_refctd_ptr<const ICPUTopLevelAccelerationStructure> dummy; // already null at this point
4688
+ markFailure (" AS Build Command Recording" ,&dummy,&stagingFound->second );
4689
+ }
4674
4690
}
4691
+ scratchOffsets.clear ();
4692
+ scratchSizes.clear ();
4675
4693
buildInfos.clear ();
4676
4694
rangeInfos.clear ();
4677
4695
};
@@ -4813,8 +4831,6 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4813
4831
trackedBLASes.clear ();
4814
4832
};
4815
4833
//
4816
- using scratch_allocator_t = std::remove_reference_t <decltype (*params.scratchForDeviceASBuild )>;
4817
- using addr_t = typename scratch_allocator_t ::size_type;
4818
4834
const auto & limits = physDev->getLimits ();
4819
4835
for (auto & tlasToBuild : tlasesToBuild)
4820
4836
{
@@ -4865,9 +4881,25 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4865
4881
constexpr uint32_t MaxAllocCount = 3 ;
4866
4882
addr_t offsets[MaxAllocCount] = {scratch_allocator_t ::invalid_value,scratch_allocator_t ::invalid_value,scratch_allocator_t ::invalid_value};
4867
4883
const addr_t sizes[MaxAllocCount] = {tlasToBuild.second .scratchSize ,instanceDataSize,sizeof (void *)*instanceCount};
4884
+ const auto AllocCount = as->usesMotion () ? 3 :2 ;
4885
+ // clean up the allocation if we fail to make it to the end of loop for whatever reason
4886
+ bool abortAllocation = true ;
4887
+ auto deallocSrc = core::makeRAIIExiter ([¶ms,&scratchOffsets,&scratchSizes,AllocCount,&offsets,&sizes,&abortAllocation]()->void
4888
+ {
4889
+ // if got to end of loop queue up the release of memory, otherwise release right away
4890
+ if (abortAllocation)
4891
+ params.scratchForDeviceASBuild ->multi_deallocate (AllocCount,&offsets[0 ],&sizes[0 ]);
4892
+ else
4893
+ for (auto i=0 ; i<AllocCount; i++)
4894
+ {
4895
+ scratchOffsets.push_back (offsets[i]);
4896
+ scratchSizes.push_back (sizes[i]);
4897
+ }
4898
+ }
4899
+ );
4900
+ // allocate out scratch or submit overflow
4868
4901
{
4869
4902
const addr_t alignments[MaxAllocCount] = {limits.minAccelerationStructureScratchOffsetAlignment ,16 ,alignof (uint64_t )};
4870
- const auto AllocCount = as->usesMotion () ? 3 :2 ;
4871
4903
// if fail then flush and keep trying till space is made
4872
4904
for (uint32_t t=0 ; params.scratchForDeviceASBuild ->multi_allocate (AllocCount,&offsets[0 ],&sizes[0 ],&alignments[0 ])!=0u ; t++)
4873
4905
if (t==1 ) // don't flush right away cause allocator not defragmented yet
@@ -4881,8 +4913,6 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4881
4913
}
4882
4914
drainCompute ();
4883
4915
}
4884
- // queue up a deferred allocation
4885
- params.scratchForDeviceASBuild ->multi_deallocate (AllocCount,&offsets[0 ],&sizes[0 ],params.compute ->getFutureScratchSemaphore ());
4886
4916
}
4887
4917
// stream the instance/geometry input in
4888
4918
const size_t trackedBLASesOffset = trackedBLASes.size ();
@@ -4983,6 +5013,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4983
5013
buildInfo.trackedBLASes = {reinterpret_cast <const p_p_BLAS_t&>(trackedBLASesOffset),trackedBLASes.size ()-trackedBLASesOffset};
4984
5014
// no special extra byte offset into the instance buffer
4985
5015
rangeInfos.emplace_back (instanceCount,0u );
5016
+ abortAllocation = false ;
4986
5017
//
4987
5018
const bool willCompact = tlasToBuild.second .compact ;
4988
5019
if (willCompact)
0 commit comments