@@ -2455,7 +2455,7 @@ struct conversions_t
2455
2455
const uint64_t uniqueCopyGroupID = gpuObjUniqueCopyGroupIDs[copyIx+baseIx];
2456
2456
if constexpr (std::is_same_v<AssetType,ICPUBuffer> || std::is_same_v<AssetType,ICPUImage>)
2457
2457
{
2458
- const auto constrainMask = inputs->constrainMemoryTypeBits (uniqueCopyGroupID,asset,contentHash,gpuObj .get ());
2458
+ const auto constrainMask = inputs->constrainMemoryTypeBits (uniqueCopyGroupID,asset,contentHash,output-> value .get ());
2459
2459
if (!deferredAllocator->request (output,constrainMask))
2460
2460
return ;
2461
2461
}
@@ -3766,11 +3766,10 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3766
3766
// Descriptor Sets need their TLAS descriptors substituted if they've been compacted
3767
3767
core::unordered_map<const IGPUTopLevelAccelerationStructure*,smart_refctd_ptr<IGPUTopLevelAccelerationStructure>> compactedTLASMap;
3768
3768
// Anything to do?
3769
- auto reqQueueFlags = reservations.m_queueFlags ;
3770
- if (reqQueueFlags.value !=IQueue::FAMILY_FLAGS::NONE)
3769
+ if (reservations.m_queueFlags .value !=IQueue::FAMILY_FLAGS::NONE)
3771
3770
{
3772
3771
// whether we actually get around to doing that depends on validity and success of transfers
3773
- const bool shouldDoSomeCompute = reqQueueFlags .hasFlags (IQueue::FAMILY_FLAGS::COMPUTE_BIT);
3772
+ const bool shouldDoSomeCompute = reservations. m_queueFlags .hasFlags (IQueue::FAMILY_FLAGS::COMPUTE_BIT);
3774
3773
auto invalidIntended = [device,logger](const IQueue::FAMILY_FLAGS flag, const SIntendedSubmitInfo* intended)->bool
3775
3774
{
3776
3775
if (!intended || !intended->valid ())
@@ -3852,7 +3851,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3852
3851
const auto transferFamily = params.transfer ->queue ->getFamilyIndex ();
3853
3852
// But don't want to have to do QFOTs between Transfer and Queue Families then
3854
3853
if (transferFamily!=computeFamily)
3855
- if (!scratchParams.canBeUsedByQueueFamily (transferFamily))
3854
+ if (!scratchParams.isConcurrentSharing () || !scratchParams. canBeUsedByQueueFamily (transferFamily))
3856
3855
{
3857
3856
logger.log (" Acceleration Structure Scratch Device Memory Allocator not mapped and not concurrently share-able by Transfer Family %d!" ,system::ILogger::ELL_ERROR,transferFamily);
3858
3857
return retval;
@@ -3868,7 +3867,6 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3868
3867
logger.log (" An Acceleration Structure will be built on Device but Default UpStreaming Buffer from IUtilities doesn't have required usage flags!" , system::ILogger::ELL_ERROR);
3869
3868
return retval;
3870
3869
}
3871
- reqQueueFlags |= IQueue::FAMILY_FLAGS::TRANSFER_BIT;
3872
3870
}
3873
3871
}
3874
3872
// the elusive and exotic host builds
@@ -3885,10 +3883,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3885
3883
if (reservations.willCompactAS ())
3886
3884
{
3887
3885
if (!params.compactedASAllocator )
3888
- {
3889
- logger.log (" An Acceleration Structure will be compacted but no Device Memory Allocator provided!" , system::ILogger::ELL_ERROR);
3890
- return retval;
3891
- }
3886
+ logger.log (" Acceleration Structures will be compacted using the ILogicalDevice as the memory allocator!" , system::ILogger::ELL_WARNING);
3892
3887
// note that can't check the compacted AS allocator being large enough against `reservations.m_compactedASMaxMemory`
3893
3888
}
3894
3889
@@ -4851,7 +4846,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4851
4846
break ;
4852
4847
}
4853
4848
if (depInfo.wasInStaging )
4854
- dependsOnBLASBuilds;
4849
+ dependsOnBLASBuilds = true ;
4855
4850
instanceDataSize += ITopLevelAccelerationStructure::getInstanceSize (instance.getType ());
4856
4851
}
4857
4852
// problem with building some Dependent BLASes
@@ -4872,7 +4867,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4872
4867
const addr_t sizes[MaxAllocCount] = {tlasToBuild.second .scratchSize ,instanceDataSize,sizeof (void *)*instanceCount};
4873
4868
{
4874
4869
const addr_t alignments[MaxAllocCount] = {limits.minAccelerationStructureScratchOffsetAlignment ,16 ,alignof (uint64_t )};
4875
- const auto AllocCount = as->usesMotion () ? 2 : 3 ;
4870
+ const auto AllocCount = as->usesMotion () ? 3 : 2 ;
4876
4871
// if fail then flush and keep trying till space is made
4877
4872
for (uint32_t t=0 ; params.scratchForDeviceASBuild ->multi_allocate (AllocCount,&offsets[0 ],&sizes[0 ],&alignments[0 ])!=0u ; t++)
4878
4873
if (t==1 ) // don't flush right away cause allocator not defragmented yet
@@ -4902,14 +4897,14 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4902
4897
assert (offsetInRange%16 ==0 );
4903
4898
4904
4899
uint32_t bytesWritten = 0 ;
4905
- while (true )
4900
+ while (instanceIndex<instances. size () )
4906
4901
{
4907
4902
const auto & instance = instances[instanceIndex++];
4908
4903
const auto type = instance.getType ();
4909
4904
const auto size = ITopLevelAccelerationStructure::getInstanceSize (type);
4910
4905
const auto newWritten = bytesWritten+size;
4911
- if (newWritten>= blockSize)
4912
- return bytesWritten ;
4906
+ if (newWritten>blockSize)
4907
+ break ;
4913
4908
auto found = instanceMap->find (instance.getBase ().blas .get ());
4914
4909
auto blas = found->second .get ();
4915
4910
if (auto found=compactedBLASMap->find (blas); found!=compactedBLASMap->end ())
@@ -4918,6 +4913,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4918
4913
dst = IGPUTopLevelAccelerationStructure::writeInstance (dst,instance,blas->getReferenceForDeviceOperations ());
4919
4914
bytesWritten = newWritten;
4920
4915
}
4916
+ return bytesWritten;
4921
4917
}
4922
4918
4923
4919
const compacted_blas_map_t * compactedBLASMap;
@@ -4994,7 +4990,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4994
4990
// enqueue ownership release if necessary
4995
4991
if (finalOwnerQueueFamily!=IQueue::FamilyIgnored)
4996
4992
{
4997
- compactedOwnershipReleaseIndices.push_back (ownershipTransfers.size ());
4993
+ if (willCompact)
4994
+ compactedOwnershipReleaseIndices.push_back (ownershipTransfers.size ());
4998
4995
ownershipTransfers.push_back ({
4999
4996
.barrier = {
5000
4997
.dep = {
@@ -5008,7 +5005,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
5008
5005
.range = backingRange
5009
5006
});
5010
5007
}
5011
- else
5008
+ else if (willCompact)
5012
5009
compactedOwnershipReleaseIndices.push_back (~0u );
5013
5010
}
5014
5011
// finish the last batch
@@ -5049,7 +5046,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
5049
5046
// create and allocate backing buffers for compacted TLASes
5050
5047
core::vector<asset_cached_t <ICPUBuffer>> backingBuffers (compactions.size ());
5051
5048
{
5052
- MetaDeviceMemoryAllocator deferredAllocator (params.compactedASAllocator ,logger);
5049
+ MetaDeviceMemoryAllocator deferredAllocator (params.compactedASAllocator ? params. compactedASAllocator :device ,logger);
5053
5050
// create
5054
5051
for (size_t i=0 ; i<compactions.size (); i++)
5055
5052
{
@@ -5182,7 +5179,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
5182
5179
// in the future we'll also finish host image copies
5183
5180
5184
5181
// check dependents before inserting into cache
5185
- if (reqQueueFlags .value !=IQueue::FAMILY_FLAGS::NONE)
5182
+ if (reservations. m_queueFlags .value !=IQueue::FAMILY_FLAGS::NONE)
5186
5183
{
5187
5184
auto checkDependents = [&]<Asset AssetType>()->void
5188
5185
{
0 commit comments