Skip to content

Commit 310eafd

Browse files
author
devsh
committed
fix various typos and bugs in Asset Converter
1 parent 41ef540 commit 310eafd

File tree

2 files changed

+18
-20
lines changed

2 files changed

+18
-20
lines changed

include/nbl/video/utilities/CAssetConverter.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -959,7 +959,8 @@ class CAssetConverter : public core::IReferenceCounted
959959
uint32_t sampledImageBindingCount = 1<<10;
960960
uint32_t storageImageBindingCount = 11<<10;
961961
// specific to Acceleration Structure Build, they need to be at least as large as the largest amount of scratch required for an AS build
962-
CAsyncSingleBufferSubAllocatorST<core::GeneralpurposeAddressAllocator<uint64_t>>* scratchForDeviceASBuild = nullptr;
962+
using scratch_for_device_AS_build_t = CAsyncSingleBufferSubAllocatorST<core::GeneralpurposeAddressAllocator<uint64_t>>;
963+
scratch_for_device_AS_build_t* scratchForDeviceASBuild = nullptr;
963964
std::pmr::memory_resource* scratchForHostASBuild = nullptr;
964965
// needs to service allocations without limit, unlike the above where failure will just force a flush and performance of already queued up builds
965966
IDeviceMemoryAllocator* compactedASAllocator = nullptr;

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2455,7 +2455,7 @@ struct conversions_t
24552455
const uint64_t uniqueCopyGroupID = gpuObjUniqueCopyGroupIDs[copyIx+baseIx];
24562456
if constexpr (std::is_same_v<AssetType,ICPUBuffer> || std::is_same_v<AssetType,ICPUImage>)
24572457
{
2458-
const auto constrainMask = inputs->constrainMemoryTypeBits(uniqueCopyGroupID,asset,contentHash,gpuObj.get());
2458+
const auto constrainMask = inputs->constrainMemoryTypeBits(uniqueCopyGroupID,asset,contentHash,output->value.get());
24592459
if (!deferredAllocator->request(output,constrainMask))
24602460
return;
24612461
}
@@ -3766,11 +3766,10 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
37663766
// Descriptor Sets need their TLAS descriptors substituted if they've been compacted
37673767
core::unordered_map<const IGPUTopLevelAccelerationStructure*,smart_refctd_ptr<IGPUTopLevelAccelerationStructure>> compactedTLASMap;
37683768
// Anything to do?
3769-
auto reqQueueFlags = reservations.m_queueFlags;
3770-
if (reqQueueFlags.value!=IQueue::FAMILY_FLAGS::NONE)
3769+
if (reservations.m_queueFlags.value!=IQueue::FAMILY_FLAGS::NONE)
37713770
{
37723771
// whether we actually get around to doing that depends on validity and success of transfers
3773-
const bool shouldDoSomeCompute = reqQueueFlags.hasFlags(IQueue::FAMILY_FLAGS::COMPUTE_BIT);
3772+
const bool shouldDoSomeCompute = reservations.m_queueFlags.hasFlags(IQueue::FAMILY_FLAGS::COMPUTE_BIT);
37743773
auto invalidIntended = [device,logger](const IQueue::FAMILY_FLAGS flag, const SIntendedSubmitInfo* intended)->bool
37753774
{
37763775
if (!intended || !intended->valid())
@@ -3852,7 +3851,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
38523851
const auto transferFamily = params.transfer->queue->getFamilyIndex();
38533852
// But don't want to have to do QFOTs between Transfer and Queue Families then
38543853
if (transferFamily!=computeFamily)
3855-
if (!scratchParams.canBeUsedByQueueFamily(transferFamily))
3854+
if (!scratchParams.isConcurrentSharing() || !scratchParams.canBeUsedByQueueFamily(transferFamily))
38563855
{
38573856
logger.log("Acceleration Structure Scratch Device Memory Allocator not mapped and not concurrently share-able by Transfer Family %d!",system::ILogger::ELL_ERROR,transferFamily);
38583857
return retval;
@@ -3868,7 +3867,6 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
38683867
logger.log("An Acceleration Structure will be built on Device but Default UpStreaming Buffer from IUtilities doesn't have required usage flags!", system::ILogger::ELL_ERROR);
38693868
return retval;
38703869
}
3871-
reqQueueFlags |= IQueue::FAMILY_FLAGS::TRANSFER_BIT;
38723870
}
38733871
}
38743872
// the elusive and exotic host builds
@@ -3885,10 +3883,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
38853883
if (reservations.willCompactAS())
38863884
{
38873885
if (!params.compactedASAllocator)
3888-
{
3889-
logger.log("An Acceleration Structure will be compacted but no Device Memory Allocator provided!", system::ILogger::ELL_ERROR);
3890-
return retval;
3891-
}
3886+
logger.log("Acceleration Structures will be compacted using the ILogicalDevice as the memory allocator!", system::ILogger::ELL_WARNING);
38923887
// note that can't check the compacted AS allocator being large enough against `reservations.m_compactedASMaxMemory`
38933888
}
38943889

@@ -4851,7 +4846,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
48514846
break;
48524847
}
48534848
if (depInfo.wasInStaging)
4854-
dependsOnBLASBuilds;
4849+
dependsOnBLASBuilds = true;
48554850
instanceDataSize += ITopLevelAccelerationStructure::getInstanceSize(instance.getType());
48564851
}
48574852
// problem with building some Dependent BLASes
@@ -4872,7 +4867,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
48724867
const addr_t sizes[MaxAllocCount] = {tlasToBuild.second.scratchSize,instanceDataSize,sizeof(void*)*instanceCount};
48734868
{
48744869
const addr_t alignments[MaxAllocCount] = {limits.minAccelerationStructureScratchOffsetAlignment,16,alignof(uint64_t)};
4875-
const auto AllocCount = as->usesMotion() ? 2:3;
4870+
const auto AllocCount = as->usesMotion() ? 3:2;
48764871
// if fail then flush and keep trying till space is made
48774872
for (uint32_t t=0; params.scratchForDeviceASBuild->multi_allocate(AllocCount,&offsets[0],&sizes[0],&alignments[0])!=0u; t++)
48784873
if (t==1) // don't flush right away cause allocator not defragmented yet
@@ -4902,14 +4897,14 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
49024897
assert(offsetInRange%16==0);
49034898

49044899
uint32_t bytesWritten = 0;
4905-
while (true)
4900+
while (instanceIndex<instances.size())
49064901
{
49074902
const auto& instance = instances[instanceIndex++];
49084903
const auto type = instance.getType();
49094904
const auto size = ITopLevelAccelerationStructure::getInstanceSize(type);
49104905
const auto newWritten = bytesWritten+size;
4911-
if (newWritten>=blockSize)
4912-
return bytesWritten;
4906+
if (newWritten>blockSize)
4907+
break;
49134908
auto found = instanceMap->find(instance.getBase().blas.get());
49144909
auto blas = found->second.get();
49154910
if (auto found=compactedBLASMap->find(blas); found!=compactedBLASMap->end())
@@ -4918,6 +4913,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
49184913
dst = IGPUTopLevelAccelerationStructure::writeInstance(dst,instance,blas->getReferenceForDeviceOperations());
49194914
bytesWritten = newWritten;
49204915
}
4916+
return bytesWritten;
49214917
}
49224918

49234919
const compacted_blas_map_t* compactedBLASMap;
@@ -4994,7 +4990,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
49944990
// enqueue ownership release if necessary
49954991
if (finalOwnerQueueFamily!=IQueue::FamilyIgnored)
49964992
{
4997-
compactedOwnershipReleaseIndices.push_back(ownershipTransfers.size());
4993+
if (willCompact)
4994+
compactedOwnershipReleaseIndices.push_back(ownershipTransfers.size());
49984995
ownershipTransfers.push_back({
49994996
.barrier = {
50004997
.dep = {
@@ -5008,7 +5005,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
50085005
.range = backingRange
50095006
});
50105007
}
5011-
else
5008+
else if (willCompact)
50125009
compactedOwnershipReleaseIndices.push_back(~0u);
50135010
}
50145011
// finish the last batch
@@ -5049,7 +5046,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
50495046
// create and allocate backing buffers for compacted TLASes
50505047
core::vector<asset_cached_t<ICPUBuffer>> backingBuffers(compactions.size());
50515048
{
5052-
MetaDeviceMemoryAllocator deferredAllocator(params.compactedASAllocator,logger);
5049+
MetaDeviceMemoryAllocator deferredAllocator(params.compactedASAllocator ? params.compactedASAllocator:device,logger);
50535050
// create
50545051
for (size_t i=0; i<compactions.size(); i++)
50555052
{
@@ -5182,7 +5179,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
51825179
// in the future we'll also finish host image copies
51835180

51845181
// check dependents before inserting into cache
5185-
if (reqQueueFlags.value!=IQueue::FAMILY_FLAGS::NONE)
5182+
if (reservations.m_queueFlags.value!=IQueue::FAMILY_FLAGS::NONE)
51865183
{
51875184
auto checkDependents = [&]<Asset AssetType>()->void
51885185
{

0 commit comments

Comments
 (0)