Skip to content

Commit d1106f9

Browse files
devshkeptsecret
authored andcommitted
be more clever about how we compute getRequiredQueueFlags
1 parent e473044 commit d1106f9

File tree

2 files changed

+19
-15
lines changed

2 files changed

+19
-15
lines changed

include/nbl/video/utilities/CAssetConverter.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -981,7 +981,12 @@ class CAssetConverter : public core::IReferenceCounted
981981
// What queues you'll need to run the submit
982982
// WARNING: Uploading image region data for depth or stencil formats requires that the transfer queue has GRAPHICS capability!
983983
// https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdCopyBufferToImage.html#VUID-vkCmdCopyBufferToImage-commandBuffer-07739
984-
inline core::bitflag<IQueue::FAMILY_FLAGS> getRequiredQueueFlags() const {return m_queueFlags;}
984+
inline core::bitflag<IQueue::FAMILY_FLAGS> getRequiredQueueFlags(const bool mappableScratch) const
985+
{
986+
if (willDeviceASBuild() && !mappableScratch)
987+
return m_queueFlags|IQueue::FAMILY_FLAGS::TRANSFER_BIT;
988+
return m_queueFlags;
989+
}
985990

986991
// This is just enough memory to build the Acceleration Structures one by one waiting for each Device Build to complete inbetween. If 0 there are no Device AS Builds or Compactions to perform.
987992
inline uint64_t getMinASBuildScratchSize(const bool forHostOps) const
@@ -995,6 +1000,7 @@ class CAssetConverter : public core::IReferenceCounted
9951000
assert(m_minASBuildScratchSize[forHostOps]<=m_maxASBuildScratchSize[forHostOps]);
9961001
return m_maxASBuildScratchSize[forHostOps];
9971002
}
1003+
// TODO: `getMinCompactedASAllocatorSpace`
9981004
// tells you if you need to provide a valid `SConvertParams::scratchForDeviceASBuild`
9991005
inline bool willDeviceASBuild() const {return getMinASBuildScratchSize(false)>0;}
10001006
// tells you if you need to provide a valid `SConvertParams::scratchForHostASBuild`
@@ -1107,6 +1113,7 @@ class CAssetConverter : public core::IReferenceCounted
11071113
// 0 for device builds, 1 for host builds
11081114
uint64_t m_minASBuildScratchSize[2] = {0,0};
11091115
uint64_t m_maxASBuildScratchSize[2] = {0,0};
1116+
// TODO: make the compaction count the size
11101117
// We do all compactions on the Device for simplicity
11111118
uint8_t m_willCompactSomeAS : 1 = false;
11121119
// This tracks non-root BLASes which are needed for a subsequent TLAS build. Note that even things which are NOT in the staging cache are tracked here to make sure they don't finish their lifetimes early.

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2698,8 +2698,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
26982698
params.queueFamilyIndexCount = queueFamilies.size();
26992699
params.queueFamilyIndices = queueFamilies.data();
27002700
// if creation successful, we will upload
2701-
if (assign(entry.first,entry.second.firstCopyIx,i,device->createBuffer(std::move(params))))
2702-
retval.m_queueFlags |= IQueue::FAMILY_FLAGS::TRANSFER_BIT;
2701+
assign(entry.first,entry.second.firstCopyIx,i,device->createBuffer(std::move(params)));
27032702
}
27042703
}
27052704
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
@@ -2909,10 +2908,12 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
29092908
// if creation successful, we check what queues we need if uploading
29102909
if (assign(entry.first,entry.second.firstCopyIx,i,device->createImage(std::move(params))) && !asset->getRegions().empty())
29112910
{
2911+
// for now until host_image_copy
29122912
retval.m_queueFlags |= IQueue::FAMILY_FLAGS::TRANSFER_BIT;
2913+
// Best effort guess, without actually looking at all regions
29132914
// https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdCopyBufferToImage.html#VUID-vkCmdCopyBufferToImage-commandBuffer-07739
29142915
if (isDepthOrStencilFormat(patch.format) && (patch.usageFlags|patch.stencilUsage).hasFlags(IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT))
2915-
retval.m_queueFlags |= IQueue::FAMILY_FLAGS::TRANSFER_BIT;
2916+
retval.m_queueFlags |= IQueue::FAMILY_FLAGS::GRAPHICS_BIT;
29162917
// only if we upload some data can we recompute the mips
29172918
if (patch.recomputeMips)
29182919
retval.m_queueFlags |= IQueue::FAMILY_FLAGS::COMPUTE_BIT;
@@ -3380,6 +3381,8 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
33803381
);
33813382
if (!flushRanges.empty())
33823383
device->flushMappedMemoryRanges(flushRanges);
3384+
if (!retval.m_bufferConversions.empty())
3385+
retval.m_queueFlags |= IQueue::FAMILY_FLAGS::TRANSFER_BIT;
33833386
}
33843387

33853388

@@ -3417,26 +3420,19 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
34173420
as = device->createTopLevelAccelerationStructure({baseParams,deferredParams.maxInstanceCount});
34183421
}
34193422
// note that in order to compact an AS you need to allocate a buffer range whose size is known only after the build
3423+
// TODO: compute with alignment
34203424
const auto buildSize = deferredParams.inputSize+deferredParams.scratchSize;
34213425
// sizes for building 1-by-1 vs parallel, note that
34223426
retval.m_minASBuildScratchSize = core::max(buildSize,retval.m_minASBuildScratchSize);
34233427
scratchSizeFullParallelBuild += buildSize;
3424-
if (deferredParams.compactAfterBuild)
3425-
scratchSizeFullParallelCompact += deferredParams.scratchSize;
34263428
// triangles, AABBs or Instance Transforms will need to be supplied from VRAM
3427-
// TODO: also mark somehow that we'll need a BUILD INPUT READ ONLY BUFFER WITH XFER usage
3428-
if (deferredParams.inputSize)
3429-
retval.m_queueFlags |= IQueue::FAMILY_FLAGS::TRANSFER_BIT;
34303429
}
34313430
//
3432-
retval.m_maxASBuildScratchSize = core::max(core::max(scratchSizeFullParallelBuild,scratchSizeFullParallelCompact),retval.m_maxASBuildScratchSize);
3431+
retval.m_maxASBuildScratchSize[0] = core::max(scratchSizeFullParallelBuild,retval.m_maxASBuildScratchSize);
34333432
}
34343433
//
3435-
if (retval.m_minASBuildScratchSize)
3436-
{
3434+
if (retval.willDeviceASBuild())
34373435
retval.m_queueFlags |= IQueue::FAMILY_FLAGS::COMPUTE_BIT;
3438-
retval.m_maxASBuildScratchSize = core::max(core::max(scratchSizeFullParallelBLASBuild,scratchSizeFullParallelBLASCompact),core::max(scratchSizeFullParallelTLASBuild,scratchSizeFullParallelTLASCompact));
3439-
}
34403436
}
34413437
#endif
34423438
dedupCreateProp.operator()<ICPUBufferView>();
@@ -3507,7 +3503,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
35073503
}
35083504
assert(reservations.m_converter.get()==this);
35093505
auto device = m_params.device;
3510-
const auto reqQueueFlags = reservations.getRequiredQueueFlags();
3506+
3507+
const auto reqQueueFlags = reservations.getRequiredQueueFlags(false);
35113508

35123509
// compacted TLASes need to be substituted in cache and Descriptor Sets
35133510
core::unordered_map<const IGPUTopLevelAccelerationStructure*,smart_refctd_ptr<IGPUTopLevelAccelerationStructure>> compactedTLASMap;

0 commit comments

Comments
 (0)