Skip to content

Commit 1e3f5dd

Browse files
author
devsh
committed
Decide on the patchable parameters for the TLAS and BLAS builds.
Note that pointer/build param encoding stuff shouldn't be in the CPU side but don't touch anything. Also fix a typo, change the SRange to a std::span, and add default SPIR-V optimizer if none provided to asset converter.
1 parent 38491b8 commit 1e3f5dd

File tree

3 files changed

+99
-11
lines changed

3 files changed

+99
-11
lines changed

include/nbl/asset/IAccelerationStructure.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ class ITopLevelAccelerationStructure : public AccelerationStructure
155155
PREFER_FAST_BUILD_BIT = 0x1u<<3u,
156156
LOW_MEMORY_BIT = 0x1u<<4u,
157157
// Synthetic flag we use to indicate `VkAccelerationStructureGeometryInstancesDataKHR::arrayOfPointers`
158-
INSTANCE_DATA_IS_POINTERS_TYPE_ENCODED_LSB = 0x1u<<5u,
158+
INSTANCE_DATA_IS_POINTERS_TYPE_ENCODED_LSB = 0x1u<<5u, // this flag really shouldn't be settable outside of `video::IGPU`
159159
// Provided by VK_NV_ray_tracing_motion_blur, but we always override and deduce from creation flag because of
160160
// https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkAccelerationStructureBuildGeometryInfoKHR-dstAccelerationStructure-04927
161161
//MOTION_BIT = 0x1u<<5u,

include/nbl/asset/ICPUAccelerationStructure.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -337,17 +337,17 @@ class ICPUTopLevelAccelerationStructure final : public ITopLevelAccelerationStru
337337
std::variant<StaticInstance,MatrixMotionInstance,SRTMotionInstance> instance = StaticInstance{};
338338
};
339339

340-
core::SRange<PolymorphicInstance> getInstances()
340+
std::span<PolymorphicInstance> getInstances()
341341
{
342342
if (!isMutable() || !m_instances)
343-
return {nullptr,nullptr};
344-
return {m_instances->begin(),m_instances->end()};
343+
return {};
344+
return {m_instances->data(),m_instances->size()};
345345
}
346-
core::SRange<const PolymorphicInstance> getInstances() const
346+
std::span<const PolymorphicInstance> getInstances() const
347347
{
348348
if (!m_instances)
349-
return {nullptr,nullptr};
350-
return {m_instances->begin(),m_instances->end()};
349+
return {};
350+
return {m_instances->data(),m_instances->size()};
351351
}
352352
bool setInstances(core::smart_refctd_dynamic_array<PolymorphicInstance>&& _instances)
353353
{
@@ -367,7 +367,7 @@ class ICPUTopLevelAccelerationStructure final : public ITopLevelAccelerationStru
367367
}
368368

369369
//!
370-
constexpr static inline auto AssetType = ET_BOTOM_LEVEL_ACCELERATION_STRUCTURE;
370+
constexpr static inline auto AssetType = ET_TOP_LEVEL_ACCELERATION_STRUCTURE;
371371
inline IAsset::E_TYPE getAssetType() const override { return AssetType; }
372372

373373
inline core::smart_refctd_ptr<IAsset> clone(uint32_t _depth = ~0u) const override

include/nbl/video/utilities/CAssetConverter.h

Lines changed: 91 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ class CAssetConverter : public core::IReferenceCounted
3838
asset::ICPUSampler,
3939
asset::ICPUShader,
4040
asset::ICPUBuffer,
41-
// acceleration structures,
41+
asset::ICPUBottomLevelAccelerationStructure,
42+
asset::ICPUTopLevelAccelerationStructure,
4243
asset::ICPUImage,
4344
asset::ICPUBufferView,
4445
asset::ICPUImageView,
@@ -71,6 +72,14 @@ class CAssetConverter : public core::IReferenceCounted
7172
{
7273
if (!params.valid())
7374
return nullptr;
75+
#ifndef _NBL_DEBUG
76+
if (!params.optimizer)
77+
{
78+
using pass_e = asset::ISPIRVOptimizer::E_OPTIMIZER_PASS;
79+
// shall we do others?
80+
params.optimizer = core::make_smart_rectd_ptr<asset::ISPIRVOptimizer>({EOP_STRIP_DEBUG_INFO});
81+
}
82+
#endif
7483
return core::smart_refctd_ptr<CAssetConverter>(new CAssetConverter(std::move(params)),core::dont_grab);
7584
}
7685
// When getting dependents, the creation parameters of GPU objects will be produced and patched appropriately.
@@ -149,6 +158,71 @@ class CAssetConverter : public core::IReferenceCounted
149158
return {true,retval};
150159
}
151160
};
161+
struct NBL_API2 acceleration_structure_patch_base
162+
{
163+
public:
164+
enum class BuildPreference : uint8_t
165+
{
166+
None = 0,
167+
FastTrace = 1,
168+
FastBuild = 2,
169+
Invalid = 3
170+
};
171+
172+
//! select build flags
173+
uint8_t allowUpdate : 1 = false;
174+
uint8_t allowCompaction : 1 = false;
175+
uint8_t allowDataAccess : 1 = false;
176+
BuildPreference preference : 2 = BuildPreference::Invalid;
177+
uint8_t lowMemory : 1 = false;
178+
//! things that control the build
179+
uint8_t hostBuild : 1 = false;
180+
uint8_t compactAfterBuild : 1 = false;
181+
182+
protected:
183+
bool valid(const ILogicalDevice* device);
184+
185+
template<typename CRTP>
186+
std::pair<bool,CRTP> combine_impl(const CRTP& _this, const CRTP& other) const
187+
{
188+
if (_this.preference!=other.preference || _this.preference==BuildPreference::Invalid)
189+
return {false,_this};
190+
CRTP retval = _this;
191+
retval.allowUpdate |= other.allowUpdate;
192+
retval.allowCompaction |= other.allowCompaction;
193+
retval.allowDataAccess |= other.allowDataAccess;
194+
retval.lowMemory |= other.lowMemory;
195+
retval.hostBuild |= other.hostBuild;
196+
retval.compactAfterBuild |= other.compactAfterBuild;
197+
return {true,retval};
198+
}
199+
};
200+
template<>
201+
struct NBL_API2 patch_impl_t<asset::ICPUBottomLevelAccelerationStructure> : acceleration_structure_patch_base
202+
{
203+
public:
204+
PATCH_IMPL_BOILERPLATE(asset::ICPUBottomLevelAccelerationStructure);
205+
206+
protected:
207+
using build_flags_t = asset::ICPUBottomLevelAccelerationStructure::BUILD_FLAGS;
208+
inline std::pair<bool,this_t> combine(const this_t& other) const
209+
{
210+
return combine_impl<this_t>(*this,other);
211+
}
212+
};
213+
template<>
214+
struct NBL_API2 patch_impl_t<asset::ICPUTopLevelAccelerationStructure> : acceleration_structure_patch_base
215+
{
216+
public:
217+
PATCH_IMPL_BOILERPLATE(asset::ICPUTopLevelAccelerationStructure);
218+
219+
protected:
220+
using build_flags_t = asset::ICPUTopLevelAccelerationStructure::BUILD_FLAGS;
221+
inline std::pair<bool,this_t> combine(const this_t& other) const
222+
{
223+
return combine_impl<this_t>(*this,other);
224+
}
225+
};
152226
template<>
153227
struct NBL_API2 patch_impl_t<asset::ICPUImage>
154228
{
@@ -458,6 +532,8 @@ class CAssetConverter : public core::IReferenceCounted
458532
virtual const patch_t<asset::ICPUSampler>* operator()(const lookup_t<asset::ICPUSampler>&) const = 0;
459533
virtual const patch_t<asset::ICPUShader>* operator()(const lookup_t<asset::ICPUShader>&) const = 0;
460534
virtual const patch_t<asset::ICPUBuffer>* operator()(const lookup_t<asset::ICPUBuffer>&) const = 0;
535+
virtual const patch_t<asset::ICPUBottomLevelAccelerationStructure>* operator()(const lookup_t<asset::ICPUBottomLevelAccelerationStructure>&) const = 0;
536+
virtual const patch_t<asset::ICPUTopLevelAccelerationStructure>* operator()(const lookup_t<asset::ICPUTopLevelAccelerationStructure>&) const = 0;
461537
virtual const patch_t<asset::ICPUImage>* operator()(const lookup_t<asset::ICPUImage>&) const = 0;
462538
virtual const patch_t<asset::ICPUBufferView>* operator()(const lookup_t<asset::ICPUBufferView>&) const = 0;
463539
virtual const patch_t<asset::ICPUImageView>* operator()(const lookup_t<asset::ICPUImageView>&) const = 0;
@@ -577,6 +653,8 @@ class CAssetConverter : public core::IReferenceCounted
577653
bool operator()(lookup_t<asset::ICPUSampler>);
578654
bool operator()(lookup_t<asset::ICPUShader>);
579655
bool operator()(lookup_t<asset::ICPUBuffer>);
656+
bool operator()(lookup_t<asset::ICPUBottomLevelAccelerationStructure>);
657+
bool operator()(lookup_t<asset::ICPUTopLevelAccelerationStructure>);
580658
bool operator()(lookup_t<asset::ICPUImage>);
581659
bool operator()(lookup_t<asset::ICPUBufferView>);
582660
bool operator()(lookup_t<asset::ICPUImageView>);
@@ -829,6 +907,8 @@ class CAssetConverter : public core::IReferenceCounted
829907
IUtilities* utilities = nullptr;
830908
// optional, last submit (compute, transfer if no compute needed) signals these in addition to the scratch semaphore
831909
std::span<const IQueue::SSubmitInfo::SSemaphoreInfo> extraSignalSemaphores = {};
910+
// specific to Acceleration Structure Build, they need to be at least as large as the largest amount of scratch required for an AS build
911+
CAsyncSingleBufferSubAllocatorST<>* scratchForASBuild = nullptr;
832912
// specific to mip-map recomputation, these are okay defaults for the size of our Descriptor Indexed temporary descriptor set
833913
uint32_t sampledImageBindingCount = 1<<10;
834914
uint32_t storageImageBindingCount = 11<<10;
@@ -853,6 +933,11 @@ class CAssetConverter : public core::IReferenceCounted
853933
// https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdCopyBufferToImage.html#VUID-vkCmdCopyBufferToImage-commandBuffer-07739
854934
inline core::bitflag<IQueue::FAMILY_FLAGS> getRequiredQueueFlags() const {return m_queueFlags;}
855935

936+
// just enough memory to build the Acceleration Structures one by one waiting for each build to complete inbetween
937+
inline uint64_t getMinASBuildScratchSize() const {return m_minASBuildScratchSize;}
938+
// enough memory to build and compact the all Acceleration Structures at once, obviously respecting order of BLAS (build->compact) -> TLAS (build->compact)
939+
inline uint64_t getMaxASBuildScratchSize() const {return m_maxASBuildScratchSize;}
940+
856941
//
857942
inline operator bool() const {return bool(m_converter);}
858943

@@ -924,12 +1009,15 @@ class CAssetConverter : public core::IReferenceCounted
9241009
using conversion_requests_t = core::vector<ConversionRequest<AssetType>>;
9251010
using convertible_asset_types = core::type_list<
9261011
asset::ICPUBuffer,
927-
asset::ICPUImage/*,
1012+
asset::ICPUImage,
9281013
asset::ICPUBottomLevelAccelerationStructure,
929-
asset::ICPUTopLevelAccelerationStructure*/
1014+
asset::ICPUTopLevelAccelerationStructure
9301015
>;
9311016
core::tuple_transform_t<conversion_requests_t,convertible_asset_types> m_conversionRequests;
9321017

1018+
//
1019+
uint64_t m_minASBuildScratchSize = 0;
1020+
uint64_t m_maxASBuildScratchSize = 0;
9331021
//
9341022
core::bitflag<IQueue::FAMILY_FLAGS> m_queueFlags = IQueue::FAMILY_FLAGS::NONE;
9351023
};

0 commit comments

Comments
 (0)