Skip to content

Commit b9be039

Browse files
Merge branch 'AS_conv' into master
2 parents dd015a9 + a4307f4 commit b9be039

File tree

6 files changed

+779
-69
lines changed

6 files changed

+779
-69
lines changed

include/nbl/asset/IAccelerationStructure.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ class ITopLevelAccelerationStructure : public AccelerationStructure
155155
PREFER_FAST_BUILD_BIT = 0x1u<<3u,
156156
LOW_MEMORY_BIT = 0x1u<<4u,
157157
// Synthetic flag we use to indicate `VkAccelerationStructureGeometryInstancesDataKHR::arrayOfPointers`
158-
INSTANCE_DATA_IS_POINTERS_TYPE_ENCODED_LSB = 0x1u<<5u,
158+
INSTANCE_DATA_IS_POINTERS_TYPE_ENCODED_LSB = 0x1u<<5u, // this flag really shouldn't be settable outside of `video::IGPU`
159159
// Provided by VK_NV_ray_tracing_motion_blur, but we always override and deduce from creation flag because of
160160
// https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkAccelerationStructureBuildGeometryInfoKHR-dstAccelerationStructure-04927
161161
//MOTION_BIT = 0x1u<<5u,

include/nbl/asset/ICPUAccelerationStructure.h

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class ICPUBottomLevelAccelerationStructure final : public IBottomLevelAccelerati
4747
return {m_geometryPrimitiveCount->begin(),m_geometryPrimitiveCount->end()};
4848
return {};
4949
}
50-
inline std::span<const uint32_t> getGeometryPrimitiveCounts(const size_t geomIx) const
50+
inline std::span<const uint32_t> getGeometryPrimitiveCounts() const
5151
{
5252
if (m_geometryPrimitiveCount)
5353
return {m_geometryPrimitiveCount->begin(),m_geometryPrimitiveCount->end()};
@@ -79,25 +79,25 @@ class ICPUBottomLevelAccelerationStructure final : public IBottomLevelAccelerati
7979
{
8080
if (!isMutable())
8181
return false;
82-
m_buildFlags &= BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT;
82+
m_buildFlags &= ~BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT;
8383
m_geometryPrimitiveCount = std::move(ranges);
8484
m_triangleGeoms = std::move(geometries);
8585
m_AABBGeoms = nullptr;
8686
return true;
8787
}
8888

8989
//
90-
inline core::SRange<AABBs<asset::ICPUBuffer>> getAABBGeometries()
90+
inline std::span<AABBs<asset::ICPUBuffer>> getAABBGeometries()
9191
{
9292
if (!isMutable() || !m_AABBGeoms)
93-
return {nullptr,nullptr};
94-
return {m_AABBGeoms->begin(),m_AABBGeoms->end()};
93+
return {};
94+
return {m_AABBGeoms->data(),m_AABBGeoms->size()};
9595
}
96-
inline core::SRange<const AABBs<asset::ICPUBuffer>> getAABBGeometries() const
96+
inline std::span<const AABBs<asset::ICPUBuffer>> getAABBGeometries() const
9797
{
9898
if (!m_AABBGeoms)
99-
return {nullptr,nullptr};
100-
return {m_AABBGeoms->begin(),m_AABBGeoms->end()};
99+
return {};
100+
return {m_AABBGeoms->data(),m_AABBGeoms->size()};
101101
}
102102
inline bool setGeometries(core::smart_refctd_dynamic_array<AABBs<ICPUBuffer>>&& geometries, core::smart_refctd_dynamic_array<uint32_t>&& ranges)
103103
{
@@ -337,17 +337,17 @@ class ICPUTopLevelAccelerationStructure final : public ITopLevelAccelerationStru
337337
std::variant<StaticInstance,MatrixMotionInstance,SRTMotionInstance> instance = StaticInstance{};
338338
};
339339

340-
core::SRange<PolymorphicInstance> getInstances()
340+
std::span<PolymorphicInstance> getInstances()
341341
{
342342
if (!isMutable() || !m_instances)
343-
return {nullptr,nullptr};
344-
return {m_instances->begin(),m_instances->end()};
343+
return {};
344+
return {m_instances->data(),m_instances->size()};
345345
}
346-
core::SRange<const PolymorphicInstance> getInstances() const
346+
std::span<const PolymorphicInstance> getInstances() const
347347
{
348348
if (!m_instances)
349-
return {nullptr,nullptr};
350-
return {m_instances->begin(),m_instances->end()};
349+
return {};
350+
return {m_instances->data(),m_instances->size()};
351351
}
352352
bool setInstances(core::smart_refctd_dynamic_array<PolymorphicInstance>&& _instances)
353353
{
@@ -367,7 +367,7 @@ class ICPUTopLevelAccelerationStructure final : public ITopLevelAccelerationStru
367367
}
368368

369369
//!
370-
constexpr static inline auto AssetType = ET_BOTOM_LEVEL_ACCELERATION_STRUCTURE;
370+
constexpr static inline auto AssetType = ET_TOP_LEVEL_ACCELERATION_STRUCTURE;
371371
inline IAsset::E_TYPE getAssetType() const override { return AssetType; }
372372

373373
inline core::smart_refctd_ptr<IAsset> clone(uint32_t _depth = ~0u) const override

include/nbl/asset/IDescriptorSetLayout.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,16 @@ class IDescriptorSetLayoutBase : public virtual core::IReferenceCounted // TODO:
147147
return getStorageOffset(index);
148148
}
149149

150+
// Weird functions for exceptional situations
151+
inline storage_range_index_t findBindingStorageIndex(const storage_offset_t offset) const
152+
{
153+
const auto found = std::upper_bound(m_storageOffsets,m_storageOffsets+m_count,offset,[](storage_offset_t a, storage_offset_t b)->bool{return a.data<b.data;});
154+
const auto ix = m_storageOffsets-found;
155+
if (ix>=m_count)
156+
return {};
157+
return storage_range_index_t(ix);
158+
}
159+
150160
inline uint32_t getTotalCount() const { return (m_count == 0ull) ? 0u : m_storageOffsets[m_count - 1].data; }
151161

152162
private:

include/nbl/video/ILogicalDevice.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
4646

4747

4848
//! Basic getters
49+
inline system::ILogger* getLogger() const {return m_logger.get();}
50+
4951
inline const IPhysicalDevice* getPhysicalDevice() const { return m_physicalDevice; }
5052

5153
inline const SPhysicalDeviceFeatures& getEnabledFeatures() const { return m_enabledFeatures; }
@@ -358,7 +360,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
358360
// Create a sampler object to use with ImageViews
359361
virtual core::smart_refctd_ptr<IGPUSampler> createSampler(const IGPUSampler::SParams& _params) = 0;
360362
// acceleration structures
361-
inline core::smart_refctd_ptr<IGPUBottomLevelAccelerationStructure> createBottomLevelAccelerationStructure(IGPUAccelerationStructure::SCreationParams&& params)
363+
inline core::smart_refctd_ptr<IGPUBottomLevelAccelerationStructure> createBottomLevelAccelerationStructure(IGPUBottomLevelAccelerationStructure::SCreationParams&& params)
362364
{
363365
if (invalidCreationParams(params))
364366
{
@@ -402,7 +404,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
402404
inline AccelerationStructureBuildSizes getAccelerationStructureBuildSizes(
403405
const core::bitflag<IGPUBottomLevelAccelerationStructure::BUILD_FLAGS> flags,
404406
const bool motionBlur,
405-
const std::span<Geometry> geometries,
407+
const std::span<const Geometry> geometries,
406408
const uint32_t* const pMaxPrimitiveCounts
407409
) const
408410
{
@@ -412,7 +414,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
412414
return {};
413415
}
414416

415-
if (!IGPUBottomLevelAccelerationStructure::validBuildFlags(flags, m_enabledFeatures))
417+
if (!IGPUBottomLevelAccelerationStructure::validBuildFlags(flags,m_enabledFeatures))
416418
{
417419
NBL_LOG_ERROR("Invalid build flags");
418420
return {};

include/nbl/video/utilities/CAssetConverter.h

Lines changed: 143 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ class CAssetConverter : public core::IReferenceCounted
3838
asset::ICPUSampler,
3939
asset::ICPUShader,
4040
asset::ICPUBuffer,
41-
// acceleration structures,
41+
asset::ICPUBottomLevelAccelerationStructure,
42+
asset::ICPUTopLevelAccelerationStructure,
4243
asset::ICPUImage,
4344
asset::ICPUBufferView,
4445
asset::ICPUImageView,
@@ -71,6 +72,14 @@ class CAssetConverter : public core::IReferenceCounted
7172
{
7273
if (!params.valid())
7374
return nullptr;
75+
#ifndef _NBL_DEBUG
76+
if (!params.optimizer)
77+
{
78+
using pass_e = asset::ISPIRVOptimizer::E_OPTIMIZER_PASS;
79+
// shall we do others?
80+
params.optimizer = core::make_smart_rectd_ptr<asset::ISPIRVOptimizer>({EOP_STRIP_DEBUG_INFO});
81+
}
82+
#endif
7483
return core::smart_refctd_ptr<CAssetConverter>(new CAssetConverter(std::move(params)),core::dont_grab);
7584
}
7685
// When getting dependents, the creation parameters of GPU objects will be produced and patched appropriately.
@@ -149,6 +158,75 @@ class CAssetConverter : public core::IReferenceCounted
149158
return {true,retval};
150159
}
151160
};
161+
struct NBL_API2 acceleration_structure_patch_base
162+
{
163+
public:
164+
enum class BuildPreference : uint8_t
165+
{
166+
None = 0,
167+
FastTrace = 1,
168+
FastBuild = 2,
169+
Invalid = 3
170+
};
171+
172+
//! select build flags
173+
uint8_t allowUpdate : 1 = false;
174+
uint8_t allowCompaction : 1 = false;
175+
uint8_t allowDataAccess : 1 = false;
176+
BuildPreference preference : 2 = BuildPreference::Invalid;
177+
uint8_t lowMemory : 1 = false;
178+
//! things that control the build
179+
uint8_t hostBuild : 1 = false;
180+
uint8_t compactAfterBuild : 1 = false;
181+
182+
protected:
183+
bool valid(const ILogicalDevice* device);
184+
185+
template<typename CRTP>
186+
std::pair<bool,CRTP> combine_impl(const CRTP& _this, const CRTP& other) const
187+
{
188+
if (_this.preference!=other.preference || _this.preference==BuildPreference::Invalid)
189+
return {false,_this};
190+
CRTP retval = _this;
191+
retval.allowUpdate |= other.allowUpdate;
192+
retval.allowCompaction |= other.allowCompaction;
193+
retval.allowDataAccess |= other.allowDataAccess;
194+
retval.lowMemory |= other.lowMemory;
195+
retval.hostBuild |= other.hostBuild;
196+
retval.compactAfterBuild |= other.compactAfterBuild;
197+
return {true,retval};
198+
}
199+
};
200+
template<>
201+
struct NBL_API2 patch_impl_t<asset::ICPUBottomLevelAccelerationStructure> : acceleration_structure_patch_base
202+
{
203+
public:
204+
PATCH_IMPL_BOILERPLATE(asset::ICPUBottomLevelAccelerationStructure);
205+
206+
using build_flags_t = asset::ICPUBottomLevelAccelerationStructure::BUILD_FLAGS;
207+
core::bitflag<build_flags_t> getBuildFlags(const asset::ICPUBottomLevelAccelerationStructure* blas) const;
208+
209+
protected:
210+
inline std::pair<bool,this_t> combine(const this_t& other) const
211+
{
212+
return combine_impl<this_t>(*this,other);
213+
}
214+
};
215+
template<>
216+
struct NBL_API2 patch_impl_t<asset::ICPUTopLevelAccelerationStructure> : acceleration_structure_patch_base
217+
{
218+
public:
219+
PATCH_IMPL_BOILERPLATE(asset::ICPUTopLevelAccelerationStructure);
220+
221+
using build_flags_t = asset::ICPUTopLevelAccelerationStructure::BUILD_FLAGS;
222+
core::bitflag<build_flags_t> getBuildFlags(const asset::ICPUTopLevelAccelerationStructure* tlas) const;
223+
224+
protected:
225+
inline std::pair<bool,this_t> combine(const this_t& other) const
226+
{
227+
return combine_impl<this_t>(*this,other);
228+
}
229+
};
152230
template<>
153231
struct NBL_API2 patch_impl_t<asset::ICPUImage>
154232
{
@@ -458,6 +536,8 @@ class CAssetConverter : public core::IReferenceCounted
458536
virtual const patch_t<asset::ICPUSampler>* operator()(const lookup_t<asset::ICPUSampler>&) const = 0;
459537
virtual const patch_t<asset::ICPUShader>* operator()(const lookup_t<asset::ICPUShader>&) const = 0;
460538
virtual const patch_t<asset::ICPUBuffer>* operator()(const lookup_t<asset::ICPUBuffer>&) const = 0;
539+
virtual const patch_t<asset::ICPUBottomLevelAccelerationStructure>* operator()(const lookup_t<asset::ICPUBottomLevelAccelerationStructure>&) const = 0;
540+
virtual const patch_t<asset::ICPUTopLevelAccelerationStructure>* operator()(const lookup_t<asset::ICPUTopLevelAccelerationStructure>&) const = 0;
461541
virtual const patch_t<asset::ICPUImage>* operator()(const lookup_t<asset::ICPUImage>&) const = 0;
462542
virtual const patch_t<asset::ICPUBufferView>* operator()(const lookup_t<asset::ICPUBufferView>&) const = 0;
463543
virtual const patch_t<asset::ICPUImageView>* operator()(const lookup_t<asset::ICPUImageView>&) const = 0;
@@ -577,6 +657,8 @@ class CAssetConverter : public core::IReferenceCounted
577657
bool operator()(lookup_t<asset::ICPUSampler>);
578658
bool operator()(lookup_t<asset::ICPUShader>);
579659
bool operator()(lookup_t<asset::ICPUBuffer>);
660+
bool operator()(lookup_t<asset::ICPUBottomLevelAccelerationStructure>);
661+
bool operator()(lookup_t<asset::ICPUTopLevelAccelerationStructure>);
580662
bool operator()(lookup_t<asset::ICPUImage>);
581663
bool operator()(lookup_t<asset::ICPUBufferView>);
582664
bool operator()(lookup_t<asset::ICPUImageView>);
@@ -717,6 +799,16 @@ class CAssetConverter : public core::IReferenceCounted
717799
return {};
718800
}
719801

802+
// this a weird signature, but its for an acceleration structure backing IGPUBuffer
803+
virtual inline std::span<const uint32_t> getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUBottomLevelAccelerationStructure* blas, const patch_t<asset::ICPUBottomLevelAccelerationStructure>& patch) const
804+
{
805+
return {};
806+
}
807+
virtual inline std::span<const uint32_t> getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUTopLevelAccelerationStructure* tlas, const patch_t<asset::ICPUTopLevelAccelerationStructure>& patch) const
808+
{
809+
return {};
810+
}
811+
720812
virtual inline std::span<const uint32_t> getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const patch_t<asset::ICPUImage>& patch) const
721813
{
722814
return {};
@@ -793,6 +885,7 @@ class CAssetConverter : public core::IReferenceCounted
793885
{
794886
// By default the last to queue to touch a GPU object will own it after any transfer or compute operations are complete.
795887
// If you want to record a pipeline barrier that will release ownership to another family, override this.
888+
// The overload for the IGPUBuffer may be called with a hash belonging to a Acceleration Structure, this means that its the storage buffer backing the AS
796889
virtual inline uint32_t getFinalOwnerQueueFamily(const IGPUBuffer* buffer, const core::blake3_hash_t& createdFrom)
797890
{
798891
return IQueue::FamilyIgnored;
@@ -829,6 +922,11 @@ class CAssetConverter : public core::IReferenceCounted
829922
IUtilities* utilities = nullptr;
830923
// optional, last submit (compute, transfer if no compute needed) signals these in addition to the scratch semaphore
831924
std::span<const IQueue::SSubmitInfo::SSemaphoreInfo> extraSignalSemaphores = {};
925+
// specific to Acceleration Structure Build, they need to be at least as large as the largest amount of scratch required for an AS build
926+
CAsyncSingleBufferSubAllocatorST</*TODO: try uint64_t GP Address Allocator*/>* scratchForDeviceASBuild = nullptr;
927+
std::pmr::memory_resource* scratchForHostASBuild = nullptr;
928+
// needs to service allocations without limit, unlike the above where failure will just force a flush and performance of already queued up builds
929+
IDeviceMemoryAllocator* compactedASAllocator = nullptr;
832930
// specific to mip-map recomputation, these are okay defaults for the size of our Descriptor Indexed temporary descriptor set
833931
uint32_t sampledImageBindingCount = 1<<10;
834932
uint32_t storageImageBindingCount = 11<<10;
@@ -853,10 +951,22 @@ class CAssetConverter : public core::IReferenceCounted
853951
// https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdCopyBufferToImage.html#VUID-vkCmdCopyBufferToImage-commandBuffer-07739
854952
inline core::bitflag<IQueue::FAMILY_FLAGS> getRequiredQueueFlags() const {return m_queueFlags;}
855953

954+
// This is just enough memory to build the Acceleration Structures one by one waiting for each Device Build to complete inbetween. If 0 there are no Device AS Builds or Compactions to perform.
955+
inline uint64_t getMinASBuildScratchSize(const bool forHostOps) const {return m_minASBuildScratchSize[forHostOps];}
956+
// Enough memory to build and compact all the Acceleration Structures at once, obviously respecting order of BLAS (build->compact) -> TLAS (build->compact)
957+
inline uint64_t getMaxASBuildScratchSize(const bool forHostOps) const {return m_maxASBuildScratchSize[forHostOps];}
958+
// What usage flags your scratch buffer must have, if returns NONE means are no Device AS Builds to perform.
959+
inline auto getASBuildScratchUsages() const {return m_ASBuildScratchUsages;}
960+
// tells you if you need to provide a valid `SConvertParams::scratchForHostASBuild`
961+
inline bool willHostASBuild() const {return m_willHostBuildSomeAS;}
962+
// tells you if you need to provide a valid `SConvertParams::compactedASAllocator`
963+
inline bool willCompactAS() const {return m_willHostBuildSomeAS;}
964+
856965
//
857966
inline operator bool() const {return bool(m_converter);}
858967

859-
// until `convert` is called, this will only contain valid entries for items already found in `SInput::readCache`
968+
// Until `convert` is called, the Buffers and Images are not filled with content and Acceleration Structures are not built, unless found in the `SInput::readCache`
969+
// WARNING: The Acceleration Structure Pointer WILL CHANGE after calling `convert` if its patch dictates that it will be compacted! (since AS can't resize)
860970
// TODO: we could also return per-object semaphore values when object is ready for use (would have to propagate two semaphores up through dependants)
861971
template<asset::Asset AssetType>
862972
std::span<const asset_cached_t<AssetType>> getGPUObjects() const {return std::get<vector_t<AssetType>>(m_gpuObjects);}
@@ -911,24 +1021,43 @@ class CAssetConverter : public core::IReferenceCounted
9111021
core::tuple_transform_t<staging_cache_t,supported_asset_types> m_stagingCaches;
9121022
// need a more explicit list of GPU objects that need device-assisted conversion
9131023
template<asset::Asset AssetType>
914-
struct ConversionRequest
1024+
struct SConversionRequestBase
9151025
{
9161026
// canonical asset (the one that provides content)
9171027
core::smart_refctd_ptr<const AssetType> canonical;
9181028
// gpu object to transfer canonical's data to or build it from
9191029
asset_traits<AssetType>::video_t* gpuObj;
920-
// only relevant for images
921-
uint16_t recomputeMips = 0;
9221030
};
923-
template<asset::Asset AssetType>
924-
using conversion_requests_t = core::vector<ConversionRequest<AssetType>>;
925-
using convertible_asset_types = core::type_list<
926-
asset::ICPUBuffer,
927-
asset::ICPUImage/*,
928-
asset::ICPUBottomLevelAccelerationStructure,
929-
asset::ICPUTopLevelAccelerationStructure*/
930-
>;
931-
core::tuple_transform_t<conversion_requests_t,convertible_asset_types> m_conversionRequests;
1031+
using SConvReqBuffer = SConversionRequestBase<asset::ICPUBuffer>;
1032+
core::vector<SConvReqBuffer> m_bufferConversions;
1033+
struct SConvReqImage : SConversionRequestBase<asset::ICPUImage>
1034+
{
1035+
bool recomputeMips = 0;
1036+
};
1037+
core::vector<SConvReqImage> m_imageConversions;
1038+
template<typename CPUAccelerationStructure>// requires std::is_base_of_v<asset::ICPUAccelerationStructure,CPUAccelerationStructure>
1039+
struct SConvReqAccelerationStructure : SConversionRequestBase<CPUAccelerationStructure>
1040+
{
1041+
constexpr static inline uint64_t WontCompact = (0x1ull<<48)-1;
1042+
inline bool compact() const {return compactedASWriteOffset!=WontCompact;}
1043+
1044+
using build_f = typename CPUAccelerationStructure::BUILD_FLAGS;
1045+
inline void setBuildFlags(const build_f _flags) {buildFlags = static_cast<uint16_t>(_flags);}
1046+
inline build_f getBuildFlags() const {return static_cast<build_f>(buildFlags);}
1047+
1048+
1049+
uint64_t compactedASWriteOffset : 48 = WontCompact;
1050+
uint64_t buildFlags : 16 = static_cast<uint16_t>(build_f::NONE);
1051+
};
1052+
core::vector<SConvReqAccelerationStructure<asset::ICPUBottomLevelAccelerationStructure>> m_blasConversions[2];
1053+
core::vector<SConvReqAccelerationStructure<asset::ICPUTopLevelAccelerationStructure>> m_tlasConversions[2];
1054+
1055+
//
1056+
uint64_t m_minASBuildScratchSize[2] = {0,0};
1057+
uint64_t m_maxASBuildScratchSize[2] = {0,0};
1058+
core::bitflag<IGPUBuffer::E_USAGE_FLAGS> m_ASBuildScratchUsages = IGPUBuffer::E_USAGE_FLAGS::EUF_NONE;
1059+
uint8_t m_willHostBuildSomeAS : 1 = false;
1060+
uint8_t m_willCompactSomeAS : 1 = false;
9321061

9331062
//
9341063
core::bitflag<IQueue::FAMILY_FLAGS> m_queueFlags = IQueue::FAMILY_FLAGS::NONE;

0 commit comments

Comments
 (0)