@@ -38,7 +38,8 @@ class CAssetConverter : public core::IReferenceCounted
38
38
asset::ICPUSampler,
39
39
asset::ICPUShader,
40
40
asset::ICPUBuffer,
41
- // acceleration structures,
41
+ asset::ICPUBottomLevelAccelerationStructure,
42
+ asset::ICPUTopLevelAccelerationStructure,
42
43
asset::ICPUImage,
43
44
asset::ICPUBufferView,
44
45
asset::ICPUImageView,
@@ -71,6 +72,14 @@ class CAssetConverter : public core::IReferenceCounted
71
72
{
72
73
if (!params.valid ())
73
74
return nullptr ;
75
+ #ifndef _NBL_DEBUG
76
+ if (!params.optimizer )
77
+ {
78
+ using pass_e = asset::ISPIRVOptimizer::E_OPTIMIZER_PASS;
79
+ // shall we do others?
80
+ params.optimizer = core::make_smart_rectd_ptr<asset::ISPIRVOptimizer>({EOP_STRIP_DEBUG_INFO});
81
+ }
82
+ #endif
74
83
return core::smart_refctd_ptr<CAssetConverter>(new CAssetConverter (std::move (params)),core::dont_grab);
75
84
}
76
85
// When getting dependents, the creation parameters of GPU objects will be produced and patched appropriately.
@@ -149,6 +158,75 @@ class CAssetConverter : public core::IReferenceCounted
149
158
return {true ,retval};
150
159
}
151
160
};
161
+ struct NBL_API2 acceleration_structure_patch_base
162
+ {
163
+ public:
164
+ enum class BuildPreference : uint8_t
165
+ {
166
+ None = 0 ,
167
+ FastTrace = 1 ,
168
+ FastBuild = 2 ,
169
+ Invalid = 3
170
+ };
171
+
172
+ // ! select build flags
173
+ uint8_t allowUpdate : 1 = false ;
174
+ uint8_t allowCompaction : 1 = false ;
175
+ uint8_t allowDataAccess : 1 = false ;
176
+ BuildPreference preference : 2 = BuildPreference::Invalid;
177
+ uint8_t lowMemory : 1 = false ;
178
+ // ! things that control the build
179
+ uint8_t hostBuild : 1 = false ;
180
+ uint8_t compactAfterBuild : 1 = false ;
181
+
182
+ protected:
183
+ bool valid (const ILogicalDevice* device);
184
+
185
+ template <typename CRTP>
186
+ std::pair<bool ,CRTP> combine_impl (const CRTP& _this, const CRTP& other) const
187
+ {
188
+ if (_this.preference !=other.preference || _this.preference ==BuildPreference::Invalid)
189
+ return {false ,_this};
190
+ CRTP retval = _this;
191
+ retval.allowUpdate |= other.allowUpdate ;
192
+ retval.allowCompaction |= other.allowCompaction ;
193
+ retval.allowDataAccess |= other.allowDataAccess ;
194
+ retval.lowMemory |= other.lowMemory ;
195
+ retval.hostBuild |= other.hostBuild ;
196
+ retval.compactAfterBuild |= other.compactAfterBuild ;
197
+ return {true ,retval};
198
+ }
199
+ };
200
+ template <>
201
+ struct NBL_API2 patch_impl_t <asset::ICPUBottomLevelAccelerationStructure> : acceleration_structure_patch_base
202
+ {
203
+ public:
204
+ PATCH_IMPL_BOILERPLATE (asset::ICPUBottomLevelAccelerationStructure);
205
+
206
+ using build_flags_t = asset::ICPUBottomLevelAccelerationStructure::BUILD_FLAGS;
207
+ core::bitflag<build_flags_t > getBuildFlags (const asset::ICPUBottomLevelAccelerationStructure* blas) const ;
208
+
209
+ protected:
210
+ inline std::pair<bool ,this_t > combine (const this_t & other) const
211
+ {
212
+ return combine_impl<this_t >(*this ,other);
213
+ }
214
+ };
215
+ template <>
216
+ struct NBL_API2 patch_impl_t <asset::ICPUTopLevelAccelerationStructure> : acceleration_structure_patch_base
217
+ {
218
+ public:
219
+ PATCH_IMPL_BOILERPLATE (asset::ICPUTopLevelAccelerationStructure);
220
+
221
+ using build_flags_t = asset::ICPUTopLevelAccelerationStructure::BUILD_FLAGS;
222
+ core::bitflag<build_flags_t > getBuildFlags (const asset::ICPUTopLevelAccelerationStructure* tlas) const ;
223
+
224
+ protected:
225
+ inline std::pair<bool ,this_t > combine (const this_t & other) const
226
+ {
227
+ return combine_impl<this_t >(*this ,other);
228
+ }
229
+ };
152
230
template <>
153
231
struct NBL_API2 patch_impl_t <asset::ICPUImage>
154
232
{
@@ -458,6 +536,8 @@ class CAssetConverter : public core::IReferenceCounted
458
536
virtual const patch_t <asset::ICPUSampler>* operator ()(const lookup_t <asset::ICPUSampler>&) const = 0;
459
537
virtual const patch_t <asset::ICPUShader>* operator ()(const lookup_t <asset::ICPUShader>&) const = 0;
460
538
virtual const patch_t <asset::ICPUBuffer>* operator ()(const lookup_t <asset::ICPUBuffer>&) const = 0;
539
+ virtual const patch_t <asset::ICPUBottomLevelAccelerationStructure>* operator ()(const lookup_t <asset::ICPUBottomLevelAccelerationStructure>&) const = 0;
540
+ virtual const patch_t <asset::ICPUTopLevelAccelerationStructure>* operator ()(const lookup_t <asset::ICPUTopLevelAccelerationStructure>&) const = 0;
461
541
virtual const patch_t <asset::ICPUImage>* operator ()(const lookup_t <asset::ICPUImage>&) const = 0;
462
542
virtual const patch_t <asset::ICPUBufferView>* operator ()(const lookup_t <asset::ICPUBufferView>&) const = 0;
463
543
virtual const patch_t <asset::ICPUImageView>* operator ()(const lookup_t <asset::ICPUImageView>&) const = 0;
@@ -577,6 +657,8 @@ class CAssetConverter : public core::IReferenceCounted
577
657
bool operator ()(lookup_t <asset::ICPUSampler>);
578
658
bool operator ()(lookup_t <asset::ICPUShader>);
579
659
bool operator ()(lookup_t <asset::ICPUBuffer>);
660
+ bool operator ()(lookup_t <asset::ICPUBottomLevelAccelerationStructure>);
661
+ bool operator ()(lookup_t <asset::ICPUTopLevelAccelerationStructure>);
580
662
bool operator ()(lookup_t <asset::ICPUImage>);
581
663
bool operator ()(lookup_t <asset::ICPUBufferView>);
582
664
bool operator ()(lookup_t <asset::ICPUImageView>);
@@ -717,6 +799,16 @@ class CAssetConverter : public core::IReferenceCounted
717
799
return {};
718
800
}
719
801
802
+ // this a weird signature, but its for an acceleration structure backing IGPUBuffer
803
+ virtual inline std::span<const uint32_t > getSharedOwnershipQueueFamilies (const size_t groupCopyID, const asset::ICPUBottomLevelAccelerationStructure* blas, const patch_t <asset::ICPUBottomLevelAccelerationStructure>& patch) const
804
+ {
805
+ return {};
806
+ }
807
+ virtual inline std::span<const uint32_t > getSharedOwnershipQueueFamilies (const size_t groupCopyID, const asset::ICPUTopLevelAccelerationStructure* tlas, const patch_t <asset::ICPUTopLevelAccelerationStructure>& patch) const
808
+ {
809
+ return {};
810
+ }
811
+
720
812
virtual inline std::span<const uint32_t > getSharedOwnershipQueueFamilies (const size_t groupCopyID, const asset::ICPUImage* buffer, const patch_t <asset::ICPUImage>& patch) const
721
813
{
722
814
return {};
@@ -793,6 +885,7 @@ class CAssetConverter : public core::IReferenceCounted
793
885
{
794
886
// By default the last to queue to touch a GPU object will own it after any transfer or compute operations are complete.
795
887
// If you want to record a pipeline barrier that will release ownership to another family, override this.
888
+ // The overload for the IGPUBuffer may be called with a hash belonging to a Acceleration Structure, this means that its the storage buffer backing the AS
796
889
virtual inline uint32_t getFinalOwnerQueueFamily (const IGPUBuffer* buffer, const core::blake3_hash_t & createdFrom)
797
890
{
798
891
return IQueue::FamilyIgnored;
@@ -829,6 +922,11 @@ class CAssetConverter : public core::IReferenceCounted
829
922
IUtilities* utilities = nullptr ;
830
923
// optional, last submit (compute, transfer if no compute needed) signals these in addition to the scratch semaphore
831
924
std::span<const IQueue::SSubmitInfo::SSemaphoreInfo> extraSignalSemaphores = {};
925
+ // specific to Acceleration Structure Build, they need to be at least as large as the largest amount of scratch required for an AS build
926
+ CAsyncSingleBufferSubAllocatorST</* TODO: try uint64_t GP Address Allocator*/ >* scratchForDeviceASBuild = nullptr ;
927
+ std::pmr::memory_resource* scratchForHostASBuild = nullptr ;
928
+ // needs to service allocations without limit, unlike the above where failure will just force a flush and performance of already queued up builds
929
+ IDeviceMemoryAllocator* compactedASAllocator = nullptr ;
832
930
// specific to mip-map recomputation, these are okay defaults for the size of our Descriptor Indexed temporary descriptor set
833
931
uint32_t sampledImageBindingCount = 1 <<10 ;
834
932
uint32_t storageImageBindingCount = 11 <<10 ;
@@ -853,10 +951,22 @@ class CAssetConverter : public core::IReferenceCounted
853
951
// https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdCopyBufferToImage.html#VUID-vkCmdCopyBufferToImage-commandBuffer-07739
854
952
inline core::bitflag<IQueue::FAMILY_FLAGS> getRequiredQueueFlags () const {return m_queueFlags;}
855
953
954
+ // This is just enough memory to build the Acceleration Structures one by one waiting for each Device Build to complete inbetween. If 0 there are no Device AS Builds or Compactions to perform.
955
+ inline uint64_t getMinASBuildScratchSize (const bool forHostOps) const {return m_minASBuildScratchSize[forHostOps];}
956
+ // Enough memory to build and compact all the Acceleration Structures at once, obviously respecting order of BLAS (build->compact) -> TLAS (build->compact)
957
+ inline uint64_t getMaxASBuildScratchSize (const bool forHostOps) const {return m_maxASBuildScratchSize[forHostOps];}
958
+ // What usage flags your scratch buffer must have, if returns NONE means are no Device AS Builds to perform.
959
+ inline auto getASBuildScratchUsages () const {return m_ASBuildScratchUsages;}
960
+ // tells you if you need to provide a valid `SConvertParams::scratchForHostASBuild`
961
+ inline bool willHostASBuild () const {return m_willHostBuildSomeAS;}
962
+ // tells you if you need to provide a valid `SConvertParams::compactedASAllocator`
963
+ inline bool willCompactAS () const {return m_willHostBuildSomeAS;}
964
+
856
965
//
857
966
inline operator bool () const {return bool (m_converter);}
858
967
859
- // until `convert` is called, this will only contain valid entries for items already found in `SInput::readCache`
968
+ // Until `convert` is called, the Buffers and Images are not filled with content and Acceleration Structures are not built, unless found in the `SInput::readCache`
969
+ // WARNING: The Acceleration Structure Pointer WILL CHANGE after calling `convert` if its patch dictates that it will be compacted! (since AS can't resize)
860
970
// TODO: we could also return per-object semaphore values when object is ready for use (would have to propagate two semaphores up through dependants)
861
971
template <asset::Asset AssetType>
862
972
std::span<const asset_cached_t <AssetType>> getGPUObjects () const {return std::get<vector_t <AssetType>>(m_gpuObjects);}
@@ -911,24 +1021,43 @@ class CAssetConverter : public core::IReferenceCounted
911
1021
core::tuple_transform_t <staging_cache_t ,supported_asset_types> m_stagingCaches;
912
1022
// need a more explicit list of GPU objects that need device-assisted conversion
913
1023
template <asset::Asset AssetType>
914
- struct ConversionRequest
1024
+ struct SConversionRequestBase
915
1025
{
916
1026
// canonical asset (the one that provides content)
917
1027
core::smart_refctd_ptr<const AssetType> canonical;
918
1028
// gpu object to transfer canonical's data to or build it from
919
1029
asset_traits<AssetType>::video_t * gpuObj;
920
- // only relevant for images
921
- uint16_t recomputeMips = 0 ;
922
1030
};
923
- template <asset::Asset AssetType>
924
- using conversion_requests_t = core::vector<ConversionRequest<AssetType>>;
925
- using convertible_asset_types = core::type_list<
926
- asset::ICPUBuffer,
927
- asset::ICPUImage/* ,
928
- asset::ICPUBottomLevelAccelerationStructure,
929
- asset::ICPUTopLevelAccelerationStructure*/
930
- >;
931
- core::tuple_transform_t <conversion_requests_t ,convertible_asset_types> m_conversionRequests;
1031
+ using SConvReqBuffer = SConversionRequestBase<asset::ICPUBuffer>;
1032
+ core::vector<SConvReqBuffer> m_bufferConversions;
1033
+ struct SConvReqImage : SConversionRequestBase<asset::ICPUImage>
1034
+ {
1035
+ bool recomputeMips = 0 ;
1036
+ };
1037
+ core::vector<SConvReqImage> m_imageConversions;
1038
+ template <typename CPUAccelerationStructure>// requires std::is_base_of_v<asset::ICPUAccelerationStructure,CPUAccelerationStructure>
1039
+ struct SConvReqAccelerationStructure : SConversionRequestBase<CPUAccelerationStructure>
1040
+ {
1041
+ constexpr static inline uint64_t WontCompact = (0x1ull <<48 )-1 ;
1042
+ inline bool compact () const {return compactedASWriteOffset!=WontCompact;}
1043
+
1044
+ using build_f = typename CPUAccelerationStructure::BUILD_FLAGS;
1045
+ inline void setBuildFlags (const build_f _flags) {buildFlags = static_cast <uint16_t >(_flags);}
1046
+ inline build_f getBuildFlags () const {return static_cast <build_f>(buildFlags);}
1047
+
1048
+
1049
+ uint64_t compactedASWriteOffset : 48 = WontCompact;
1050
+ uint64_t buildFlags : 16 = static_cast <uint16_t >(build_f::NONE);
1051
+ };
1052
+ core::vector<SConvReqAccelerationStructure<asset::ICPUBottomLevelAccelerationStructure>> m_blasConversions[2 ];
1053
+ core::vector<SConvReqAccelerationStructure<asset::ICPUTopLevelAccelerationStructure>> m_tlasConversions[2 ];
1054
+
1055
+ //
1056
+ uint64_t m_minASBuildScratchSize[2 ] = {0 ,0 };
1057
+ uint64_t m_maxASBuildScratchSize[2 ] = {0 ,0 };
1058
+ core::bitflag<IGPUBuffer::E_USAGE_FLAGS> m_ASBuildScratchUsages = IGPUBuffer::E_USAGE_FLAGS::EUF_NONE;
1059
+ uint8_t m_willHostBuildSomeAS : 1 = false ;
1060
+ uint8_t m_willCompactSomeAS : 1 = false ;
932
1061
933
1062
//
934
1063
core::bitflag<IQueue::FAMILY_FLAGS> m_queueFlags = IQueue::FAMILY_FLAGS::NONE;
0 commit comments