@@ -3016,14 +3016,13 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
3016
3016
return ;
3017
3017
}
3018
3018
}
3019
- // this is super annoying, was hoping metaprogramming with `has_type` would actually work
3020
- auto getConversionRequests = [&]<typename AssetU>()->auto &{return std::get<SReserveResult::conversion_requests_t <AssetU>>(retval.m_conversionRequests );};
3019
+ //
3021
3020
if constexpr (std::is_same_v<AssetType,ICPUBuffer>)
3022
- getConversionRequests. operator ()<ICPUBuffer>() .emplace_back (core::smart_refctd_ptr<const AssetType>(instance.asset ),created.gpuObj .get ()); ;
3021
+ retval. m_bufferConversions .emplace_back ({ core::smart_refctd_ptr<const AssetType>(instance.asset ),created.gpuObj .get ()}) ;
3023
3022
if constexpr (std::is_same_v<AssetType,ICPUImage>)
3024
3023
{
3025
3024
const uint16_t recomputeMips = created.patch .recomputeMips ;
3026
- getConversionRequests. operator ()<ICPUImage>() .emplace_back (core::smart_refctd_ptr<const AssetType>(instance.asset ),created.gpuObj .get (),recomputeMips);
3025
+ retval. m_imageConversions .emplace_back ({ core::smart_refctd_ptr<const AssetType>(instance.asset ),created.gpuObj .get ()} ,recomputeMips);
3027
3026
}
3028
3027
// TODO: BLAS and TLAS requests
3029
3028
}
@@ -3337,7 +3336,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
3337
3336
}
3338
3337
const auto & found = dfsCache.nodes [metadata[i].patchIndex .value ];
3339
3338
// write it out to the results
3340
- if (const auto & gpuObj=found.gpuObj ; gpuObj) // found from the `input.readCache`
3339
+ if (const auto & gpuObj=found.gpuObj ; gpuObj)
3341
3340
{
3342
3341
results[i] = gpuObj;
3343
3342
// if something with this content hash is in the stagingCache, then it must match the `found->gpuObj`
@@ -3372,6 +3371,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3372
3371
auto device = m_params.device ;
3373
3372
const auto reqQueueFlags = reservations.getRequiredQueueFlags ();
3374
3373
3374
+ // compacted TLASes need to be substituted in cache and Descriptor Sets
3375
+ core::unordered_map<IGPUTopLevelAccelerationStructure*,smart_refctd_ptr<IGPUTopLevelAccelerationStructure>> compactedTLASMap;
3375
3376
// Anything to do?
3376
3377
if (reqQueueFlags.value !=IQueue::FAMILY_FLAGS::NONE)
3377
3378
{
@@ -3536,7 +3537,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3536
3537
};
3537
3538
3538
3539
// upload Buffers
3539
- auto & buffersToUpload = std::get<SReserveResult:: conversion_requests_t <ICPUBuffer>>( reservations.m_conversionRequests ) ;
3540
+ auto & buffersToUpload = reservations.m_bufferConversions ;
3540
3541
{
3541
3542
core::vector<IGPUCommandBuffer::SBufferMemoryBarrier<IGPUCommandBuffer::SOwnershipTransferBarrier>> ownershipTransfers;
3542
3543
ownershipTransfers.reserve (buffersToUpload.size ());
@@ -3630,7 +3631,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3630
3631
origXferStallCallback (tillScratchResettable);
3631
3632
};
3632
3633
3633
- auto & imagesToUpload = std::get<SReserveResult:: conversion_requests_t <ICPUImage>>( reservations.m_conversionRequests ) ;
3634
+ auto & imagesToUpload = reservations.m_imageConversions ;
3634
3635
if (!imagesToUpload.empty ())
3635
3636
{
3636
3637
//
@@ -4088,7 +4089,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4088
4089
}
4089
4090
4090
4091
// BLAS builds
4091
- auto & blasToBuild = std::get<SReserveResult::conversion_requests_t <ICPUBottomLevelAccelerationStructure>>(reservations.m_conversionRequests );
4092
+ core::unordered_map<IGPUBottomLevelAccelerationStructure*,smart_refctd_ptr<IGPUBottomLevelAccelerationStructure>> compactedBLASMap;
4093
+ auto & blasToBuild = reservations.m_blasConversions [0 ];
4092
4094
if (const auto blasCount = blasToBuild.size (); blasCount)
4093
4095
{
4094
4096
constexpr auto GeometryIsAABBFlag = ICPUBottomLevelAccelerationStructure::BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT;
@@ -4111,6 +4113,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4111
4113
triangles.reserve (totalTriGeoCount);
4112
4114
triangles.reserve (totalAABBGeoCount);
4113
4115
}
4116
+ #if 0
4114
4117
for (auto& item : blasToBuild)
4115
4118
{
4116
4119
auto* as = item.gpuObj;
@@ -4141,13 +4144,15 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4141
4144
computeCmdBuf->cmdbuf->buildAccelerationStructures(buildInfo,rangeInfo);
4142
4145
}
4143
4146
}
4147
+ #endif
4144
4148
}
4145
4149
4146
4150
// TLAS builds
4147
- auto & tlasToBuild = std::get<SReserveResult:: conversion_requests_t <ICPUTopLevelAccelerationStructure>>( reservations.m_conversionRequests ) ;
4151
+ auto & tlasToBuild = reservations.m_tlasConversions [ 0 ] ;
4148
4152
if (!tlasToBuild.empty ())
4149
4153
{
4150
4154
}
4155
+ compactedBLASMap.clear ();
4151
4156
4152
4157
const bool computeSubmitIsNeeded = submitsNeeded.hasFlags (IQueue::FAMILY_FLAGS::COMPUTE_BIT);
4153
4158
// first submit transfer
@@ -4183,6 +4188,9 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4183
4188
}
4184
4189
4185
4190
4191
+ // Descriptor Sets need their TLAS descriptors substituted if they've been compacted
4192
+ core::vector<IGPUDescriptorSet::SWriteDescriptorSet> tlasRewrites; tlasRewrites.reserve (compactedTLASMap.size ());
4193
+ core::vector<IGPUDescriptorSet::SDescriptorInfo> tlasInfos; tlasInfos.reserve (compactedTLASMap.size ());
4186
4194
// want to check if deps successfully exist
4187
4195
auto missingDependent = [&reservations]<Asset AssetType>(const typename asset_traits<AssetType>::video_t * dep)->bool
4188
4196
{
@@ -4200,13 +4208,14 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4200
4208
auto & cache = std::get<CCache<AssetType>>(m_caches);
4201
4209
cache.m_forwardMap .reserve (cache.m_forwardMap .size ()+stagingCache.size ());
4202
4210
cache.m_reverseMap .reserve (cache.m_reverseMap .size ()+stagingCache.size ());
4211
+ constexpr bool IsTLAS = std::is_same_v<AssetType,ICPUTopLevelAccelerationStructure>;
4203
4212
for (auto & item : stagingCache)
4204
4213
if (item.second .value !=CHashCache::NoContentHash) // didn't get wiped
4205
4214
{
4206
4215
// rescan all the GPU objects and find out if they depend on anything that failed, if so add to failure set
4207
4216
bool depsMissing = false ;
4208
4217
// only go over types we could actually break via missing upload/build (i.e. pipelines are unbreakable)
4209
- if constexpr (std::is_same_v<AssetType,ICPUTopLevelAccelerationStructure> )
4218
+ if constexpr (IsTLAS )
4210
4219
{
4211
4220
// there's no lifetime tracking (refcounting) from TLAS to BLAS, so one just must trust the pre-TLAS-build input validation to do its job
4212
4221
}
@@ -4225,6 +4234,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4225
4234
if (samplers[i])
4226
4235
depsMissing = missingDependent.operator ()<ICPUSampler>(samplers[i].get ());
4227
4236
}
4237
+ const auto tlasRewriteOldSize = tlasRewrites.size ();
4228
4238
for (auto i=0u ; !depsMissing && i<static_cast <uint32_t >(asset::IDescriptor::E_TYPE::ET_COUNT); i++)
4229
4239
{
4230
4240
const auto type = static_cast <asset::IDescriptor::E_TYPE>(i);
@@ -4251,30 +4261,65 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4251
4261
depsMissing = missingDependent.operator ()<ICPUBufferView>(static_cast <const IGPUBufferView*>(untypedDesc));
4252
4262
break ;
4253
4263
case asset::IDescriptor::EC_ACCELERATION_STRUCTURE:
4254
- depsMissing = missingDependent.operator ()<ICPUTopLevelAccelerationStructure>(static_cast <const ICPUTopLevelAccelerationStructure*>(untypedDesc));
4264
+ {
4265
+ const auto * tlas = static_cast <const IGPUTopLevelAccelerationStructure*>(untypedDesc);
4266
+ depsMissing = missingDependent.operator ()<ICPUTopLevelAccelerationStructure>(tlas);
4267
+ if (!depsMissing)
4268
+ {
4269
+ auto found = compactedTLASMap.find (tlas);
4270
+ if (found==compactedTLASMap.end ())
4271
+ break ;
4272
+ // written TLAS got compacted, so queue the descriptor for update
4273
+ using redirect_t = IDescriptorSetLayoutBase::CBindingRedirect;
4274
+ const redirect_t & redirect = layout->getDescriptorRedirect (IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE);
4275
+ const auto bindingRange = redirect.findBindingStorageIndex (redirect_t::storage_offset_t (i));
4276
+ const auto firstElementOffset = redirect.getStorageOffset (bindingRange).data ;
4277
+ tlasRewrites.push_back ({
4278
+ .set = item.first ,
4279
+ .binding = redirect.getBinding (bindingRange).data ,
4280
+ .arrayElement = i-firstElementOffset,
4281
+ .count = 1 , // write them one by one, no point optimizing
4282
+ .info = nullptr // for now
4283
+ });
4284
+ tlasInfos.emplace_back ().desc = smart_refctd_ptr<IGPUTopLevelAccelerationStructure>(found->second );
4285
+ }
4255
4286
break ;
4287
+ }
4256
4288
default :
4257
4289
assert (false );
4258
4290
depsMissing = true ;
4259
4291
break ;
4260
4292
}
4261
4293
}
4262
4294
}
4295
+ // don't bother overwriting a Descriptor Set that won't be marked as successfully converted (inserted into write cache)
4296
+ if (depsMissing)
4297
+ {
4298
+ tlasRewrites.resize (tlasRewriteOldSize);
4299
+ tlasInfos.resize (tlasRewriteOldSize);
4300
+ }
4263
4301
}
4302
+ auto * pGpuObj = item.first ;
4264
4303
if (depsMissing)
4265
4304
{
4266
4305
const auto * hashAsU64 = reinterpret_cast <const uint64_t *>(item.second .value .data );
4267
- logger.log (" GPU Obj %s not writing to final cache because conversion of a dependant failed!" , system::ILogger::ELL_ERROR, item. first ->getObjectDebugName ());
4306
+ logger.log (" GPU Obj %s not writing to final cache because conversion of a dependant failed!" , system::ILogger::ELL_ERROR, pGpuObj ->getObjectDebugName ());
4268
4307
// wipe self, to let users know
4269
4308
item.second .value = {};
4270
4309
continue ;
4271
4310
}
4272
- if (!params.writeCache (item.second ))
4311
+ if (!params.writeCache (item.second )) // TODO: let the user know the pointer too?
4273
4312
continue ;
4313
+ if constexpr (IsTLAS)
4314
+ {
4315
+ auto found = compactedTLASMap.find (pGpuObj);
4316
+ if (found!=compactedTLASMap.end ())
4317
+ pGpuObj = found->second .get ();
4318
+ }
4274
4319
asset_cached_t <AssetType> cached;
4275
- cached.value = core::smart_refctd_ptr<typename asset_traits<AssetType>::video_t >(item.first );
4320
+ cached.value = core::smart_refctd_ptr<typename asset_traits<AssetType>::video_t >(pGpuObj);
4321
+ cache.m_reverseMap .emplace (pGpuObj,item.second );
4276
4322
cache.m_forwardMap .emplace (item.second ,std::move (cached));
4277
- cache.m_reverseMap .emplace (item.first ,item.second );
4278
4323
}
4279
4324
};
4280
4325
// again, need to go bottom up so we can check dependencies being successes
@@ -4293,6 +4338,15 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4293
4338
mergeCache.operator ()<ICPURenderpass>();
4294
4339
mergeCache.operator ()<ICPUGraphicsPipeline>();
4295
4340
mergeCache.operator ()<ICPUDescriptorSet>();
4341
+ // deal with rewriting the TLASes with compacted ones
4342
+ {
4343
+ compactedTLASMap.clear ();
4344
+ auto * infoIt = tlasInfos.data ();
4345
+ for (auto & write : tlasRewrites)
4346
+ write.info = infoIt++;
4347
+ if (!tlasRewrites.empty ())
4348
+ device->updateDescriptorSets (tlasRewrites,{});
4349
+ }
4296
4350
// mergeCache.operator()<ICPUFramebuffer>();
4297
4351
4298
4352
// no submit was necessary, so should signal the extra semaphores from the host
0 commit comments