@@ -3401,10 +3401,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3401
3401
auto device = m_params.device ;
3402
3402
const auto reqQueueFlags = reservations.getRequiredQueueFlags ();
3403
3403
3404
- #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
3405
3404
// compacted TLASes need to be substituted in cache and Descriptor Sets
3406
- core::unordered_map<IGPUTopLevelAccelerationStructure*,smart_refctd_ptr<IGPUTopLevelAccelerationStructure>> compactedTLASMap;
3407
- #endif
3405
+ core::unordered_map<const IGPUTopLevelAccelerationStructure*,smart_refctd_ptr<IGPUTopLevelAccelerationStructure>> compactedTLASMap;
3408
3406
// Anything to do?
3409
3407
if (reqQueueFlags.value !=IQueue::FAMILY_FLAGS::NONE)
3410
3408
{
@@ -4221,19 +4219,18 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4221
4219
}
4222
4220
4223
4221
4224
- #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
4225
4222
// Descriptor Sets need their TLAS descriptors substituted if they've been compacted
4226
4223
core::vector<IGPUDescriptorSet::SWriteDescriptorSet> tlasRewrites; tlasRewrites.reserve (compactedTLASMap.size ());
4227
4224
core::vector<IGPUDescriptorSet::SDescriptorInfo> tlasInfos; tlasInfos.reserve (compactedTLASMap.size ());
4228
- #endif
4229
4225
// want to check if deps successfully exist
4230
4226
auto missingDependent = [&reservations]<Asset AssetType>(const typename asset_traits<AssetType>::video_t * dep)->bool
4231
4227
{
4232
4228
auto & stagingCache = std::get<SReserveResult::staging_cache_t <AssetType>>(reservations.m_stagingCaches );
4233
4229
auto found = stagingCache.find (const_cast <asset_traits<AssetType>::video_t *>(dep));
4230
+ // this only checks if whether we had to convert and failed
4234
4231
if (found!=stagingCache.end () && found->second .value ==CHashCache::NoContentHash)
4235
4232
return true ;
4236
- // dependent might be in readCache of one or more converters, so if in doubt assume its okay
4233
+ // but the dependent might be in readCache of one or more converters, so if in doubt assume its okay
4237
4234
return false ;
4238
4235
};
4239
4236
// insert items into cache if overflows handled fine and commandbuffers ready to be recorded
@@ -4250,12 +4247,11 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4250
4247
// rescan all the GPU objects and find out if they depend on anything that failed, if so add to failure set
4251
4248
bool depsMissing = false ;
4252
4249
// only go over types we could actually break via missing upload/build (i.e. pipelines are unbreakable)
4253
- #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
4254
4250
if constexpr (IsTLAS)
4255
4251
{
4256
- // there's no lifetime tracking (refcounting) from TLAS to BLAS , so one just must trust the pre-TLAS-build input validation to do its job
4252
+ // A built TLAS cannot be queried about the BLASes it contains , so just trust the pre-TLAS-build input validation did its job
4257
4253
}
4258
- # endif
4254
+
4259
4255
if constexpr (std::is_same_v<AssetType,ICPUBufferView>)
4260
4256
depsMissing = missingDependent.operator ()<ICPUBuffer>(item.first ->getUnderlyingBuffer ());
4261
4257
if constexpr (std::is_same_v<AssetType,ICPUImageView>)
@@ -4271,9 +4267,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4271
4267
if (samplers[i])
4272
4268
depsMissing = missingDependent.operator ()<ICPUSampler>(samplers[i].get ());
4273
4269
}
4274
- #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
4275
4270
const auto tlasRewriteOldSize = tlasRewrites.size ();
4276
- #endif
4277
4271
for (auto i=0u ; !depsMissing && i<static_cast <uint32_t >(asset::IDescriptor::E_TYPE::ET_COUNT); i++)
4278
4272
{
4279
4273
const auto type = static_cast <asset::IDescriptor::E_TYPE>(i);
@@ -4299,7 +4293,6 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4299
4293
case asset::IDescriptor::EC_BUFFER_VIEW:
4300
4294
depsMissing = missingDependent.operator ()<ICPUBufferView>(static_cast <const IGPUBufferView*>(untypedDesc));
4301
4295
break ;
4302
- #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
4303
4296
case asset::IDescriptor::EC_ACCELERATION_STRUCTURE:
4304
4297
{
4305
4298
const auto * tlas = static_cast <const IGPUTopLevelAccelerationStructure*>(untypedDesc);
@@ -4314,33 +4307,30 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4314
4307
const redirect_t & redirect = layout->getDescriptorRedirect (IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE);
4315
4308
const auto bindingRange = redirect.findBindingStorageIndex (redirect_t::storage_offset_t (i));
4316
4309
const auto firstElementOffset = redirect.getStorageOffset (bindingRange).data ;
4317
- tlasRewrites.push_back ({
4318
- .set = item.first ,
4310
+ tlasRewrites.push_back (IGPUDescriptorSet::SWriteDescriptorSet {
4311
+ .dstSet = item.first ,
4319
4312
.binding = redirect.getBinding (bindingRange).data ,
4320
4313
.arrayElement = i-firstElementOffset,
4321
4314
.count = 1 , // write them one by one, no point optimizing
4322
- .info = nullptr // for now
4315
+ .info = nullptr // for now, will set once the vector of infos stops growing
4323
4316
});
4324
4317
tlasInfos.emplace_back ().desc = smart_refctd_ptr<IGPUTopLevelAccelerationStructure>(found->second );
4325
4318
}
4326
4319
break ;
4327
4320
}
4328
- #endif
4329
4321
default :
4330
4322
assert (false );
4331
4323
depsMissing = true ;
4332
4324
break ;
4333
4325
}
4334
4326
}
4335
4327
}
4336
- #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
4337
4328
// don't bother overwriting a Descriptor Set that won't be marked as successfully converted (inserted into write cache)
4338
4329
if (depsMissing)
4339
4330
{
4340
4331
tlasRewrites.resize (tlasRewriteOldSize);
4341
4332
tlasInfos.resize (tlasRewriteOldSize);
4342
4333
}
4343
- #endif
4344
4334
}
4345
4335
auto * pGpuObj = item.first ;
4346
4336
if (depsMissing)
@@ -4351,16 +4341,16 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4351
4341
item.second .value = {};
4352
4342
continue ;
4353
4343
}
4354
- if (!params.writeCache (item.second )) // TODO: let the user know the pointer too?
4355
- continue ;
4356
- #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
4344
+ // For TLASes we need to write the compacted TLAS and not the intermediate build to the Cache
4357
4345
if constexpr (IsTLAS)
4358
4346
{
4359
4347
auto found = compactedTLASMap.find (pGpuObj);
4360
4348
if (found!=compactedTLASMap.end ())
4361
4349
pGpuObj = found->second .get ();
4362
4350
}
4363
- #endif
4351
+ // We have success now, but ask callback if we write to the new cache.
4352
+ if (!params.writeCache (item.second )) // TODO: let the user know the pointer to the GPU Object too?
4353
+ continue ;
4364
4354
asset_cached_t <AssetType> cached;
4365
4355
cached.value = core::smart_refctd_ptr<typename asset_traits<AssetType>::video_t >(pGpuObj);
4366
4356
cache.m_reverseMap .emplace (pGpuObj,item.second );
@@ -4385,17 +4375,21 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4385
4375
mergeCache.operator ()<ICPURenderpass>();
4386
4376
mergeCache.operator ()<ICPUGraphicsPipeline>();
4387
4377
mergeCache.operator ()<ICPUDescriptorSet>();
4388
- #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
4389
4378
// deal with rewriting the TLASes with compacted ones
4390
4379
{
4380
+ // not strictly necessary, just provoking refcounting bugs right away if they exist
4391
4381
compactedTLASMap.clear ();
4392
4382
auto * infoIt = tlasInfos.data ();
4383
+ // writes map 1:1 with infos, the lazy way, can finally write the pointer as vector stops growing
4393
4384
for (auto & write : tlasRewrites)
4394
4385
write.info = infoIt++;
4395
4386
if (!tlasRewrites.empty ())
4396
- device->updateDescriptorSets (tlasRewrites,{});
4387
+ {
4388
+ const bool success = device->updateDescriptorSets (tlasRewrites,{});
4389
+ // There's no point in any fault handling, everything we have done should have been valid
4390
+ assert (success);
4391
+ }
4397
4392
}
4398
- #endif
4399
4393
// mergeCache.operator()<ICPUFramebuffer>();
4400
4394
4401
4395
// no submit was necessary, so should signal the extra semaphores from the host
0 commit comments