Skip to content

Commit 41ef540

Browse files
author
devsh
committed
I forgot to make the deferredly created BLASes and TLASes write to the post GPU object creation output array.
Also found that I stored a lot of stuff redundantly in the `DeferredASCreationParams`
1 parent 5290d65 commit 41ef540

File tree

1 file changed

+62
-59
lines changed

1 file changed

+62
-59
lines changed

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 62 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2759,10 +2759,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
27592759
// BLAS and TLAS creation is somewhat delayed by buffer creation and allocation
27602760
struct DeferredASCreationParams
27612761
{
2762-
const IAccelerationStructure* canonical;
27632762
asset_cached_t<ICPUBuffer> storage = {};
2764-
uint64_t patchIx = 0;
2765-
uint64_t uniqueCopyGroupID = 0;
27662763
uint64_t scratchSize = 0;
27672764
uint64_t buildSize = 0;
27682765
};
@@ -2931,7 +2928,6 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
29312928

29322929
// we need to save the buffer in a side-channel for later
29332930
auto& out = accelerationStructureParams[IsTLAS][entry.second.firstCopyIx+i];
2934-
out.canonical = as;
29352931
// this is where it gets a bit weird, we need to create a buffer to back the acceleration structure
29362932
{
29372933
IGPUBuffer::SCreationParams params = {};
@@ -2950,8 +2946,6 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
29502946
continue;
29512947
}
29522948
}
2953-
out.patchIx = patchIx;
2954-
out.uniqueCopyGroupID = uniqueCopyGroupID;
29552949
out.scratchSize = sizes.buildScratchSize;
29562950
out.buildSize = buildSize;
29572951
}
@@ -3386,7 +3380,8 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
33863380
}
33873381

33883382
// clear what we don't need
3389-
conversionRequests.gpuObjUniqueCopyGroupIDs.clear();
3383+
if constexpr (!std::is_base_of_v<IAccelerationStructure,AssetType>)
3384+
conversionRequests.gpuObjUniqueCopyGroupIDs.clear();
33903385
// This gets deferred till AFTER the Buffer Memory Allocations and Binding
33913386
if constexpr (!std::is_base_of_v<IAccelerationStructure,AssetType> && !std::is_base_of_v<IDeviceMemoryBacked,typename asset_traits<AssetType>::video_t>)
33923387
{
@@ -3418,7 +3413,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
34183413
bufferConversions.propagateToCaches(std::get<dfs_cache<ICPUBuffer>>(dfsCaches),std::get<SReserveResult::staging_cache_t<ICPUBuffer>>(retval.m_stagingCaches));
34193414
// Deal with Deferred Creation of Acceleration structures
34203415
{
3421-
auto createAccelerationStructures = [&]<typename AccelerationStructure>()->void
3416+
auto createAccelerationStructures = [&]<typename AccelerationStructure>(conversions_t<AccelerationStructure>& requests)->void
34223417
{
34233418
constexpr bool IsTLAS = std::is_same_v<AccelerationStructure,ICPUTopLevelAccelerationStructure>;
34243419
//
@@ -3428,63 +3423,70 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
34283423
else
34293424
pConversions = retval.m_blasConversions;
34303425
// we enqueue the conversions AFTER making sure that the BLAS / TLAS can actually be created
3431-
for (size_t i=0; i<accelerationStructureParams[IsTLAS].size(); i++)
3432-
if (const auto& deferredParams=accelerationStructureParams[IsTLAS][i]; deferredParams.storage)
3426+
for (auto& entry : requests.contentHashToCanonical)
3427+
for (auto i=0ull; i<entry.second.copyCount; i++)
34333428
{
3434-
const auto canonical = static_cast<const AccelerationStructure*>(deferredParams.canonical);
3435-
const auto& dfsNode = std::get<dfs_cache<AccelerationStructure>>(dfsCaches).nodes[deferredParams.patchIx];
3436-
const auto& patch = dfsNode.patch;
3437-
// create the AS
3438-
const auto bufSz = deferredParams.storage.get()->getSize();
3439-
IGPUAccelerationStructure::SCreationParams baseParams;
3440-
{
3441-
using create_f = IGPUAccelerationStructure::SCreationParams::FLAGS;
3442-
baseParams = {
3443-
.bufferRange = {.offset=0,.size=bufSz,.buffer=deferredParams.storage.value},
3444-
.flags = patch.isMotion ? create_f::MOTION_BIT:create_f::NONE
3445-
};
3446-
}
3447-
smart_refctd_ptr<typename asset_traits<AccelerationStructure>::video_t> as;
3448-
CAssetConverter::SReserveResult::SConvReqTLAS::cpu_to_gpu_blas_map_t blasInstanceMap;
3449-
if constexpr (IsTLAS)
3450-
{
3451-
// check if the BLASes we want to use for the instances were successfully allocated and created
3452-
AssetVisitor<GetDependantVisit<ICPUTopLevelAccelerationStructure>> visitor = {
3453-
{inputs,dfsCaches,&blasInstanceMap},
3454-
{canonical,deferredParams.uniqueCopyGroupID},
3455-
patch
3456-
};
3457-
if (!visitor())
3458-
{
3459-
inputs.logger.log(
3460-
"Failed to find all GPU Bottom Level Acceleration Structures needed to build TLAS %8llx%8llx%8llx%8llx",
3461-
system::ILogger::ELL_ERROR//,hashAsU64[0],hashAsU64[1],hashAsU64[2],hashAsU64[3]
3462-
);
3463-
continue;
3464-
}
3465-
as = device->createTopLevelAccelerationStructure({std::move(baseParams),patch.maxInstances});
3466-
}
3467-
else
3468-
as = device->createBottomLevelAccelerationStructure(std::move(baseParams));
3469-
if (!as)
3429+
const auto reqIx = entry.second.firstCopyIx+i;
3430+
if (const auto& deferredParams=accelerationStructureParams[IsTLAS][reqIx]; deferredParams.storage)
34703431
{
3471-
inputs.logger.log("Failed to Create Acceleration Structure.",system::ILogger::ELL_ERROR);
3472-
continue;
3432+
const auto* canonical = entry.second.canonicalAsset;
3433+
const auto& dfsNode = std::get<dfs_cache<AccelerationStructure>>(dfsCaches).nodes[entry.second.patchIndex.value];
3434+
const auto& patch = dfsNode.patch;
3435+
// create the AS
3436+
const auto bufSz = deferredParams.storage.get()->getSize();
3437+
IGPUAccelerationStructure::SCreationParams baseParams;
3438+
{
3439+
using create_f = IGPUAccelerationStructure::SCreationParams::FLAGS;
3440+
baseParams = {
3441+
.bufferRange = {.offset=0,.size=bufSz,.buffer=deferredParams.storage.value},
3442+
.flags = patch.isMotion ? create_f::MOTION_BIT:create_f::NONE
3443+
};
3444+
}
3445+
smart_refctd_ptr<typename asset_traits<AccelerationStructure>::video_t> as;
3446+
CAssetConverter::SReserveResult::SConvReqTLAS::cpu_to_gpu_blas_map_t blasInstanceMap;
3447+
if constexpr (IsTLAS)
3448+
{
3449+
// check if the BLASes we want to use for the instances were successfully allocated and created
3450+
AssetVisitor<GetDependantVisit<ICPUTopLevelAccelerationStructure>> visitor = {
3451+
{inputs,dfsCaches,&blasInstanceMap},
3452+
{canonical,requests.gpuObjUniqueCopyGroupIDs[reqIx]},
3453+
patch
3454+
};
3455+
if (!visitor())
3456+
{
3457+
const auto hashAsU64 = reinterpret_cast<const uint64_t*>(entry.first.data);
3458+
inputs.logger.log(
3459+
"Failed to find all GPU Bottom Level Acceleration Structures needed to build TLAS %8llx%8llx%8llx%8llx",
3460+
system::ILogger::ELL_ERROR,hashAsU64[0],hashAsU64[1],hashAsU64[2],hashAsU64[3]
3461+
);
3462+
continue;
3463+
}
3464+
as = device->createTopLevelAccelerationStructure({std::move(baseParams),patch.maxInstances});
3465+
}
3466+
else
3467+
as = device->createBottomLevelAccelerationStructure(std::move(baseParams));
3468+
if (!as)
3469+
{
3470+
inputs.logger.log("Failed to Create Acceleration Structure.",system::ILogger::ELL_ERROR);
3471+
continue;
3472+
}
3473+
// file the request for conversion
3474+
auto& request = pConversions[patch.hostBuild][as.get()];
3475+
request.canonical = smart_refctd_ptr<const AccelerationStructure>(canonical);
3476+
request.scratchSize = deferredParams.scratchSize;
3477+
request.compact = patch.compactAfterBuild;
3478+
request.buildFlags = static_cast<uint16_t>(patch.getBuildFlags(canonical).value);
3479+
request.buildSize = deferredParams.buildSize;
3480+
if constexpr (IsTLAS)
3481+
request.instanceMap = std::move(blasInstanceMap);
3482+
requests.assign(entry.first,entry.second.firstCopyIx,i,std::move(as));
34733483
}
3474-
// file the request for conversion
3475-
auto& request = pConversions[patch.hostBuild][as.get()];
3476-
request.canonical = smart_refctd_ptr<const AccelerationStructure>(canonical);
3477-
request.scratchSize = deferredParams.scratchSize;
3478-
request.compact = patch.compactAfterBuild;
3479-
request.buildFlags = static_cast<uint16_t>(patch.getBuildFlags(canonical).value);
3480-
request.buildSize = deferredParams.buildSize;
3481-
if constexpr (IsTLAS)
3482-
request.instanceMap = std::move(blasInstanceMap);
34833484
}
3485+
requests.gpuObjUniqueCopyGroupIDs.clear();
34843486
};
3485-
createAccelerationStructures.template operator()<ICPUBottomLevelAccelerationStructure>();
3487+
createAccelerationStructures.template operator()<ICPUBottomLevelAccelerationStructure>(blasConversions);
34863488
blasConversions.propagateToCaches(std::get<dfs_cache<ICPUBottomLevelAccelerationStructure>>(dfsCaches),std::get<SReserveResult::staging_cache_t<ICPUBottomLevelAccelerationStructure>>(retval.m_stagingCaches));
3487-
createAccelerationStructures.template operator()<ICPUTopLevelAccelerationStructure>();
3489+
createAccelerationStructures.template operator()<ICPUTopLevelAccelerationStructure>(tlasConversions);
34883490
tlasConversions.propagateToCaches(std::get<dfs_cache<ICPUTopLevelAccelerationStructure>>(dfsCaches),std::get<SReserveResult::staging_cache_t<ICPUTopLevelAccelerationStructure>>(retval.m_stagingCaches));
34893491
}
34903492
// enqueue successfully created images with data to upload for conversion
@@ -3577,6 +3579,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
35773579
retval.m_tlasConversions[i].erase(gpuObj);
35783580
if constexpr (std::is_same_v<AssetType,ICPUImage>)
35793581
retval.m_imageConversions.erase(gpuObj);
3582+
// TODO: erase from `retval.m_gpuObjects` as well
35803583
return true;
35813584
}
35823585
// still referenced, keep it around

0 commit comments

Comments
 (0)