Skip to content

Commit c32846f

Browse files
author
devsh
committed
Stream the BLAS build inputs, fix a bug and note another one that has to get fixed
1 parent 0ebdda6 commit c32846f

File tree

1 file changed

+28
-13
lines changed

1 file changed

+28
-13
lines changed

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2854,6 +2854,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
28542854
{
28552855
const uint32_t* pPrimitiveCounts = as->getGeometryPrimitiveCounts().data();
28562856
// the code here is not pretty, but DRY-ing is of this is for later
2857+
// TODO: ILogicalDevice needs code to query build sizes of ICPUBottomLevelAccelerationStructure geometries!
28572858
if (buildFlags.hasFlags(ICPUBottomLevelAccelerationStructure::BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT))
28582859
{
28592860
const auto geoms = as->getAABBGeometries();
@@ -4890,9 +4891,9 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
48904891
}
48914892
allocOffsets.resize(allocSizes.size(),scratch_allocator_t::invalid_value);
48924893
// allocate out scratch or submit overflow, if fail then flush and keep trying till space is made
4893-
auto* const offsets = allocOffsets.data()+allocOffsets.size()-allocCount;
4894-
const auto* const sizes = allocSizes.data()+allocSizes.size()-allocCount;
4895-
for (uint32_t t=0; params.scratchForDeviceASBuild->multi_allocate(allocCount,offsets,sizes,alignments.data())!=0; t++)
4894+
auto* const offsets = allocOffsets.data()+allocOffsets.size()-alignments.size();
4895+
const auto* const sizes = allocSizes.data()+allocSizes.size()-alignments.size();
4896+
for (uint32_t t=0; params.scratchForDeviceASBuild->multi_allocate(alignments.size(),offsets,sizes,alignments.data())!=0; t++)
48964897
if (t==1) // don't flush right away cause allocator not defragmented yet
48974898
{
48984899
recordBuildCommands();
@@ -5007,6 +5008,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
50075008
{
50085009
buildInfo.geometryCount = canonical->getGeometryCount();
50095010
const auto* offsetIt = offsets+1;
5011+
const auto* sizeIt = sizes+1;
50105012
const auto primitiveCounts = canonical->getGeometryPrimitiveCounts();
50115013
for (const auto count : primitiveCounts)
50125014
geometryRangeInfo.push_back({
@@ -5015,14 +5017,17 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
50155017
.firstVertex = 0,
50165018
.transformByteOffset = 0
50175019
});
5018-
const uint32_t* pPrimitiveCounts = canonical->getGeometryPrimitiveCounts().data();
5020+
const uint32_t* pPrimitiveCounts = primitiveCounts.data();
5021+
IUtilities::CMemcpyUpstreamingDataProducer memcpyCallback;
50195022
if (buildFlags.hasFlags(GeometryIsAABBFlag))
50205023
{
50215024
for (const auto& geom : canonical->getAABBGeometries())
50225025
if (const auto aabbCount=*(pPrimitiveCounts++); aabbCount)
50235026
{
50245027
auto offset = *(offsetIt++);
5025-
// TODO: stream in the data
5028+
memcpyCallback.data = reinterpret_cast<const uint8_t*>(geom.data.buffer->getPointer())+geom.data.offset;
5029+
if (!streamDataToScratch(offset,*(sizeIt++),memcpyCallback))
5030+
break;
50265031
aabbs.push_back({
50275032
.data = {.offset=offset,.buffer=smart_refctd_ptr<const IGPUBuffer>(scratchBuffer)},
50285033
.stride = geom.stride,
@@ -5038,19 +5043,24 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
50385043
{
50395044
auto& outGeom = triangles.emplace_back();
50405045
auto offset = *(offsetIt++);
5041-
// TODO: stream in the data
5042-
outGeom.vertexData[0] = {.offset=offset,.buffer=smart_refctd_ptr<const IGPUBuffer>(scratchBuffer)};
5043-
offset += geom.vertexStride*geom.maxVertex;
5044-
if (geom.vertexData[1])
5046+
auto size = geom.vertexStride*geom.maxVertex;
5047+
for (auto i=0; i<2; i++)
5048+
if (geom.vertexData[i]) // could assert that it must be true for i==0
50455049
{
5046-
outGeom.vertexData[1] = {.offset=offset,.buffer=smart_refctd_ptr<const IGPUBuffer>(scratchBuffer)};
5047-
offset += geom.vertexStride*geom.maxVertex;
5050+
outGeom.vertexData[i] = {.offset=offset,.buffer=smart_refctd_ptr<const IGPUBuffer>(scratchBuffer)};
5051+
memcpyCallback.data = reinterpret_cast<const uint8_t*>(geom.vertexData[i].buffer->getPointer())+geom.vertexData[i].offset;
5052+
if (!streamDataToScratch(offset,size,memcpyCallback))
5053+
break;
5054+
offset += size;
50485055
}
50495056
if (geom.hasTransform())
50505057
{
50515058
offset = core::alignUp(offset,alignof(float));
50525059
outGeom.transform = {.offset=offset,.buffer=smart_refctd_ptr<const IGPUBuffer>(scratchBuffer)};
5053-
offset += sizeof(hlsl::float32_t3x4);
5060+
memcpyCallback.data = &geom.transform;
5061+
if (!streamDataToScratch(offset,sizeof(geom.transform),memcpyCallback))
5062+
break;
5063+
offset += sizeof(geom.transform);
50545064
}
50555065
switch (geom.indexType)
50565066
{
@@ -5060,11 +5070,16 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
50605070
const auto alignment = geom.indexType==E_INDEX_TYPE::EIT_16BIT ? alignof(uint16_t):alignof(uint32_t);
50615071
offset = core::alignUp(offset,alignment);
50625072
outGeom.indexData = {.offset=offset,.buffer=smart_refctd_ptr<const IGPUBuffer>(scratchBuffer)};
5073+
size = triCount*3*alignment;
5074+
memcpyCallback.data = reinterpret_cast<const uint8_t*>(geom.indexData.buffer->getPointer())+geom.indexData.offset;
5075+
success = streamDataToScratch(offset,size,memcpyCallback);
50635076
break;
50645077
}
50655078
default:
50665079
break;
50675080
}
5081+
if (!success)
5082+
break;
50685083
outGeom.maxVertex = geom.maxVertex;
50695084
outGeom.vertexStride = geom.vertexStride;
50705085
outGeom.vertexFormat = geom.vertexFormat;
@@ -5073,8 +5088,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
50735088
}
50745089
buildInfo.triangles = reinterpret_cast<const IGPUBottomLevelAccelerationStructure::Triangles<const IGPUBuffer>* const&>(trianglesOffset);
50755090
}
5091+
success = pPrimitiveCounts==primitiveCounts.data()+primitiveCounts.size();
50765092
rangeInfos.push_back(reinterpret_cast<const IGPUBottomLevelAccelerationStructure::BuildRangeInfo* const&>(geometryRangeInfoOffset));
5077-
success = false;
50785093
}
50795094
// current recording buffer may have changed
50805095
xferCmdBuf = params.transfer->getCommandBufferForRecording();

0 commit comments

Comments
 (0)