Skip to content

Commit 731f077

Browse files
author
devsh
committed
fix various sync bugs in AS building
1 parent d69cd60 commit 731f077

File tree

1 file changed

+22
-16
lines changed

1 file changed

+22
-16
lines changed

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2868,7 +2868,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
28682868
for (const auto& geom : geoms)
28692869
if (const auto triCount=*(pPrimitiveCounts++); triCount)
28702870
{
2871-
auto size = geom.vertexStride*(geom.vertexData[1] ? 2:1)*geom.maxVertex;
2871+
auto size = geom.vertexStride*(geom.vertexData[1] ? 2:1)*(geom.maxVertex+1);
28722872
uint16_t alignment = hlsl::max(0x1u<<hlsl::findLSB(geom.vertexStride),32u);
28732873
if (geom.hasTransform())
28742874
{
@@ -2892,7 +2892,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
28922892
size = core::alignUp(size,indexSize)+triCount*3*indexSize;
28932893
alignment = hlsl::max<uint16_t>(indexSize,alignment);
28942894
}
2895-
inputs.logger.log("%p Triangle Data Size %d Align %d",system::ILogger::ELL_DEBUG,as,size,alignment);
2895+
//inputs.logger.log("%p Triangle Data Size %d Align %d",system::ILogger::ELL_DEBUG,as,size,alignment);
28962896
incrementBuildSize(size,alignment);
28972897
}
28982898
}
@@ -2908,7 +2908,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
29082908
}
29092909
//
29102910
incrementBuildSize(sizes.buildScratchSize,device->getPhysicalDevice()->getLimits().minAccelerationStructureScratchOffsetAlignment);
2911-
inputs.logger.log("%p Scratch Size %d Combined %d",system::ILogger::ELL_DEBUG,as,sizes.buildScratchSize,buildSize);
2911+
//inputs.logger.log("%p Scratch Size %d Combined %d",system::ILogger::ELL_DEBUG,as,sizes.buildScratchSize,buildSize);
29122912

29132913
// we need to save the buffer in a side-channel for later
29142914
auto& out = accelerationStructureParams[IsTLAS][entry.second.firstCopyIx+i];
@@ -4632,7 +4632,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
46324632
auto* scratchBuffer = params.scratchForDeviceASBuild->getBuffer();
46334633
core::vector<ILogicalDevice::MappedMemoryRange> flushRanges;
46344634
const bool manualFlush = scratchBuffer->getBoundMemory().memory->haveToMakeVisible();
4635-
if (manualFlush) // TLAS builds do max 2 writes each and BLAS do much more anyway
4635+
if (deviceASBuildScratchPtr && manualFlush) // TLAS builds do max 2 writes each and BLAS do much more anyway
46364636
flushRanges.reserve(asCount*2);
46374637
// lambdas!
46384638
auto streamDataToScratch = [&](const size_t offset, const size_t size,IUtilities::IUpstreamingDataProducer& callback) -> bool
@@ -4644,10 +4644,14 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
46444644
flushRanges.emplace_back(scratchBuffer->getBoundMemory().memory,offset,size,ILogicalDevice::MappedMemoryRange::align_non_coherent_tag);
46454645
return true;
46464646
}
4647-
else if (const SBufferRange<IGPUBuffer> range={.offset=offset,.size=size,.buffer=smart_refctd_ptr<IGPUBuffer>(scratchBuffer)}; params.utilities->updateBufferRangeViaStagingBuffer(*params.transfer,range,callback))
4648-
return true;
46494647
else
4650-
return false;
4648+
{
4649+
const SBufferRange<IGPUBuffer> range={.offset=offset,.size=size,.buffer=smart_refctd_ptr<IGPUBuffer>(scratchBuffer)};
4650+
const bool retval = params.utilities->updateBufferRangeViaStagingBuffer(*params.transfer,range,callback);
4651+
// current recording buffer may have changed
4652+
xferCmdBuf = params.transfer->getCommandBufferForRecording();
4653+
return retval;
4654+
}
46514655
};
46524656
//
46534657
core::vector<typename AccelerationStructure::DeviceBuildInfo> buildInfos;
@@ -4849,7 +4853,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
48494853
for (const auto& geom : canonical->getTriangleGeometries())
48504854
if (const auto triCount=*(pPrimitiveCounts++); triCount)
48514855
{
4852-
auto size = geom.vertexStride*(geom.vertexData[1] ? 2:1)*geom.maxVertex;
4856+
auto size = geom.vertexStride*(geom.vertexData[1] ? 2:1)*(geom.maxVertex+1);
48534857
uint16_t alignment = hlsl::max(0x1u<<hlsl::findLSB(geom.vertexStride),32u);
48544858
if (geom.hasTransform())
48554859
{
@@ -4876,15 +4880,15 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
48764880
allocSizes.push_back(size);
48774881
alignments.push_back(alignment);
48784882
const auto tmp = asToBuild.second.scratchSize;
4879-
logger.log("%p Triangle Data Size %d Align %d Scratch Size %d",system::ILogger::ELL_DEBUG,canonical.get(),size,alignment,tmp);
4883+
//logger.log("%p Triangle Data Size %d Align %d Scratch Size %d",system::ILogger::ELL_DEBUG,canonical.get(),size,alignment,tmp);
48804884
}
48814885
}
48824886
}
48834887
allocOffsets.resize(allocSizes.size(),scratch_allocator_t::invalid_value);
48844888
// allocate out scratch or submit overflow, if fail then flush and keep trying till space is made
48854889
auto* offsets = allocOffsets.data()+allocOffsets.size()-alignments.size();
48864890
const auto* sizes = allocSizes.data()+allocSizes.size()-alignments.size();
4887-
logger.log("%p Combined Size %d",system::ILogger::ELL_DEBUG,canonical.get(),std::accumulate(sizes,sizes+alignments.size(),0));
4891+
//logger.log("%p Combined Size %d",system::ILogger::ELL_DEBUG,canonical.get(),std::accumulate(sizes,sizes+alignments.size(),0));
48884892
for (uint32_t t=0; params.scratchForDeviceASBuild->multi_allocate(alignments.size(),offsets,sizes,alignments.data())!=0; t++)
48894893
{
48904894
if (t==1) // don't flush right away cause allocator not defragmented yet
@@ -5042,8 +5046,10 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
50425046
if (const auto triCount=*(pPrimitiveCounts++); triCount)
50435047
{
50445048
auto& outGeom = triangles.emplace_back();
5045-
auto offset = *(offsetIt++);
5046-
auto size = geom.vertexStride*geom.maxVertex;
5049+
const auto origSize = *(sizeIt++);
5050+
const auto origOffset = *(offsetIt++);
5051+
auto offset = origOffset;
5052+
auto size = geom.vertexStride*(geom.maxVertex+1);
50475053
for (auto i=0; i<2; i++)
50485054
if (geom.vertexData[i]) // could assert that it must be true for i==0
50495055
{
@@ -5073,11 +5079,13 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
50735079
size = triCount*3*alignment;
50745080
memcpyCallback.data = reinterpret_cast<const uint8_t*>(geom.indexData.buffer->getPointer())+geom.indexData.offset;
50755081
success = streamDataToScratch(offset,size,memcpyCallback);
5082+
offset += size;
50765083
break;
50775084
}
50785085
default:
50795086
break;
50805087
}
5088+
assert(offset-origOffset<=origSize);
50815089
if (!success)
50825090
break;
50835091
outGeom.maxVertex = geom.maxVertex;
@@ -5091,8 +5099,6 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
50915099
success = pPrimitiveCounts==primitiveCounts.data()+primitiveCounts.size();
50925100
rangeInfos.push_back(reinterpret_cast<const IGPUBottomLevelAccelerationStructure::BuildRangeInfo* const&>(geometryRangeInfoOffset));
50935101
}
5094-
// current recording buffer may have changed
5095-
xferCmdBuf = params.transfer->getCommandBufferForRecording();
50965102
if (!success)
50975103
{
50985104
rangeInfos.resize(buildInfos.size());
@@ -5161,7 +5167,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
51615167
)
51625168
{
51635169
// clean AS builds, pipeline barrier, query reset and writes need to get executed before we start waiting on the results
5164-
drainCompute();
5170+
drainBoth();
51655171
// get queries
51665172
core::vector<size_t> sizes(compactions.size());
51675173
if (!device->getQueryPoolResults(queryPool.get(),0,compactions.size(),sizes.data(),sizeof(size_t),bitflag(IQueryPool::RESULTS_FLAGS::WAIT_BIT)|IQueryPool::RESULTS_FLAGS::_64_BIT))
@@ -5301,7 +5307,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
53015307
};
53025308
// submit because we want to launch BLAS builds in a separate submit, so the scratch semaphore can signal and free the scratch and more is available for TLAS builds
53035309
if (pipelineBarrier(computeCmdBuf,{.memBarriers={&readBLASInTLASBuildBarrier,1}},"Failed to sync BLAS with TLAS build!"))
5304-
drainCompute();
5310+
drainBoth();
53055311
else
53065312
failedBLASBarrier = true;
53075313
}

0 commit comments

Comments
 (0)