@@ -2868,7 +2868,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
2868
2868
for (const auto & geom : geoms)
2869
2869
if (const auto triCount=*(pPrimitiveCounts++); triCount)
2870
2870
{
2871
- auto size = geom.vertexStride *(geom.vertexData [1 ] ? 2 :1 )*geom.maxVertex ;
2871
+ auto size = geom.vertexStride *(geom.vertexData [1 ] ? 2 :1 )*( geom.maxVertex + 1 ) ;
2872
2872
uint16_t alignment = hlsl::max (0x1u <<hlsl::findLSB (geom.vertexStride ),32u );
2873
2873
if (geom.hasTransform ())
2874
2874
{
@@ -2892,7 +2892,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
2892
2892
size = core::alignUp (size,indexSize)+triCount*3 *indexSize;
2893
2893
alignment = hlsl::max<uint16_t >(indexSize,alignment);
2894
2894
}
2895
- inputs.logger .log (" %p Triangle Data Size %d Align %d" ,system::ILogger::ELL_DEBUG,as,size,alignment);
2895
+ // inputs.logger.log("%p Triangle Data Size %d Align %d",system::ILogger::ELL_DEBUG,as,size,alignment);
2896
2896
incrementBuildSize (size,alignment);
2897
2897
}
2898
2898
}
@@ -2908,7 +2908,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
2908
2908
}
2909
2909
//
2910
2910
incrementBuildSize (sizes.buildScratchSize ,device->getPhysicalDevice ()->getLimits ().minAccelerationStructureScratchOffsetAlignment );
2911
- inputs.logger .log (" %p Scratch Size %d Combined %d" ,system::ILogger::ELL_DEBUG,as,sizes.buildScratchSize ,buildSize);
2911
+ // inputs.logger.log("%p Scratch Size %d Combined %d",system::ILogger::ELL_DEBUG,as,sizes.buildScratchSize,buildSize);
2912
2912
2913
2913
// we need to save the buffer in a side-channel for later
2914
2914
auto & out = accelerationStructureParams[IsTLAS][entry.second .firstCopyIx +i];
@@ -4632,7 +4632,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4632
4632
auto * scratchBuffer = params.scratchForDeviceASBuild ->getBuffer ();
4633
4633
core::vector<ILogicalDevice::MappedMemoryRange> flushRanges;
4634
4634
const bool manualFlush = scratchBuffer->getBoundMemory ().memory ->haveToMakeVisible ();
4635
- if (manualFlush) // TLAS builds do max 2 writes each and BLAS do much more anyway
4635
+ if (deviceASBuildScratchPtr && manualFlush) // TLAS builds do max 2 writes each and BLAS do much more anyway
4636
4636
flushRanges.reserve (asCount*2 );
4637
4637
// lambdas!
4638
4638
auto streamDataToScratch = [&](const size_t offset, const size_t size,IUtilities::IUpstreamingDataProducer& callback) -> bool
@@ -4644,10 +4644,14 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4644
4644
flushRanges.emplace_back (scratchBuffer->getBoundMemory ().memory ,offset,size,ILogicalDevice::MappedMemoryRange::align_non_coherent_tag);
4645
4645
return true ;
4646
4646
}
4647
- else if (const SBufferRange<IGPUBuffer> range={.offset =offset,.size =size,.buffer =smart_refctd_ptr<IGPUBuffer>(scratchBuffer)}; params.utilities ->updateBufferRangeViaStagingBuffer (*params.transfer ,range,callback))
4648
- return true ;
4649
4647
else
4650
- return false ;
4648
+ {
4649
+ const SBufferRange<IGPUBuffer> range={.offset =offset,.size =size,.buffer =smart_refctd_ptr<IGPUBuffer>(scratchBuffer)};
4650
+ const bool retval = params.utilities ->updateBufferRangeViaStagingBuffer (*params.transfer ,range,callback);
4651
+ // current recording buffer may have changed
4652
+ xferCmdBuf = params.transfer ->getCommandBufferForRecording ();
4653
+ return retval;
4654
+ }
4651
4655
};
4652
4656
//
4653
4657
core::vector<typename AccelerationStructure::DeviceBuildInfo> buildInfos;
@@ -4849,7 +4853,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4849
4853
for (const auto & geom : canonical->getTriangleGeometries ())
4850
4854
if (const auto triCount=*(pPrimitiveCounts++); triCount)
4851
4855
{
4852
- auto size = geom.vertexStride *(geom.vertexData [1 ] ? 2 :1 )*geom.maxVertex ;
4856
+ auto size = geom.vertexStride *(geom.vertexData [1 ] ? 2 :1 )*( geom.maxVertex + 1 ) ;
4853
4857
uint16_t alignment = hlsl::max (0x1u <<hlsl::findLSB (geom.vertexStride ),32u );
4854
4858
if (geom.hasTransform ())
4855
4859
{
@@ -4876,15 +4880,15 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4876
4880
allocSizes.push_back (size);
4877
4881
alignments.push_back (alignment);
4878
4882
const auto tmp = asToBuild.second .scratchSize ;
4879
- logger.log (" %p Triangle Data Size %d Align %d Scratch Size %d" ,system::ILogger::ELL_DEBUG,canonical.get (),size,alignment,tmp);
4883
+ // logger.log("%p Triangle Data Size %d Align %d Scratch Size %d",system::ILogger::ELL_DEBUG,canonical.get(),size,alignment,tmp);
4880
4884
}
4881
4885
}
4882
4886
}
4883
4887
allocOffsets.resize (allocSizes.size (),scratch_allocator_t ::invalid_value);
4884
4888
// allocate out scratch or submit overflow, if fail then flush and keep trying till space is made
4885
4889
auto * offsets = allocOffsets.data ()+allocOffsets.size ()-alignments.size ();
4886
4890
const auto * sizes = allocSizes.data ()+allocSizes.size ()-alignments.size ();
4887
- logger.log (" %p Combined Size %d" ,system::ILogger::ELL_DEBUG,canonical.get (),std::accumulate (sizes,sizes+alignments.size (),0 ));
4891
+ // logger.log("%p Combined Size %d",system::ILogger::ELL_DEBUG,canonical.get(),std::accumulate(sizes,sizes+alignments.size(),0));
4888
4892
for (uint32_t t=0 ; params.scratchForDeviceASBuild ->multi_allocate (alignments.size (),offsets,sizes,alignments.data ())!=0 ; t++)
4889
4893
{
4890
4894
if (t==1 ) // don't flush right away cause allocator not defragmented yet
@@ -5042,8 +5046,10 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
5042
5046
if (const auto triCount=*(pPrimitiveCounts++); triCount)
5043
5047
{
5044
5048
auto & outGeom = triangles.emplace_back ();
5045
- auto offset = *(offsetIt++);
5046
- auto size = geom.vertexStride *geom.maxVertex ;
5049
+ const auto origSize = *(sizeIt++);
5050
+ const auto origOffset = *(offsetIt++);
5051
+ auto offset = origOffset;
5052
+ auto size = geom.vertexStride *(geom.maxVertex +1 );
5047
5053
for (auto i=0 ; i<2 ; i++)
5048
5054
if (geom.vertexData [i]) // could assert that it must be true for i==0
5049
5055
{
@@ -5073,11 +5079,13 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
5073
5079
size = triCount*3 *alignment;
5074
5080
memcpyCallback.data = reinterpret_cast <const uint8_t *>(geom.indexData .buffer ->getPointer ())+geom.indexData .offset ;
5075
5081
success = streamDataToScratch (offset,size,memcpyCallback);
5082
+ offset += size;
5076
5083
break ;
5077
5084
}
5078
5085
default :
5079
5086
break ;
5080
5087
}
5088
+ assert (offset-origOffset<=origSize);
5081
5089
if (!success)
5082
5090
break ;
5083
5091
outGeom.maxVertex = geom.maxVertex ;
@@ -5091,8 +5099,6 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
5091
5099
success = pPrimitiveCounts==primitiveCounts.data ()+primitiveCounts.size ();
5092
5100
rangeInfos.push_back (reinterpret_cast <const IGPUBottomLevelAccelerationStructure::BuildRangeInfo* const &>(geometryRangeInfoOffset));
5093
5101
}
5094
- // current recording buffer may have changed
5095
- xferCmdBuf = params.transfer ->getCommandBufferForRecording ();
5096
5102
if (!success)
5097
5103
{
5098
5104
rangeInfos.resize (buildInfos.size ());
@@ -5161,7 +5167,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
5161
5167
)
5162
5168
{
5163
5169
// clean AS builds, pipeline barrier, query reset and writes need to get executed before we start waiting on the results
5164
- drainCompute ();
5170
+ drainBoth ();
5165
5171
// get queries
5166
5172
core::vector<size_t > sizes (compactions.size ());
5167
5173
if (!device->getQueryPoolResults (queryPool.get (),0 ,compactions.size (),sizes.data (),sizeof (size_t ),bitflag (IQueryPool::RESULTS_FLAGS::WAIT_BIT)|IQueryPool::RESULTS_FLAGS::_64_BIT))
@@ -5301,7 +5307,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
5301
5307
};
5302
5308
// submit because we want to launch BLAS builds in a separate submit, so the scratch semaphore can signal and free the scratch and more is available for TLAS builds
5303
5309
if (pipelineBarrier (computeCmdBuf,{.memBarriers ={&readBLASInTLASBuildBarrier,1 }}," Failed to sync BLAS with TLAS build!" ))
5304
- drainCompute ();
5310
+ drainBoth ();
5305
5311
else
5306
5312
failedBLASBarrier = true ;
5307
5313
}
0 commit comments