Skip to content

Commit 32a7916

Browse files
Make sure to use steady clock, high resolution was responsible for time going backwards and deadlocks on GLsync waits.
GLES backend has a bug where the tests timeout!? Vulkan has a bug because new Buffer API is not ready yet.
1 parent 0f65fba commit 32a7916

File tree

11 files changed

+23
-16
lines changed

11 files changed

+23
-16
lines changed

examples_tests/48.ArithmeticUnitTest/main.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ bool validateResults(ILogicalDevice* device, const uint32_t* inputData, const ui
201201
auto dataFromBuffer = reinterpret_cast<uint32_t*>(mem->getMappedPointer());
202202
const uint32_t subgroupSize = (*dataFromBuffer++);
203203

204+
// TODO: parallel for
204205
// now check if the data obtained has valid values
205206
uint32_t* tmp = new uint32_t[workgroupSize];
206207
uint32_t* ballotInput = new uint32_t[workgroupSize];
@@ -281,7 +282,7 @@ bool runTest(
281282
passed = validateResults<Arithmetic,max_op>(device, inputData, workgroupSize, workgroupCount, buffers[6].get(),logger)&&passed;
282283
if(is_workgroup_test)
283284
{
284-
passed = validateResults<Arithmetic,ballot>(device, inputData, workgroupSize, workgroupCount, buffers[7].get(),logger) && passed;
285+
passed = validateResults<Arithmetic,ballot>(device, inputData, workgroupSize, workgroupCount, buffers[7].get(),logger)&&passed;
285286
}
286287

287288
return passed;
@@ -300,7 +301,7 @@ class ArythmeticUnitTestApp : public NonGraphicalApplicationBase
300301
void onAppInitialized_impl() override
301302
{
302303
CommonAPI::InitOutput initOutput;
303-
CommonAPI::InitWithNoExt(initOutput, nbl::video::EAT_OPENGL, "Subgroup Arithmetic Test");
304+
CommonAPI::InitWithNoExt(initOutput, video::EAT_OPENGL, "Subgroup Arithmetic Test");
304305
gl = std::move(initOutput.apiConnection);
305306
gpuPhysicalDevice = std::move(initOutput.physicalDevice);
306307
logicalDevice = std::move(initOutput.logicalDevice);
@@ -335,7 +336,7 @@ class ArythmeticUnitTestApp : public NonGraphicalApplicationBase
335336
params.queueFamilyIndexCount = 0;
336337
params.queueFamilyIndices = nullptr;
337338
params.sharingMode = ESM_CONCURRENT;
338-
params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT;
339+
params.usage = core::bitflag(IGPUBuffer::EUF_STORAGE_BUFFER_BIT)|IGPUBuffer::EUF_TRANSFER_SRC_BIT;
339340
IDriverMemoryBacked::SDriverMemoryRequirements reqs;
340341
reqs.vulkanReqs.memoryTypeBits = ~0u;
341342
reqs.vulkanReqs.alignment = 256u;
@@ -424,7 +425,7 @@ class ArythmeticUnitTestApp : public NonGraphicalApplicationBase
424425
core::smart_refctd_ptr<IGPUCommandBuffer> cmdbuf;
425426
logicalDevice->createCommandBuffers(cmdPool.get(), IGPUCommandBuffer::EL_PRIMARY, 1u, &cmdbuf);
426427
computeQueue->startCapture();
427-
for (uint32_t workgroupSize = 1u; workgroupSize <= 1024u; workgroupSize++)
428+
for (uint32_t workgroupSize=45u; workgroupSize<=1024u; workgroupSize++)
428429
{
429430
core::smart_refctd_ptr<IGPUComputePipeline> pipelines[kTestTypeCount];
430431
for (uint32_t i = 0u; i < kTestTypeCount; i++)

examples_tests/Readme.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ N = No support
7979
| 45.BRDFEvalTest | S | S | S | S | S | S | N | S | |
8080
| 46.SamplingValidation | S | S | S | S | S | S | N | S | |
8181
| 47.DerivMapTest | Y | Y | B | S | N | S | N | N | |
82-
| 48.ArithmeticUnitTest | Y | Y | Y | S | N | S | N | S | |
82+
| 48.ArithmeticUnitTest | Y | B | B | S | S | S | N | S | |
8383
| 49.ComputeFFT | S | N | S | S | N | S | N | N | |
8484
| 50.NewAPITest | W | W | W | W | W | W | W | W | |
8585
| 51.RadixSort | W | N | W | W | N | W | N | W | |

include/nbl/core/EventDeferredHandler.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ class DeferredEventHandlerST
105105
auto prev = mEvents.before_begin();
106106
for (auto it = mEvents.begin(); it!=mEvents.end(); )
107107
{
108-
if (it->m_event.wait_until(std::chrono::high_resolution_clock::now()+std::chrono::microseconds(250ull)))
108+
if (it->m_event.wait_until(std::chrono::steady_clock::now()+std::chrono::microseconds(250ull)))
109109
{
110110
it->m_function();
111111
it = mEvents.erase_after(prev);

include/nbl/video/IGPUFence.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,11 @@ class GPUEventWrapper : public core::Uncopyable
6565
return *this;
6666
}
6767

68-
template<class Clock=std::chrono::high_resolution_clock, class Duration=typename Clock::duration>
68+
template<class Clock=std::chrono::steady_clock, class Duration=typename Clock::duration>
6969
inline static std::chrono::time_point<Clock,Duration> default_wait()
7070
{
71-
return std::chrono::high_resolution_clock::now()+std::chrono::nanoseconds(50000ull); // 50 us
71+
//return typename Clock::now()+std::chrono::nanoseconds(50000ull); // 50 us
72+
return Clock::now()+std::chrono::nanoseconds(50000ull); // 50 us
7273
}
7374

7475
IGPUFence::E_STATUS waitFenceWrapper(IGPUFence* fence, uint64_t timeout);

include/nbl/video/alloc/SubAllocatedDataBuffer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ class SubAllocatedDataBuffer : protected core::impl::FriendOfHeterogenousMemoryA
227227
return multi_alloc(GPUEventWrapper::default_wait(),count,std::forward<Args>(args)...);
228228
}
229229
//!
230-
template<class Clock=typename std::chrono::high_resolution_clock, typename... Args>
230+
template<class Clock=typename std::chrono::steady_clock, typename... Args>
231231
inline size_type multi_alloc(const std::chrono::time_point<Clock>& maxWaitPoint, const Args&... args) noexcept
232232
{
233233
#ifdef _NBL_DEBUG

include/nbl/video/utilities/CPropertyPoolHandler.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ class CPropertyPoolHandler final : public core::IReferenceCounted, public core::
187187
StreamingTransientDataBufferMT<>* const upBuff, IGPUCommandBuffer* const cmdbuf, IGPUFence* const fence, IGPUQueue* const queue,
188188
const asset::SBufferBinding<video::IGPUBuffer>& scratch, UpStreamingRequest*& requests, const uint32_t requestCount,
189189
uint32_t& waitSemaphoreCount, IGPUSemaphore* const*& semaphoresToWaitBeforeOverwrite, const asset::E_PIPELINE_STAGE_FLAGS*& stagesToWaitForPerSemaphore,
190-
system::logger_opt_ptr logger, const std::chrono::high_resolution_clock::time_point& maxWaitPoint=std::chrono::high_resolution_clock::now()+std::chrono::microseconds(500u)
190+
system::logger_opt_ptr logger, const std::chrono::steady_clock::time_point& maxWaitPoint=std::chrono::steady_clock::now()+std::chrono::microseconds(500u)
191191
);
192192

193193
// utility to help you fill out the tail move scatter request after the free, properly, returns if you actually need to transfer anything

include/nbl/video/utilities/IUtilities.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ namespace nbl::video
317317
const uint32_t subSize = core::min(alllocationSize,size);
318318
// cannot use `multi_place` because of the extra padding size we could have added
319319
uint32_t localOffset = video::StreamingTransientDataBufferMT<>::invalid_address;
320-
m_defaultUploadBuffer.get()->multi_alloc(std::chrono::high_resolution_clock::now()+std::chrono::microseconds(500u),1u,&localOffset,&alllocationSize,&alignment);
320+
m_defaultUploadBuffer.get()->multi_alloc(std::chrono::steady_clock::now()+std::chrono::microseconds(500u),1u,&localOffset,&alllocationSize,&alignment);
321321
// copy only the unpadded part
322322
if (localOffset != video::StreamingTransientDataBufferMT<>::invalid_address)
323323
{

src/nbl/video/COpenGLSync.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ uint64_t COpenGLSync::prewait() const
3232
if (sync)
3333
return 0ull;
3434

35-
using clock_t = std::chrono::high_resolution_clock;
35+
using clock_t = std::chrono::steady_clock;
3636
auto start = clock_t::now();
3737
while (!sync)
3838
{
@@ -53,7 +53,7 @@ COpenGLSync::E_STATUS COpenGLSync::waitCPU(IOpenGL_FunctionTable* _gl, uint64_t
5353
return (cachedRetval = ES_TIMEOUT_EXPIRED);
5454
timeout -= spintime;
5555

56-
GLenum status = _gl->glSync.pglClientWaitSync(sync, lockedTable?GL_SYNC_FLUSH_COMMANDS_BIT:0, timeout); // GL_SYNC_FLUSH_COMMANDS_BIT to flags?
56+
GLenum status = _gl->glSync.pglClientWaitSync(sync, lockedTable ? GL_SYNC_FLUSH_COMMANDS_BIT:0, timeout);
5757
switch (status)
5858
{
5959
case GL_ALREADY_SIGNALED:

src/nbl/video/IOpenGL_LogicalDevice.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -852,7 +852,12 @@ class IOpenGL_LogicalDevice : public ILogicalDevice, protected impl::IOpenGL_Log
852852
if(notFirstRun && now>=timeoutPoint)
853853
return IGPUFence::ES_TIMEOUT;
854854
else if (_waitAll) // all fences have to get signalled anyway so no use round robining
855-
timeout = std::chrono::duration_cast<std::chrono::nanoseconds>(timeoutPoint-now).count();
855+
{
856+
if (timeoutPoint>now)
857+
timeout = std::chrono::duration_cast<std::chrono::nanoseconds>(timeoutPoint-now).count();
858+
else
859+
timeout = 0ull;
860+
}
856861
else if (i==0u) // if we're only looking for one to succeed then poll with increasing timeouts until deadline
857862
timeout <<= 1u;
858863
}

src/nbl/video/utilities/CPropertyPoolHandler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ uint32_t CPropertyPoolHandler::transferProperties(
174174
StreamingTransientDataBufferMT<>* const upBuff, IGPUCommandBuffer* const cmdbuf, IGPUFence* const fence, IGPUQueue* const queue,
175175
const asset::SBufferBinding<video::IGPUBuffer>& scratch, UpStreamingRequest* &requests, const uint32_t requestCount,
176176
uint32_t& waitSemaphoreCount, IGPUSemaphore* const*& semaphoresToWaitBeforeOverwrite, const asset::E_PIPELINE_STAGE_FLAGS*& stagesToWaitForPerSemaphore,
177-
system::logger_opt_ptr logger, const std::chrono::high_resolution_clock::time_point& maxWaitPoint
177+
system::logger_opt_ptr logger, const std::chrono::steady_clock::time_point& maxWaitPoint
178178
)
179179
{
180180
if (!requestCount)

0 commit comments

Comments
 (0)