Skip to content

Commit 893fe7e

Browse files
committed
OpenGL QueryPools Fixed and Tested on GPUBuffer and void*
1 parent dac26dd commit 893fe7e

File tree

8 files changed

+477
-287
lines changed

8 files changed

+477
-287
lines changed

examples_tests/06.MeshLoaders/main.cpp

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -150,21 +150,34 @@ class MeshLoadersApp : public ApplicationBase
150150

151151
void getAndLogQueryPoolResults()
152152
{
153+
#ifdef QUERY_POOL_LOGS
153154
{
154-
uint64_t samples_passed = 0u;
155-
auto queryResultFlags = core::bitflag<video::IQueryPool::E_QUERY_RESULTS_FLAGS>(video::IQueryPool::EQRF_WAIT_BIT) | video::IQueryPool::EQRF_64_BIT;
156-
logicalDevice->getQueryPoolResults(occlusionQueryPool.get(), 0u, 1u, sizeof(uint64_t), &samples_passed, sizeof(uint64_t), queryResultFlags);
157-
logger->log("Samples Passed = %d", system::ILogger::ELL_INFO, samples_passed);
155+
uint64_t samples_passed[4] = {};
156+
auto queryResultFlags = core::bitflag<video::IQueryPool::E_QUERY_RESULTS_FLAGS>(video::IQueryPool::EQRF_WITH_AVAILABILITY_BIT) | video::IQueryPool::EQRF_64_BIT;
157+
logicalDevice->getQueryPoolResults(occlusionQueryPool.get(), 0u, 2u, sizeof(samples_passed), &samples_passed, sizeof(uint64_t) * 2, queryResultFlags);
158+
logger->log("[AVAIL+64] Samples Passed [1] = %d, Samples Passed [2] = %d, Result Available = %d, %d", system::ILogger::ELL_INFO, samples_passed[0], samples_passed[2], samples_passed[1], samples_passed[3]);
159+
}
160+
{
161+
uint64_t samples_passed[4] = {};
162+
auto queryResultFlags = core::bitflag<video::IQueryPool::E_QUERY_RESULTS_FLAGS>(video::IQueryPool::EQRF_WITH_AVAILABILITY_BIT) | video::IQueryPool::EQRF_64_BIT | video::IQueryPool::EQRF_WAIT_BIT;
163+
logicalDevice->getQueryPoolResults(occlusionQueryPool.get(), 0u, 2u, sizeof(samples_passed), &samples_passed, sizeof(uint64_t) * 2, queryResultFlags);
164+
logger->log("[WAIT+AVAIL+64] Samples Passed [1] = %d, Samples Passed [2] = %d, Result Available = %d, %d", system::ILogger::ELL_INFO, samples_passed[0], samples_passed[2], samples_passed[1], samples_passed[3]);
158165
}
159-
160166
{
161-
uint64_t timestamps[2] = {};
162-
auto queryResultFlags = core::bitflag<video::IQueryPool::E_QUERY_RESULTS_FLAGS>(video::IQueryPool::EQRF_WAIT_BIT) | video::IQueryPool::EQRF_64_BIT;
163-
logicalDevice->getQueryPoolResults(timestampQueryPool.get(), 0u, 2u, sizeof(timestamps), timestamps, sizeof(uint64_t), queryResultFlags);
164-
float timePassed = (timestamps[1] - timestamps[0]) * physicalDevice->getLimits().timestampPeriodInNanoSeconds;
165-
// logger->log("Time Passed (NanoSeconds) = %f", system::ILogger::ELL_INFO, timePassed);
167+
uint32_t samples_passed[2] = {};
168+
auto queryResultFlags = core::bitflag<video::IQueryPool::E_QUERY_RESULTS_FLAGS>(video::IQueryPool::EQRF_64_BIT) | video::IQueryPool::EQRF_WAIT_BIT;
169+
logicalDevice->getQueryPoolResults(occlusionQueryPool.get(), 0u, 2u, sizeof(samples_passed), &samples_passed, sizeof(uint32_t), queryResultFlags);
170+
logger->log("[WAIT] Samples Passed [0] = %d, Samples Passed [1] = %d", system::ILogger::ELL_INFO, samples_passed[0], samples_passed[1]);
171+
}
172+
{
173+
uint64_t timestamps[4] = {};
174+
auto queryResultFlags = core::bitflag<video::IQueryPool::E_QUERY_RESULTS_FLAGS>(video::IQueryPool::EQRF_WAIT_BIT) | video::IQueryPool::EQRF_WITH_AVAILABILITY_BIT | video::IQueryPool::EQRF_64_BIT;
175+
logicalDevice->getQueryPoolResults(timestampQueryPool.get(), 0u, 2u, sizeof(timestamps), timestamps, sizeof(uint64_t) * 2ull, queryResultFlags);
176+
float timePassed = (timestamps[2] - timestamps[0]) * physicalDevice->getLimits().timestampPeriodInNanoSeconds;
166177
logger->log("Time Passed (Seconds) = %f", system::ILogger::ELL_INFO, (timePassed * 1e-9));
178+
logger->log("Timestamps availablity: %d, %d", system::ILogger::ELL_INFO, timestamps[1], timestamps[3]);
167179
}
180+
#endif
168181
}
169182

170183
APP_CONSTRUCTOR(MeshLoadersApp)
@@ -177,7 +190,7 @@ class MeshLoadersApp : public ApplicationBase
177190
const auto swapchainImageUsage = static_cast<asset::IImage::E_USAGE_FLAGS>(asset::IImage::EUF_COLOR_ATTACHMENT_BIT);
178191
const video::ISurface::SFormat surfaceFormat(asset::EF_R8G8B8A8_SRGB, asset::ECP_SRGB, asset::EOTF_sRGB);
179192

180-
CommonAPI::InitWithDefaultExt(initOutput, video::EAT_VULKAN, "MeshLoaders", WIN_W, WIN_H, SC_IMG_COUNT, swapchainImageUsage, surfaceFormat, nbl::asset::EF_D32_SFLOAT);
193+
CommonAPI::InitWithDefaultExt(initOutput, video::EAT_OPENGL, "MeshLoaders", WIN_W, WIN_H, SC_IMG_COUNT, swapchainImageUsage, surfaceFormat, nbl::asset::EF_D32_SFLOAT);
181194
window = std::move(initOutput.window);
182195
windowCb = std::move(initOutput.windowCb);
183196
apiConnection = std::move(initOutput.apiConnection);
@@ -200,7 +213,7 @@ class MeshLoadersApp : public ApplicationBase
200213
{
201214
video::IQueryPool::SCreationParams queryPoolCreationParams = {};
202215
queryPoolCreationParams.queryType = video::IQueryPool::EQT_OCCLUSION;
203-
queryPoolCreationParams.queryCount = 1u;
216+
queryPoolCreationParams.queryCount = 2u;
204217
occlusionQueryPool = logicalDevice->createQueryPool(std::move(queryPoolCreationParams));
205218
}
206219

@@ -462,14 +475,15 @@ class MeshLoadersApp : public ApplicationBase
462475
beginInfo.clearValues = clear;
463476
}
464477

465-
commandBuffer->resetQueryPool(occlusionQueryPool.get(), 0u, 1u);
478+
commandBuffer->resetQueryPool(occlusionQueryPool.get(), 0u, 2u);
466479
commandBuffer->resetQueryPool(timestampQueryPool.get(), 0u, 2u);
467480
commandBuffer->beginRenderPass(&beginInfo, nbl::asset::ESC_INLINE);
468481

469-
commandBuffer->beginQuery(occlusionQueryPool.get(), 0u);
470482
commandBuffer->writeTimestamp(asset::E_PIPELINE_STAGE_FLAGS::EPSF_TOP_OF_PIPE_BIT, timestampQueryPool.get(), 0u);
471483
for (size_t i = 0; i < gpumesh->getMeshBuffers().size(); ++i)
472484
{
485+
if(i < 2)
486+
commandBuffer->beginQuery(occlusionQueryPool.get(), i);
473487
auto gpuMeshBuffer = gpumesh->getMeshBuffers().begin()[i];
474488
auto gpuGraphicsPipeline = gpuPipelines[reinterpret_cast<RENDERPASS_INDEPENDENT_PIPELINE_ADRESS>(gpuMeshBuffer->getPipeline())];
475489

@@ -486,9 +500,11 @@ class MeshLoadersApp : public ApplicationBase
486500
commandBuffer->pushConstants(gpuRenderpassIndependentPipeline->getLayout(), asset::IShader::ESS_FRAGMENT, 0u, gpuMeshBuffer->MAX_PUSH_CONSTANT_BYTESIZE, gpuMeshBuffer->getPushConstantsDataPtr());
487501

488502
commandBuffer->drawMeshBuffer(gpuMeshBuffer);
503+
504+
if(i < 2)
505+
commandBuffer->endQuery(occlusionQueryPool.get(), i);
489506
}
490507
commandBuffer->writeTimestamp(asset::E_PIPELINE_STAGE_FLAGS::EPSF_BOTTOM_OF_PIPE_BIT, timestampQueryPool.get(), 1u);
491-
commandBuffer->endQuery(occlusionQueryPool.get(), 0u);
492508

493509
commandBuffer->endRenderPass();
494510
commandBuffer->end();

src/nbl/video/COpenGLCommandBuffer.cpp

Lines changed: 99 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -919,102 +919,146 @@ COpenGLCommandBuffer::~COpenGLCommandBuffer()
919919
{
920920
auto& c = cmd.get<impl::ECT_RESET_QUERY_POOL>();
921921
COpenGLQueryPool* qp = static_cast<COpenGLQueryPool*>(c.queryPool.get());
922-
bool success = qp->resetQueries(gl, c.query, c.queryCount);
922+
bool success = qp->resetQueries(gl, ctxid, c.query, c.queryCount);
923923
assert(success);
924924
}
925925
break;
926926
case impl::ECT_BEGIN_QUERY:
927927
{
928928
auto& c = cmd.get<impl::ECT_BEGIN_QUERY>();
929929
const COpenGLQueryPool* qp = static_cast<const COpenGLQueryPool*>(c.queryPool.get());
930-
qp->beginQuery(gl, c.query, c.flags.value);
930+
qp->beginQuery(gl, ctxid, c.query, c.flags.value);
931931
}
932932
break;
933933
case impl::ECT_END_QUERY:
934934
{
935+
// TODO: set last queue to use
935936
auto& c = cmd.get<impl::ECT_END_QUERY>();
936937
const COpenGLQueryPool* qp = static_cast<const COpenGLQueryPool*>(c.queryPool.get());
937-
qp->endQuery(gl, c.query);
938+
qp->endQuery(gl, ctxid, c.query);
938939
}
939940
break;
940941
case impl::ECT_COPY_QUERY_POOL_RESULTS:
941942
{
942943
auto& c = cmd.get<impl::ECT_COPY_QUERY_POOL_RESULTS>();
943944

944945
const COpenGLBuffer* buffer = static_cast<const COpenGLBuffer*>(c.dstBuffer.get());
945-
GLuint bufferId = buffer->getOpenGLName();
946-
947-
const COpenGLQueryPool* qp = static_cast<const COpenGLQueryPool*>(c.queryPool.get());
948-
946+
const COpenGLQueryPool* qp = IBackendObject::compatibility_cast<const COpenGLQueryPool*>(c.queryPool.get(), this);
949947
auto queryPoolQueriesCount = qp->getCreationParameters().queryCount;
950-
auto queriesRange = qp->getQueries(); // queriesRange.size() is a multiple of queryPoolQueriesCount
951-
auto queries = queriesRange.begin();
952-
953-
IQueryPool::E_QUERY_TYPE queryType = qp->getCreationParameters().queryType;
954-
bool use64Version = c.flags.hasValue(IQueryPool::E_QUERY_RESULTS_FLAGS::EQRF_64_BIT);
955-
bool availabilityFlag = c.flags.hasValue(IQueryPool::E_QUERY_RESULTS_FLAGS::EQRF_WITH_AVAILABILITY_BIT);
956-
bool waitForAllResults = c.flags.hasValue(IQueryPool::E_QUERY_RESULTS_FLAGS::EQRF_WAIT_BIT);
957-
bool partialResults = c.flags.hasValue(IQueryPool::E_QUERY_RESULTS_FLAGS::EQRF_PARTIAL_BIT);
958948

959-
if(c.firstQuery + c.queryCount > queryPoolQueriesCount)
949+
if(buffer != nullptr && qp != nullptr)
960950
{
961-
assert(false && "The sum of firstQuery and queryCount must be less than or equal to the number of queries in queryPool");
962-
break;
963-
}
964-
if(partialResults && queryType == IQueryPool::E_QUERY_TYPE::EQT_TIMESTAMP) {
965-
assert(false && "QUERY_RESULT_PARTIAL_BIT must not be used if the pool’s queryType is QUERY_TYPE_TIMESTAMP.");
966-
break;
967-
}
951+
GLuint bufferId = buffer->getOpenGLName();
968952

969-
size_t currentDataPtrOffset = c.dstOffset;
970-
size_t queryElementDataSize = (use64Version) ? sizeof(GLuint64) : sizeof(GLuint); // each query might write to multiple values/elements
953+
IQueryPool::E_QUERY_TYPE queryType = qp->getCreationParameters().queryType;
954+
bool use64Version = c.flags.hasValue(IQueryPool::E_QUERY_RESULTS_FLAGS::EQRF_64_BIT);
955+
bool availabilityFlag = c.flags.hasValue(IQueryPool::E_QUERY_RESULTS_FLAGS::EQRF_WITH_AVAILABILITY_BIT);
956+
bool waitForAllResults = c.flags.hasValue(IQueryPool::E_QUERY_RESULTS_FLAGS::EQRF_WAIT_BIT);
957+
bool partialResults = c.flags.hasValue(IQueryPool::E_QUERY_RESULTS_FLAGS::EQRF_PARTIAL_BIT);
971958

972-
GLenum pname;
973-
if(availabilityFlag)
974-
pname = GL_QUERY_RESULT_AVAILABLE;
975-
else if(waitForAllResults)
976-
pname = GL_QUERY_RESULT;
977-
else if(partialResults)
978-
pname = GL_QUERY_NO_WAIT;
959+
assert(queryType == IQueryPool::E_QUERY_TYPE::EQT_OCCLUSION || queryType == IQueryPool::E_QUERY_TYPE::EQT_TIMESTAMP);
979960

980-
auto getQueryBufferObject = [&](GLuint queryId, GLuint buffer, GLenum pname, GLintptr offset) -> void
981-
{
982-
if(use64Version)
961+
if(c.firstQuery + c.queryCount > queryPoolQueriesCount)
983962
{
984-
gl->extGlGetQueryBufferObjectui64v(queryId, buffer, pname, offset);
963+
assert(false && "The sum of firstQuery and queryCount must be less than or equal to the number of queries in queryPool");
964+
break;
985965
}
986-
else
987-
{
988-
gl->extGlGetQueryBufferObjectuiv(queryId, buffer, pname, offset);
966+
if(partialResults && queryType == IQueryPool::E_QUERY_TYPE::EQT_TIMESTAMP) {
967+
assert(false && "QUERY_RESULT_PARTIAL_BIT must not be used if the pool’s queryType is QUERY_TYPE_TIMESTAMP.");
968+
break;
989969
}
990-
};
991970

992-
for(uint32_t i = 0; i < c.queryCount; ++i)
993-
{
994-
if(queryType == IQueryPool::E_QUERY_TYPE::EQT_TIMESTAMP || queryType == IQueryPool::E_QUERY_TYPE::EQT_OCCLUSION)
971+
size_t currentDataPtrOffset = c.dstOffset;
972+
const uint32_t glQueriesPerQuery = qp->getGLQueriesPerQuery();
973+
const size_t queryElementDataSize = (use64Version) ? sizeof(GLuint64) : sizeof(GLuint); // each query might write to multiple values/elements
974+
const size_t eachQueryDataSize = queryElementDataSize * glQueriesPerQuery;
975+
const size_t eachQueryWithAvailabilityDataSize = (availabilityFlag) ? queryElementDataSize + eachQueryDataSize : eachQueryDataSize;
976+
977+
const size_t bufferDataSize = buffer->getSize();
978+
979+
assert(core::is_aligned_to(c.dstOffset, queryElementDataSize));
980+
assert(c.stride >= eachQueryWithAvailabilityDataSize);
981+
assert(c.stride && core::is_aligned_to(c.stride, eachQueryWithAvailabilityDataSize)); // stride must be aligned to each query data size considering the specified flags
982+
assert((bufferDataSize - currentDataPtrOffset) >= (c.queryCount * c.stride)); // bufferDataSize is not enough for "queryCount" queries and specified stride
983+
assert((bufferDataSize - currentDataPtrOffset) >= (c.queryCount * eachQueryWithAvailabilityDataSize)); // bufferDataSize is not enough for "queryCount" queries with considering the specified flags
984+
985+
auto getQueryObject = [&](GLuint queryId, GLuint buffer, GLenum pname, GLintptr offset, uint32_t queueIdx) -> void
995986
{
996-
assert(queryPoolQueriesCount == queriesRange.size());
997-
assert(c.stride >= queryElementDataSize);
998-
GLuint query = queries[i+c.firstQuery];
987+
assert(ctxid == queueIdx);
988+
if(ctxid == queueIdx)
989+
{
990+
if(use64Version)
991+
gl->extGlGetQueryBufferObjectui64v(queryId, buffer, pname, offset);
992+
else
993+
gl->extGlGetQueryBufferObjectuiv(queryId, buffer, pname, offset);
994+
}
995+
};
999996

1000-
getQueryBufferObject(query, bufferId, pname, currentDataPtrOffset);
1001-
}
1002-
else
997+
// iterate on each query
998+
for(uint32_t i = 0; i < c.queryCount; ++i)
1003999
{
1004-
assert(false && "QueryType is not supported.");
1005-
}
1000+
if(currentDataPtrOffset >= bufferDataSize)
1001+
{
1002+
assert(false);
1003+
break;
1004+
}
1005+
1006+
const size_t queryDataOffset = currentDataPtrOffset;
1007+
const size_t availabilityDataOffset = queryDataOffset + eachQueryDataSize; // Write Availability to this offset if flag specified
1008+
1009+
// iterate on each gl query (we may have multiple gl queries per query like pipelinestatistics query type)
1010+
const uint32_t queryIndex = i + c.firstQuery;
1011+
const uint32_t glQueryBegin = queryIndex * glQueriesPerQuery;
1012+
bool allGlQueriesAvailable = true;
1013+
for(uint32_t q = 0; q < glQueriesPerQuery; ++q)
1014+
{
1015+
const size_t subQueryDataOffset = queryDataOffset + q * queryElementDataSize;
1016+
const uint32_t queryIdx = glQueryBegin + q;
1017+
const uint32_t lastQueueToUse = qp->getLastQueueToUseForQuery(queryIdx);
1018+
GLuint query = qp->getQueryAt(lastQueueToUse, queryIdx);
1019+
1020+
GLenum pname;
1021+
if(waitForAllResults)
1022+
{
1023+
// Has WAIT_BIT -> Get Result with Wait (GL_QUERY_RESULT) + don't getQueryAvailability (if availability flag is set it will report true)
1024+
pname = GL_QUERY_RESULT;
1025+
}
1026+
else if(partialResults)
1027+
{
1028+
// Has PARTIAL_BIT but no WAIT_BIT -> (read vk spec) -> result value between zero and the final result value
1029+
// No PARTIAL queries for GL -> GL_QUERY_RESULT_NO_WAIT best match
1030+
pname = GL_QUERY_RESULT_NO_WAIT;
1031+
}
1032+
else if(availabilityFlag)
1033+
{
1034+
// Only Availablity -> Get Results with NoWait + get Query Availability
1035+
pname = GL_QUERY_RESULT_NO_WAIT;
1036+
}
1037+
else
1038+
{
1039+
// No Flags -> GL_QUERY_RESULT_NO_WAIT
1040+
pname = GL_QUERY_RESULT_NO_WAIT;
1041+
}
1042+
1043+
if(availabilityFlag && !waitForAllResults && (q == glQueriesPerQuery - 1))
1044+
getQueryObject(query, bufferId, GL_QUERY_RESULT_AVAILABLE, availabilityDataOffset, lastQueueToUse);
10061045

1007-
currentDataPtrOffset += c.stride;
1046+
getQueryObject(query, bufferId, pname, subQueryDataOffset, lastQueueToUse);
1047+
1048+
if(availabilityFlag && waitForAllResults && (q == glQueriesPerQuery - 1))
1049+
getQueryObject(query, bufferId, GL_QUERY_RESULT_AVAILABLE, availabilityDataOffset, lastQueueToUse);
1050+
}
1051+
1052+
currentDataPtrOffset += c.stride;
1053+
}
10081054
}
10091055
}
10101056
break;
10111057
case impl::ECT_WRITE_TIMESTAMP:
10121058
{
10131059
auto& c = cmd.get<impl::ECT_WRITE_TIMESTAMP>();
10141060
const COpenGLQueryPool* qp = static_cast<const COpenGLQueryPool*>(c.queryPool.get());
1015-
const GLuint query = qp->getQueryAt(c.query);
1016-
assert(qp->getCreationParameters().queryType == IQueryPool::E_QUERY_TYPE::EQT_TIMESTAMP);
1017-
gl->glQuery.pglQueryCounter(query, GL_TIMESTAMP);
1061+
qp->writeTimestamp(gl, ctxid, c.query);
10181062
}
10191063
break;
10201064
case impl::ECT_BIND_DESCRIPTOR_SETS:

src/nbl/video/COpenGLQueryPool.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ COpenGLQueryPool::~COpenGLQueryPool()
88
{
99
auto* device = static_cast<IOpenGL_LogicalDevice*>(const_cast<ILogicalDevice*>(getOriginDevice()));
1010
device->destroyQueryPool(this);
11+
_NBL_ALIGNED_FREE(lastQueueToUseArray);
1112
}
1213

1314
}

0 commit comments

Comments
 (0)