OpenGL queryPool still broken but GET_QUERY_POOL_RESULTS Logic updated

Erfan-Ahmadi · Erfan-Ahmadi · commit 6c2cdea09948 · 2022-02-11T18:55:47.000+03:30
diff --git a/src/nbl/video/COpenGLQueryPool.h b/src/nbl/video/COpenGLQueryPool.h
@@ -19,28 +19,29 @@ class COpenGLQueryPool final : public IQueryPool
 	protected:
 		virtual ~COpenGLQueryPool();
 
-		// queries.size() is a multiple of params.queryCount
 		core::vector<GLuint> queries;
+		uint32_t glQueriesPerQuery = 0u;
 
 	public:
 		COpenGLQueryPool(core::smart_refctd_ptr<const ILogicalDevice>&& dev, IOpenGL_FunctionTable* gl, IQueryPool::SCreationParams&& _params) 
 			: IQueryPool(std::move(dev), std::move(_params))
 		{
 			if(_params.queryType == EQT_OCCLUSION)
 			{
-				queries.resize(_params.queryCount);
+				glQueriesPerQuery = 1u;
 				gl->extGlCreateQueries(GL_SAMPLES_PASSED, _params.queryCount, queries.data());
 			}
 			else if(_params.queryType == EQT_TIMESTAMP)
 			{
-				queries.resize(_params.queryCount);
+				glQueriesPerQuery = 1u;
 				gl->extGlCreateQueries(GL_TIMESTAMP, _params.queryCount, queries.data());
 			}
 			else
 			{
 				// TODO: Add ARB_pipeline_statistics support: https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_pipeline_statistics_query.txt
 				assert(false && "QueryType is not supported.");
 			}
+			queries.resize(_params.queryCount * glQueriesPerQuery);
 		}
 
 		inline core::SRange<const GLuint> getQueries() const
@@ -61,6 +62,8 @@ class COpenGLQueryPool final : public IQueryPool
 			}
 		}
 
+		inline uint32_t getGLQueriesPerQuery() const { return glQueriesPerQuery; }
+
 		inline void beginQuery(IOpenGL_FunctionTable* gl, uint32_t queryIndex, E_QUERY_CONTROL_FLAGS flags) const
 		{
 			if(gl != nullptr)
@@ -103,27 +106,8 @@ class COpenGLQueryPool final : public IQueryPool
 
 		inline bool resetQueries(IOpenGL_FunctionTable* gl, uint32_t query, uint32_t queryCount)
 		{
-			// NOTE: There is no Reset Queries on OpenGL but to make the queries invalid/unavailable and not return the previous ones we just delete the queries and recreate them.
-			// TODO: Needs test
-			size_t querySize = queries.size();
-
-			if(query + queryCount > querySize)
-			{
-				assert(false);
-				return false;
-			}
-
-			if(params.queryType == EQT_OCCLUSION)
-			{
-				gl->glQuery.pglDeleteQueries(queryCount, queries.data() + query);
-				gl->extGlCreateQueries(GL_SAMPLES_PASSED, queryCount, queries.data() + query);
-			}
-			else if(params.queryType == EQT_TIMESTAMP)
-			{
-				gl->glQuery.pglDeleteQueries(queryCount, queries.data() + query);
-				gl->extGlCreateQueries(GL_TIMESTAMP, queryCount, queries.data() + query);
-			}
-
+			// NOTE: There is no Reset Queries on OpenGL
+			// NOOP
 			return true;
 		}
 
diff --git a/src/nbl/video/IOpenGL_LogicalDevice.h b/src/nbl/video/IOpenGL_LogicalDevice.h
@@ -670,9 +670,8 @@ class IOpenGL_LogicalDevice : public ILogicalDevice, protected impl::IOpenGL_Log
                 auto& p = std::get<SRequestGetQueryPoolResults>(req.params_variant);
                 const COpenGLQueryPool* qp = IBackendObject::device_compatibility_cast<const COpenGLQueryPool*>(p.queryPool.get(), device);
                 auto queryPoolQueriesCount = qp->getCreationParameters().queryCount;
-                auto queriesRange = qp->getQueries(); // queriesRange.size() is a multiple of queryPoolQueriesCount
-                auto queries = queriesRange.begin();
-
+                auto queries = qp->getQueries();
+                
                 if(p.pData != nullptr)
                 {
                     IQueryPool::E_QUERY_TYPE queryType = qp->getCreationParameters().queryType;
@@ -681,6 +680,8 @@ class IOpenGL_LogicalDevice : public ILogicalDevice, protected impl::IOpenGL_Log
                     bool waitForAllResults = p.flags.hasValue(IQueryPool::E_QUERY_RESULTS_FLAGS::EQRF_WAIT_BIT);
                     bool partialResults = p.flags.hasValue(IQueryPool::E_QUERY_RESULTS_FLAGS::EQRF_PARTIAL_BIT);
 
+                    assert(queryType == IQueryPool::E_QUERY_TYPE::EQT_OCCLUSION || queryType == IQueryPool::E_QUERY_TYPE::EQT_TIMESTAMP);
+
                     if(p.firstQuery + p.queryCount > queryPoolQueriesCount)
                     {
                         assert(false && "The sum of firstQuery and queryCount must be less than or equal to the number of queries in queryPool");
@@ -692,50 +693,103 @@ class IOpenGL_LogicalDevice : public ILogicalDevice, protected impl::IOpenGL_Log
                     }
 
                     size_t currentDataPtrOffset = 0;
-                    size_t queryElementDataSize = (use64Version) ? sizeof(GLuint64) : sizeof(GLuint); // each query might write to multiple values/elements
-
-                    GLenum pname;
-                    if(availabilityFlag)
-                        pname = GL_QUERY_RESULT_AVAILABLE;
-                    else if(waitForAllResults)
-                        pname = GL_QUERY_RESULT;
-                    else if(partialResults)
-                        pname = GL_QUERY_NO_WAIT;
-
-                    auto getQueryObject = [&](GLuint queryId, GLenum pname, void * pData) -> void 
+                    const uint32_t glQueriesPerQuery = qp->getGLQueriesPerQuery();
+                    const size_t queryElementDataSize = (use64Version) ? sizeof(GLuint64) : sizeof(GLuint); // each query might write to multiple values/elements
+                    const size_t eachQueryDataSize = queryElementDataSize * glQueriesPerQuery;
+                    const size_t eachQueryWithAvailabilityDataSize = (availabilityFlag) ? queryElementDataSize + eachQueryDataSize : eachQueryDataSize;
+                    
+                    assert(p.stride >= eachQueryWithAvailabilityDataSize);
+                    assert(p.stride && core::is_aligned_to(p.stride, eachQueryWithAvailabilityDataSize)); // p.stride must be aligned to each query data size considering the specified flags
+                    assert(p.dataSize >= (p.queryCount * p.stride)); // dataSize is not enough for "queryCount" queries and specified stride
+                    assert(p.dataSize >= (p.queryCount * eachQueryWithAvailabilityDataSize)); // dataSize is not enough for "queryCount" queries with considering the specified flags
+
+                    auto getQueryObject = [&](GLuint queryId, GLenum pname, void* pData) -> void 
                     {
                         if(use64Version)
-                        {
                             gl.extGlGetQueryObjectui64v(queryId, pname, reinterpret_cast<GLuint64*>(pData));
+                        else
+                            gl.extGlGetQueryObjectuiv(queryId, pname, reinterpret_cast<GLuint*>(pData));
+                    }; 
+                    auto getQueryAvailablity = [&](GLuint queryId) -> bool 
+                    {
+                        GLuint ret = 0;
+                        gl.extGlGetQueryObjectuiv(queryId, GL_QUERY_RESULT_AVAILABLE, &ret);
+                        return (ret == GL_TRUE);
+                    };
+                    auto writeValueToData = [&](void* pData, const uint64_t value)
+                    {
+                        if(use64Version)
+                        {
+                            GLuint64* dataPtr = reinterpret_cast<GLuint64*>(pData);
+                            *dataPtr = value;
                         }
                         else
                         {
-                            gl.extGlGetQueryObjectuiv(queryId, pname, reinterpret_cast<GLuint*>(pData));
+                            GLuint* dataPtr = reinterpret_cast<GLuint*>(pData);
+                            *dataPtr = static_cast<uint32_t>(value);
                         }
                     };
 
+                    // iterate on each query
                     for(uint32_t i = 0; i < p.queryCount; ++i)
                     {
-                        // Don't write queries that exceed the dataSize
                         if(currentDataPtrOffset >= p.dataSize)
+                        {
+                            assert(false);
                             break;
+                        }
                         
-                        if(queryType == IQueryPool::E_QUERY_TYPE::EQT_TIMESTAMP || queryType == IQueryPool::E_QUERY_TYPE::EQT_OCCLUSION)
+                        uint8_t* pQueryData = reinterpret_cast<uint8_t*>(p.pData) + currentDataPtrOffset;
+                        uint8_t* pAvailabilityData = pQueryData + eachQueryDataSize; // Write Availability to this value if flag specified
+
+                        // iterate on each gl query (we may have multiple gl queries per query like pipelinestatistics query type)
+                        const uint32_t queryIndex = i + p.firstQuery;
+                        const uint32_t glQueryBegin = queryIndex * glQueriesPerQuery;
+                        bool allGlQueriesAvailable = true;
+                        for(uint32_t q = 0; q < glQueriesPerQuery; ++q)
                         {
-                            assert(queryPoolQueriesCount == queriesRange.size());
-                            assert(p.stride >= queryElementDataSize);
+                            uint8_t* pSubQueryData = pQueryData + q * queryElementDataSize;
+                            GLuint query = queries[glQueryBegin + q];
+
+                            GLenum pname;
 
-                            GLuint query = queries[i+p.firstQuery];
-                            uint8_t* pData = reinterpret_cast<uint8_t*>(p.pData) + currentDataPtrOffset;
-                            getQueryObject(query, pname, pData);
+                            if(waitForAllResults)
+                            {
+                                // Has WAIT_BIT -> Get Result with Wait (GL_QUERY_RESULT) + don't getQueryAvailability (if availability flag is set it will report true)
+                                pname = GL_QUERY_RESULT;
+                            }
+                            else if(partialResults)
+                            {
+                                // Has PARTIAL_BIT but no WAIT_BIT -> (read vk spec) -> result value between zero and the final result value
+                                // No PARTIAL queries for GL -> GL_QUERY_RESULT_NO_WAIT best match
+                                // TODO(Erfan): Maybe set the values to 0 before query so it's consistent with vulkan spec? (what to do about the cmd version where we have to upload 0's to buffer)
+                                pname = GL_QUERY_RESULT_NO_WAIT;
+                            }
+                            else if(availabilityFlag)
+                            {
+                                // Only Availablity -> Get Results with NoWait + get Query Availability
+                                pname = GL_QUERY_RESULT_NO_WAIT;
+                            }
+                            else
+                            {
+                                // No Flags -> GL_QUERY_RESULT_NO_WAIT
+                                pname = GL_QUERY_RESULT_NO_WAIT;
+                            }
+                            
+                            if(availabilityFlag)
+                                allGlQueriesAvailable &= getQueryAvailablity(query);
+                            getQueryObject(query, pname, pSubQueryData);
                         }
-                        else
+
+                        if(availabilityFlag)
                         {
-                            assert(false && "QueryType is not supported.");
+                            if(waitForAllResults)
+                                writeValueToData(pAvailabilityData, (allGlQueriesAvailable) ? 1ull : 0ull);
+                            else
+                                writeValueToData(pAvailabilityData, 1ull);
                         }
 
                         currentDataPtrOffset += p.stride;
-                        
                     }
                 }
             }

Original file line number	Diff line number	Diff line change
`@@ -19,28 +19,29 @@ class COpenGLQueryPool final : public IQueryPool`
`19`	`19`	`protected:`
`20`	`20`	`virtual ~COpenGLQueryPool();`
`21`	`21`
`22`		`- // queries.size() is a multiple of params.queryCount`
`23`	`22`	`core::vector<GLuint> queries;`
	`23`	`+ uint32_t glQueriesPerQuery = 0u;`
`24`	`24`
`25`	`25`	`public:`
`26`	`26`	`COpenGLQueryPool(core::smart_refctd_ptr<const ILogicalDevice>&& dev, IOpenGL_FunctionTable* gl, IQueryPool::SCreationParams&& _params)`
`27`	`27`	`: IQueryPool(std::move(dev), std::move(_params))`
`28`	`28`	`{`
`29`	`29`	`if(_params.queryType == EQT_OCCLUSION)`
`30`	`30`	`{`
`31`		`- queries.resize(_params.queryCount);`
	`31`	`+ glQueriesPerQuery = 1u;`
`32`	`32`	`gl->extGlCreateQueries(GL_SAMPLES_PASSED, _params.queryCount, queries.data());`
`33`	`33`	`}`
`34`	`34`	`else if(_params.queryType == EQT_TIMESTAMP)`
`35`	`35`	`{`
`36`		`- queries.resize(_params.queryCount);`
	`36`	`+ glQueriesPerQuery = 1u;`
`37`	`37`	`gl->extGlCreateQueries(GL_TIMESTAMP, _params.queryCount, queries.data());`
`38`	`38`	`}`
`39`	`39`	`else`
`40`	`40`	`{`
`41`	`41`	`// TODO: Add ARB_pipeline_statistics support: https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_pipeline_statistics_query.txt`
`42`	`42`	`assert(false && "QueryType is not supported.");`
`43`	`43`	`}`
	`44`	`+ queries.resize(_params.queryCount * glQueriesPerQuery);`
`44`	`45`	`}`
`45`	`46`
`46`	`47`	`inline core::SRange<const GLuint> getQueries() const`
`@@ -61,6 +62,8 @@ class COpenGLQueryPool final : public IQueryPool`
`61`	`62`	`}`
`62`	`63`	`}`
`63`	`64`
	`65`	`+ inline uint32_t getGLQueriesPerQuery() const { return glQueriesPerQuery; }`
	`66`	`+`
`64`	`67`	`inline void beginQuery(IOpenGL_FunctionTable* gl, uint32_t queryIndex, E_QUERY_CONTROL_FLAGS flags) const`
`65`	`68`	`{`
`66`	`69`	`if(gl != nullptr)`
`@@ -103,27 +106,8 @@ class COpenGLQueryPool final : public IQueryPool`
`103`	`106`
`104`	`107`	`inline bool resetQueries(IOpenGL_FunctionTable* gl, uint32_t query, uint32_t queryCount)`
`105`	`108`	`{`
`106`		`- // NOTE: There is no Reset Queries on OpenGL but to make the queries invalid/unavailable and not return the previous ones we just delete the queries and recreate them.`
`107`		`- // TODO: Needs test`
`108`		`- size_t querySize = queries.size();`
`109`		`-`
`110`		`- if(query + queryCount > querySize)`
`111`		`- {`
`112`		`- assert(false);`
`113`		`- return false;`
`114`		`- }`
`115`		`-`
`116`		`- if(params.queryType == EQT_OCCLUSION)`
`117`		`- {`
`118`		`- gl->glQuery.pglDeleteQueries(queryCount, queries.data() + query);`
`119`		`- gl->extGlCreateQueries(GL_SAMPLES_PASSED, queryCount, queries.data() + query);`
`120`		`- }`
`121`		`- else if(params.queryType == EQT_TIMESTAMP)`
`122`		`- {`
`123`		`- gl->glQuery.pglDeleteQueries(queryCount, queries.data() + query);`
`124`		`- gl->extGlCreateQueries(GL_TIMESTAMP, queryCount, queries.data() + query);`
`125`		`- }`
`126`		`-`
	`109`	`+ // NOTE: There is no Reset Queries on OpenGL`
	`110`	`+ // NOOP`
`127`	`111`	`return true;`
`128`	`112`	`}`
`129`	`113`