Skip to content

Commit 70ee483

Browse files
committed
Got rid of separate cache for GL base instance values
1 parent 9885c3b commit 70ee483

File tree

2 files changed

+37
-16
lines changed

2 files changed

+37
-16
lines changed

src/nbl/video/COpenGLRenderpassIndependentPipeline.h

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@ class COpenGLRenderpassIndependentPipeline final : public IGPURenderpassIndepend
3636
_vertexInputParams, _blendParams, _primAsmParams, _rasterParams
3737
),
3838
IOpenGLPipeline(_ctxCount, _ctxID, _GLnames, _binaries),
39-
m_stagePresenceMask(0u),
40-
m_baseInstanceUniformIDs(core::make_refctd_dynamic_array<decltype(m_baseInstanceUniformIDs)>(_ctxCount))
39+
m_stagePresenceMask(0u)
4140
{
4241
static_assert(asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT == asset::SVertexInputParams::MAX_VERTEX_ATTRIB_COUNT, "This code below has to be divided into 2 loops");
4342
static_assert(asset::EF_UNKNOWN <= 0xffu, "All E_FORMAT values must fit in 1 byte or hash falls apart");
@@ -71,12 +70,12 @@ class COpenGLRenderpassIndependentPipeline final : public IGPURenderpassIndepend
7170
// only this function touches this uniform
7271
constexpr const char* SPIRV_CROSS_BaseInstanceUniformName = "SPIRV_Cross_BaseInstance";
7372

74-
GLint& value = (*m_baseInstanceUniformIDs)[_ctxID].cache;
73+
GLint& value = getBaseInstanceState(_ctxID)->cache;
7574
if (value == _baseInstance)
7675
return;
7776

7877
const GLuint programID = getShaderGLnameForCtx(ESSI_VERTEX_SHADER_IX, _ctxID);
79-
GLint& uid = (*m_baseInstanceUniformIDs)[_ctxID].id;
78+
GLint& uid = getBaseInstanceState(_ctxID)->id;
8079
if (uid == -1)
8180
{
8281
uid = COpenGLExtensionHandler::extGlGetUniformLocation(programID, SPIRV_CROSS_BaseInstanceUniformName);
@@ -315,16 +314,6 @@ class COpenGLRenderpassIndependentPipeline final : public IGPURenderpassIndepend
315314
SVAOHash m_vaoHashval;
316315
uint32_t m_stagePresenceMask;
317316
mutable uint32_t m_lastUpdateStamp[SHADER_STAGE_COUNT];
318-
319-
// needed for spirv-cross-based workaround of GL's behaviour of gl_InstanceID
320-
struct SBaseInstance
321-
{
322-
GLint cache = 0;
323-
GLint id = -1;
324-
};
325-
// per-context ID of SPIRV_Cross_BaseInstance uniform
326-
// (only present in case of absence of GL_ARB_shader_draw_parameters)
327-
mutable core::smart_refctd_dynamic_array<SBaseInstance> m_baseInstanceUniformIDs;
328317
};
329318

330319
}

src/nbl/video/IOpenGLPipeline.h

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,34 @@ namespace video
1717
template<size_t _STAGE_COUNT>
1818
class IOpenGLPipeline
1919
{
20+
protected:
21+
// needed for spirv-cross-based workaround of GL's behaviour of gl_InstanceID
22+
struct SBaseInstance
23+
{
24+
GLint cache = 0;
25+
GLint id = -1;
26+
};
27+
28+
private:
29+
using base_instance_cache_t = SBaseInstance;
30+
31+
_NBL_STATIC_INLINE_CONSTEXPR bool IsComputePipelineBase = (_STAGE_COUNT == 1u);
32+
_NBL_STATIC_INLINE_CONSTEXPR uint32_t BaseInstancePerContextCacheSize = IsComputePipelineBase ? 0ull : sizeof(base_instance_cache_t);
33+
_NBL_STATIC_INLINE_CONSTEXPR uint32_t UniformsPerContextCacheSize = _STAGE_COUNT*IGPUMeshBuffer::MAX_PUSH_CONSTANT_BYTESIZE + BaseInstancePerContextCacheSize;
34+
35+
static uint32_t baseInstanceCacheByteoffsetForCtx(uint32_t _ctxId)
36+
{
37+
return UniformsPerContextCacheSize*_ctxId;
38+
}
39+
static uint32_t uniformsCacheByteoffsetForCtx(uint32_t _ctxId)
40+
{
41+
return baseInstanceCacheByteoffsetForCtx(_ctxId) + BaseInstancePerContextCacheSize;
42+
}
43+
static uint32_t uniformsCacheByteoffsetForCtxAndStage(uint32_t _ctxId, uint32_t _stage)
44+
{
45+
return uniformsCacheByteoffsetForCtx(_ctxId) + _stage*IGPUMeshBuffer::MAX_PUSH_CONSTANT_BYTESIZE;
46+
}
47+
2048
public:
2149
IOpenGLPipeline(uint32_t _ctxCount, uint32_t _ctxID, const GLuint _GLnames[_STAGE_COUNT], const COpenGLSpecializedShader::SProgramBinary _binaries[_STAGE_COUNT]) :
2250
m_GLprograms(core::make_refctd_dynamic_array<decltype(m_GLprograms)>(_ctxCount*_STAGE_COUNT))
@@ -34,8 +62,10 @@ class IOpenGLPipeline
3462
(*m_GLprograms)[i*_STAGE_COUNT+j].GLname = GLname;
3563
}
3664

37-
const size_t uVals_sz = _STAGE_COUNT*_ctxCount*IGPUMeshBuffer::MAX_PUSH_CONSTANT_BYTESIZE;
65+
const size_t uVals_sz = UniformsPerContextCacheSize * _ctxCount;
3866
m_uniformValues = reinterpret_cast<uint8_t*>(_NBL_ALIGNED_MALLOC(uVals_sz, 128));
67+
for (uint32_t i = 0u; i < _ctxCount; ++i)
68+
getBaseInstanceState(i)[0] = base_instance_cache_t{};
3969
}
4070
~IOpenGLPipeline()
4171
{
@@ -46,7 +76,8 @@ class IOpenGLPipeline
4676
_NBL_ALIGNED_FREE(m_uniformValues);
4777
}
4878

49-
uint8_t* getPushConstantsStateForStage(uint32_t _stageIx, uint32_t _ctxID) const { return const_cast<uint8_t*>(m_uniformValues + ((_STAGE_COUNT*_ctxID + _stageIx)*IGPUMeshBuffer::MAX_PUSH_CONSTANT_BYTESIZE)); }
79+
uint8_t* getPushConstantsStateForStage(uint32_t _stageIx, uint32_t _ctxID) const { return const_cast<uint8_t*>(m_uniformValues + uniformsCacheByteoffsetForCtxAndStage(_ctxID, _stageIx)); }
80+
base_instance_cache_t* getBaseInstanceState(uint32_t _ctxID) const { return const_cast<base_instance_cache_t*>(m_uniformValues + baseInstanceCacheByteoffsetForCtx(_ctxID)); }
5081

5182
protected:
5283
void setUniformsImitatingPushConstants(uint32_t _stageIx, uint32_t _ctxID, const uint8_t* _pcData, const core::SRange<const COpenGLSpecializedShader::SUniform>& _uniforms, const core::SRange<const GLint>& _locations) const
@@ -80,6 +111,7 @@ class IOpenGLPipeline
80111
{
81112
// 1N for scalar types, 2N for gvec2, 4N for gvec3 and gvec4
82113
// N==sizeof(float)
114+
// WARNING / TODO : need some touch in case when we want to support `double` push constants
83115
if (is_scalar_or_vec())
84116
arrayStride = (m.mtxRowCnt==1u) ? m.size : core::roundUpToPoT(m.mtxRowCnt)*sizeof(float);
85117
// same as size in case of matrices

0 commit comments

Comments
 (0)