Skip to content

Commit f7ecdba

Browse files
Merge pull request #39 from Crisspl/spirv-optimizer
Bug fixes
2 parents 855b55e + 70ee483 commit f7ecdba

File tree

17 files changed

+173
-69
lines changed

17 files changed

+173
-69
lines changed

examples_tests/18.MitsubaLoader/main.cpp

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ struct SLight
3131
vec3 intensity;
3232
};
3333
34+
layout (push_constant) uniform Block {
35+
float camTformDeterminant;
36+
} PC;
37+
3438
layout (set = 2, binding = 0, std430) readonly restrict buffer Lights
3539
{
3640
SLight lights[];
@@ -94,6 +98,29 @@ vec3 nbl_computeLighting(inout nbl_glsl_AnisotropicViewSurfaceInteraction out_in
9498
return color+emissive;
9599
}
96100
)";
101+
constexpr const char* GLSL_FRAG_MAIN = R"(
102+
#define _NBL_FRAG_MAIN_DEFINED_
103+
void main()
104+
{
105+
mat2 dUV = mat2(dFdx(UV),dFdy(UV));
106+
107+
// "The sign of this computation is negated when the value of GL_CLIP_ORIGIN (the clip volume origin, set with glClipControl) is GL_UPPER_LEFT."
108+
const bool front = (!gl_FrontFacing) != (PC.camTformDeterminant*InstData.data[InstanceIndex].determinant < 0.0);
109+
nbl_glsl_MC_precomputed_t precomp = nbl_glsl_precomputeData(front);
110+
#ifdef TEX_PREFETCH_STREAM
111+
nbl_glsl_runTexPrefetchStream(getTexPrefetchStream(precomp), UV, dUV);
112+
#endif
113+
#ifdef NORM_PRECOMP_STREAM
114+
nbl_glsl_runNormalPrecompStream(getNormalPrecompStream(precomp), dUV, precomp);
115+
#endif
116+
117+
118+
nbl_glsl_IsotropicViewSurfaceInteraction inter;
119+
vec3 color = nbl_computeLighting(inter, dUV, precomp);
120+
121+
OutColor = vec4(color, 1.0);
122+
}
123+
)";
97124
static core::smart_refctd_ptr<asset::ICPUSpecializedShader> createModifiedFragShader(const asset::ICPUSpecializedShader* _fs, uint32_t viewport_w, uint32_t viewport_h, uint32_t lightCnt, uint32_t smplCnt, float intensityScale)
98125
{
99126
const asset::ICPUShader* unspec = _fs->getUnspecialized();
@@ -110,6 +137,7 @@ static core::smart_refctd_ptr<asset::ICPUSpecializedShader> createModifiedFragSh
110137
GLSL_COMPUTE_LIGHTING;
111138

112139
glsl.insert(glsl.find("#ifndef _NBL_COMPUTE_LIGHTING_DEFINED_"), extra);
140+
glsl.insert(glsl.find("#ifndef _NBL_FRAG_MAIN_DEFINED_"), GLSL_FRAG_MAIN);
113141

114142
//auto* f = fopen("fs.glsl","w");
115143
//fwrite(glsl.c_str(), 1, glsl.size(), f);
@@ -273,9 +301,9 @@ int main()
273301
return 1;
274302

275303
bool leftHandedCamera = false;
304+
auto cameraTransform = sensor.transform.matrix.extractSub3x4();
276305
{
277-
auto relativeTransform = sensor.transform.matrix.extractSub3x4();
278-
if (relativeTransform.getPseudoDeterminant().x < 0.f)
306+
if (cameraTransform.getPseudoDeterminant().x < 0.f)
279307
leftHandedCamera = true;
280308
}
281309

@@ -429,7 +457,13 @@ int main()
429457
//modify pipeline layouts with our custom DS2 layout (DS2 will be used for lights buffer)
430458
for (uint32_t i = 0u; i < mesh->getMeshBufferCount(); ++i)
431459
{
432-
auto* pipeline = mesh->getMeshBuffer(i)->getPipeline();
460+
auto* meshbuffer = mesh->getMeshBuffer(i);
461+
auto* pipeline = meshbuffer->getPipeline();
462+
463+
asset::SPushConstantRange pcr;
464+
pcr.offset = 0u;
465+
pcr.size = sizeof(float);
466+
pcr.stageFlags = asset::ISpecializedShader::ESS_FRAGMENT;
433467
if (modifiedPipelines.find(pipeline) == modifiedPipelines.end())
434468
{
435469
//if (!pipeline->getLayout()->getDescriptorSetLayout(2u))
@@ -443,10 +477,14 @@ int main()
443477
modifiedShaders.insert({ core::smart_refctd_ptr<asset::ICPUSpecializedShader>(fs),newfs });
444478
pipeline->setShaderAtStage(asset::ICPUSpecializedShader::ESS_FRAGMENT, newfs.get());
445479
}
446-
// invert what is recognized as frontface in case of RH camera
447-
pipeline->getRasterizationParams().frontFaceIsCCW = !leftHandedCamera;
480+
481+
auto pc = core::make_refctd_dynamic_array<core::smart_refctd_dynamic_array<asset::SPushConstantRange>>(1u);
482+
(*pc)[0] = pcr;
483+
pipeline->getLayout()->setPushConstantRanges(std::move(pc));
448484
modifiedPipelines.insert(pipeline);
449485
}
486+
487+
reinterpret_cast<float*>(meshbuffer->getPushConstantsDataPtr() + pcr.offset)[0] = cameraTransform.getPseudoDeterminant().x;
450488
}
451489
}
452490
modifiedShaders.clear();
@@ -728,7 +766,7 @@ int main()
728766
const video::IGPUDescriptorSet* ds[3]{ gpuds0.get(), gpuds1.get(), gpuds2.get() };
729767
driver->bindGraphicsPipeline(pipeline);
730768
driver->bindDescriptorSets(video::EPBP_GRAPHICS, pipeline->getLayout(), 0u, 3u, ds, nullptr);
731-
driver->pushConstants(pipeline->getLayout(), video::IGPUSpecializedShader::ESS_VERTEX|video::IGPUSpecializedShader::ESS_FRAGMENT, 0u, sizeof(uint32_t), mb->getPushConstantsDataPtr());
769+
driver->pushConstants(pipeline->getLayout(), video::IGPUSpecializedShader::ESS_VERTEX|video::IGPUSpecializedShader::ESS_FRAGMENT, 0u, sizeof(float), mb->getPushConstantsDataPtr());
732770

733771
driver->drawMeshBuffer(mb);
734772
}

examples_tests/22.RaytracedAO/dirty_source/ExtraCrap.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ constexpr uint32_t kOptiXPixelSize = sizeof(uint16_t)*3u;
2121
core::smart_refctd_ptr<ICPUSpecializedShader> specializedShaderFromFile(IAssetManager* assetManager, const char* path)
2222
{
2323
auto bundle = assetManager->getAsset(path, {});
24-
return core::move_and_static_cast<ICPUSpecializedShader>(*bundle.getContents().begin());
24+
return core::smart_refctd_ptr_static_cast<ICPUSpecializedShader>(*bundle.getContents().begin());
2525
}
2626
core::smart_refctd_ptr<IGPUSpecializedShader> gpuSpecializedShaderFromFile(IAssetManager* assetManager, IVideoDriver* driver, const char* path)
2727
{

examples_tests/22.RaytracedAO/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,4 +313,4 @@ int main()
313313
renderer = nullptr;
314314

315315
return 0;
316-
}
316+
}

examples_tests/22.RaytracedAO/raytraceCommon.glsl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,13 +138,13 @@ layout(set = 1, binding = 5, std430, row_major) restrict readonly buffer LightRa
138138
vec3 fetchAccumulation(in ivec2 coord)
139139
{
140140
const uvec2 data = imageLoad(accumulation,coord).rg;
141-
return vec4(unpackHalf2x16(data[0]),unpackHalf2x16(data[1])).rgb;
142-
//return nbl_glsl_decodeRGB19E7(data);
141+
//return vec4(unpackHalf2x16(data[0]),unpackHalf2x16(data[1])).rgb;
142+
return nbl_glsl_decodeRGB19E7(data);
143143
}
144144
void storeAccumulation(in vec3 color, in ivec2 coord)
145145
{
146-
//const uvec2 data = nbl_glsl_encodeRGB19E7(color);
147-
const uvec2 data = uvec2(packHalf2x16(color.rg),packHalf2x16(vec2(color.b,1.0)));
146+
const uvec2 data = nbl_glsl_encodeRGB19E7(color);
147+
//const uvec2 data = uvec2(packHalf2x16(color.rg),packHalf2x16(vec2(color.b,1.0)));
148148
imageStore(accumulation,coord,uvec4(data,0u,0u));
149149
}
150150

include/nbl/asset/IAsset.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -342,11 +342,6 @@ class SAssetBundle
342342
return core::SRange<const core::smart_refctd_ptr<IAsset>>(m_contents->begin(),m_contents->end());
343343
}
344344

345-
inline core::SRange<core::smart_refctd_ptr<IAsset>> getContents()
346-
{
347-
return core::SRange<core::smart_refctd_ptr<IAsset>>(m_contents->begin(),m_contents->end());
348-
}
349-
350345
//! Whether this asset bundle is in a cache and should be removed from cache to destroy
351346
inline bool isInAResourceCache() const { return m_isCached; }
352347

include/nbl/asset/ICPUPipelineLayout.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,14 @@ class ICPUPipelineLayout : public IAsset, public IPipelineLayout<ICPUDescriptorS
3434
void setDescriptorSetLayout(uint32_t _set, core::smart_refctd_ptr<ICPUDescriptorSetLayout>&& _dslayout)
3535
{
3636
assert(!isImmutable_debug());
37-
m_descSetLayouts[_set] = std::move(_dslayout);
37+
assert(_set < DESCRIPTOR_SET_COUNT);
38+
m_descSetLayouts[_set] = std::move(_dslayout);
39+
}
40+
41+
void setPushConstantRanges(core::smart_refctd_dynamic_array<SPushConstantRange>&& _ranges)
42+
{
43+
assert(!isImmutable_debug());
44+
m_pushConstantRanges = std::move(_ranges);
3845
}
3946

4047
core::smart_refctd_ptr<IAsset> clone(uint32_t _depth = ~0u) const override

include/nbl/asset/IVirtualTexture.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,11 @@ class IVirtualTexture : public core::IReferenceCounted, public IVirtualTextureBa
319319
assert(_pgTabSzxy_log2<=MAX_PAGE_TABLE_EXTENT_LOG2);//otherwise STextureData encoding falls apart
320320
assert(_pgTabLayers<=MAX_PAGE_TABLE_LAYERS);
321321

322+
_pgTabLayers = std::max(_pgTabLayers, 1u);
323+
322324
const uint32_t pgTabSzxy = 1u<<_pgTabSzxy_log2;
323325
typename image_t::SCreationParams params;
324-
params.arrayLayers = std::max(_pgTabLayers, 1u); // page table must always be present
326+
params.arrayLayers = _pgTabLayers; // page table must always be present
325327
params.extent = {pgTabSzxy,pgTabSzxy,1u};
326328
params.format = EF_R16G16_UINT;
327329
params.mipLevels = std::max<int32_t>(static_cast<int32_t>(_maxAllocatableTexSz_log2-_pgSzxy_log2+1u), 1);

include/nbl/asset/ShaderRes.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,14 +108,14 @@ struct SShaderMemoryBlock
108108

109109
struct SMember
110110
{
111-
//! count==1 implies not array
112111
union {
113112
uint32_t count;
114113
uint32_t count_specID;
115114
};
116115
bool countIsSpecConstant;
117116
uint32_t offset;
118117
uint32_t size;
118+
//! relevant only in case of array types
119119
uint32_t arrayStride;
120120
//! mtxStride==0 implies not matrix
121121
uint32_t mtxStride;
@@ -131,6 +131,8 @@ struct SShaderMemoryBlock
131131
size_t count;
132132
} members;
133133
std::string name;
134+
135+
bool isArray() const { return countIsSpecConstant || count > 1u; }
134136
};
135137

136138
SMember::SMembers members;

include/nbl/builtin/glsl/format/constants.glsl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
#define nbl_glsl_RGB19E7_EXP_BIAS 63
99
#define nbl_glsl_MAX_RGB19E7_EXP (nbl_glsl_RGB19E7_EXP_BIAS+1)
1010

11-
#define MAX_RGB19E7_MANTISSA_VALUES (0x1<<nbl_glsl_RGB19E7_MANTISSA_BITS)
12-
#define MAX_RGB19E7_MANTISSA (MAX_RGB19E7_MANTISSA_VALUES-1)
13-
#define nbl_glsl_MAX_RGB19E7 float(MAX_RGB19E7_MANTISSA)/float(MAX_RGB19E7_MANTISSA_VALUES)*exp2(float(nbl_glsl_MAX_RGB19E7_EXP-nbl_glsl_RGB19E7_MANTISSA_BITS))
11+
#define nbl_glsl_MAX_RGB19E7_MANTISSA_VALUES (0x1<<nbl_glsl_RGB19E7_MANTISSA_BITS)
12+
#define nbl_glsl_MAX_RGB19E7_MANTISSA (nbl_glsl_MAX_RGB19E7_MANTISSA_VALUES-1)
13+
#define nbl_glsl_MAX_RGB19E7 float(nbl_glsl_MAX_RGB19E7_MANTISSA)/float(nbl_glsl_MAX_RGB19E7_MANTISSA_VALUES)*exp2(float(nbl_glsl_MAX_RGB19E7_EXP))
1414

1515
#define nbl_glsl_RGB19E7_COMPONENT_INDICES ivec4(0,0,1,1)
1616
#define nbl_glsl_RGB19E7_COMPONENT_BITOFFSETS ivec4(0,nbl_glsl_RGB19E7_MANTISSA_BITS,(2*nbl_glsl_RGB19E7_MANTISSA_BITS)&31,(3*nbl_glsl_RGB19E7_MANTISSA_BITS)&31)

include/nbl/builtin/glsl/format/encode.glsl

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,14 @@ uvec2 nbl_glsl_encodeRGB19E7(in vec3 col)
99
const float maxrgb = max(max(clamped.r,clamped.g),clamped.b);
1010

1111
const int f32_exp = ((floatBitsToInt(maxrgb)>>23) & 0xff) - 127;
12-
const int shared_exp = clamp(f32_exp,-nbl_glsl_RGB19E7_EXP_BIAS-1,nbl_glsl_MAX_RGB19E7_EXP) + 1;
13-
14-
const uvec3 mantissas = uvec3(clamped*exp2(nbl_glsl_RGB19E7_MANTISSA_BITS-shared_exp) + 0.5);
12+
int shared_exp = clamp(f32_exp,-nbl_glsl_RGB19E7_EXP_BIAS-1,nbl_glsl_MAX_RGB19E7_EXP) + 1;
13+
14+
float scale = exp2(nbl_glsl_RGB19E7_MANTISSA_BITS - shared_exp);
15+
const uint maxm = uint(maxrgb*scale + 0.5);
16+
const bool need = (maxm == nbl_glsl_MAX_RGB19E7_MANTISSA_VALUES);
17+
scale = need ? 0.5*scale : scale;
18+
shared_exp = need ? (shared_exp+1) : shared_exp;
19+
const uvec3 mantissas = uvec3(clamped*scale + 0.5);
1520

1621
uvec2 encoded;
1722
encoded.x = bitfieldInsert(mantissas.x,mantissas.y,nbl_glsl_RGB19E7_COMPONENT_BITOFFSETS[1],nbl_glsl_RGB19E7_G_COMPONENT_SPLIT);

0 commit comments

Comments
 (0)