Skip to content

Commit 58f97aa

Browse files
use workgroup reductions for LumaMeter
1 parent 1bf8bc2 commit 58f97aa

File tree

3 files changed

+88
-98
lines changed

3 files changed

+88
-98
lines changed

examples_tests/23.Autoexposure/main.cpp

Lines changed: 81 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -38,69 +38,69 @@ int main()
3838
device->setEventReceiver(&receiver);
3939

4040
IVideoDriver* driver = device->getVideoDriver();
41-
42-
nbl::io::IFileSystem* filesystem = device->getFileSystem();
43-
IAssetManager* am = device->getAssetManager();
44-
45-
IAssetLoader::SAssetLoadParams lp;
46-
auto imageBundle = am->getAsset("../../media/noises/spp_benchmark_4k_512.exr", lp);
47-
48-
E_FORMAT inFormat;
49-
constexpr auto outFormat = EF_R8G8B8A8_SRGB;
50-
smart_refctd_ptr<IGPUImage> outImg;
51-
smart_refctd_ptr<IGPUImageView> imgToTonemapView,outImgView;
52-
{
53-
auto cpuImg = IAsset::castDown<ICPUImage>(imageBundle.getContents().begin()[0]);
54-
IGPUImage::SCreationParams imgInfo = cpuImg->getCreationParameters();
55-
inFormat = imgInfo.format;
56-
57-
auto gpuImages = driver->getGPUObjectsFromAssets(&cpuImg.get(),&cpuImg.get()+1);
58-
auto gpuImage = gpuImages->operator[](0u);
59-
60-
IGPUImageView::SCreationParams imgViewInfo;
61-
imgViewInfo.flags = static_cast<IGPUImageView::E_CREATE_FLAGS>(0u);
62-
imgViewInfo.image = std::move(gpuImage);
63-
imgViewInfo.viewType = IGPUImageView::ET_2D_ARRAY;
64-
imgViewInfo.format = inFormat;
65-
imgViewInfo.subresourceRange.aspectMask = static_cast<IImage::E_ASPECT_FLAGS>(0u);
66-
imgViewInfo.subresourceRange.baseMipLevel = 0;
67-
imgViewInfo.subresourceRange.levelCount = 1;
68-
imgViewInfo.subresourceRange.baseArrayLayer = 0;
69-
imgViewInfo.subresourceRange.layerCount = 1;
70-
imgToTonemapView = driver->createGPUImageView(IGPUImageView::SCreationParams(imgViewInfo));
71-
72-
imgInfo.format = outFormat;
73-
outImg = driver->createDeviceLocalGPUImageOnDedMem(std::move(imgInfo));
74-
75-
imgViewInfo.image = outImg;
76-
imgViewInfo.format = outFormat;
77-
outImgView = driver->createGPUImageView(IGPUImageView::SCreationParams(imgViewInfo));
78-
}
79-
80-
auto glslCompiler = am->getGLSLCompiler();
81-
const auto inputColorSpace = std::make_tuple(inFormat,ECP_SRGB,EOTF_IDENTITY);
82-
83-
using LumaMeterClass = ext::LumaMeter::CLumaMeter;
84-
constexpr auto MeterMode = LumaMeterClass::EMM_MEDIAN;
85-
const float minLuma = 1.f/2048.f;
86-
const float maxLuma = 65536.f;
87-
41+
42+
nbl::io::IFileSystem* filesystem = device->getFileSystem();
43+
IAssetManager* am = device->getAssetManager();
44+
45+
IAssetLoader::SAssetLoadParams lp;
46+
auto imageBundle = am->getAsset("../../media/noises/spp_benchmark_4k_512.exr", lp);
47+
48+
E_FORMAT inFormat;
49+
constexpr auto outFormat = EF_R8G8B8A8_SRGB;
50+
smart_refctd_ptr<IGPUImage> outImg;
51+
smart_refctd_ptr<IGPUImageView> imgToTonemapView,outImgView;
52+
{
53+
auto cpuImg = IAsset::castDown<ICPUImage>(imageBundle.getContents().begin()[0]);
54+
IGPUImage::SCreationParams imgInfo = cpuImg->getCreationParameters();
55+
inFormat = imgInfo.format;
56+
57+
auto gpuImages = driver->getGPUObjectsFromAssets(&cpuImg.get(),&cpuImg.get()+1);
58+
auto gpuImage = gpuImages->operator[](0u);
59+
60+
IGPUImageView::SCreationParams imgViewInfo;
61+
imgViewInfo.flags = static_cast<IGPUImageView::E_CREATE_FLAGS>(0u);
62+
imgViewInfo.image = std::move(gpuImage);
63+
imgViewInfo.viewType = IGPUImageView::ET_2D_ARRAY;
64+
imgViewInfo.format = inFormat;
65+
imgViewInfo.subresourceRange.aspectMask = static_cast<IImage::E_ASPECT_FLAGS>(0u);
66+
imgViewInfo.subresourceRange.baseMipLevel = 0;
67+
imgViewInfo.subresourceRange.levelCount = 1;
68+
imgViewInfo.subresourceRange.baseArrayLayer = 0;
69+
imgViewInfo.subresourceRange.layerCount = 1;
70+
imgToTonemapView = driver->createGPUImageView(IGPUImageView::SCreationParams(imgViewInfo));
71+
72+
imgInfo.format = outFormat;
73+
outImg = driver->createDeviceLocalGPUImageOnDedMem(std::move(imgInfo));
74+
75+
imgViewInfo.image = outImg;
76+
imgViewInfo.format = outFormat;
77+
outImgView = driver->createGPUImageView(IGPUImageView::SCreationParams(imgViewInfo));
78+
}
79+
80+
auto glslCompiler = am->getGLSLCompiler();
81+
const auto inputColorSpace = std::make_tuple(inFormat,ECP_SRGB,EOTF_IDENTITY);
82+
83+
using LumaMeterClass = ext::LumaMeter::CLumaMeter;
84+
constexpr auto MeterMode = LumaMeterClass::EMM_GEOM_MEAN;
85+
const float minLuma = 1.f/2048.f;
86+
const float maxLuma = 65536.f;
87+
8888
auto cpuLumaMeasureSpecializedShader = LumaMeterClass::createShader(glslCompiler,inputColorSpace,MeterMode,minLuma,maxLuma);
8989
auto gpuLumaMeasureShader = driver->createGPUShader(smart_refctd_ptr<const ICPUShader>(cpuLumaMeasureSpecializedShader->getUnspecialized()));
9090
auto gpuLumaMeasureSpecializedShader = driver->createGPUSpecializedShader(gpuLumaMeasureShader.get(), cpuLumaMeasureSpecializedShader->getSpecializationInfo());
9191

9292
const float meteringMinUV[2] = { 0.1f,0.1f };
9393
const float meteringMaxUV[2] = { 0.9f,0.9f };
9494
LumaMeterClass::Uniforms_t<MeterMode> uniforms;
95-
auto lumaDispatchInfo = LumaMeterClass::buildParameters(uniforms, outImg->getCreationParameters().extent, meteringMinUV, meteringMaxUV);
95+
auto lumaDispatchInfo = LumaMeterClass::buildParameters(uniforms, outImg->getCreationParameters().extent, meteringMinUV, meteringMaxUV);
96+
97+
auto uniformBuffer = driver->createFilledDeviceLocalGPUBufferOnDedMem(sizeof(uniforms),&uniforms);
98+
9699

97-
auto uniformBuffer = driver->createFilledDeviceLocalGPUBufferOnDedMem(sizeof(uniforms),&uniforms);
98-
99-
100100
using ToneMapperClass = ext::ToneMapper::CToneMapper;
101-
constexpr auto TMO = ToneMapperClass::EO_ACES;
102-
constexpr bool usingLumaMeter = MeterMode<LumaMeterClass::EMM_COUNT;
103-
constexpr bool usingTemporalAdapatation = true;
101+
constexpr auto TMO = ToneMapperClass::EO_ACES;
102+
constexpr bool usingLumaMeter = MeterMode<LumaMeterClass::EMM_COUNT;
103+
constexpr bool usingTemporalAdapatation = true;
104104

105105
auto cpuTonemappingSpecializedShader = ToneMapperClass::createShader(am->getGLSLCompiler(),
106106
inputColorSpace,
@@ -115,30 +115,30 @@ int main()
115115
auto parameterBuffer = driver->createDeviceLocalGPUBufferOnDedMem(ToneMapperClass::getParameterBufferSize<TMO,MeterMode>());
116116
constexpr float Exposure = 0.f;
117117
constexpr float Key = 0.18;
118-
auto params = ToneMapperClass::Params_t<TMO>(Exposure, Key, 0.85f);
119-
{
120-
params.setAdaptationFactorFromFrameDelta(0.f);
118+
auto params = ToneMapperClass::Params_t<TMO>(Exposure, Key, 0.85f);
119+
{
120+
params.setAdaptationFactorFromFrameDelta(0.f);
121121
driver->updateBufferRangeViaStagingBuffer(parameterBuffer.get(),0u,sizeof(params),&params);
122122
}
123-
124-
auto commonPipelineLayout = ToneMapperClass::getDefaultPipelineLayout(driver,usingLumaMeter);
125-
126-
auto lumaMeteringPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(commonPipelineLayout),std::move(gpuLumaMeasureSpecializedShader));
127-
auto toneMappingPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(commonPipelineLayout),std::move(gpuTonemappingSpecializedShader));
123+
124+
auto commonPipelineLayout = ToneMapperClass::getDefaultPipelineLayout(driver,usingLumaMeter);
125+
126+
auto lumaMeteringPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(commonPipelineLayout),std::move(gpuLumaMeasureSpecializedShader));
127+
auto toneMappingPipeline = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(commonPipelineLayout),std::move(gpuTonemappingSpecializedShader));
128128

129129
auto commonDescriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(commonPipelineLayout->getDescriptorSetLayout(0u)));
130-
ToneMapperClass::updateDescriptorSet<TMO,MeterMode>(driver,commonDescriptorSet.get(),parameterBuffer,imgToTonemapView,outImgStorage,1u,2u,usingLumaMeter ? 3u:0u,uniformBuffer,0u,usingTemporalAdapatation);
130+
ToneMapperClass::updateDescriptorSet<TMO,MeterMode>(driver,commonDescriptorSet.get(),parameterBuffer,imgToTonemapView,outImgStorage,1u,2u,usingLumaMeter ? 3u:0u,uniformBuffer,0u,usingTemporalAdapatation);
131131

132132

133133
constexpr auto dynOffsetArrayLen = usingLumaMeter ? 2u : 1u;
134134

135-
auto lumaDynamicOffsetArray = core::make_refctd_dynamic_array<core::smart_refctd_dynamic_array<uint32_t> >(dynOffsetArrayLen,0u);
135+
auto lumaDynamicOffsetArray = core::make_refctd_dynamic_array<core::smart_refctd_dynamic_array<uint32_t> >(dynOffsetArrayLen,0u);
136136
lumaDynamicOffsetArray->back() = sizeof(ToneMapperClass::Params_t<TMO>);
137137

138-
auto toneDynamicOffsetArray = core::make_refctd_dynamic_array<core::smart_refctd_dynamic_array<uint32_t> >(dynOffsetArrayLen,0u);
139-
140-
141-
auto blitFBO = driver->addFrameBuffer();
138+
auto toneDynamicOffsetArray = core::make_refctd_dynamic_array<core::smart_refctd_dynamic_array<uint32_t> >(dynOffsetArrayLen,0u);
139+
140+
141+
auto blitFBO = driver->addFrameBuffer();
142142
blitFBO->attach(video::EFAP_COLOR_ATTACHMENT0, std::move(outImgView));
143143

144144
uint32_t outBufferIx = 0u;
@@ -155,21 +155,21 @@ int main()
155155
driver->bindComputePipeline(toneMappingPipeline.get());
156156
driver->bindDescriptorSets(EPBP_COMPUTE,commonPipelineLayout.get(),0u,1u,&commonDescriptorSet.get(),&toneDynamicOffsetArray);
157157
ToneMapperClass::dispatchHelper(driver,outImgStorage.get(),true);
158-
158+
159159
driver->blitRenderTargets(blitFBO, nullptr, false, false);
160160

161-
driver->endScene();
162-
if (usingTemporalAdapatation)
163-
{
164-
auto thisPresentStamp = std::chrono::high_resolution_clock::now();
165-
auto microsecondsElapsedBetweenPresents = std::chrono::duration_cast<std::chrono::microseconds>(thisPresentStamp-lastPresentStamp);
166-
lastPresentStamp = thisPresentStamp;
167-
168-
params.setAdaptationFactorFromFrameDelta(float(microsecondsElapsedBetweenPresents.count())/1000000.f);
169-
// dont override shader output
170-
constexpr auto offsetPastLumaHistory = offsetof(decltype(params),lastFrameExtraEVAsHalf)+sizeof(decltype(params)::lastFrameExtraEVAsHalf);
171-
auto* paramPtr = reinterpret_cast<const uint8_t*>(&params);
172-
driver->updateBufferRangeViaStagingBuffer(parameterBuffer.get(), offsetPastLumaHistory, sizeof(params)-offsetPastLumaHistory, paramPtr+offsetPastLumaHistory);
161+
driver->endScene();
162+
if (usingTemporalAdapatation)
163+
{
164+
auto thisPresentStamp = std::chrono::high_resolution_clock::now();
165+
auto microsecondsElapsedBetweenPresents = std::chrono::duration_cast<std::chrono::microseconds>(thisPresentStamp-lastPresentStamp);
166+
lastPresentStamp = thisPresentStamp;
167+
168+
params.setAdaptationFactorFromFrameDelta(float(microsecondsElapsedBetweenPresents.count())/1000000.f);
169+
// dont override shader output
170+
constexpr auto offsetPastLumaHistory = offsetof(decltype(params),lastFrameExtraEVAsHalf)+sizeof(decltype(params)::lastFrameExtraEVAsHalf);
171+
auto* paramPtr = reinterpret_cast<const uint8_t*>(&params);
172+
driver->updateBufferRangeViaStagingBuffer(parameterBuffer.get(), offsetPastLumaHistory, sizeof(params)-offsetPastLumaHistory, paramPtr+offsetPastLumaHistory);
173173
}
174174
}
175175

include/nbl/builtin/glsl/ext/LumaMeter/common.glsl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ struct nbl_glsl_ext_LumaMeter_Uniforms_t
6464
};
6565
#elif _NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_==_NBL_GLSL_EXT_LUMA_METER_MODE_GEOM_MEAN
6666
#ifdef _NBL_GLSL_EXT_LUMA_METER_FIRST_PASS_DEFINED_
67-
#define _NBL_GLSL_EXT_LUMA_METER_SHARED_SIZE_NEEDED_ NBL_GLSL_EVAL(_NBL_GLSL_WORKGROUP_SIZE_)
67+
#include "nbl/builtin/glsl/workgroup/shared_arithmetic.glsl"
68+
#define _NBL_GLSL_EXT_LUMA_METER_SHARED_SIZE_NEEDED_ NBL_GLSL_EVAL(_NBL_GLSL_WORKGROUP_ARITHMETIC_SHARED_SIZE_NEEDED_)
6869
#else
6970
#define _NBL_GLSL_EXT_LUMA_METER_SHARED_SIZE_NEEDED_ 0
7071
#endif

include/nbl/builtin/glsl/ext/LumaMeter/impl.glsl

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ float nbl_glsl_ext_LumaMeter_local_process(in bool wgExecutionMask, in vec3 colo
7272
return scaledLogLuma;
7373
}
7474

75+
#if _NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_==_NBL_GLSL_EXT_LUMA_METER_MODE_GEOM_MEAN
76+
#include "nbl/builtin/glsl/workgroup/arithmetic.glsl"
77+
#endif
78+
7579
nbl_glsl_ext_LumaMeter_WriteOutValue_t nbl_glsl_ext_LumaMeter_workgroup_process(in bool wgExecutionMask, in float scaledLogLuma)
7680
{
7781
#if _NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_==_NBL_GLSL_EXT_LUMA_METER_MODE_MEDIAN
@@ -81,22 +85,7 @@ nbl_glsl_ext_LumaMeter_WriteOutValue_t nbl_glsl_ext_LumaMeter_workgroup_process(
8185
writeOutVal += _NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex+i*_NBL_GLSL_EXT_LUMA_METER_PADDED_BIN_COUNT];
8286
return writeOutVal;
8387
#elif _NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_==_NBL_GLSL_EXT_LUMA_METER_MODE_GEOM_MEAN
84-
// TODO: use nbl_glsl_workgroupAdd reduction
85-
_NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex] = wgExecutionMask ? floatBitsToUint(scaledLogLuma):0u;
86-
for (int i=NBL_GLSL_WORKGROUP_SIZE_>>1; i>1; i>>=1)
87-
{
88-
barrier();
89-
if (gl_LocalInvocationIndex<i)
90-
{
91-
_NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex] = floatBitsToUint
92-
(
93-
uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex])+
94-
uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex+i])
95-
);
96-
}
97-
}
98-
barrier();
99-
return uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[0])+uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[1]);
88+
return nbl_glsl_workgroupAdd(wgExecutionMask ? scaledLogLuma:0.f);
10089
#endif
10190
}
10291

0 commit comments

Comments
 (0)