Skip to content

Commit 395ac58

Browse files
author
devsh
committed
start using the asset converter to make Blit shaders
1 parent 067e8a3 commit 395ac58

File tree

5 files changed

+188
-331
lines changed

5 files changed

+188
-331
lines changed

include/nbl/asset/IGraphicsPipeline.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ class IGraphicsPipeline : public IPipeline<PipelineLayoutType>, public IGraphics
155155

156156
protected:
157157
explicit IGraphicsPipeline(const SCreationParams& _params) :
158-
IPipeline<PipelineLayoutType>(core::smart_refctd_ptr<PipelineLayoutType>(_params.layout)),
158+
IPipeline<PipelineLayoutType>(core::smart_refctd_ptr<const PipelineLayoutType>(_params.layout)),
159159
m_params(_params.cached), m_renderpass(core::smart_refctd_ptr<renderpass_t>(_params.renderpass)) {}
160160

161161
SCachedCreationParams m_params;

include/nbl/video/utilities/CComputeBlit.h

Lines changed: 43 additions & 250 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,16 @@
88
namespace nbl::video
99
{
1010

11-
class NBL_API2 CComputeBlit : public core::IReferenceCounted
11+
class CComputeBlit : public core::IReferenceCounted
1212
{
1313
public:
14+
constexpr static inline asset::SPushConstantRange DefaultPushConstantRange = {
15+
.stageFlags = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE,
16+
.offset = 0ull,
17+
.size = sizeof(hlsl::blit::parameters2_t)
18+
};
19+
constexpr static inline std::span<const asset::SPushConstantRange> DefaultPushConstantRanges = {&DefaultPushConstantRange,1};
20+
1421
// Coverage adjustment needs alpha to be stored in HDR with high precision
1522
static inline asset::E_FORMAT getCoverageAdjustmentIntermediateFormat(const asset::E_FORMAT format)
1623
{
@@ -41,7 +48,7 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
4148
}
4249

4350
// ctor
44-
CComputeBlit(
51+
NBL_API2 CComputeBlit(
4552
core::smart_refctd_ptr<ILogicalDevice>&& logicalDevice,
4653
core::smart_refctd_ptr<asset::IShaderCompiler::CCache>&& cache=nullptr,
4754
core::smart_refctd_ptr<system::ILogger>&& logger=nullptr
@@ -52,6 +59,7 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
5259
{
5360
core::smart_refctd_ptr<IGPUComputePipeline> blit;
5461
core::smart_refctd_ptr<IGPUComputePipeline> coverage;
62+
uint16_t workgroupSize;
5563
};
5664
struct SPipelinesCreateInfo
5765
{
@@ -67,13 +75,13 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
6775
hlsl::SBindingInfo samplers;
6876
// must be Storage Image descriptor type
6977
hlsl::SBindingInfo outputs;
70-
//! If you set the balues too small, we'll correct them ourselves anyway
78+
//! If you set the balues too small, we'll correct them ourselves anyway, default values of 0 means we guess and provide our defaults
7179
// needs to be at least as big as the maximum subgroup size
72-
uint32_t workgroupSizeLog2 : 4 = 0;
73-
//
74-
uint32_t sharedMemoryPerInvocation : 6 = 0;
80+
uint16_t workgroupSizeLog2 : 4 = 0;
81+
// in bytes, needs to be at least enough to store two full input pixels per invocation
82+
uint16_t sharedMemoryPerInvocation : 6 = 0;
7583
};
76-
SPipelines createAndCachePipelines(const SPipelinesCreateInfo& info);
84+
NBL_API2 SPipelines createAndCachePipelines(const SPipelinesCreateInfo& info);
7785

7886
//! Returns the original format if supports STORAGE_IMAGE otherwise returns a format in its compat class which supports STORAGE_IMAGE.
7987
inline asset::E_FORMAT getOutputViewFormat(const asset::E_FORMAT format)
@@ -99,101 +107,38 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
99107
}
100108
}
101109

102-
#if 0
103-
// @param `alphaBinCount` is only required to size the histogram present in the default nbl_glsl_blit_AlphaStatistics_t in default_compute_common.comp
104-
core::smart_refctd_ptr<video::IGPUShader> createAlphaTestSpecializedShader(const asset::IImage::E_TYPE inImageType, const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount);
105-
106-
core::smart_refctd_ptr<video::IGPUComputePipeline> getAlphaTestPipeline(const uint32_t alphaBinCount, const asset::IImage::E_TYPE imageType)
107-
{
108-
const auto workgroupDims = getDefaultWorkgroupDims(imageType);
109-
const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
110-
111-
assert(paddedAlphaBinCount >= asset::IBlitUtilities::MinAlphaBinCount);
112-
const auto pipelineIndex = (paddedAlphaBinCount / asset::IBlitUtilities::MinAlphaBinCount) - 1;
113-
114-
if (m_alphaTestPipelines[pipelineIndex][imageType])
115-
return m_alphaTestPipelines[pipelineIndex][imageType];
116-
117-
auto specShader = createAlphaTestSpecializedShader(imageType, paddedAlphaBinCount);
118-
IGPUComputePipeline::SCreationParams creationParams;
119-
creationParams.shader.shader = specShader.get();
120-
creationParams.shader.entryPoint = "main";
121-
creationParams.layout = m_blitPipelineLayout[EBT_COVERAGE_ADJUSTMENT].get();
122-
assert(m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_alphaTestPipelines[pipelineIndex][imageType]));
123-
124-
return m_alphaTestPipelines[pipelineIndex][imageType];
125-
}
126-
127-
// @param `outFormat` dictates encoding.
128-
core::smart_refctd_ptr<video::IGPUShader> createNormalizationSpecializedShader(const asset::IImage::E_TYPE inImageType, const asset::E_FORMAT outFormat,
129-
const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount);
130-
131-
core::smart_refctd_ptr<video::IGPUComputePipeline> getNormalizationPipeline(const asset::IImage::E_TYPE imageType, const asset::E_FORMAT outFormat,
132-
const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount)
110+
// Use the return values of `getOutputViewFormat` and `getCoverageAdjustmentIntermediateFormat` for this
111+
static inline uint32_t getAlphaBinCount(const uint16_t workgroupSize, const asset::E_FORMAT intermediateAlpha, const uint32_t layersToBlit)
133112
{
134-
const auto workgroupDims = getDefaultWorkgroupDims(imageType);
135-
const uint32_t paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
136-
const SNormalizationCacheKey key = { imageType, paddedAlphaBinCount, outFormat };
137-
138-
if (m_normalizationPipelines.find(key) == m_normalizationPipelines.end())
113+
uint16_t baseBucketCount;
114+
using format_t = nbl::asset::E_FORMAT;
115+
switch (intermediateAlpha)
139116
{
140-
auto specShader = createNormalizationSpecializedShader(imageType, outFormat, paddedAlphaBinCount);
141-
IGPUComputePipeline::SCreationParams creationParams;
142-
creationParams.shader.shader = specShader.get();
143-
creationParams.shader.entryPoint = "main";
144-
creationParams.layout = m_blitPipelineLayout[EBT_COVERAGE_ADJUSTMENT].get();
145-
assert(m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_normalizationPipelines[key]));
117+
case format_t::EF_R8_UNORM: [[fallthrough]];
118+
case format_t::EF_R8_SNORM:
119+
baseBucketCount = 256;
120+
break;
121+
case format_t::EF_R16_SFLOAT:
122+
baseBucketCount = 512;
123+
break;
124+
case format_t::EF_R16_UNORM: [[fallthrough]];
125+
case format_t::EF_R16_SNORM: [[fallthrough]];
126+
baseBucketCount = 1024;
127+
break;
128+
case format_t::EF_R32_SFLOAT:
129+
baseBucketCount = 2048;
130+
break;
131+
default:
132+
return 0;
146133
}
147-
148-
return m_normalizationPipelines[key];
134+
// the absolute minimum needed to store a single pixel of a worst case format (precise, all 4 channels)
135+
constexpr auto singlePixelStorage = 4*sizeof(hlsl::float32_t);
136+
constexpr auto ratio = singlePixelStorage/sizeof(uint16_t);
137+
const auto paddedAlphaBinCount = core::min(core::roundUp(baseBucketCount,workgroupSize),workgroupSize*ratio);
138+
return paddedAlphaBinCount*layersToBlit;
149139
}
150140

151-
template <typename BlitUtilities>
152-
core::smart_refctd_ptr<video::IGPUComputePipeline> getBlitPipeline(
153-
const asset::E_FORMAT outFormat,
154-
const asset::IImage::E_TYPE imageType,
155-
const core::vectorSIMDu32& inExtent,
156-
const core::vectorSIMDu32& outExtent,
157-
const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic,
158-
const typename BlitUtilities::convolution_kernels_t& kernels,
159-
const uint32_t workgroupSize = 256,
160-
const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount)
161-
{
162-
const auto paddedAlphaBinCount = getPaddedAlphaBinCount(core::vectorSIMDu32(workgroupSize, 1, 1, 1), alphaBinCount);
163-
164-
const SBlitCacheKey key =
165-
{
166-
.wgSize = workgroupSize,
167-
.imageType = imageType,
168-
.alphaBinCount = paddedAlphaBinCount,
169-
.outFormat = outFormat,
170-
.smemSize = m_availableSharedMemory,
171-
.coverageAdjustment = (alphaSemantic == asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE)
172-
};
173-
174-
if (m_blitPipelines.find(key) == m_blitPipelines.end())
175-
{
176-
const auto blitType = (alphaSemantic == asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE) ? EBT_COVERAGE_ADJUSTMENT : EBT_REGULAR;
177-
178-
auto specShader = createBlitSpecializedShader<BlitUtilities>(
179-
outFormat,
180-
imageType,
181-
inExtent,
182-
outExtent,
183-
alphaSemantic,
184-
kernels,
185-
workgroupSize,
186-
paddedAlphaBinCount);
187-
188-
IGPUComputePipeline::SCreationParams creationParams;
189-
creationParams.shader.shader = specShader.get();
190-
creationParams.shader.entryPoint = "main";
191-
creationParams.layout = m_blitPipelineLayout[blitType].get();
192-
m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_blitPipelines[key]);
193-
}
194-
195-
return m_blitPipelines[key];
196-
}
141+
#if 0
197142

198143
//! Returns the number of output texels produced by one workgroup, deciding factor is `m_availableSharedMemory`.
199144
//! @param outImageFormat is the format of output (of the blit step) image.
@@ -368,152 +313,10 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
368313
outDispatchInfo.wgCount[2] = workgroupCount[2];
369314
}
370315

371-
static inline core::vectorSIMDu32 getDefaultWorkgroupDims(const asset::IImage::E_TYPE imageType)
372-
{
373-
switch (imageType)
374-
{
375-
case asset::IImage::ET_1D:
376-
return core::vectorSIMDu32(256, 1, 1, 1);
377-
case asset::IImage::ET_2D:
378-
return core::vectorSIMDu32(16, 16, 1, 1);
379-
case asset::IImage::ET_3D:
380-
return core::vectorSIMDu32(8, 8, 4, 1);
381-
default:
382-
return core::vectorSIMDu32(1, 1, 1, 1);
383-
}
384-
}
385-
386-
static inline size_t getCoverageAdjustmentScratchSize(const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic, const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount, const uint32_t layersToBlit)
387-
{
388-
if (alphaSemantic != asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE)
389-
return 0;
390-
391-
const auto workgroupDims = getDefaultWorkgroupDims(imageType);
392-
const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
393-
const auto requiredSize = (sizeof(uint32_t) + paddedAlphaBinCount * sizeof(uint32_t)) * layersToBlit;
394-
return requiredSize;
395-
}
396-
397-
bool updateDescriptorSet(
398-
video::IGPUDescriptorSet* blitDS,
399-
video::IGPUDescriptorSet* kernelWeightsDS,
400-
core::smart_refctd_ptr<video::IGPUImageView> inImageView,
401-
core::smart_refctd_ptr<video::IGPUImageView> outImageView,
402-
core::smart_refctd_ptr<video::IGPUBuffer> coverageAdjustmentScratchBuffer,
403-
core::smart_refctd_ptr<video::IGPUBufferView> kernelWeightsUTB,
404-
const asset::ISampler::E_TEXTURE_CLAMP wrapU = asset::ISampler::ETC_CLAMP_TO_EDGE,
405-
const asset::ISampler::E_TEXTURE_CLAMP wrapV = asset::ISampler::ETC_CLAMP_TO_EDGE,
406-
const asset::ISampler::E_TEXTURE_CLAMP wrapW = asset::ISampler::ETC_CLAMP_TO_EDGE,
407-
const asset::ISampler::E_TEXTURE_BORDER_COLOR borderColor = asset::ISampler::ETBC_FLOAT_OPAQUE_BLACK)
408-
{
409-
constexpr auto MAX_DESCRIPTOR_COUNT = 3;
410-
411-
auto updateDS = [this, coverageAdjustmentScratchBuffer](video::IGPUDescriptorSet* ds, video::IGPUDescriptorSet::SDescriptorInfo* infos) -> bool
412-
{
413-
const auto bindingCount = ds->getLayout()->getTotalBindingCount();
414-
if ((bindingCount == 3) && !coverageAdjustmentScratchBuffer)
415-
return false;
416-
417-
video::IGPUDescriptorSet::SWriteDescriptorSet writes[MAX_DESCRIPTOR_COUNT] = {};
418-
419-
uint32_t infoIdx = 0;
420-
uint32_t writeCount = 0;
421-
for (uint32_t t = 0; t < static_cast<uint32_t>(asset::IDescriptor::E_TYPE::ET_COUNT); ++t)
422-
{
423-
const auto type = static_cast<asset::IDescriptor::E_TYPE>(t);
424-
const auto& redirect = ds->getLayout()->getDescriptorRedirect(type);
425-
const auto declaredBindingCount = redirect.getBindingCount();
426-
427-
for (uint32_t i = 0; i < declaredBindingCount; ++i)
428-
{
429-
auto& write = writes[writeCount++];
430-
write.dstSet = ds;
431-
write.binding = redirect.getBinding(IGPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ i }).data;
432-
write.arrayElement = 0u;
433-
write.count = redirect.getCount(IGPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ i });
434-
write.info = &infos[infoIdx];
435-
436-
infoIdx += write.count;
437-
}
438-
}
439-
assert(writeCount == bindingCount);
440-
m_device->updateDescriptorSets(writeCount, writes, 0u, nullptr);
441-
442-
return true;
443-
};
444-
445-
if (blitDS)
446-
{
447-
if (!inImageView || !outImageView)
448-
return false;
449-
450-
video::IGPUDescriptorSet::SDescriptorInfo infos[MAX_DESCRIPTOR_COUNT] = {};
451-
452-
if (!samplers[wrapU][wrapV][wrapW][borderColor])
453-
{
454-
video::IGPUSampler::SParams params = {};
455-
params.TextureWrapU = wrapU;
456-
params.TextureWrapV = wrapV;
457-
params.TextureWrapW = wrapW;
458-
params.BorderColor = borderColor;
459-
params.MinFilter = asset::ISampler::ETF_NEAREST;
460-
params.MaxFilter = asset::ISampler::ETF_NEAREST;
461-
params.MipmapMode = asset::ISampler::ESMM_NEAREST;
462-
params.AnisotropicFilter = 0u;
463-
params.CompareEnable = 0u;
464-
params.CompareFunc = asset::ISampler::ECO_ALWAYS;
465-
466-
samplers[wrapU][wrapV][wrapW][borderColor] = m_device->createSampler(params);
467-
if (!samplers[wrapU][wrapV][wrapW][borderColor])
468-
return false;
469-
}
470-
471-
infos[0].desc = inImageView;
472-
infos[0].info.image.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL;
473-
infos[0].info.combinedImageSampler.sampler = samplers[wrapU][wrapV][wrapW][borderColor];
474-
475-
infos[1].desc = outImageView;
476-
infos[1].info.image.imageLayout = asset::IImage::LAYOUT::GENERAL;
477-
infos[1].info.combinedImageSampler.sampler = nullptr;
478-
479-
if (coverageAdjustmentScratchBuffer)
480-
{
481-
infos[2].desc = coverageAdjustmentScratchBuffer;
482-
infos[2].info.buffer.offset = 0;
483-
infos[2].info.buffer.size = coverageAdjustmentScratchBuffer->getSize();
484-
}
485-
486-
if (!updateDS(blitDS, infos))
487-
return false;
488-
}
489-
490-
if (kernelWeightsDS)
491-
{
492-
video::IGPUDescriptorSet::SDescriptorInfo info = {};
493-
info.desc = kernelWeightsUTB;
494-
info.info.buffer.offset = 0ull;
495-
info.info.buffer.size = kernelWeightsUTB->getUnderlyingBuffer()->getSize();
496-
497-
if (!updateDS(kernelWeightsDS, &info))
498-
return false;
499-
}
500-
501-
return true;
502-
}
503-
504316
//! User is responsible for the memory barriers between previous writes and the first
505317
//! dispatch on the input image, and future reads of output image and the last dispatch.
506318
template <typename BlitUtilities>
507319
inline void blit(
508-
video::IGPUCommandBuffer* cmdbuf,
509-
const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic,
510-
video::IGPUDescriptorSet* alphaTestDS,
511-
video::IGPUComputePipeline* alphaTestPipeline,
512-
video::IGPUDescriptorSet* blitDS,
513-
video::IGPUDescriptorSet* blitWeightsDS,
514-
video::IGPUComputePipeline* blitPipeline,
515-
video::IGPUDescriptorSet* normalizationDS,
516-
video::IGPUComputePipeline* normalizationPipeline,
517320
const core::vectorSIMDu32& inImageExtent,
518321
const asset::IImage::E_TYPE inImageType,
519322
const asset::E_FORMAT inImageFormat,
@@ -627,7 +430,7 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
627430
}
628431

629432
//! Query shared memory size for a given `outputTexelsPerWG`.
630-
size_t getRequiredSharedMemorySize(
433+
inline size_t getRequiredSharedMemorySize(
631434
const core::vectorSIMDu32& outputTexelsPerWG,
632435
const core::vectorSIMDu32& outExtent,
633436
const asset::IImage::E_TYPE imageType,
@@ -641,16 +444,6 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
641444
const size_t requiredSmem = (core::max(preloadRegion.x * preloadRegion.y * preloadRegion.z, outputTexelsPerWG.x * outputTexelsPerWG.y * preloadRegion.z) + outputTexelsPerWG.x * preloadRegion.y * preloadRegion.z) * channelCount * sizeof(float);
642445
return requiredSmem;
643446
};
644-
645-
static inline uint32_t getPaddedAlphaBinCount(const core::vectorSIMDu32& workgroupDims, const uint32_t oldAlphaBinCount)
646-
{
647-
// For the normalization shader, it should be that:
648-
// alphaBinCount = k*workGroupSize, k is integer, k >= 1,
649-
assert(workgroupDims.x != 0 && workgroupDims.y != 0 && workgroupDims.z != 0);
650-
const auto wgSize = workgroupDims.x * workgroupDims.y * workgroupDims.z;
651-
const auto paddedAlphaBinCount = core::roundUp(oldAlphaBinCount, wgSize);
652-
return paddedAlphaBinCount;
653-
}
654447
};
655448

656449
}

src/nbl/builtin/CMakeLists.txt

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -207,24 +207,6 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/ext/DepthPyramidGenerator/com
207207
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/ext/DepthPyramidGenerator/push_constants_struct_common.h")
208208
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/ext/DepthPyramidGenerator/depth_pyramid_generator_impl.glsl")
209209
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/ext/DepthPyramidGenerator/virtual_work_group.glsl")
210-
# blit
211-
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/formats_encode.glsl")
212-
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/parameters.glsl")
213-
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/multi_dimensional_array_addressing.glsl")
214-
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/default_compute_common.comp")
215-
216-
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/default_compute_blit.comp")
217-
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/blit/blit.glsl")
218-
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/blit/descriptors.glsl")
219-
220-
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/default_compute_alpha_test.comp")
221-
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/alpha_test/alpha_test.glsl")
222-
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/alpha_test/descriptors.glsl")
223-
224-
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/default_compute_normalization.comp")
225-
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/normalization/normalization.glsl")
226-
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/normalization/descriptors.glsl")
227-
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/normalization/shared_normalization.glsl")
228210

229211
# HLSL
230212
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h")

0 commit comments

Comments
 (0)