8
8
namespace nbl ::video
9
9
{
10
10
11
- class NBL_API2 CComputeBlit : public core::IReferenceCounted
11
+ class CComputeBlit : public core ::IReferenceCounted
12
12
{
13
13
public:
14
+ constexpr static inline asset::SPushConstantRange DefaultPushConstantRange = {
15
+ .stageFlags = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE,
16
+ .offset = 0ull ,
17
+ .size = sizeof (hlsl::blit::parameters2_t )
18
+ };
19
+ constexpr static inline std::span<const asset::SPushConstantRange> DefaultPushConstantRanges = {&DefaultPushConstantRange,1 };
20
+
14
21
// Coverage adjustment needs alpha to be stored in HDR with high precision
15
22
static inline asset::E_FORMAT getCoverageAdjustmentIntermediateFormat (const asset::E_FORMAT format)
16
23
{
@@ -41,7 +48,7 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
41
48
}
42
49
43
50
// ctor
44
- CComputeBlit (
51
+ NBL_API2 CComputeBlit (
45
52
core::smart_refctd_ptr<ILogicalDevice>&& logicalDevice,
46
53
core::smart_refctd_ptr<asset::IShaderCompiler::CCache>&& cache=nullptr ,
47
54
core::smart_refctd_ptr<system::ILogger>&& logger=nullptr
@@ -52,6 +59,7 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
52
59
{
53
60
core::smart_refctd_ptr<IGPUComputePipeline> blit;
54
61
core::smart_refctd_ptr<IGPUComputePipeline> coverage;
62
+ uint16_t workgroupSize;
55
63
};
56
64
struct SPipelinesCreateInfo
57
65
{
@@ -67,13 +75,13 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
67
75
hlsl::SBindingInfo samplers;
68
76
// must be Storage Image descriptor type
69
77
hlsl::SBindingInfo outputs;
70
- // ! If you set the balues too small, we'll correct them ourselves anyway
78
+ // ! If you set the balues too small, we'll correct them ourselves anyway, default values of 0 means we guess and provide our defaults
71
79
// needs to be at least as big as the maximum subgroup size
72
- uint32_t workgroupSizeLog2 : 4 = 0 ;
73
- //
74
- uint32_t sharedMemoryPerInvocation : 6 = 0 ;
80
+ uint16_t workgroupSizeLog2 : 4 = 0 ;
81
+ // in bytes, needs to be at least enough to store two full input pixels per invocation
82
+ uint16_t sharedMemoryPerInvocation : 6 = 0 ;
75
83
};
76
- SPipelines createAndCachePipelines (const SPipelinesCreateInfo& info);
84
+ NBL_API2 SPipelines createAndCachePipelines (const SPipelinesCreateInfo& info);
77
85
78
86
// ! Returns the original format if supports STORAGE_IMAGE otherwise returns a format in its compat class which supports STORAGE_IMAGE.
79
87
inline asset::E_FORMAT getOutputViewFormat (const asset::E_FORMAT format)
@@ -99,101 +107,38 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
99
107
}
100
108
}
101
109
102
- #if 0
103
- // @param `alphaBinCount` is only required to size the histogram present in the default nbl_glsl_blit_AlphaStatistics_t in default_compute_common.comp
104
- core::smart_refctd_ptr<video::IGPUShader> createAlphaTestSpecializedShader(const asset::IImage::E_TYPE inImageType, const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount);
105
-
106
- core::smart_refctd_ptr<video::IGPUComputePipeline> getAlphaTestPipeline(const uint32_t alphaBinCount, const asset::IImage::E_TYPE imageType)
107
- {
108
- const auto workgroupDims = getDefaultWorkgroupDims(imageType);
109
- const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
110
-
111
- assert(paddedAlphaBinCount >= asset::IBlitUtilities::MinAlphaBinCount);
112
- const auto pipelineIndex = (paddedAlphaBinCount / asset::IBlitUtilities::MinAlphaBinCount) - 1;
113
-
114
- if (m_alphaTestPipelines[pipelineIndex][imageType])
115
- return m_alphaTestPipelines[pipelineIndex][imageType];
116
-
117
- auto specShader = createAlphaTestSpecializedShader(imageType, paddedAlphaBinCount);
118
- IGPUComputePipeline::SCreationParams creationParams;
119
- creationParams.shader.shader = specShader.get();
120
- creationParams.shader.entryPoint = "main";
121
- creationParams.layout = m_blitPipelineLayout[EBT_COVERAGE_ADJUSTMENT].get();
122
- assert(m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_alphaTestPipelines[pipelineIndex][imageType]));
123
-
124
- return m_alphaTestPipelines[pipelineIndex][imageType];
125
- }
126
-
127
- // @param `outFormat` dictates encoding.
128
- core::smart_refctd_ptr<video::IGPUShader> createNormalizationSpecializedShader(const asset::IImage::E_TYPE inImageType, const asset::E_FORMAT outFormat,
129
- const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount);
130
-
131
- core::smart_refctd_ptr<video::IGPUComputePipeline> getNormalizationPipeline(const asset::IImage::E_TYPE imageType, const asset::E_FORMAT outFormat,
132
- const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount)
110
+ // Use the return values of `getOutputViewFormat` and `getCoverageAdjustmentIntermediateFormat` for this
111
+ static inline uint32_t getAlphaBinCount (const uint16_t workgroupSize, const asset::E_FORMAT intermediateAlpha, const uint32_t layersToBlit)
133
112
{
134
- const auto workgroupDims = getDefaultWorkgroupDims(imageType);
135
- const uint32_t paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
136
- const SNormalizationCacheKey key = { imageType, paddedAlphaBinCount, outFormat };
137
-
138
- if (m_normalizationPipelines.find(key) == m_normalizationPipelines.end())
113
+ uint16_t baseBucketCount;
114
+ using format_t = nbl::asset::E_FORMAT;
115
+ switch (intermediateAlpha)
139
116
{
140
- auto specShader = createNormalizationSpecializedShader(imageType, outFormat, paddedAlphaBinCount);
141
- IGPUComputePipeline::SCreationParams creationParams;
142
- creationParams.shader.shader = specShader.get();
143
- creationParams.shader.entryPoint = "main";
144
- creationParams.layout = m_blitPipelineLayout[EBT_COVERAGE_ADJUSTMENT].get();
145
- assert(m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_normalizationPipelines[key]));
117
+ case format_t ::EF_R8_UNORM: [[fallthrough]];
118
+ case format_t ::EF_R8_SNORM:
119
+ baseBucketCount = 256 ;
120
+ break ;
121
+ case format_t ::EF_R16_SFLOAT:
122
+ baseBucketCount = 512 ;
123
+ break ;
124
+ case format_t ::EF_R16_UNORM: [[fallthrough]];
125
+ case format_t ::EF_R16_SNORM: [[fallthrough]];
126
+ baseBucketCount = 1024 ;
127
+ break ;
128
+ case format_t ::EF_R32_SFLOAT:
129
+ baseBucketCount = 2048 ;
130
+ break ;
131
+ default :
132
+ return 0 ;
146
133
}
147
-
148
- return m_normalizationPipelines[key];
134
+ // the absolute minimum needed to store a single pixel of a worst case format (precise, all 4 channels)
135
+ constexpr auto singlePixelStorage = 4 *sizeof (hlsl::float32_t );
136
+ constexpr auto ratio = singlePixelStorage/sizeof (uint16_t );
137
+ const auto paddedAlphaBinCount = core::min (core::roundUp (baseBucketCount,workgroupSize),workgroupSize*ratio);
138
+ return paddedAlphaBinCount*layersToBlit;
149
139
}
150
140
151
- template <typename BlitUtilities>
152
- core::smart_refctd_ptr<video::IGPUComputePipeline> getBlitPipeline(
153
- const asset::E_FORMAT outFormat,
154
- const asset::IImage::E_TYPE imageType,
155
- const core::vectorSIMDu32& inExtent,
156
- const core::vectorSIMDu32& outExtent,
157
- const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic,
158
- const typename BlitUtilities::convolution_kernels_t& kernels,
159
- const uint32_t workgroupSize = 256,
160
- const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount)
161
- {
162
- const auto paddedAlphaBinCount = getPaddedAlphaBinCount(core::vectorSIMDu32(workgroupSize, 1, 1, 1), alphaBinCount);
163
-
164
- const SBlitCacheKey key =
165
- {
166
- .wgSize = workgroupSize,
167
- .imageType = imageType,
168
- .alphaBinCount = paddedAlphaBinCount,
169
- .outFormat = outFormat,
170
- .smemSize = m_availableSharedMemory,
171
- .coverageAdjustment = (alphaSemantic == asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE)
172
- };
173
-
174
- if (m_blitPipelines.find(key) == m_blitPipelines.end())
175
- {
176
- const auto blitType = (alphaSemantic == asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE) ? EBT_COVERAGE_ADJUSTMENT : EBT_REGULAR;
177
-
178
- auto specShader = createBlitSpecializedShader<BlitUtilities>(
179
- outFormat,
180
- imageType,
181
- inExtent,
182
- outExtent,
183
- alphaSemantic,
184
- kernels,
185
- workgroupSize,
186
- paddedAlphaBinCount);
187
-
188
- IGPUComputePipeline::SCreationParams creationParams;
189
- creationParams.shader.shader = specShader.get();
190
- creationParams.shader.entryPoint = "main";
191
- creationParams.layout = m_blitPipelineLayout[blitType].get();
192
- m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_blitPipelines[key]);
193
- }
194
-
195
- return m_blitPipelines[key];
196
- }
141
+ #if 0
197
142
198
143
//! Returns the number of output texels produced by one workgroup, deciding factor is `m_availableSharedMemory`.
199
144
//! @param outImageFormat is the format of output (of the blit step) image.
@@ -368,152 +313,10 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
368
313
outDispatchInfo.wgCount[2] = workgroupCount[2];
369
314
}
370
315
371
- static inline core::vectorSIMDu32 getDefaultWorkgroupDims(const asset::IImage::E_TYPE imageType)
372
- {
373
- switch (imageType)
374
- {
375
- case asset::IImage::ET_1D:
376
- return core::vectorSIMDu32(256, 1, 1, 1);
377
- case asset::IImage::ET_2D:
378
- return core::vectorSIMDu32(16, 16, 1, 1);
379
- case asset::IImage::ET_3D:
380
- return core::vectorSIMDu32(8, 8, 4, 1);
381
- default:
382
- return core::vectorSIMDu32(1, 1, 1, 1);
383
- }
384
- }
385
-
386
- static inline size_t getCoverageAdjustmentScratchSize(const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic, const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount, const uint32_t layersToBlit)
387
- {
388
- if (alphaSemantic != asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE)
389
- return 0;
390
-
391
- const auto workgroupDims = getDefaultWorkgroupDims(imageType);
392
- const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
393
- const auto requiredSize = (sizeof(uint32_t) + paddedAlphaBinCount * sizeof(uint32_t)) * layersToBlit;
394
- return requiredSize;
395
- }
396
-
397
- bool updateDescriptorSet(
398
- video::IGPUDescriptorSet* blitDS,
399
- video::IGPUDescriptorSet* kernelWeightsDS,
400
- core::smart_refctd_ptr<video::IGPUImageView> inImageView,
401
- core::smart_refctd_ptr<video::IGPUImageView> outImageView,
402
- core::smart_refctd_ptr<video::IGPUBuffer> coverageAdjustmentScratchBuffer,
403
- core::smart_refctd_ptr<video::IGPUBufferView> kernelWeightsUTB,
404
- const asset::ISampler::E_TEXTURE_CLAMP wrapU = asset::ISampler::ETC_CLAMP_TO_EDGE,
405
- const asset::ISampler::E_TEXTURE_CLAMP wrapV = asset::ISampler::ETC_CLAMP_TO_EDGE,
406
- const asset::ISampler::E_TEXTURE_CLAMP wrapW = asset::ISampler::ETC_CLAMP_TO_EDGE,
407
- const asset::ISampler::E_TEXTURE_BORDER_COLOR borderColor = asset::ISampler::ETBC_FLOAT_OPAQUE_BLACK)
408
- {
409
- constexpr auto MAX_DESCRIPTOR_COUNT = 3;
410
-
411
- auto updateDS = [this, coverageAdjustmentScratchBuffer](video::IGPUDescriptorSet* ds, video::IGPUDescriptorSet::SDescriptorInfo* infos) -> bool
412
- {
413
- const auto bindingCount = ds->getLayout()->getTotalBindingCount();
414
- if ((bindingCount == 3) && !coverageAdjustmentScratchBuffer)
415
- return false;
416
-
417
- video::IGPUDescriptorSet::SWriteDescriptorSet writes[MAX_DESCRIPTOR_COUNT] = {};
418
-
419
- uint32_t infoIdx = 0;
420
- uint32_t writeCount = 0;
421
- for (uint32_t t = 0; t < static_cast<uint32_t>(asset::IDescriptor::E_TYPE::ET_COUNT); ++t)
422
- {
423
- const auto type = static_cast<asset::IDescriptor::E_TYPE>(t);
424
- const auto& redirect = ds->getLayout()->getDescriptorRedirect(type);
425
- const auto declaredBindingCount = redirect.getBindingCount();
426
-
427
- for (uint32_t i = 0; i < declaredBindingCount; ++i)
428
- {
429
- auto& write = writes[writeCount++];
430
- write.dstSet = ds;
431
- write.binding = redirect.getBinding(IGPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ i }).data;
432
- write.arrayElement = 0u;
433
- write.count = redirect.getCount(IGPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ i });
434
- write.info = &infos[infoIdx];
435
-
436
- infoIdx += write.count;
437
- }
438
- }
439
- assert(writeCount == bindingCount);
440
- m_device->updateDescriptorSets(writeCount, writes, 0u, nullptr);
441
-
442
- return true;
443
- };
444
-
445
- if (blitDS)
446
- {
447
- if (!inImageView || !outImageView)
448
- return false;
449
-
450
- video::IGPUDescriptorSet::SDescriptorInfo infos[MAX_DESCRIPTOR_COUNT] = {};
451
-
452
- if (!samplers[wrapU][wrapV][wrapW][borderColor])
453
- {
454
- video::IGPUSampler::SParams params = {};
455
- params.TextureWrapU = wrapU;
456
- params.TextureWrapV = wrapV;
457
- params.TextureWrapW = wrapW;
458
- params.BorderColor = borderColor;
459
- params.MinFilter = asset::ISampler::ETF_NEAREST;
460
- params.MaxFilter = asset::ISampler::ETF_NEAREST;
461
- params.MipmapMode = asset::ISampler::ESMM_NEAREST;
462
- params.AnisotropicFilter = 0u;
463
- params.CompareEnable = 0u;
464
- params.CompareFunc = asset::ISampler::ECO_ALWAYS;
465
-
466
- samplers[wrapU][wrapV][wrapW][borderColor] = m_device->createSampler(params);
467
- if (!samplers[wrapU][wrapV][wrapW][borderColor])
468
- return false;
469
- }
470
-
471
- infos[0].desc = inImageView;
472
- infos[0].info.image.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL;
473
- infos[0].info.combinedImageSampler.sampler = samplers[wrapU][wrapV][wrapW][borderColor];
474
-
475
- infos[1].desc = outImageView;
476
- infos[1].info.image.imageLayout = asset::IImage::LAYOUT::GENERAL;
477
- infos[1].info.combinedImageSampler.sampler = nullptr;
478
-
479
- if (coverageAdjustmentScratchBuffer)
480
- {
481
- infos[2].desc = coverageAdjustmentScratchBuffer;
482
- infos[2].info.buffer.offset = 0;
483
- infos[2].info.buffer.size = coverageAdjustmentScratchBuffer->getSize();
484
- }
485
-
486
- if (!updateDS(blitDS, infos))
487
- return false;
488
- }
489
-
490
- if (kernelWeightsDS)
491
- {
492
- video::IGPUDescriptorSet::SDescriptorInfo info = {};
493
- info.desc = kernelWeightsUTB;
494
- info.info.buffer.offset = 0ull;
495
- info.info.buffer.size = kernelWeightsUTB->getUnderlyingBuffer()->getSize();
496
-
497
- if (!updateDS(kernelWeightsDS, &info))
498
- return false;
499
- }
500
-
501
- return true;
502
- }
503
-
504
316
//! User is responsible for the memory barriers between previous writes and the first
505
317
//! dispatch on the input image, and future reads of output image and the last dispatch.
506
318
template <typename BlitUtilities>
507
319
inline void blit(
508
- video::IGPUCommandBuffer* cmdbuf,
509
- const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic,
510
- video::IGPUDescriptorSet* alphaTestDS,
511
- video::IGPUComputePipeline* alphaTestPipeline,
512
- video::IGPUDescriptorSet* blitDS,
513
- video::IGPUDescriptorSet* blitWeightsDS,
514
- video::IGPUComputePipeline* blitPipeline,
515
- video::IGPUDescriptorSet* normalizationDS,
516
- video::IGPUComputePipeline* normalizationPipeline,
517
320
const core::vectorSIMDu32& inImageExtent,
518
321
const asset::IImage::E_TYPE inImageType,
519
322
const asset::E_FORMAT inImageFormat,
@@ -627,7 +430,7 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
627
430
}
628
431
629
432
// ! Query shared memory size for a given `outputTexelsPerWG`.
630
- size_t getRequiredSharedMemorySize (
433
+ inline size_t getRequiredSharedMemorySize (
631
434
const core::vectorSIMDu32& outputTexelsPerWG,
632
435
const core::vectorSIMDu32& outExtent,
633
436
const asset::IImage::E_TYPE imageType,
@@ -641,16 +444,6 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted
641
444
const size_t requiredSmem = (core::max (preloadRegion.x * preloadRegion.y * preloadRegion.z , outputTexelsPerWG.x * outputTexelsPerWG.y * preloadRegion.z ) + outputTexelsPerWG.x * preloadRegion.y * preloadRegion.z ) * channelCount * sizeof (float );
642
445
return requiredSmem;
643
446
};
644
-
645
- static inline uint32_t getPaddedAlphaBinCount (const core::vectorSIMDu32& workgroupDims, const uint32_t oldAlphaBinCount)
646
- {
647
- // For the normalization shader, it should be that:
648
- // alphaBinCount = k*workGroupSize, k is integer, k >= 1,
649
- assert (workgroupDims.x != 0 && workgroupDims.y != 0 && workgroupDims.z != 0 );
650
- const auto wgSize = workgroupDims.x * workgroupDims.y * workgroupDims.z ;
651
- const auto paddedAlphaBinCount = core::roundUp (oldAlphaBinCount, wgSize);
652
- return paddedAlphaBinCount;
653
- }
654
447
};
655
448
656
449
}
0 commit comments