@@ -5,6 +5,79 @@ using namespace nbl::system;
5
5
using namespace nbl ::asset;
6
6
using namespace nbl ::video;
7
7
8
+
9
+ CComputeBlit::CComputeBlit (smart_refctd_ptr<ILogicalDevice>&& logicalDevice, smart_refctd_ptr<IShaderCompiler::CCache>&& cache, smart_refctd_ptr<ILogger>&& logger) : m_device(std::move(logicalDevice)), m_logger(nullptr )
10
+ {
11
+ if (logger)
12
+ m_logger = std::move (logger);
13
+ else if (auto debugCb=m_device->getPhysicalDevice ()->getDebugCallback (); debugCb->getLogger ())
14
+ m_logger = smart_refctd_ptr<system::ILogger>(debugCb->getLogger ());
15
+
16
+ if (cache)
17
+ m_shaderCache = std::move (cache);
18
+ else
19
+ m_shaderCache = make_smart_refctd_ptr<IShaderCompiler::CCache>();
20
+ }
21
+
22
+ void CComputeBlit::createAndCachePipelines (CAssetConverter* converter, smart_refctd_ptr<IGPUComputePipeline>* pipelines, const std::span<const STask> tasks)
23
+ {
24
+ core::vector<smart_refctd_ptr<ICPUComputePipeline>> cpuPplns;
25
+ cpuPplns.reserve (tasks.size ());
26
+
27
+ const auto & limits = m_device->getPhysicalDevice ()->getLimits ();
28
+ for (auto task : tasks)
29
+ {
30
+ // adjust task default values
31
+ {
32
+ if (task.workgroupSizeLog2 <limits.maxSubgroupSize )
33
+ task.workgroupSizeLog2 = core::roundDownToPoT (limits.maxComputeWorkGroupInvocations );
34
+ bool useFloat16 = false ;
35
+ uint16_t channels = 4 ;
36
+ using namespace hlsl ::format;
37
+ if (task.outputFormat !=TexelBlockFormat::TBF_UNKNOWN)
38
+ {
39
+ channels = getTraits (task.outputFormat ).Channels ;
40
+ const auto precisionAt1 = getFormatPrecision (static_cast <E_FORMAT>(task.outputFormat ),3 ,1 .f );
41
+ const auto precisionAt0 = getFormatPrecision (static_cast <E_FORMAT>(task.outputFormat ),3 ,0 .f );
42
+ if (limits.workgroupMemoryExplicitLayout16BitAccess && limits.shaderFloat16 && precisionAt1>=std::exp2f (-11 .f ) && precisionAt0>=std::numeric_limits<hlsl::float16_t >::min ())
43
+ useFloat16 = true ;
44
+ }
45
+ // the absolute minimum needed to store a single pixel
46
+ const auto singlePixelStorage = channels*(useFloat16 ? sizeof (hlsl::float16_t ):sizeof (hlsl::float32_t ));
47
+ // also slightly more memory is needed
48
+ task.sharedMemoryPerInvocation = core::max (singlePixelStorage*2 ,task.sharedMemoryPerInvocation );
49
+ }
50
+ // create blit pipeline
51
+ cpuPplns.emplace_back (nullptr );
52
+ // create optional coverage normalization pipeline
53
+ cpuPplns.emplace_back (nullptr );
54
+ }
55
+
56
+ CAssetConverter::SInputs inputs = {};
57
+ inputs.readCache = converter;
58
+ inputs.logger = m_logger.getRaw ();
59
+ std::get<CAssetConverter::SInputs::asset_span_t <ICPUComputePipeline>>(inputs.assets ) = {&cpuPplns.data ()->get (),cpuPplns.size ()};
60
+ inputs.readShaderCache = m_shaderCache.get ();
61
+ inputs.writeShaderCache = m_shaderCache.get ();
62
+ // no pipeline cache, because we only make the same pipeline once, ever
63
+ auto reserveResults = converter->reserve (inputs);
64
+ assert (reserveResults.getRequiredQueueFlags ().value ==IQueue::FAMILY_FLAGS::NONE);
65
+ // copy over the results
66
+ {
67
+ auto rIt = reserveResults.getGPUObjects <ICPUComputePipeline>().data ();
68
+ // TODO: redo
69
+ for (size_t i=0 ; i<tasks.size (); i++)
70
+ *(pipelines++) = (rIt++)->value ;
71
+ }
72
+
73
+ // this just inserts the pipelines into the cache
74
+ {
75
+ CAssetConverter::SConvertParams params = {};
76
+ auto convertResults = reserveResults.convert (params);
77
+ assert (!convertResults.blocking ());
78
+ }
79
+ }
80
+
8
81
#if 0
9
82
core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createAlphaTestSpecializedShader(const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount)
10
83
{
@@ -39,21 +112,14 @@ core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createAlphaTestSpecializ
39
112
"}\n";
40
113
41
114
auto cpuShader = core::make_smart_refctd_ptr<asset::ICPUShader>(shaderSourceStream.str().c_str(), IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, IGPUShader::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlitGLSLGLSL::createAlphaTestSpecializedShader");
42
-
43
- return m_device->createShader(std::move(cpuShader.get()));
44
115
}
45
116
46
- core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createNormalizationSpecializedShader(const asset::IImage::E_TYPE imageType, const asset::E_FORMAT outFormat,
47
- const uint32_t alphaBinCount)
117
+ core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createNormalizationSpecializedShader(const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount)
48
118
{
49
119
const auto workgroupDims = getDefaultWorkgroupDims(imageType);
50
120
const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
51
121
const uint32_t blitDimCount = static_cast<uint32_t>(imageType) + 1;
52
122
53
- const auto castedFormat = getOutImageViewFormat(outFormat);
54
- assert(outFormat == castedFormat);
55
- const char* formatQualifier = asset::CHLSLCompiler::getStorageImageFormatQualifier(castedFormat);
56
-
57
123
std::ostringstream shaderSourceStream;
58
124
59
125
shaderSourceStream
@@ -67,7 +133,7 @@ core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createNormalizationSpeci
67
133
"[[vk::binding(0, 0)]]\n"
68
134
"nbl::hlsl::blit::impl::dim_to_image_properties<ceval_params_t::BlitDimCount>::combined_sampler_t inCS;\n"
69
135
70
- "[[vk::image_format(\"" << formatQualifier << " \")]]\n"
136
+ "[[vk::image_format(\"unknown \")]]\n"
71
137
"[[vk::binding(1, 0)]]\n"
72
138
"nbl::hlsl::blit::impl::dim_to_image_properties<ceval_params_t::BlitDimCount>::image_t outImg;\n"
73
139
@@ -90,7 +156,5 @@ core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createNormalizationSpeci
90
156
"}\n";
91
157
92
158
auto cpuShader = core::make_smart_refctd_ptr<asset::ICPUShader>(shaderSourceStream.str().c_str(), IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, IGPUShader::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlitGLSL::createNormalizationSpecializedShader");
93
-
94
- return m_device->createShader(std::move(cpuShader.get()));
95
159
}
96
160
#endif
0 commit comments