Devsh-Graphics-Programming
diff --git a/‎include/nbl/asset/IDescriptorSet.h
Lines changed: 1 addition & 0 deletions b/‎include/nbl/asset/IDescriptorSet.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎include/nbl/asset/IDescriptorSetLayout.h
Lines changed: 2 additions & 0 deletions b/‎include/nbl/asset/IDescriptorSetLayout.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎include/nbl/asset/IImage.h
Lines changed: 3 additions & 2 deletions b/‎include/nbl/asset/IImage.h
Lines changed: 3 additions & 2 deletions
diff --git a/‎include/nbl/asset/filters/CBlitImageFilter.h
Lines changed: 256 additions & 184 deletions b/‎include/nbl/asset/filters/CBlitImageFilter.h
Lines changed: 256 additions & 184 deletions
diff --git a/‎include/nbl/asset/filters/CBlitUtilities.h
Lines changed: 196 additions & 0 deletions b/‎include/nbl/asset/filters/CBlitUtilities.h
Lines changed: 196 additions & 0 deletions
diff --git a/‎include/nbl/asset/filters/CMipMapGenerationImageFilter.h
Lines changed: 2 additions & 0 deletions b/‎include/nbl/asset/filters/CMipMapGenerationImageFilter.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎include/nbl/asset/filters/kernels/kernels.h
Lines changed: 0 additions & 30 deletions b/‎include/nbl/asset/filters/kernels/kernels.h
Lines changed: 0 additions & 30 deletions
diff --git a/‎include/nbl/asset/utils/ICPUVirtualTexture.h
Lines changed: 5 additions & 0 deletions b/‎include/nbl/asset/utils/ICPUVirtualTexture.h
Lines changed: 5 additions & 0 deletions
diff --git a/‎include/nbl/asset/utils/IGLSLCompiler.h
Lines changed: 58 additions & 0 deletions b/‎include/nbl/asset/utils/IGLSLCompiler.h
Lines changed: 58 additions & 0 deletions
diff --git a/‎include/nbl/builtin/glsl/blit/alpha_test/alpha_test.glsl
Lines changed: 33 additions & 0 deletions b/‎include/nbl/builtin/glsl/blit/alpha_test/alpha_test.glsl
Lines changed: 33 additions & 0 deletions
@@ -51,6 +51,7 @@ class NBL_API IDescriptorSet : public virtual core::IReferenceCounted
                 };
                 struct SImageInfo
                 {
+					// This will be ignored if the DS layout already has an immutable sampler specified for the binding.
                     core::smart_refctd_ptr<typename layout_t::sampler_type> sampler;
                     //! Irrelevant in OpenGL backend
                     E_IMAGE_LAYOUT imageLayout;
 
@@ -87,6 +87,8 @@ class NBL_API IDescriptorSetLayout : public virtual core::IReferenceCounted
 			E_DESCRIPTOR_TYPE type;
 			uint32_t count;
 			IShader::E_SHADER_STAGE stageFlags;
+			// Use this if you want an immutable sampler that is baked into the DS layout itself.
+			// If its `nullptr` then the sampler used is mutable and can be specified while writing the image descriptor to a binding while updating the DS.
 			const core::smart_refctd_ptr<sampler_type>* samplers;
 
 			bool operator<(const SBinding& rhs) const
 
@@ -104,9 +104,10 @@ class NBL_API IImage : public IDescriptor
 		};
 		enum E_TYPE : uint32_t
 		{
-			ET_1D,
+			ET_1D = 0,
 			ET_2D,
-			ET_3D
+			ET_3D,
+			ET_COUNT
 		};
 		enum E_SAMPLE_COUNT_FLAGS : uint32_t
 		{
 
@@ -0,0 +1,196 @@
+// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef __NBL_ASSET_C_BLIT_UTILITIES_H_INCLUDED__
+#define __NBL_ASSET_C_BLIT_UTILITIES_H_INCLUDED__
+
+#include "nbl/asset/filters/kernels/kernels.h"
+
+namespace nbl::asset
+{
+class IBlitUtilities
+{
+public:
+	static constexpr uint32_t MinAlphaBinCount = 256u;
+	static constexpr uint32_t MaxAlphaBinCount = 4096u;
+	static constexpr uint32_t DefaultAlphaBinCount = MinAlphaBinCount;
+
+	enum E_ALPHA_SEMANTIC : uint32_t
+	{
+		EAS_NONE_OR_PREMULTIPLIED = 0u, // just filter the channels independently (also works for a texture for blending equation `dstCol*(1-srcAlpha)+srcCol`)
+		EAS_REFERENCE_OR_COVERAGE, // try to preserve coverage (percentage of pixels above a threshold value) across mipmap levels
+		EAS_SEPARATE_BLEND, // compute a new alpha value for a texture to be used with the blending equation `mix(dstCol,srcCol,srcAlpha)`
+		EAS_COUNT
+	};
+
+	static inline core::vectorSIMDu32 getPhaseCount(const core::vectorSIMDu32& inExtent, const core::vectorSIMDu32& outExtent, const asset::IImage::E_TYPE inImageType)
+	{
+		core::vectorSIMDu32 result(0u);
+		for (uint32_t i = 0u; i <= inImageType; ++i)
+			result[i] = outExtent[i] / std::gcd(inExtent[i], outExtent[i]);
+		return result;
+	}
+
+	// we'll need to rescale the kernel support to be relative to the output image but in the input image coordinate system
+	// (if support is 3 pixels, it needs to be 3 output texels, but measured in input texels)
+	template<class Kernel>
+	static inline auto constructScaledKernel(const Kernel& kernel, const core::vectorSIMDu32& inExtent, const core::vectorSIMDu32& outExtent)
+	{
+		const core::vectorSIMDf fInExtent(inExtent);
+		const core::vectorSIMDf fOutExtent(outExtent);
+		const auto fScale = fInExtent.preciseDivision(fOutExtent);
+		return CScaledImageFilterKernel<Kernel>(fScale, kernel);
+	}
+};
+
+template <class KernelX = CBoxImageFilterKernel, class KernelY = KernelX, class KernelZ = KernelX>
+class CBlitUtilities : public IBlitUtilities
+{
+	static_assert(std::is_same<typename KernelX::value_type, typename KernelY::value_type>::value&& std::is_same<typename KernelZ::value_type, typename KernelY::value_type>::value, "Kernel value_type need to be identical");
+
+public:
+	_NBL_STATIC_INLINE_CONSTEXPR auto MaxChannels = std::max<decltype(KernelX::MaxChannels)>(std::max<decltype(KernelX::MaxChannels)>(KernelX::MaxChannels, KernelY::MaxChannels), KernelZ::MaxChannels);
+
+	template <typename lut_value_type = KernelX::value_type>
+	static inline size_t getScaledKernelPhasedLUTSize(const core::vectorSIMDu32& inExtent, const core::vectorSIMDu32& outExtent, const asset::IImage::E_TYPE inImageType,
+		const KernelX& kernelX, const KernelY& kernelY, const KernelZ& kernelZ)
+	{
+		const auto scaledKernelX = constructScaledKernel(kernelX, inExtent, outExtent);
+		const auto scaledKernelY = constructScaledKernel(kernelY, inExtent, outExtent);
+		const auto scaledKernelZ = constructScaledKernel(kernelZ, inExtent, outExtent);
+
+		const auto phaseCount = getPhaseCount(inExtent, outExtent, inImageType);
+
+		return ((phaseCount[0] * scaledKernelX.getWindowSize().x) + (phaseCount[1] * scaledKernelY.getWindowSize().y) + (phaseCount[2] * scaledKernelZ.getWindowSize().z)) * sizeof(lut_value_type) * MaxChannels;
+	}
+
+	template <typename lut_value_type = KernelX::value_type>
+	static bool computeScaledKernelPhasedLUT(void* outKernelWeights, const core::vectorSIMDu32& inExtent, const core::vectorSIMDu32& outExtent, const asset::IImage::E_TYPE inImageType,
+		const KernelX& kernelX, const KernelY& kernelY, const KernelZ& kernelZ)
+	{
+		const core::vectorSIMDu32 phaseCount = getPhaseCount(inExtent, outExtent, inImageType);
+
+		for (auto i = 0; i <= inImageType; ++i)
+		{
+			if (phaseCount[i] == 0)
+				return false;
+		}
+
+		const auto scaledKernelX = constructScaledKernel(kernelX, inExtent, outExtent);
+		const auto scaledKernelY = constructScaledKernel(kernelY, inExtent, outExtent);
+		const auto scaledKernelZ = constructScaledKernel(kernelZ, inExtent, outExtent);
+
+		const auto windowDims = getRealWindowSize(inImageType, scaledKernelX, scaledKernelY, scaledKernelZ);
+		const auto axisOffsets = getScaledKernelPhasedLUTAxisOffsets<lut_value_type>(phaseCount, windowDims);
+
+		const core::vectorSIMDf fInExtent(inExtent);
+		const core::vectorSIMDf fOutExtent(outExtent);
+		const auto fScale = fInExtent.preciseDivision(fOutExtent);
+
+		// a dummy load functor
+		// does nothing but fills up the `windowSample` with 1s (identity) so we can preserve the value of kernel
+		// weights when eventually `windowSample` gets multiplied by them later in
+		// `CFloatingPointSeparableImageFilterKernelBase<CRTP>::sample_functor_t<PreFilter,PostFilter>::operator()`
+		// this exists only because `evaluateImpl` expects a pre filtering step.
+		auto dummyLoad = [](double* windowSample, const core::vectorSIMDf&, const core::vectorSIMDi32&, const IImageFilterKernel::UserData*) -> void
+		{
+			for (auto h = 0; h < MaxChannels; h++)
+				windowSample[h] = 1.0;
+		};
+
+		double kernelWeight[MaxChannels];
+		// actually used to put values in the LUT
+		auto dummyEvaluate = [&kernelWeight](const double* windowSample, const core::vectorSIMDf&, const core::vectorSIMDi32&, const IImageFilterKernel::UserData*) -> void
+		{
+			for (auto h = 0; h < MaxChannels; h++)
+				kernelWeight[h] = windowSample[h];
+		};
+
+		auto computeForAxis = [&](const asset::IImage::E_TYPE axis, const auto& scaledKernel)
+		{
+			if (axis > inImageType)
+				return;
+
+			const auto windowSize = scaledKernel.getWindowSize()[axis];
+
+			IImageFilterKernel::ScaleFactorUserData scale(1.f / fScale[axis]);
+			const IImageFilterKernel::ScaleFactorUserData* otherScale = nullptr;
+			switch (axis)
+			{
+			case IImage::ET_1D:
+				otherScale = IImageFilterKernel::ScaleFactorUserData::cast(kernelX.getUserData());
+				break;
+			case IImage::ET_2D:
+				otherScale = IImageFilterKernel::ScaleFactorUserData::cast(kernelY.getUserData());
+				break;
+			case IImage::ET_3D:
+				otherScale = IImageFilterKernel::ScaleFactorUserData::cast(kernelZ.getUserData());
+				break;
+			}
+			if (otherScale)
+			{
+				for (auto k = 0; k < MaxChannels; k++)
+					scale.factor[k] *= otherScale->factor[k];
+			}
+
+			lut_value_type* outKernelWeightsPixel = reinterpret_cast<lut_value_type*>(reinterpret_cast<uint8_t*>(outKernelWeights) + axisOffsets[axis]);
+			for (uint32_t i = 0u; i < phaseCount[axis]; ++i)
+			{
+				core::vectorSIMDf tmp(0.f);
+				tmp[axis] = float(i) + 0.5f;
+
+				const int32_t windowCoord = scaledKernel.getWindowMinCoord(tmp * fScale, tmp)[axis];
+
+				float relativePos = tmp[axis] - float(windowCoord); // relative position of the last pixel in window from current (ith) output pixel having a unique phase sequence of kernel evaluation points
+
+				for (int32_t j = 0; j < windowSize; ++j)
+				{
+					core::vectorSIMDf tmp(relativePos, 0.f, 0.f);
+					scaledKernel.evaluateImpl(dummyLoad, dummyEvaluate, kernelWeight, tmp, core::vectorSIMDi32(), &scale);
+					for (uint32_t ch = 0; ch < MaxChannels; ++ch)
+					{
+						if constexpr (std::is_same_v<lut_value_type, uint16_t>)
+							outKernelWeightsPixel[(i * windowSize + j) * MaxChannels + ch] = core::Float16Compressor::compress(float(kernelWeight[ch]));
+						else
+							outKernelWeightsPixel[(i * windowSize + j) * MaxChannels + ch] = lut_value_type(kernelWeight[ch]);
+
+					}
+					relativePos -= 1.f;
+				}
+			}
+		};
+
+		computeForAxis(asset::IImage::ET_1D, scaledKernelX);
+		computeForAxis(asset::IImage::ET_2D, scaledKernelY);
+		computeForAxis(asset::IImage::ET_3D, scaledKernelZ);
+
+		return true;
+	}
+
+	static inline core::vectorSIMDi32 getRealWindowSize(const IImage::E_TYPE inImageType,
+		const CScaledImageFilterKernel<KernelX>& kernelX,
+		const CScaledImageFilterKernel<KernelY>& kernelY,
+		const CScaledImageFilterKernel<KernelZ>& kernelZ)
+	{
+		core::vectorSIMDi32 last(kernelX.getWindowSize().x, 0, 0, 0);
+		if (inImageType >= IImage::ET_2D)
+			last.y = kernelY.getWindowSize().y;
+		if (inImageType >= IImage::ET_3D)
+			last.z = kernelZ.getWindowSize().z;
+		return last;
+	}
+
+	template <typename lut_value_type = KernelX::value_type>
+	static inline core::vectorSIMDu32 getScaledKernelPhasedLUTAxisOffsets(const core::vectorSIMDu32& phaseCount, const core::vectorSIMDi32& real_window_size)
+	{
+		core::vectorSIMDu32 result;
+		result.x = 0u;
+		result.y = (phaseCount[0] * real_window_size.x);
+		result.z = ((phaseCount[0] * real_window_size.x) + (phaseCount[1] * real_window_size.y));
+		return result * sizeof(lut_value_type) * MaxChannels;
+	}
+};
+}
+
+#endif
@@ -122,6 +122,8 @@ class NBL_API CMipMapGenerationImageFilter : public CImageFilter<CMipMapGenerati
 			//not all kernels are default-constructible, this is going to be a problem (i already added appropriate ctor for blit filter state class though)
 			//blit.kernel = Kernel(); // gets default constructed, we should probably do a `static_assert` about this property
 			static_cast<state_base_t&>(blit) = *static_cast<const state_base_t*>(state);
+
+			pseudo_base_t::blit_utils_t::computeScaledKernelPhasedLUT(blit.scratchMemory + pseudo_base_t::getScratchOffset(&blit, pseudo_base_t::ESU_SCALED_KERNEL_PHASED_LUT), blit.inExtentLayerCount, blit.outExtentLayerCount, blit.inImage->getCreationParameters().type, blit.kernelX, blit.kernelY, blit.kernelZ);
 			return blit;
 		}
 };
 
@@ -13,36 +13,6 @@ namespace nbl
 {
 namespace asset
 {
-	
-/*
-// caches weights, also should we call it Polyphase?
-template<class Kernel>
-class CMultiphaseKernel : public CImageFilterKernel<CMultiphaseKernel<Kernel> >, private Kernel
-{
-	public:
-		_NBL_STATIC_INLINE_CONSTEXPR bool is_separable = Kernel;
-
-		CMultiphaseKernel(Kernel&& k) : Kernel(std::move(k)
-		{
-		}
-		
-	protected:
-		static inline core::vectorSIMDu32 computePhases(const core::vectorSIMDu32& from, const core::vectorSIMDu32& to)
-		{
-			assert(!(to>from).any()); // Convolution Kernel cannot be used for upscaling!
-			return from/core::gcd(to,from);
-		}
-		static inline uint32_t computePhaseStorage(const core::vectorSIMDu32& from, const core::vectorSIMDu32& to)
-		{
-			auto phases = computePhases(from,to);
-			auto samplesInSupports = ceil();
-			if constexpr(is_separable)
-			{
-
-			}
-		}
-};
-*/
 
 // to be inline this function relies on any kernel's `create_sample_functor_t` being defined
 template<class CRTP, typename value_type>
 
@@ -145,6 +145,11 @@ class NBL_API ICPUVirtualTexture final : public IVirtualTexture<ICPUImageView, I
         blit.scratchMemoryByteSize = blit_filter_t::getRequiredScratchByteSize(&blit);
         blit.scratchMemory = reinterpret_cast<uint8_t*>(_NBL_ALIGNED_MALLOC(blit.scratchMemoryByteSize, _NBL_SIMD_ALIGNMENT));
 
+        const core::vectorSIMDu32 inExtent(blit.inExtent.width, blit.inExtent.height, blit.inExtent.depth, 1);
+        const core::vectorSIMDu32 outExtent(blit.outExtent.width, blit.outExtent.height, blit.outExtent.depth, 1);
+        if (!blit_filter_t::blit_utils_t::computeScaledKernelPhasedLUT(blit.scratchMemory + blit_filter_t::getScratchOffset(&blit, blit_filter_t::ESU_SCALED_KERNEL_PHASED_LUT), inExtent, outExtent, blit.inImage->getCreationParameters().type, blit.kernelX, blit.kernelY, blit.kernelZ))
+            return nullptr;
+
         const bool blit_succeeded = blit_filter_t::execute(&blit);
         _NBL_ALIGNED_FREE(blit.scratchMemory);
         if (!blit_succeeded)
 
@@ -193,6 +193,64 @@ class NBL_API IGLSLCompiler final : public core::IReferenceCounted
 			return nbl::core::make_smart_refctd_ptr<ICPUShader>(std::move(outBuffer), IShader::buffer_contains_glsl_t{}, original->getStage(), std::string(original->getFilepathHint()));
 		}
 
+		static inline const char* getStorageImageFormatQualifier(const asset::E_FORMAT format)
+		{
+			switch (format)
+			{
+			case asset::EF_R32G32B32A32_SFLOAT:
+				return "rgba32f";
+			case asset::EF_R16G16B16A16_SFLOAT:
+				return "rgba16f";
+			case asset::EF_R32G32_SFLOAT:
+				return "rg32f";
+			case asset::EF_R16G16_SFLOAT:
+				return "rg16f";
+			case asset::EF_B10G11R11_UFLOAT_PACK32:
+				return "r11f_g11f_b10f";
+			case asset::EF_R32_SFLOAT:
+				return "r32f";
+			case asset::EF_R16_SFLOAT:
+				return "r16f";
+			case asset::EF_R16G16B16A16_UNORM:
+				return "rgba16";
+			case asset::EF_A2B10G10R10_UNORM_PACK32:
+				return "rgb10_a2";
+			case asset::EF_R8G8B8A8_UNORM:
+				return "rgba8";
+			case asset::EF_R16G16_UNORM:
+				return "rg16";
+			case asset::EF_R8G8_UNORM:
+				return "rg8";
+			case asset::EF_R16_UNORM:
+				return "r16";
+			case asset::EF_R8_UNORM:
+				return "r8";
+			case asset::EF_R16G16B16A16_SNORM:
+				return "rgba16_snorm";
+			case asset::EF_R8G8B8A8_SNORM:
+				return "rgba8_snorm";
+			case asset::EF_R16G16_SNORM:
+				return "rg16_snorm";
+			case asset::EF_R8G8_SNORM:
+				return "rg8_snorm";
+			case asset::EF_R16_SNORM:
+				return "r16_snorm";
+			case asset::EF_R8_UINT:
+				return "r8ui";
+			case asset::EF_R16_UINT:
+				return "r16ui";
+			case asset::EF_R32_UINT:
+				return "r32ui";
+			case asset::EF_R32G32_UINT:
+				return "rg32ui";
+			case asset::EF_R32G32B32A32_UINT:
+				return "rgba32ui";
+			default:
+				assert(false);
+				return "";
+			}
+		}
+
 	private:
 		core::smart_refctd_ptr<IIncludeHandler> m_inclHandler;
 		system::ISystem* m_system;
 
@@ -0,0 +1,33 @@
+#ifndef _NBL_GLSL_BLIT_ALPHA_TEST_INCLUDED_
+#define _NBL_GLSL_BLIT_ALPHA_TEST_INCLUDED_
+
+#ifndef _NBL_GLSL_BLIT_ALPHA_TEST_MAIN_DEFINED_
+
+#include <nbl/builtin/glsl/blit/parameters.glsl>
+
+#ifndef _NBL_GLSL_BLIT_ALPHA_TEST_PASSED_COUNTER_DESCRIPTOR_DEFINED_
+#error _NBL_GLSL_BLIT_ALPHA_TEST_PASSED_COUNTER_DESCRIPTOR_DEFINED_ must be defined
+#endif
+
+nbl_glsl_blit_parameters_t nbl_glsl_blit_getParameters();
+
+float nbl_glsl_blit_alpha_test_getData(in uvec3 coord, in uint layerIdx);
+
+void nbl_glsl_blit_alpha_test_main()
+{
+	const uvec3 inDim = nbl_glsl_blit_parameters_getInputImageDimensions();
+	const nbl_glsl_blit_parameters_t params = nbl_glsl_blit_getParameters();
+
+	if (all(lessThan(gl_GlobalInvocationID, inDim)))
+	{
+		const float alpha = nbl_glsl_blit_alpha_test_getData(gl_GlobalInvocationID, gl_WorkGroupID.z);
+		if (alpha > params.referenceAlpha)
+			atomicAdd(_NBL_GLSL_BLIT_ALPHA_TEST_PASSED_COUNTER_DESCRIPTOR_DEFINED_.data[gl_WorkGroupID.z].passedPixelCount, 1u);
+	}
+}
+
+#define _NBL_GLSL_BLIT_ALPHA_TEST_MAIN_DEFINED_
+#endif
+
+#endif
+
Original file line number	Diff line number	Diff line change
`@@ -51,6 +51,7 @@ class NBL_API IDescriptorSet : public virtual core::IReferenceCounted`
`51`	`51`	`};`
`52`	`52`	`struct SImageInfo`
`53`	`53`	`{`
	`54`	`+ // This will be ignored if the DS layout already has an immutable sampler specified for the binding.`
`54`	`55`	`core::smart_refctd_ptr<typename layout_t::sampler_type> sampler;`
`55`	`56`	`//! Irrelevant in OpenGL backend`
`56`	`57`	`E_IMAGE_LAYOUT imageLayout;`
Original file line number	Diff line number	Diff line change
`@@ -104,9 +104,10 @@ class NBL_API IImage : public IDescriptor`
`104`	`104`	`};`
`105`	`105`	`enum E_TYPE : uint32_t`
`106`	`106`	`{`
`107`		`- ET_1D,`
	`107`	`+ ET_1D = 0,`
`108`	`108`	`ET_2D,`
`109`		`- ET_3D`
	`109`	`+ ET_3D,`
	`110`	`+ ET_COUNT`
`110`	`111`	`};`
`111`	`112`	`enum E_SAMPLE_COUNT_FLAGS : uint32_t`
`112`	`113`	`{`
Original file line number	Diff line number	Diff line change
`@@ -122,6 +122,8 @@ class NBL_API CMipMapGenerationImageFilter : public CImageFilter<CMipMapGenerati`
`122`	`122`	`//not all kernels are default-constructible, this is going to be a problem (i already added appropriate ctor for blit filter state class though)`
`123`	`123`	//blit.kernel = Kernel(); // gets default constructed, we should probably do a `static_assert` about this property
`124`	`124`	`static_cast<state_base_t&>(blit) = static_cast<const state_base_t>(state);`
	`125`	`+`
	`126`	`+ pseudo_base_t::blit_utils_t::computeScaledKernelPhasedLUT(blit.scratchMemory + pseudo_base_t::getScratchOffset(&blit, pseudo_base_t::ESU_SCALED_KERNEL_PHASED_LUT), blit.inExtentLayerCount, blit.outExtentLayerCount, blit.inImage->getCreationParameters().type, blit.kernelX, blit.kernelY, blit.kernelZ);`
`125`	`127`	`return blit;`
`126`	`128`	`}`
`127`	`129`	`};`