Skip to content

Commit 2edba89

Browse files
AnastaZIukCrisspl
authored andcommitted
Add derivative normal map filter
1 parent 01de56c commit 2edba89

File tree

1 file changed

+275
-0
lines changed

1 file changed

+275
-0
lines changed
Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
2+
// This file is part of the "Nabla Engine".
3+
// For conditions of distribution and use, see copyright notice in nabla.h
4+
5+
#ifndef __NBL_ASSET_C_NORMAL_MAP_TO_DERIVATIVE_FILTER_H_INCLUDED__
6+
#define __NBL_ASSET_C_NORMAL_MAP_TO_DERIVATIVE_FILTER_H_INCLUDED__
7+
8+
#include "nbl/core/core.h"
9+
10+
#include <type_traits>
11+
#include <functional>
12+
13+
#include "nbl/asset/filters/CMatchedSizeInOutImageFilterCommon.h"
14+
#include "CConvertFormatImageFilter.h"
15+
16+
namespace nbl
17+
{
18+
namespace asset
19+
{
20+
21+
class CNormalMapToDerivativeFilterBase
22+
{
23+
public:
24+
class CNormalMapToDerivativeStateBase
25+
{
26+
public:
27+
28+
static inline constexpr size_t decodeTypeByteSize = sizeof(double);
29+
static inline constexpr size_t forcedScratchChannelAmount = 4;
30+
uint8_t* scratchMemory = nullptr; //!< memory covering all regions used for temporary filling within computation of sum values
31+
size_t scratchMemoryByteSize = {}; //!< required byte size for entire scratch memory
32+
33+
static inline size_t getRequiredScratchByteSize(asset::VkExtent3D extent)
34+
{
35+
size_t retval = extent.width * extent.height * extent.depth * decodeTypeByteSize * forcedScratchChannelAmount;
36+
37+
return retval;
38+
}
39+
40+
/*
41+
Layer ID is relative to outBaseLayer in state
42+
*/
43+
44+
const std::array<double, forcedScratchChannelAmount>& getAbsoluteLayerScaleValue(size_t layer)
45+
{
46+
if (!maxAbsLayerScaleValues.empty())
47+
return maxAbsLayerScaleValues[layer];
48+
else
49+
return {};
50+
}
51+
52+
protected:
53+
std::vector<std::array<double, forcedScratchChannelAmount>> maxAbsLayerScaleValues; //!< scales gained by the filter (each layer handled) for derivative map shader usage
54+
};
55+
56+
protected:
57+
CNormalMapToDerivativeFilterBase() {}
58+
virtual ~CNormalMapToDerivativeFilterBase() {}
59+
60+
static inline bool validate(CNormalMapToDerivativeStateBase* state)
61+
{
62+
if (!state)
63+
return false;
64+
65+
return true;
66+
}
67+
};
68+
69+
//! Convert Normal Map to Derivative Normal Map
70+
/*
71+
72+
*/
73+
74+
class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon, public CNormalMapToDerivativeFilterBase
75+
{
76+
public:
77+
virtual ~CNormalMapToDerivativeFilter() {}
78+
79+
class CStateBase : public CMatchedSizeInOutImageFilterCommon::state_type, public CNormalMapToDerivativeFilterBase::CNormalMapToDerivativeStateBase
80+
{
81+
public:
82+
CStateBase() = default;
83+
virtual ~CStateBase() = default;
84+
85+
private:
86+
87+
void resetLayerScaleValues()
88+
{
89+
maxAbsLayerScaleValues.clear();
90+
}
91+
92+
friend class CNormalMapToDerivativeFilter;
93+
};
94+
using state_type = CStateBase; //!< full combined state
95+
96+
static inline bool validate(state_type* state)
97+
{
98+
if (!CMatchedSizeInOutImageFilterCommon::validate(state))
99+
return false;
100+
101+
if (!CNormalMapToDerivativeFilterBase::validate(state))
102+
return false;
103+
104+
const ICPUImage::SCreationParams& inParams = state->inImage->getCreationParameters();
105+
const ICPUImage::SCreationParams& outParams = state->outImage->getCreationParameters();
106+
const auto inFormat = inParams.format;
107+
const auto outFormat = outParams.format;
108+
109+
if (outFormat != asset::EF_R8G8_SNORM)
110+
return false;
111+
112+
if (state->scratchMemoryByteSize < state_type::getRequiredScratchByteSize(state->extent))
113+
return false;
114+
115+
if (asset::getFormatChannelCount(inFormat) < 3 )
116+
return false;
117+
118+
return true;
119+
}
120+
121+
static inline bool execute(state_type* state)
122+
{
123+
if (!validate(state))
124+
return false;
125+
126+
state->resetLayerScaleValues();
127+
128+
auto checkFormat = state->inImage->getCreationParameters().format;
129+
if (isIntegerFormat(checkFormat))
130+
return executeInterprated(state, reinterpret_cast<uint64_t*>(state->scratchMemory));
131+
else
132+
return executeInterprated(state, reinterpret_cast<double*>(state->scratchMemory));
133+
}
134+
135+
private:
136+
137+
template<typename decodeType> //!< double or uint64_t
138+
static inline bool executeInterprated(state_type* state, decodeType* scratchMemory)
139+
{
140+
const asset::E_FORMAT inFormat = state->inImage->getCreationParameters().format;
141+
const auto inTexelByteSize = asset::getTexelOrBlockBytesize(inFormat);
142+
const auto currentChannelCount = asset::getFormatChannelCount(inFormat);
143+
const auto arrayLayers = state->inImage->getCreationParameters().arrayLayers;
144+
static constexpr auto maxChannels = 4u;
145+
146+
#ifdef _NBL_DEBUG
147+
memset(scratchMemory, 0, state->scratchMemoryByteSize);
148+
#endif // _NBL_DEBUG
149+
150+
const core::vector3du32_SIMD scratchByteStrides = TexelBlockInfo(asset::E_FORMAT::EF_R64G64B64A64_SFLOAT).convert3DTexelStridesTo1DByteStrides(state->extentLayerCount);
151+
const auto scratchTexelByteSize = scratchByteStrides[0];
152+
153+
// I wonder if we should let somebody pass through more than 1 layer, though I find it cool
154+
155+
const auto&& [copyInBaseLayer, copyOutBaseLayer, copyLayerCount] = std::make_tuple(state->inBaseLayer, state->outBaseLayer, state->layerCount);
156+
state->layerCount = 1u;
157+
158+
auto resetState = [&]()
159+
{
160+
state->inBaseLayer = copyInBaseLayer;
161+
state->outBaseLayer = copyOutBaseLayer;
162+
state->layerCount = copyLayerCount;
163+
};
164+
165+
for (uint16_t w = 0u; w < copyLayerCount; ++w)
166+
{
167+
std::array<decodeType, maxChannels> maxAbsoluteDecodeValues = {};
168+
169+
{
170+
const uint8_t* inData = reinterpret_cast<const uint8_t*>(state->inImage->getBuffer()->getPointer());
171+
const auto blockDims = asset::getBlockDimensions(state->inImage->getCreationParameters().format);
172+
static constexpr uint8_t maxPlanes = 4;
173+
174+
auto decode = [&](uint32_t readBlockArrayOffset, core::vectorSIMDu32 readBlockPos) -> void
175+
{
176+
core::vectorSIMDu32 localOutPos = readBlockPos * blockDims - core::vectorSIMDu32(state->inOffset.x, state->inOffset.y, state->inOffset.z);
177+
178+
auto* inDataAdress = inData + readBlockArrayOffset;
179+
const void* inSourcePixels[maxPlanes] = { inDataAdress, nullptr, nullptr, nullptr };
180+
181+
decodeType decodeBuffer[maxChannels] = {};
182+
for (auto blockY = 0u; blockY < blockDims.y; blockY++)
183+
for (auto blockX = 0u; blockX < blockDims.x; blockX++)
184+
{
185+
asset::decodePixelsRuntime(inFormat, inSourcePixels, decodeBuffer, blockX, blockY);
186+
const size_t offset = asset::IImage::SBufferCopy::getLocalByteOffset(core::vector3du32_SIMD(localOutPos.x + blockX, localOutPos.y + blockY, localOutPos.z), scratchByteStrides);
187+
memcpy(reinterpret_cast<uint8_t*>(scratchMemory) + offset, decodeBuffer, scratchTexelByteSize);
188+
}
189+
};
190+
191+
IImage::SSubresourceLayers subresource = { static_cast<IImage::E_ASPECT_FLAGS>(0u), state->inMipLevel, state->inBaseLayer, 1 };
192+
CMatchedSizeInOutImageFilterCommon::state_type::TexelRange range = { state->inOffset,state->extent };
193+
CBasicImageFilterCommon::clip_region_functor_t clipFunctor(subresource, range, inFormat);
194+
195+
auto& inRegions = state->inImage->getRegions(state->inMipLevel);
196+
CBasicImageFilterCommon::executePerRegion(state->inImage, decode, inRegions.begin(), inRegions.end(), clipFunctor);
197+
}
198+
199+
{
200+
auto getScratchPixel = [&](core::vector4di32_SIMD readBlockPos) -> decodeType*
201+
{
202+
const size_t scratchOffset = asset::IImage::SBufferCopy::getLocalByteOffset(core::vector3du32_SIMD(readBlockPos.x, readBlockPos.y, readBlockPos.z, 0), scratchByteStrides);
203+
return reinterpret_cast<decodeType*>(reinterpret_cast<uint8_t*>(scratchMemory) + scratchOffset);
204+
};
205+
206+
auto computeDerivativeTexel = [&](core::vectorSIMDi32 readBlockPos) -> void
207+
{
208+
decodeType* current = getScratchPixel(readBlockPos);
209+
auto& [x, y, z, a] = std::make_tuple(*current, *(current + 1), *(current + 2), *(current + 3));
210+
211+
std::for_each(current, current + currentChannelCount,
212+
[&](const decodeType& itrValue)
213+
{
214+
uint8_t offset = &itrValue - current;
215+
const decodeType absoluteValue = core::abs(itrValue);
216+
217+
if (maxAbsoluteDecodeValues[offset] < absoluteValue)
218+
maxAbsoluteDecodeValues[offset] = absoluteValue;
219+
}
220+
);
221+
222+
x = -x / z;
223+
y = -y / z;
224+
};
225+
226+
{
227+
core::vector3du32_SIMD localCoord;
228+
for (auto& z = localCoord[2] = 0u; z < state->extent.depth; ++z)
229+
for (auto& y = localCoord[1] = 0u; y < state->extent.height; ++y)
230+
for (auto& x = localCoord[0] = 0u; x < state->extent.width; ++x)
231+
computeDerivativeTexel(core::vectorSIMDu32(x, y, z));
232+
}
233+
234+
auto& maxAbsLayerScaleValues = state->maxAbsLayerScaleValues.emplace_back();
235+
for (auto& absLayerScaleValue : maxAbsLayerScaleValues)
236+
absLayerScaleValue = maxAbsoluteDecodeValues[&absLayerScaleValue - &maxAbsLayerScaleValues[0]];
237+
238+
// what about normalize, should it be done like SAT ?
239+
240+
{
241+
uint8_t* outData = reinterpret_cast<uint8_t*>(state->outImage->getBuffer()->getPointer());
242+
243+
auto encode = [&](uint32_t writeBlockArrayOffset, core::vectorSIMDu32 readBlockPos) -> void
244+
{
245+
// encoding format cannot be block compressed so in this case block==texel
246+
auto localOutPos = readBlockPos - core::vectorSIMDu32(state->outOffset.x, state->outOffset.y, state->outOffset.z, readBlockPos.w); // force 0 on .w compoment to obtain valid offset
247+
uint8_t* outDataAdress = outData + writeBlockArrayOffset;
248+
249+
const size_t offset = asset::IImage::SBufferCopy::getLocalByteOffset(localOutPos, scratchByteStrides);
250+
auto* data = reinterpret_cast<uint8_t*>(scratchMemory) + offset;
251+
asset::encodePixels<asset::EF_R8G8_SNORM, double>(outDataAdress, reinterpret_cast<double*>(data)); // overrrides texels, so region-overlapping case is fine
252+
};
253+
254+
IImage::SSubresourceLayers subresource = { static_cast<IImage::E_ASPECT_FLAGS>(0u), state->outMipLevel, state->outBaseLayer, 1 };
255+
CMatchedSizeInOutImageFilterCommon::state_type::TexelRange range = { state->outOffset,state->extent };
256+
CBasicImageFilterCommon::clip_region_functor_t clipFunctor(subresource, range, asset::EF_R8G8_SNORM);
257+
258+
auto& outRegions = state->outImage->getRegions(state->outMipLevel);
259+
CBasicImageFilterCommon::executePerRegion(state->outImage, encode, outRegions.begin(), outRegions.end(), clipFunctor);
260+
}
261+
}
262+
263+
++state->inBaseLayer;
264+
++state->outBaseLayer;
265+
}
266+
267+
resetState();
268+
return true;
269+
}
270+
};
271+
272+
} // end namespace asset
273+
} // end namespace nbl
274+
275+
#endif // __NBL_ASSET_C_NORMAL_MAP_TO_DERIVATIVE_FILTER_H_INCLUDED__

0 commit comments

Comments
 (0)