Skip to content

Commit 3f03135

Browse files
4d morton code leads to much better batching of triangles
1 parent 7a4e252 commit 3f03135

File tree

2 files changed

+74
-21
lines changed

2 files changed

+74
-21
lines changed

include/nbl/asset/utils/IMeshPacker.h

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -218,20 +218,18 @@ class IMeshPacker : public IMeshPackerBase
218218

219219
MortonTriangle(uint16_t fixedPointPos[3], float area)
220220
{
221-
key = core::Float16Compressor::compress(area);
222-
key <<= 48ull;
223-
224-
key |= core::morton3d_encode(fixedPointPos[0], fixedPointPos[1], fixedPointPos[2]);
221+
auto tmp = reinterpret_cast<uint16_t*>(key);
222+
std::copy_n(fixedPointPos,3u,tmp);
223+
tmp[3] = core::Float16Compressor::compress(area);
225224
}
226225

227-
//TODO: maybe investigate morton 4d, where `logRelArea` is "4th" coord
228226
void complete(float maxArea)
229227
{
230-
const float area = core::Float16Compressor::decompress(key >> 48ull);
231-
key &= 0x0000ffffFFFFffffu;
232-
const float scale = -0.5f; // square root
233-
uint64_t logRelArea = uint64_t(65535.5f - core::clamp(scale * std::log2f(area / maxArea), 0.f, 65535.5f));
234-
key |= logRelArea << 48ull;
228+
auto tmp = reinterpret_cast<const uint16_t*>(key);
229+
const float area = core::Float16Compressor::decompress(tmp[3]);
230+
const float scale = 0.5f; // square root
231+
uint16_t logRelArea = uint16_t(65535.5f+core::clamp(scale*std::log2f(area/maxArea),-65535.5f,0.f));
232+
key = core::morton4d_encode(tmp[0],tmp[1],tmp[2],logRelArea);
235233
}
236234

237235
uint64_t key;

include/nbl/core/math/morton.h

Lines changed: 66 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ namespace core
1616
namespace impl
1717
{
1818
template <typename T>
19-
constexpr T morton2d_mask(uint32_t _n)
19+
constexpr T morton2d_mask(uint8_t _n)
2020
{
2121
constexpr uint64_t mask[5] =
2222
{
@@ -28,6 +28,31 @@ namespace impl
2828
};
2929
return static_cast<T>(mask[_n]);
3030
}
31+
template <typename T>
32+
constexpr T morton3d_mask(uint8_t _n)
33+
{
34+
constexpr uint64_t mask[5] =
35+
{
36+
0x1249249249249249ull,
37+
0x10C30C30C30C30C3ull,
38+
0x010F00F00F00F00Full,
39+
0x001F0000FF0000FFull,
40+
0x001F00000000FFFFull
41+
};
42+
return static_cast<T>(mask[_n]);
43+
}
44+
template <typename T>
45+
constexpr T morton4d_mask(uint8_t _n)
46+
{
47+
constexpr uint64_t mask[4] =
48+
{
49+
0x1111111111111111ull,
50+
0x0303030303030303ull,
51+
0x000F000F000F000Full,
52+
0x000000FF000000FFull
53+
};
54+
return static_cast<T>(mask[_n]);
55+
}
3156

3257
template <typename T, uint32_t bitDepth>
3358
inline T morton2d_decode(T x)
@@ -58,7 +83,7 @@ namespace impl
5883
{
5984
x = (x | (x << 16)) & morton2d_mask<T>(4);
6085
}
61-
if constexpr (bitDepth > 16u)
86+
if constexpr (bitDepth>16u)
6287
{
6388
x = (x | (x << 8)) & morton2d_mask<T>(3);
6489
}
@@ -71,15 +96,43 @@ namespace impl
7196

7297
return x;
7398
}
99+
template <typename T, uint32_t bitDepth>
100+
inline T separate_bits_3d(T x)
101+
{
102+
if constexpr (bitDepth>32u)
103+
{
104+
x = (x | (x << 32)) & morton3d_mask<T>(4);
105+
}
106+
if constexpr (bitDepth>16u)
107+
{
108+
x = (x | (x << 16)) & morton3d_mask<T>(3);
109+
}
110+
if constexpr (bitDepth>8u)
111+
{
112+
x = (x | (x << 8)) & morton3d_mask<T>(2);
113+
}
114+
x = (x | (x << 4)) & morton3d_mask<T>(1);
115+
x = (x | (x << 2)) & morton3d_mask<T>(0);
74116

75-
inline uint64_t separate_bits_3d(uint64_t x)
117+
return x;
118+
}
119+
template <typename T, uint32_t bitDepth>
120+
inline T separate_bits_4d(T x)
76121
{
77-
x &= 0x00000000001fffff;
78-
x = (x | x << 32) & 0x001f00000000ffff;
79-
x = (x | x << 16) & 0x001f0000ff0000ff;
80-
x = (x | x << 8) & 0x010f00f00f00f00f;
81-
x = (x | x << 4) & 0x10c30c30c30c30c3;
82-
x = (x | x << 2) & 0x1249249249249249;
122+
if constexpr (bitDepth>32u)
123+
{
124+
x = (x | (x << 24)) & morton4d_mask<T>(3);
125+
}
126+
if constexpr (bitDepth>16u)
127+
{
128+
x = (x | (x << 12)) & morton4d_mask<T>(2);
129+
}
130+
if constexpr (bitDepth>8u)
131+
{
132+
x = (x | (x << 6)) & morton4d_mask<T>(1);
133+
}
134+
x = (x | (x << 3)) & morton4d_mask<T>(0);
135+
83136
return x;
84137
}
85138
}
@@ -91,8 +144,10 @@ T morton2d_decode_y(T _morton) { return impl::morton2d_decode<T,bitDepth>(_morto
91144

92145
template<typename T, uint32_t bitDepth=sizeof(T)*8u>
93146
T morton2d_encode(T x, T y) { return impl::separate_bits_2d<T,bitDepth>(x) | (impl::separate_bits_2d<T,bitDepth>(y)<<1); }
94-
95-
inline uint64_t morton3d_encode(uint64_t x, uint64_t y, uint64_t z) { return impl::separate_bits_3d(x) | (impl::separate_bits_3d(y) << 1) | (impl::separate_bits_3d(z) << 2); }
147+
template<typename T, uint32_t bitDepth=sizeof(T)*8u>
148+
T morton3d_encode(T x, T y, T z) { return impl::separate_bits_3d<T,bitDepth>(x) | (impl::separate_bits_3d<T,bitDepth>(y)<<1) | (impl::separate_bits_3d<T,bitDepth>(z)<<2); }
149+
template<typename T, uint32_t bitDepth=sizeof(T)*8u>
150+
T morton4d_encode(T x, T y, T z, T w) { return impl::separate_bits_4d<T,bitDepth>(x) | (impl::separate_bits_4d<T,bitDepth>(y)<<1) | (impl::separate_bits_4d<T,bitDepth>(z)<<2) | (impl::separate_bits_4d<T,bitDepth>(w)<<3); }
96151

97152
}}
98153

0 commit comments

Comments
 (0)