Skip to content

Commit 5b3becd

Browse files
authored
Merge pull request #1003 from zeux/meshletcodec
Implement experimental meshlet codec
2 parents f105bb5 + 8605ff9 commit 5b3becd

File tree

9 files changed

+1010
-13
lines changed

9 files changed

+1010
-13
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ set(SOURCES
3232
src/indexanalyzer.cpp
3333
src/indexcodec.cpp
3434
src/indexgenerator.cpp
35+
src/meshletcodec.cpp
3536
src/overdrawoptimizer.cpp
3637
src/partition.cpp
3738
src/quantization.cpp

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ ifeq ($(config),coverage)
8686
LDFLAGS+=-coverage
8787
endif
8888

89+
ifeq ($(config),release-avx)
90+
CXXFLAGS+=-O3 -DNDEBUG -mavx
91+
endif
92+
8993
ifeq ($(config),release-avx512)
9094
CXXFLAGS+=-O3 -DNDEBUG -mavx512vl -mavx512vbmi -mavx512vbmi2
9195
endif
@@ -227,7 +231,7 @@ codecbench-simd.wasm: tools/codecbench.cpp $(LIBRARY_SOURCES)
227231
codectest: tools/codectest.cpp $(LIBRARY)
228232
$(CXX) $^ $(CXXFLAGS) $(LDFLAGS) -o $@
229233

230-
codecfuzz: tools/codecfuzz.cpp src/vertexcodec.cpp src/indexcodec.cpp
234+
codecfuzz: tools/codecfuzz.cpp src/vertexcodec.cpp src/indexcodec.cpp src/meshletcodec.cpp
231235
$(CXX) $^ -fsanitize=fuzzer,address,undefined -O1 -g -o $@
232236

233237
clusterfuzz: tools/clusterfuzz.cpp src/clusterizer.cpp

demo/main.cpp

Lines changed: 117 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,118 @@ void encodeIndexSequence(const std::vector<unsigned int>& data, size_t vertex_co
760760
(double(result.size() * 4) / 1e9) / (end - middle));
761761
}
762762

763+
void encodeMeshlets(const Mesh& mesh, size_t max_vertices, size_t max_triangles, bool reorder = true)
764+
{
765+
size_t max_meshlets = meshopt_buildMeshletsBound(mesh.indices.size(), max_vertices, max_triangles);
766+
std::vector<meshopt_Meshlet> meshlets(max_meshlets);
767+
std::vector<unsigned int> meshlet_vertices(mesh.indices.size());
768+
std::vector<unsigned char> meshlet_triangles(mesh.indices.size());
769+
770+
meshlets.resize(meshopt_buildMeshlets(&meshlets[0], &meshlet_vertices[0], &meshlet_triangles[0], &mesh.indices[0], mesh.indices.size(), &mesh.vertices[0].px, mesh.vertices.size(), sizeof(Vertex), max_vertices, max_triangles, 0.f));
771+
772+
if (meshlets.size())
773+
{
774+
const meshopt_Meshlet& last = meshlets.back();
775+
776+
// this is an example of how to trim the vertex/triangle arrays when copying data out to GPU storage
777+
meshlet_vertices.resize(last.vertex_offset + last.vertex_count);
778+
meshlet_triangles.resize(last.triangle_offset + last.triangle_count * 3);
779+
780+
// TODO: over-allocate meshlet_vertices to multiple of 3 to make meshopt_optimizeVertexFetch below work without assertions
781+
meshlet_vertices.resize((meshlet_vertices.size() + 2) / 3 * 3);
782+
}
783+
784+
std::vector<unsigned char> cbuf(meshopt_encodeMeshletBound(max_vertices, max_triangles));
785+
786+
// optimize each meshlet for locality; this is important for performance, and critical for good compression
787+
for (size_t i = 0; i < meshlets.size(); ++i)
788+
meshopt_optimizeMeshlet(&meshlet_vertices[meshlets[i].vertex_offset], &meshlet_triangles[meshlets[i].triangle_offset], meshlets[i].triangle_count, meshlets[i].vertex_count);
789+
790+
// optimize the order of vertex references within each meshlet and globally; this is valuable for access locality and critical for compression of vertex references
791+
// note that this reorders the vertex buffer too, so if a traditional index buffer is required it would need to be reconstructed from the meshlet data for optimal locality
792+
std::vector<Vertex> vertices = mesh.vertices;
793+
if (reorder)
794+
meshopt_optimizeVertexFetch(&vertices[0], &meshlet_vertices[0], meshlet_vertices.size(), &mesh.vertices[0], mesh.vertices.size(), sizeof(Vertex));
795+
796+
size_t mbst = 0;
797+
798+
std::vector<unsigned char> packed;
799+
800+
for (size_t i = 0; i < meshlets.size(); ++i)
801+
{
802+
const meshopt_Meshlet& meshlet = meshlets[i];
803+
804+
size_t mbs = meshopt_encodeMeshlet(&cbuf[0], cbuf.size(), &meshlet_vertices[meshlet.vertex_offset], meshlet.vertex_count, &meshlet_triangles[meshlet.triangle_offset], meshlet.triangle_count);
805+
assert(mbs > 0);
806+
807+
packed.push_back((unsigned char)meshlet.vertex_count);
808+
packed.push_back((unsigned char)meshlet.triangle_count);
809+
packed.push_back((unsigned char)(mbs & 0xff));
810+
packed.push_back((unsigned char)((mbs >> 8) & 0xff));
811+
packed.insert(packed.end(), &cbuf[0], &cbuf[mbs]);
812+
813+
unsigned int rv[256];
814+
unsigned int rt[256];
815+
int rc = meshopt_decodeMeshlet(rv, meshlet.vertex_count, rt, meshlet.triangle_count, &cbuf[0], mbs);
816+
assert(rc == 0);
817+
818+
for (size_t j = 0; j < meshlet.vertex_count; ++j)
819+
assert(rv[j] == meshlet_vertices[meshlet.vertex_offset + j]);
820+
821+
for (size_t j = 0; j < meshlet.triangle_count; ++j)
822+
{
823+
unsigned int a = meshlet_triangles[meshlet.triangle_offset + j * 3 + 0];
824+
unsigned int b = meshlet_triangles[meshlet.triangle_offset + j * 3 + 1];
825+
unsigned int c = meshlet_triangles[meshlet.triangle_offset + j * 3 + 2];
826+
827+
unsigned int abc = (a << 0) | (b << 8) | (c << 16);
828+
unsigned int bca = (b << 0) | (c << 8) | (a << 16);
829+
unsigned int cba = (c << 0) | (a << 8) | (b << 16);
830+
831+
assert(rt[j] == abc || rt[j] == bca || rt[j] == cba);
832+
}
833+
834+
mbst += mbs;
835+
}
836+
837+
size_t mbc = compress(packed);
838+
839+
printf("MeshletCodec (%d/%d): %d meshlets, %d bytes/meshlet; %d bytes, %.1f bits/triangle\n",
840+
int(max_vertices), int(max_triangles),
841+
int(meshlets.size()),
842+
int(mbst / meshlets.size()),
843+
int(mbst), double(mbst * 8) / double(mesh.indices.size() / 3));
844+
printf("MeshletCodec (%d/%d, packed): %d bytes/meshlet, %.1f bits/triangle; post-deflate: %d bytes/meshlet, %.1f bits/triangle)\n",
845+
int(max_vertices), int(max_triangles),
846+
int(packed.size() / meshlets.size()), double(packed.size() * 8) / double(mesh.indices.size() / 3),
847+
int(mbc / meshlets.size()), double(mbc * 8) / double(mesh.indices.size() / 3));
848+
849+
#if !TRACE
850+
double mbtime = 0;
851+
852+
for (int i = 0; i < 10; ++i)
853+
{
854+
unsigned int rv[256];
855+
unsigned int rt[256];
856+
double t0 = timestamp();
857+
unsigned char* p = &packed[0];
858+
for (size_t j = 0; j < meshlets.size(); ++j)
859+
{
860+
size_t size = p[2] | (p[3] << 8);
861+
meshopt_decodeMeshlet(rv, p[0], rt, p[1], p + 4, size);
862+
p += 4 + size;
863+
}
864+
double t1 = timestamp();
865+
866+
mbtime = (mbtime == 0 || t1 - t0 < mbtime) ? (t1 - t0) : mbtime;
867+
}
868+
869+
printf("MeshletCodec (%d/%d, packed): decode time %.3f msec, %.3fB tri/sec, %.1f ns/meshlet\n",
870+
int(max_vertices), int(max_triangles),
871+
mbtime * 1000, double(mesh.indices.size() / 3) / 1e9 / mbtime, mbtime * 1e9 / double(meshlets.size()));
872+
#endif
873+
}
874+
763875
template <typename PV>
764876
void packVertex(const Mesh& mesh, const char* pvn)
765877
{
@@ -1425,6 +1537,8 @@ void process(const char* path)
14251537
encodeVertex<PackedVertex>(copy, "");
14261538
encodeVertex<PackedVertexOct>(copy, "O");
14271539

1540+
encodeMeshlets(mesh, 64, 96);
1541+
14281542
simplify(mesh);
14291543
simplify(mesh, 0.1f, meshopt_SimplifyPrune);
14301544
simplifyAttr(mesh);
@@ -1453,7 +1567,9 @@ void processDev(const char* path)
14531567
if (!loadMesh(mesh, path))
14541568
return;
14551569

1456-
simplifyUpdate(mesh, 0.1f, meshopt_SimplifyPrune | meshopt_SimplifyPermissive);
1570+
encodeMeshlets(mesh, 32, 48);
1571+
encodeMeshlets(mesh, 64, 64);
1572+
encodeMeshlets(mesh, 64, 96);
14571573
}
14581574

14591575
void processNanite(const char* path)

src/clusterizer.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1743,6 +1743,55 @@ void meshopt_optimizeMeshlet(unsigned int* meshlet_vertices, unsigned char* mesh
17431743
cache[c] = cache_last;
17441744
}
17451745

1746+
// rotate triangles to maximize compressibility
1747+
memset(cache, 0, vertex_count);
1748+
1749+
for (size_t i = 0; i < triangle_count; ++i)
1750+
{
1751+
unsigned char a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
1752+
1753+
// if only the middle vertex has been used, rotate triangle to ensure new vertices are always sequential
1754+
if (!cache[a] && cache[b] && !cache[c])
1755+
{
1756+
// abc -> bca
1757+
unsigned char t = a;
1758+
a = b, b = c, c = t;
1759+
}
1760+
else if (!cache[a] && !cache[b] && !cache[c])
1761+
{
1762+
// out of three edges, the edge ab can not be reused by subsequent triangles in some encodings
1763+
// if subsequent triangles don't share edges ca or bc, we can rotate the triangle to fix this
1764+
bool needab = false, needbc = false, needca = false;
1765+
1766+
for (size_t j = i + 1; j < triangle_count && j <= i + cache_cutoff; ++j)
1767+
{
1768+
unsigned char oa = indices[j * 3 + 0], ob = indices[j * 3 + 1], oc = indices[j * 3 + 2];
1769+
1770+
// note: edge comparisons are reversed as reused edges are flipped
1771+
needab |= (oa == b && ob == a) || (ob == b && oc == a) || (oc == b && oa == a);
1772+
needbc |= (oa == c && ob == b) || (ob == c && oc == b) || (oc == c && oa == b);
1773+
needca |= (oa == a && ob == c) || (ob == a && oc == c) || (oc == a && oa == c);
1774+
}
1775+
1776+
if (needab && !needbc)
1777+
{
1778+
// abc -> bca
1779+
unsigned char t = a;
1780+
a = b, b = c, c = t;
1781+
}
1782+
else if (needab && !needca)
1783+
{
1784+
// abc -> cab
1785+
unsigned char t = c;
1786+
c = b, b = a, a = t;
1787+
}
1788+
}
1789+
1790+
indices[i * 3 + 0] = a, indices[i * 3 + 1] = b, indices[i * 3 + 2] = c;
1791+
1792+
cache[a] = cache[b] = cache[c] = 1;
1793+
}
1794+
17461795
// reorder meshlet vertices for access locality assuming index buffer is scanned sequentially
17471796
unsigned int order[kMeshletMaxVertices];
17481797

src/indexcodec.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,6 @@ const int kDecodeIndexVersion = 1;
1919
typedef unsigned int VertexFifo[16];
2020
typedef unsigned int EdgeFifo[16][2];
2121

22-
static const unsigned int kTriangleIndexOrder[3][3] = {
23-
{0, 1, 2},
24-
{1, 2, 0},
25-
{2, 0, 1},
26-
};
27-
2822
static const unsigned char kCodeAuxEncodingTable[16] = {
2923
0x00, 0x76, 0x87, 0x56, 0x67, 0x78, 0xa9, 0x86, 0x65, 0x89, 0x68, 0x98, 0x01, 0x69,
3024
0, 0, // last two entries aren't used for encoding
@@ -194,6 +188,8 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons
194188

195189
int fecmax = version >= 1 ? 13 : 15;
196190

191+
static const int rotations[] = {0, 1, 2, 0, 1};
192+
197193
// use static encoding table; it's possible to pack the result and then build an optimal table and repack
198194
// for now we keep it simple and use the table that has been generated based on symbol frequency on a training mesh set
199195
const unsigned char* codeaux_table = kCodeAuxEncodingTable;
@@ -211,7 +207,7 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons
211207
if (fer >= 0 && (fer >> 2) < 15)
212208
{
213209
// note: getEdgeFifo implicitly rotates triangles by matching a/b to existing edge
214-
const unsigned int* order = kTriangleIndexOrder[fer & 3];
210+
const int* order = rotations + (fer & 3);
215211

216212
unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
217213

@@ -247,7 +243,7 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons
247243
else
248244
{
249245
int rotation = rotateTriangle(indices[i + 0], indices[i + 1], indices[i + 2], next);
250-
const unsigned int* order = kTriangleIndexOrder[rotation];
246+
const int* order = rotations + rotation;
251247

252248
unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
253249

0 commit comments

Comments
 (0)