@@ -238,7 +238,7 @@ static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int
238238
239239 bool result = false ;
240240
241- unsigned int used_extra = (av == 0xff ) + (bv == 0xff ) + (cv == 0xff );
241+ int used_extra = (av == 0xff ) + (bv == 0xff ) + (cv == 0xff );
242242
243243 if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles)
244244 {
@@ -283,10 +283,10 @@ static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int
283283 return result;
284284}
285285
286- static unsigned int getNeighborTriangle (const meshopt_Meshlet& meshlet, const Cone* meshlet_cone, unsigned int * meshlet_vertices, const unsigned int * indices, const TriangleAdjacency2& adjacency, const Cone* triangles, const unsigned int * live_triangles, const unsigned char * used, float meshlet_expected_radius, float cone_weight, unsigned int * out_extra )
286+ static unsigned int getNeighborTriangle (const meshopt_Meshlet& meshlet, const Cone* meshlet_cone, unsigned int * meshlet_vertices, const unsigned int * indices, const TriangleAdjacency2& adjacency, const Cone* triangles, const unsigned int * live_triangles, const unsigned char * used, float meshlet_expected_radius, float cone_weight)
287287{
288288 unsigned int best_triangle = ~0u ;
289- unsigned int best_extra = 5 ;
289+ int best_priority = 5 ;
290290 float best_score = FLT_MAX;
291291
292292 for (size_t i = 0 ; i < meshlet.vertex_count ; ++i)
@@ -301,20 +301,26 @@ static unsigned int getNeighborTriangle(const meshopt_Meshlet& meshlet, const Co
301301 unsigned int triangle = neighbors[j];
302302 unsigned int a = indices[triangle * 3 + 0 ], b = indices[triangle * 3 + 1 ], c = indices[triangle * 3 + 2 ];
303303
304- unsigned int extra = (used[a] == 0xff ) + (used[b] == 0xff ) + (used[c] == 0xff );
304+ int extra = (used[a] == 0xff ) + (used[b] == 0xff ) + (used[c] == 0xff );
305+ assert (extra <= 2 );
305306
306- // triangles that don't add new vertices to meshlets are max. priority
307- if (extra != 0 )
308- {
309- // artificially increase the priority of dangling triangles as they're expensive to add to new meshlets
310- if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1 )
311- extra = 0 ;
307+ int priority = -1 ;
312308
313- extra++;
314- }
309+ // triangles that don't add new vertices to meshlets are max. priority
310+ if (extra == 0 )
311+ priority = 0 ;
312+ // artificially increase the priority of dangling triangles as they're expensive to add to new meshlets
313+ else if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1 )
314+ priority = 1 ;
315+ // if two vertices have live count of 2, removing this triangle will make another triangle dangling which is good for overall flow
316+ else if ((live_triangles[a] == 2 ) + (live_triangles[b] == 2 ) + (live_triangles[c] == 2 ) >= 2 )
317+ priority = 1 + extra;
318+ // otherwise adjust priority to be after the above cases, 3 or 4 based on used[] count
319+ else
320+ priority = 2 + extra;
315321
316322 // since topology-based priority is always more important than the score, we can skip scoring in some cases
317- if (extra > best_extra )
323+ if (priority > best_priority )
318324 continue ;
319325
320326 float score = 0 ;
@@ -341,18 +347,15 @@ static unsigned int getNeighborTriangle(const meshopt_Meshlet& meshlet, const Co
341347
342348 // note that topology-based priority is always more important than the score
343349 // this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost
344- if (extra < best_extra || score < best_score)
350+ if (priority < best_priority || score < best_score)
345351 {
346352 best_triangle = triangle;
347- best_extra = extra ;
353+ best_priority = priority ;
348354 best_score = score;
349355 }
350356 }
351357 }
352358
353- if (out_extra)
354- *out_extra = best_extra;
355-
356359 return best_triangle;
357360}
358361
@@ -441,7 +444,7 @@ static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const
441444 }
442445
443446 // split axis is one where the variance is largest
444- unsigned int axis = vars[0 ] >= vars[1 ] && vars[0 ] >= vars[2 ] ? 0 : vars[1 ] >= vars[2 ] ? 1 : 2 ;
447+ unsigned int axis = ( vars[0 ] >= vars[1 ] && vars[0 ] >= vars[2 ]) ? 0 : ( vars[1 ] >= vars[2 ] ? 1 : 2 ) ;
445448
446449 float split = mean[axis];
447450 size_t middle = kdtreePartition (indices, count, points, stride, axis, split);
@@ -588,13 +591,13 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve
588591 {
589592 Cone meshlet_cone = getMeshletCone (meshlet_cone_acc, meshlet.triangle_count );
590593
591- unsigned int best_extra = 0 ;
592- unsigned int best_triangle = getNeighborTriangle (meshlet, &meshlet_cone, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, cone_weight, &best_extra );
594+ unsigned int best_triangle = getNeighborTriangle (meshlet, &meshlet_cone, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, cone_weight) ;
595+ int best_extra = best_triangle == ~ 0u ? - 1 : (used[indices[best_triangle * 3 + 0 ]] == 0xff ) + (used[ indices[best_triangle * 3 + 1 ]] == 0xff ) + ( used[indices[best_triangle * 3 + 2 ]] == 0xff );
593596
594597 // if the best triangle doesn't fit into current meshlet, the spatial scoring we've used is not very meaningful, so we re-select using topological scoring
595598 if (best_triangle != ~0u && (meshlet.vertex_count + best_extra > max_vertices || meshlet.triangle_count >= max_triangles))
596599 {
597- best_triangle = getNeighborTriangle (meshlet, NULL , meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, 0 .f , NULL );
600+ best_triangle = getNeighborTriangle (meshlet, NULL , meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, 0 .f );
598601 }
599602
600603 // when we run out of neighboring triangles we need to switch to spatial search; we currently just pick the closest triangle irrespective of connectivity
@@ -882,3 +885,93 @@ meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices
882885
883886 return meshopt_computeClusterBounds (indices, triangle_count * 3 , vertex_positions, vertex_count, vertex_positions_stride);
884887}
888+
889+ void meshopt_optimizeMeshlet (unsigned int * meshlet_vertices, unsigned char * meshlet_triangles, size_t triangle_count, size_t vertex_count)
890+ {
891+ using namespace meshopt ;
892+
893+ assert (triangle_count <= kMeshletMaxTriangles );
894+ assert (vertex_count <= kMeshletMaxVertices );
895+
896+ unsigned char * indices = meshlet_triangles;
897+ unsigned int * vertices = meshlet_vertices;
898+
899+ // cache tracks vertex timestamps (corresponding to triangle index! all 3 vertices are added at the same time and never removed)
900+ unsigned char cache[kMeshletMaxVertices ];
901+ memset (cache, 0 , vertex_count);
902+
903+ // note that we start from a value that means all vertices aren't in cache
904+ unsigned char cache_last = 128 ;
905+ const unsigned char cache_cutoff = 3 ; // 3 triangles = ~5..9 vertices depending on reuse
906+
907+ for (size_t i = 0 ; i < triangle_count; ++i)
908+ {
909+ int next = -1 ;
910+ int next_match = -1 ;
911+
912+ for (size_t j = i; j < triangle_count; ++j)
913+ {
914+ unsigned char a = indices[j * 3 + 0 ], b = indices[j * 3 + 1 ], c = indices[j * 3 + 2 ];
915+ assert (a < vertex_count && b < vertex_count && c < vertex_count);
916+
917+ // score each triangle by how many vertices are in cache
918+ // note: the distance is computed using unsigned 8-bit values, so cache timestamp overflow is handled gracefully
919+ int aok = (unsigned char )(cache_last - cache[a]) < cache_cutoff;
920+ int bok = (unsigned char )(cache_last - cache[b]) < cache_cutoff;
921+ int cok = (unsigned char )(cache_last - cache[c]) < cache_cutoff;
922+
923+ if (aok + bok + cok > next_match)
924+ {
925+ next = (int )j;
926+ next_match = aok + bok + cok;
927+
928+ // note that we could end up with all 3 vertices in the cache, but 2 is enough for ~strip traversal
929+ if (next_match >= 2 )
930+ break ;
931+ }
932+ }
933+
934+ assert (next >= 0 );
935+
936+ unsigned char a = indices[next * 3 + 0 ], b = indices[next * 3 + 1 ], c = indices[next * 3 + 2 ];
937+
938+ // shift triangles before the next one forward so that we always keep an ordered partition
939+ // note: this could have swapped triangles [i] and [next] but that distorts the order and may skew the output sequence
940+ memmove (indices + (i + 1 ) * 3 , indices + i * 3 , (next - i) * 3 * sizeof (unsigned char ));
941+
942+ indices[i * 3 + 0 ] = a;
943+ indices[i * 3 + 1 ] = b;
944+ indices[i * 3 + 2 ] = c;
945+
946+ // cache timestamp is the same between all vertices of each triangle to reduce overflow
947+ cache_last++;
948+ cache[a] = cache_last;
949+ cache[b] = cache_last;
950+ cache[c] = cache_last;
951+ }
952+
953+ // reorder meshlet vertices for access locality assuming index buffer is scanned sequentially
954+ unsigned int order[kMeshletMaxVertices ];
955+
956+ unsigned char remap[kMeshletMaxVertices ];
957+ memset (remap, -1 , vertex_count);
958+
959+ size_t vertex_offset = 0 ;
960+
961+ for (size_t i = 0 ; i < triangle_count * 3 ; ++i)
962+ {
963+ unsigned char & r = remap[indices[i]];
964+
965+ if (r == 0xff )
966+ {
967+ r = (unsigned char )(vertex_offset);
968+ order[vertex_offset] = vertices[indices[i]];
969+ vertex_offset++;
970+ }
971+
972+ indices[i] = r;
973+ }
974+
975+ assert (vertex_offset <= vertex_count);
976+ memcpy (vertices, order, vertex_offset * sizeof (unsigned int ));
977+ }
0 commit comments