Skip to content

Commit 260287b

Browse files
committed
Rewrite index optimization code for maximum efficiency
While all the previous fixes to optimizeVertexCache invocation fixed the vertex transform efficiency, the import code still was missing two crucial recommendations from meshoptimizer documentation: - All meshes should be optimized for vertex cache (this reorders vertices for maximum fetch efficiency) - When LODs are used with a shared vertex buffer, the vertex order should be generated by doing a vertex fetch optimization on the concatenated index buffer from coarse to fine LODs; this maximizes fetch efficiency for coarse LODs The last point is especially crucial for Mali GPUs; unlike other GPUs where vertex order affects fetch efficiency but not shading, these GPUs have various shading quirks (depending on the GPU generation) that really require consecutive index ranges for each LOD, which requires the second optimization mentioned above. However all of these also help desktop GPUs and other mobile GPUs as well. Because this optimization is "global" in the sense that it affects all LODs and all vertex arrays in concert, I've taken this opportunity to isolate all optimization code in this function and pull it out of generate_lods and create_shadow_mesh; this doesn't change the vertex cache efficiency, but makes the code cleaner. Consequently, optimize_indices should be called after other functions like create_shadow_mesh / generate_lods. This required exposing meshopt_optimizeVertexFetchRemap; as a drive-by, meshopt_simplifySloppy was never used so it's not exposed anymore - this will simplify future meshopt upgrades if they end up changing the function's interface.
1 parent 1bffd6c commit 260287b

File tree

7 files changed

+98
-29
lines changed

7 files changed

+98
-29
lines changed

editor/import/3d/resource_importer_obj.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -535,8 +535,6 @@ static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes,
535535
}
536536
}
537537

538-
mesh->optimize_indices_for_cache();
539-
540538
if (p_generate_lods) {
541539
// Use normal merge/split angles that match the defaults used for 3D scene importing.
542540
mesh->generate_lods(60.0f, {});
@@ -546,6 +544,8 @@ static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes,
546544
mesh->create_shadow_mesh();
547545
}
548546

547+
mesh->optimize_indices();
548+
549549
if (p_single_mesh && mesh->get_surface_count() > 0) {
550550
r_meshes.push_back(mesh);
551551
}

editor/import/3d/resource_importer_scene.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2567,8 +2567,6 @@ Node *ResourceImporterScene::_generate_meshes(Node *p_node, const Dictionary &p_
25672567
}
25682568
}
25692569

2570-
src_mesh_node->get_mesh()->optimize_indices_for_cache();
2571-
25722570
if (generate_lods) {
25732571
Array skin_pose_transform_array = _get_skinned_pose_transforms(src_mesh_node);
25742572
src_mesh_node->get_mesh()->generate_lods(merge_angle, skin_pose_transform_array);
@@ -2578,6 +2576,8 @@ Node *ResourceImporterScene::_generate_meshes(Node *p_node, const Dictionary &p_
25782576
src_mesh_node->get_mesh()->create_shadow_mesh();
25792577
}
25802578

2579+
src_mesh_node->get_mesh()->optimize_indices();
2580+
25812581
if (!save_to_file.is_empty()) {
25822582
Ref<Mesh> existing = ResourceCache::get_ref(save_to_file);
25832583
if (existing.is_valid()) {

modules/meshoptimizer/register_types.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ void initialize_meshoptimizer_module(ModuleInitializationLevel p_level) {
4040
}
4141

4242
SurfaceTool::optimize_vertex_cache_func = meshopt_optimizeVertexCache;
43+
SurfaceTool::optimize_vertex_fetch_remap_func = meshopt_optimizeVertexFetchRemap;
4344
SurfaceTool::simplify_func = meshopt_simplify;
4445
SurfaceTool::simplify_with_attrib_func = meshopt_simplifyWithAttributes;
4546
SurfaceTool::simplify_scale_func = meshopt_simplifyScale;
46-
SurfaceTool::simplify_sloppy_func = meshopt_simplifySloppy;
4747
SurfaceTool::generate_remap_func = meshopt_generateVertexRemap;
4848
SurfaceTool::remap_vertex_func = meshopt_remapVertexBuffer;
4949
SurfaceTool::remap_index_func = meshopt_remapIndexBuffer;
@@ -55,9 +55,9 @@ void uninitialize_meshoptimizer_module(ModuleInitializationLevel p_level) {
5555
}
5656

5757
SurfaceTool::optimize_vertex_cache_func = nullptr;
58+
SurfaceTool::optimize_vertex_fetch_remap_func = nullptr;
5859
SurfaceTool::simplify_func = nullptr;
5960
SurfaceTool::simplify_scale_func = nullptr;
60-
SurfaceTool::simplify_sloppy_func = nullptr;
6161
SurfaceTool::generate_remap_func = nullptr;
6262
SurfaceTool::remap_vertex_func = nullptr;
6363
SurfaceTool::remap_index_func = nullptr;

scene/resources/3d/importer_mesh.cpp

Lines changed: 85 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -168,10 +168,56 @@ void ImporterMesh::set_surface_material(int p_surface, const Ref<Material> &p_ma
168168
mesh.unref();
169169
}
170170

171-
void ImporterMesh::optimize_indices_for_cache() {
171+
template <typename T>
172+
static Vector<T> _remap_array(Vector<T> p_array, const Vector<uint32_t> &p_remap, uint32_t p_vertex_count) {
173+
ERR_FAIL_COND_V(p_array.size() % p_remap.size() != 0, p_array);
174+
int num_elements = p_array.size() / p_remap.size();
175+
T *data = p_array.ptrw();
176+
SurfaceTool::remap_vertex_func(data, data, p_remap.size(), sizeof(T) * num_elements, p_remap.ptr());
177+
p_array.resize(p_vertex_count * num_elements);
178+
return p_array;
179+
}
180+
181+
static void _remap_arrays(Array &r_arrays, const Vector<uint32_t> &p_remap, uint32_t p_vertex_count) {
182+
for (int i = 0; i < r_arrays.size(); i++) {
183+
if (i == RS::ARRAY_INDEX) {
184+
continue;
185+
}
186+
187+
switch (r_arrays[i].get_type()) {
188+
case Variant::NIL:
189+
break;
190+
case Variant::PACKED_VECTOR3_ARRAY:
191+
r_arrays[i] = _remap_array<Vector3>(r_arrays[i], p_remap, p_vertex_count);
192+
break;
193+
case Variant::PACKED_VECTOR2_ARRAY:
194+
r_arrays[i] = _remap_array<Vector2>(r_arrays[i], p_remap, p_vertex_count);
195+
break;
196+
case Variant::PACKED_FLOAT32_ARRAY:
197+
r_arrays[i] = _remap_array<float>(r_arrays[i], p_remap, p_vertex_count);
198+
break;
199+
case Variant::PACKED_INT32_ARRAY:
200+
r_arrays[i] = _remap_array<int32_t>(r_arrays[i], p_remap, p_vertex_count);
201+
break;
202+
case Variant::PACKED_BYTE_ARRAY:
203+
r_arrays[i] = _remap_array<uint8_t>(r_arrays[i], p_remap, p_vertex_count);
204+
break;
205+
case Variant::PACKED_COLOR_ARRAY:
206+
r_arrays[i] = _remap_array<Color>(r_arrays[i], p_remap, p_vertex_count);
207+
break;
208+
default:
209+
ERR_FAIL_MSG("Unhandled array type.");
210+
}
211+
}
212+
}
213+
214+
void ImporterMesh::optimize_indices() {
172215
if (!SurfaceTool::optimize_vertex_cache_func) {
173216
return;
174217
}
218+
if (!SurfaceTool::optimize_vertex_fetch_remap_func || !SurfaceTool::remap_vertex_func || !SurfaceTool::remap_index_func) {
219+
return;
220+
}
175221

176222
for (int i = 0; i < surfaces.size(); i++) {
177223
if (surfaces[i].primitive != Mesh::PRIMITIVE_TRIANGLES) {
@@ -188,10 +234,48 @@ void ImporterMesh::optimize_indices_for_cache() {
188234
continue;
189235
}
190236

237+
// Optimize indices for vertex cache to establish final triangle order.
191238
int *indices_ptr = indices.ptrw();
192239
SurfaceTool::optimize_vertex_cache_func((unsigned int *)indices_ptr, (const unsigned int *)indices_ptr, index_count, vertex_count);
240+
surfaces.write[i].arrays[RS::ARRAY_INDEX] = indices;
241+
242+
for (int j = 0; j < surfaces[i].lods.size(); ++j) {
243+
Surface::LOD &lod = surfaces.write[i].lods.write[j];
244+
int *lod_indices_ptr = lod.indices.ptrw();
245+
SurfaceTool::optimize_vertex_cache_func((unsigned int *)lod_indices_ptr, (const unsigned int *)lod_indices_ptr, lod.indices.size(), vertex_count);
246+
}
193247

248+
// Concatenate indices for all LODs in the order of coarse->fine; this establishes the effective order of vertices,
249+
// and is important to optimize for vertex fetch (all GPUs) and shading (Mali GPUs)
250+
PackedInt32Array merged_indices;
251+
for (int j = surfaces[i].lods.size() - 1; j >= 0; --j) {
252+
merged_indices.append_array(surfaces[i].lods[j].indices);
253+
}
254+
merged_indices.append_array(indices);
255+
256+
// Generate remap array that establishes optimal vertex order according to the order of indices above.
257+
Vector<uint32_t> remap;
258+
remap.resize(vertex_count);
259+
unsigned int new_vertex_count = SurfaceTool::optimize_vertex_fetch_remap_func(remap.ptrw(), (const unsigned int *)merged_indices.ptr(), merged_indices.size(), vertex_count);
260+
261+
// We need to remap all vertex and index arrays in lockstep according to the remap.
262+
SurfaceTool::remap_index_func((unsigned int *)indices_ptr, (const unsigned int *)indices_ptr, index_count, remap.ptr());
194263
surfaces.write[i].arrays[RS::ARRAY_INDEX] = indices;
264+
265+
for (int j = 0; j < surfaces[i].lods.size(); ++j) {
266+
Surface::LOD &lod = surfaces.write[i].lods.write[j];
267+
int *lod_indices_ptr = lod.indices.ptrw();
268+
SurfaceTool::remap_index_func((unsigned int *)lod_indices_ptr, (const unsigned int *)lod_indices_ptr, lod.indices.size(), remap.ptr());
269+
}
270+
271+
_remap_arrays(surfaces.write[i].arrays, remap, new_vertex_count);
272+
for (int j = 0; j < surfaces[i].blend_shape_data.size(); j++) {
273+
_remap_arrays(surfaces.write[i].blend_shape_data.write[j].arrays, remap, new_vertex_count);
274+
}
275+
}
276+
277+
if (shadow_mesh.is_valid()) {
278+
shadow_mesh->optimize_indices();
195279
}
196280
}
197281

@@ -215,9 +299,6 @@ void ImporterMesh::generate_lods(float p_normal_merge_angle, Array p_bone_transf
215299
if (!SurfaceTool::simplify_with_attrib_func) {
216300
return;
217301
}
218-
if (!SurfaceTool::optimize_vertex_cache_func) {
219-
return;
220-
}
221302

222303
LocalVector<Transform3D> bone_transform_vector;
223304
for (int i = 0; i < p_bone_transform_array.size(); i++) {
@@ -431,12 +512,6 @@ void ImporterMesh::generate_lods(float p_normal_merge_angle, Array p_bone_transf
431512
}
432513

433514
surfaces.write[i].lods.sort_custom<Surface::LODComparator>();
434-
435-
for (int j = 0; j < surfaces.write[i].lods.size(); j++) {
436-
Surface::LOD &lod = surfaces.write[i].lods.write[j];
437-
unsigned int *lod_indices_ptr = (unsigned int *)lod.indices.ptrw();
438-
SurfaceTool::optimize_vertex_cache_func(lod_indices_ptr, lod_indices_ptr, lod.indices.size(), vertex_count);
439-
}
440515
}
441516
}
442517

@@ -574,10 +649,6 @@ void ImporterMesh::create_shadow_mesh() {
574649
index_wptr[j] = vertex_remap[index];
575650
}
576651

577-
if (SurfaceTool::optimize_vertex_cache_func && surfaces[i].primitive == Mesh::PRIMITIVE_TRIANGLES) {
578-
SurfaceTool::optimize_vertex_cache_func((unsigned int *)index_wptr, (const unsigned int *)index_wptr, index_count, new_vertices.size());
579-
}
580-
581652
new_surface[RS::ARRAY_INDEX] = new_indices;
582653

583654
// Make sure the same LODs as the full version are used.
@@ -596,10 +667,6 @@ void ImporterMesh::create_shadow_mesh() {
596667
index_wptr[k] = vertex_remap[index];
597668
}
598669

599-
if (SurfaceTool::optimize_vertex_cache_func && surfaces[i].primitive == Mesh::PRIMITIVE_TRIANGLES) {
600-
SurfaceTool::optimize_vertex_cache_func((unsigned int *)index_wptr, (const unsigned int *)index_wptr, index_count, new_vertices.size());
601-
}
602-
603670
lods[surfaces[i].lods[j].distance] = new_indices;
604671
}
605672
}

scene/resources/3d/importer_mesh.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ class ImporterMesh : public Resource {
113113

114114
void set_surface_material(int p_surface, const Ref<Material> &p_material);
115115

116-
void optimize_indices_for_cache();
116+
void optimize_indices();
117117

118118
void generate_lods(float p_normal_merge_angle, Array p_skin_pose_transform_array);
119119

scene/resources/surface_tool.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@
3333
#define EQ_VERTEX_DIST 0.00001
3434

3535
SurfaceTool::OptimizeVertexCacheFunc SurfaceTool::optimize_vertex_cache_func = nullptr;
36+
SurfaceTool::OptimizeVertexFetchRemapFunc SurfaceTool::optimize_vertex_fetch_remap_func = nullptr;
3637
SurfaceTool::SimplifyFunc SurfaceTool::simplify_func = nullptr;
3738
SurfaceTool::SimplifyWithAttribFunc SurfaceTool::simplify_with_attrib_func = nullptr;
3839
SurfaceTool::SimplifyScaleFunc SurfaceTool::simplify_scale_func = nullptr;
39-
SurfaceTool::SimplifySloppyFunc SurfaceTool::simplify_sloppy_func = nullptr;
4040
SurfaceTool::GenerateRemapFunc SurfaceTool::generate_remap_func = nullptr;
4141
SurfaceTool::RemapVertexFunc SurfaceTool::remap_vertex_func = nullptr;
4242
SurfaceTool::RemapIndexFunc SurfaceTool::remap_index_func = nullptr;

scene/resources/surface_tool.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,14 +90,14 @@ class SurfaceTool : public RefCounted {
9090

9191
typedef void (*OptimizeVertexCacheFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, size_t vertex_count);
9292
static OptimizeVertexCacheFunc optimize_vertex_cache_func;
93+
typedef size_t (*OptimizeVertexFetchRemapFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, size_t vertex_count);
94+
static OptimizeVertexFetchRemapFunc optimize_vertex_fetch_remap_func;
9395
typedef size_t (*SimplifyFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float *r_error);
9496
static SimplifyFunc simplify_func;
9597
typedef size_t (*SimplifyWithAttribFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_data, size_t vertex_count, size_t vertex_stride, const float *attributes, size_t attribute_stride, const float *attribute_weights, size_t attribute_count, const unsigned char *vertex_lock, size_t target_index_count, float target_error, unsigned int options, float *result_error);
9698
static SimplifyWithAttribFunc simplify_with_attrib_func;
9799
typedef float (*SimplifyScaleFunc)(const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
98100
static SimplifyScaleFunc simplify_scale_func;
99-
typedef size_t (*SimplifySloppyFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *out_result_error);
100-
static SimplifySloppyFunc simplify_sloppy_func;
101101
typedef size_t (*GenerateRemapFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const void *vertices, size_t vertex_count, size_t vertex_size);
102102
static GenerateRemapFunc generate_remap_func;
103103
typedef void (*RemapVertexFunc)(void *destination, const void *vertices, size_t vertex_count, size_t vertex_size, const unsigned int *remap);
@@ -222,7 +222,9 @@ class SurfaceTool : public RefCounted {
222222

223223
void clear();
224224

225-
LocalVector<Vertex> &get_vertex_array() { return vertex_array; }
225+
LocalVector<Vertex> &get_vertex_array() {
226+
return vertex_array;
227+
}
226228

227229
void create_from_triangle_arrays(const Array &p_arrays);
228230
void create_from_arrays(const Array &p_arrays, Mesh::PrimitiveType p_primitive_type = Mesh::PRIMITIVE_TRIANGLES);

0 commit comments

Comments
 (0)