@@ -168,10 +168,56 @@ void ImporterMesh::set_surface_material(int p_surface, const Ref<Material> &p_ma
168168 mesh.unref ();
169169}
170170
171- void ImporterMesh::optimize_indices_for_cache () {
171+ template <typename T>
172+ static Vector<T> _remap_array (Vector<T> p_array, const Vector<uint32_t > &p_remap, uint32_t p_vertex_count) {
173+ ERR_FAIL_COND_V (p_array.size () % p_remap.size () != 0 , p_array);
174+ int num_elements = p_array.size () / p_remap.size ();
175+ T *data = p_array.ptrw ();
176+ SurfaceTool::remap_vertex_func (data, data, p_remap.size (), sizeof (T) * num_elements, p_remap.ptr ());
177+ p_array.resize (p_vertex_count * num_elements);
178+ return p_array;
179+ }
180+
181+ static void _remap_arrays (Array &r_arrays, const Vector<uint32_t > &p_remap, uint32_t p_vertex_count) {
182+ for (int i = 0 ; i < r_arrays.size (); i++) {
183+ if (i == RS::ARRAY_INDEX) {
184+ continue ;
185+ }
186+
187+ switch (r_arrays[i].get_type ()) {
188+ case Variant::NIL:
189+ break ;
190+ case Variant::PACKED_VECTOR3_ARRAY:
191+ r_arrays[i] = _remap_array<Vector3>(r_arrays[i], p_remap, p_vertex_count);
192+ break ;
193+ case Variant::PACKED_VECTOR2_ARRAY:
194+ r_arrays[i] = _remap_array<Vector2>(r_arrays[i], p_remap, p_vertex_count);
195+ break ;
196+ case Variant::PACKED_FLOAT32_ARRAY:
197+ r_arrays[i] = _remap_array<float >(r_arrays[i], p_remap, p_vertex_count);
198+ break ;
199+ case Variant::PACKED_INT32_ARRAY:
200+ r_arrays[i] = _remap_array<int32_t >(r_arrays[i], p_remap, p_vertex_count);
201+ break ;
202+ case Variant::PACKED_BYTE_ARRAY:
203+ r_arrays[i] = _remap_array<uint8_t >(r_arrays[i], p_remap, p_vertex_count);
204+ break ;
205+ case Variant::PACKED_COLOR_ARRAY:
206+ r_arrays[i] = _remap_array<Color>(r_arrays[i], p_remap, p_vertex_count);
207+ break ;
208+ default :
209+ ERR_FAIL_MSG (" Unhandled array type." );
210+ }
211+ }
212+ }
213+
214+ void ImporterMesh::optimize_indices () {
172215 if (!SurfaceTool::optimize_vertex_cache_func) {
173216 return ;
174217 }
218+ if (!SurfaceTool::optimize_vertex_fetch_remap_func || !SurfaceTool::remap_vertex_func || !SurfaceTool::remap_index_func) {
219+ return ;
220+ }
175221
176222 for (int i = 0 ; i < surfaces.size (); i++) {
177223 if (surfaces[i].primitive != Mesh::PRIMITIVE_TRIANGLES) {
@@ -188,10 +234,48 @@ void ImporterMesh::optimize_indices_for_cache() {
188234 continue ;
189235 }
190236
237+ // Optimize indices for vertex cache to establish final triangle order.
191238 int *indices_ptr = indices.ptrw ();
192239 SurfaceTool::optimize_vertex_cache_func ((unsigned int *)indices_ptr, (const unsigned int *)indices_ptr, index_count, vertex_count);
240+ surfaces.write [i].arrays [RS::ARRAY_INDEX] = indices;
241+
242+ for (int j = 0 ; j < surfaces[i].lods .size (); ++j) {
243+ Surface::LOD &lod = surfaces.write [i].lods .write [j];
244+ int *lod_indices_ptr = lod.indices .ptrw ();
245+ SurfaceTool::optimize_vertex_cache_func ((unsigned int *)lod_indices_ptr, (const unsigned int *)lod_indices_ptr, lod.indices .size (), vertex_count);
246+ }
193247
248+ // Concatenate indices for all LODs in the order of coarse->fine; this establishes the effective order of vertices,
249+ // and is important to optimize for vertex fetch (all GPUs) and shading (Mali GPUs)
250+ PackedInt32Array merged_indices;
251+ for (int j = surfaces[i].lods .size () - 1 ; j >= 0 ; --j) {
252+ merged_indices.append_array (surfaces[i].lods [j].indices );
253+ }
254+ merged_indices.append_array (indices);
255+
256+ // Generate remap array that establishes optimal vertex order according to the order of indices above.
257+ Vector<uint32_t > remap;
258+ remap.resize (vertex_count);
259+ unsigned int new_vertex_count = SurfaceTool::optimize_vertex_fetch_remap_func (remap.ptrw (), (const unsigned int *)merged_indices.ptr (), merged_indices.size (), vertex_count);
260+
261+ // We need to remap all vertex and index arrays in lockstep according to the remap.
262+ SurfaceTool::remap_index_func ((unsigned int *)indices_ptr, (const unsigned int *)indices_ptr, index_count, remap.ptr ());
194263 surfaces.write [i].arrays [RS::ARRAY_INDEX] = indices;
264+
265+ for (int j = 0 ; j < surfaces[i].lods .size (); ++j) {
266+ Surface::LOD &lod = surfaces.write [i].lods .write [j];
267+ int *lod_indices_ptr = lod.indices .ptrw ();
268+ SurfaceTool::remap_index_func ((unsigned int *)lod_indices_ptr, (const unsigned int *)lod_indices_ptr, lod.indices .size (), remap.ptr ());
269+ }
270+
271+ _remap_arrays (surfaces.write [i].arrays , remap, new_vertex_count);
272+ for (int j = 0 ; j < surfaces[i].blend_shape_data .size (); j++) {
273+ _remap_arrays (surfaces.write [i].blend_shape_data .write [j].arrays , remap, new_vertex_count);
274+ }
275+ }
276+
277+ if (shadow_mesh.is_valid ()) {
278+ shadow_mesh->optimize_indices ();
195279 }
196280}
197281
@@ -215,9 +299,6 @@ void ImporterMesh::generate_lods(float p_normal_merge_angle, Array p_bone_transf
215299 if (!SurfaceTool::simplify_with_attrib_func) {
216300 return ;
217301 }
218- if (!SurfaceTool::optimize_vertex_cache_func) {
219- return ;
220- }
221302
222303 LocalVector<Transform3D> bone_transform_vector;
223304 for (int i = 0 ; i < p_bone_transform_array.size (); i++) {
@@ -431,12 +512,6 @@ void ImporterMesh::generate_lods(float p_normal_merge_angle, Array p_bone_transf
431512 }
432513
433514 surfaces.write [i].lods .sort_custom <Surface::LODComparator>();
434-
435- for (int j = 0 ; j < surfaces.write [i].lods .size (); j++) {
436- Surface::LOD &lod = surfaces.write [i].lods .write [j];
437- unsigned int *lod_indices_ptr = (unsigned int *)lod.indices .ptrw ();
438- SurfaceTool::optimize_vertex_cache_func (lod_indices_ptr, lod_indices_ptr, lod.indices .size (), vertex_count);
439- }
440515 }
441516}
442517
@@ -574,10 +649,6 @@ void ImporterMesh::create_shadow_mesh() {
574649 index_wptr[j] = vertex_remap[index];
575650 }
576651
577- if (SurfaceTool::optimize_vertex_cache_func && surfaces[i].primitive == Mesh::PRIMITIVE_TRIANGLES) {
578- SurfaceTool::optimize_vertex_cache_func ((unsigned int *)index_wptr, (const unsigned int *)index_wptr, index_count, new_vertices.size ());
579- }
580-
581652 new_surface[RS::ARRAY_INDEX] = new_indices;
582653
583654 // Make sure the same LODs as the full version are used.
@@ -596,10 +667,6 @@ void ImporterMesh::create_shadow_mesh() {
596667 index_wptr[k] = vertex_remap[index];
597668 }
598669
599- if (SurfaceTool::optimize_vertex_cache_func && surfaces[i].primitive == Mesh::PRIMITIVE_TRIANGLES) {
600- SurfaceTool::optimize_vertex_cache_func ((unsigned int *)index_wptr, (const unsigned int *)index_wptr, index_count, new_vertices.size ());
601- }
602-
603670 lods[surfaces[i].lods [j].distance ] = new_indices;
604671 }
605672 }
0 commit comments