Skip to content

Commit b49aea8

Browse files
committed
Organize render surface sorting key for optimizing API performance.
1 parent 36b9212 commit b49aea8

File tree

5 files changed

+44
-12
lines changed

5 files changed

+44
-12
lines changed

servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -844,7 +844,7 @@ void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, i
844844

845845
RenderElementInfo &element_info = rl->element_info[p_offset + i];
846846

847-
element_info.value = uint32_t((surface->sort.sort_key2 & 0x0FFF00000000) >> 32u);
847+
element_info.value = uint32_t(surface->sort.sort_key1 & 0xFFF);
848848

849849
if (cant_repeat) {
850850
prev_surface = nullptr;
@@ -4075,7 +4075,8 @@ void RenderForwardClustered::_geometry_instance_add_surface_with_material(Geomet
40754075
sdcache->sort.sort_key2 = 0;
40764076

40774077
sdcache->sort.surface_index = p_surface;
4078-
sdcache->sort.material_id = p_material_id;
4078+
sdcache->sort.material_id_hi = (p_material_id & 0xFF000000) >> 24;
4079+
sdcache->sort.material_id_lo = (p_material_id & 0x00FFFFFF);
40794080
sdcache->sort.shader_id = p_shader_id;
40804081
sdcache->sort.geometry_id = p_mesh.get_local_index(); //only meshes can repeat anyway
40814082
sdcache->sort.uses_forward_gi = ginstance->can_sdfgi;

servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -498,17 +498,23 @@ class RenderForwardClustered : public RendererSceneRenderRD {
498498
uint64_t sort_key2;
499499
};
500500
struct {
501-
uint64_t geometry_id : 32;
502-
uint64_t material_id : 32;
503-
504-
uint64_t shader_id : 32;
501+
// Needs to be grouped together to be used in RenderElementInfo, as the value is masked directly.
505502
uint64_t lod_index : 8;
506503
uint64_t uses_softshadow : 1;
507504
uint64_t uses_projector : 1;
508505
uint64_t uses_forward_gi : 1;
509506
uint64_t uses_lightmap : 1;
507+
508+
// Sorted based on optimal order for respecting priority and reducing the amount of rebinding of shaders, materials,
509+
// and geometry. This current order was found to be the most optimal in large projects. If you wish to measure
510+
// differences, refer to RenderingDeviceGraph and the methods available to print statistics for draw lists.
510511
uint64_t depth_layer : 4;
511512
uint64_t surface_index : 8;
513+
uint64_t geometry_id : 32;
514+
uint64_t material_id_hi : 8;
515+
516+
uint64_t material_id_lo : 24;
517+
uint64_t shader_id : 32;
512518
uint64_t priority : 8;
513519
};
514520
} sort;

servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1981,7 +1981,7 @@ void RenderForwardMobile::_fill_instance_data(RenderListType p_render_list, uint
19811981
RenderElementInfo &element_info = rl->element_info[p_offset + i];
19821982

19831983
// Sets lod_index and uses_lightmap at once.
1984-
element_info.value = uint32_t((surface->sort.sort_key2 & 0x01FF00000000) >> 32u);
1984+
element_info.value = uint32_t(surface->sort.sort_key1 & 0x1FF);
19851985
}
19861986

19871987
if (p_update_buffer) {
@@ -2764,7 +2764,8 @@ void RenderForwardMobile::_geometry_instance_add_surface_with_material(GeometryI
27642764
sdcache->sort.sort_key2 = 0;
27652765

27662766
sdcache->sort.surface_index = p_surface;
2767-
sdcache->sort.material_id = p_material_id;
2767+
sdcache->sort.material_id_hi = (p_material_id & 0xFF000000) >> 24;
2768+
sdcache->sort.material_id_lo = (p_material_id & 0x00FFFFFF);
27682769
sdcache->sort.shader_id = p_shader_id;
27692770
sdcache->sort.geometry_id = p_mesh.get_local_index();
27702771
sdcache->sort.priority = p_material->priority;

servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -478,15 +478,21 @@ class RenderForwardMobile : public RendererSceneRenderRD {
478478
uint64_t sort_key2;
479479
};
480480
struct {
481-
uint64_t geometry_id : 32;
482-
uint64_t material_id : 32;
483-
484-
uint64_t shader_id : 32;
481+
// Needs to be grouped together to be used in RenderElementInfo, as the value is masked directly.
485482
uint64_t lod_index : 8;
486483
uint64_t uses_lightmap : 1;
487484
uint64_t pad : 3;
485+
486+
// Sorted based on optimal order for respecting priority and reducing the amount of rebinding of shaders, materials,
487+
// and geometry. This current order was found to be the most optimal in large projects. If you wish to measure
488+
// differences, refer to RenderingDeviceGraph and the methods available to print statistics for draw lists.
488489
uint64_t depth_layer : 4;
489490
uint64_t surface_index : 8;
491+
uint64_t geometry_id : 32;
492+
uint64_t material_id_hi : 8;
493+
494+
uint64_t material_id_lo : 24;
495+
uint64_t shader_id : 32;
490496
uint64_t priority : 8;
491497
};
492498
} sort;

servers/rendering/rendering_device_graph.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@
3535
#define PRINT_RESOURCE_TRACKER_TOTAL 0
3636
#define PRINT_COMMAND_RECORDING 0
3737

38+
// Prints the total number of bytes used for draw lists in a frame.
39+
#define PRINT_DRAW_LIST_STATS 0
40+
3841
RenderingDeviceGraph::RenderingDeviceGraph() {
3942
driver_honors_barriers = false;
4043
driver_clears_with_copy_engine = false;
@@ -835,7 +838,15 @@ void RenderingDeviceGraph::_get_draw_list_render_pass_and_framebuffer(const Reco
835838
r_framebuffer = it->value.framebuffer;
836839
}
837840

841+
#if PRINT_DRAW_LIST_STATS
842+
static uint32_t draw_list_total_size = 0;
843+
#endif
844+
838845
void RenderingDeviceGraph::_run_draw_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) {
846+
#if PRINT_DRAW_LIST_STATS
847+
draw_list_total_size += p_instruction_data_size;
848+
#endif
849+
839850
uint32_t instruction_data_cursor = 0;
840851
while (instruction_data_cursor < p_instruction_data_size) {
841852
DEV_ASSERT((instruction_data_cursor + sizeof(DrawListInstruction)) <= p_instruction_data_size);
@@ -2366,6 +2377,10 @@ void RenderingDeviceGraph::end(bool p_reorder_commands, bool p_full_barriers, RD
23662377
workarounds_state.draw_list_found = false;
23672378
}
23682379

2380+
#if PRINT_DRAW_LIST_STATS
2381+
draw_list_total_size = 0;
2382+
#endif
2383+
23692384
if (p_reorder_commands) {
23702385
#if PRINT_RENDER_GRAPH
23712386
print_line("BEFORE SORT");
@@ -2416,6 +2431,9 @@ void RenderingDeviceGraph::end(bool p_reorder_commands, bool p_full_barriers, RD
24162431

24172432
_run_label_command_change(r_command_buffer, -1, -1, false, false, nullptr, 0, current_label_index, current_label_level);
24182433

2434+
#if PRINT_DRAW_LIST_STATS
2435+
print_line(vformat("Draw list %d bytes", draw_list_total_size));
2436+
#endif
24192437
#if PRINT_COMMAND_RECORDING
24202438
print_line(vformat("Recorded %d commands", command_count));
24212439
#endif

0 commit comments

Comments
 (0)