Skip to content

Commit 235d112

Browse files
committed
Merge pull request #112481 from stuartcarnie/2d_canvas_vbos
Massively optimize canvas 2D rendering by using vertex buffers
2 parents 90413da + 90c0e6a commit 235d112

25 files changed

+893
-256
lines changed

core/templates/hash_map.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ class HashMap : private Allocator {
7171
static constexpr uint32_t MIN_CAPACITY_INDEX = 2; // Use a prime.
7272
static constexpr float MAX_OCCUPANCY = 0.75;
7373
static constexpr uint32_t EMPTY_HASH = 0;
74+
using KV = KeyValue<TKey, TValue>; // Type alias for easier access to KeyValue.
7475

7576
private:
7677
HashMapElement<TKey, TValue> **_elements = nullptr;
@@ -590,6 +591,22 @@ class HashMap : private Allocator {
590591
}
591592
}
592593

594+
HashMap(HashMap &&p_other) {
595+
_elements = p_other._elements;
596+
_hashes = p_other._hashes;
597+
_head_element = p_other._head_element;
598+
_tail_element = p_other._tail_element;
599+
_capacity_idx = p_other._capacity_idx;
600+
_size = p_other._size;
601+
602+
p_other._elements = nullptr;
603+
p_other._hashes = nullptr;
604+
p_other._head_element = nullptr;
605+
p_other._tail_element = nullptr;
606+
p_other._capacity_idx = MIN_CAPACITY_INDEX;
607+
p_other._size = 0;
608+
}
609+
593610
void operator=(const HashMap &p_other) {
594611
if (this == &p_other) {
595612
return; // Ignore self assignment.
@@ -609,6 +626,36 @@ class HashMap : private Allocator {
609626
}
610627
}
611628

629+
HashMap &operator=(HashMap &&p_other) {
630+
if (this == &p_other) {
631+
return *this;
632+
}
633+
634+
if (_size != 0) {
635+
clear();
636+
}
637+
if (_elements != nullptr) {
638+
Memory::free_static(_elements);
639+
Memory::free_static(_hashes);
640+
}
641+
642+
_elements = p_other._elements;
643+
_hashes = p_other._hashes;
644+
_head_element = p_other._head_element;
645+
_tail_element = p_other._tail_element;
646+
_capacity_idx = p_other._capacity_idx;
647+
_size = p_other._size;
648+
649+
p_other._elements = nullptr;
650+
p_other._hashes = nullptr;
651+
p_other._head_element = nullptr;
652+
p_other._tail_element = nullptr;
653+
p_other._capacity_idx = MIN_CAPACITY_INDEX;
654+
p_other._size = 0;
655+
656+
return *this;
657+
}
658+
612659
HashMap(uint32_t p_initial_capacity) {
613660
// Capacity can't be 0.
614661
_capacity_idx = 0;

doc/classes/RDVertexAttribute.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
<tutorials>
1010
</tutorials>
1111
<members>
12+
<member name="binding" type="int" setter="set_binding" getter="get_binding" default="4294967295">
13+
The index of the buffer in the vertex buffer array to bind this vertex attribute. When set to [code]-1[/code], it defaults to the index of the attribute.
14+
[b]Note:[/b] You cannot mix binding explicitly assigned attributes with implicitly assigned ones (i.e. [code]-1[/code]). Either all attributes must have their binding set to [code]-1[/code], or all must have explicit bindings.
15+
</member>
1216
<member name="format" type="int" setter="set_format" getter="get_format" enum="RenderingDevice.DataFormat" default="232">
1317
The way that this attribute's data is interpreted when sent to a shader.
1418
</member>

doc/classes/RenderingDevice.xml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,17 @@
335335
Binds [param vertex_array] to the specified [param draw_list].
336336
</description>
337337
</method>
338+
<method name="draw_list_bind_vertex_buffers_format">
339+
<return type="void" />
340+
<param index="0" name="draw_list" type="int" />
341+
<param index="1" name="vertex_format" type="int" />
342+
<param index="2" name="vertex_count" type="int" />
343+
<param index="3" name="vertex_buffers" type="RID[]" />
344+
<param index="4" name="offsets" type="PackedInt64Array" default="PackedInt64Array()" />
345+
<description>
346+
Binds a set of [param vertex_buffers] directly to the specified [param draw_list] using [param vertex_format] without creating a vertex array RID. Provide the number of vertices in [param vertex_count]; optional per-buffer byte [param offsets] may also be supplied.
347+
</description>
348+
</method>
338349
<method name="draw_list_disable_scissor">
339350
<return type="void" />
340351
<param index="0" name="draw_list" type="int" />

drivers/d3d12/rendering_device_driver_d3d12.cpp

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,24 @@ uint8_t *RenderingDeviceDriverD3D12::buffer_persistent_map_advance(BufferID p_bu
11651165
return buf_info->persistent_ptr + buf_info->frame_idx * buf_info->size;
11661166
}
11671167

1168+
uint64_t RenderingDeviceDriverD3D12::buffer_get_dynamic_offsets(Span<BufferID> p_buffers) {
1169+
uint64_t mask = 0u;
1170+
uint64_t shift = 0u;
1171+
1172+
for (const BufferID &buf : p_buffers) {
1173+
const BufferInfo *buf_info = (const BufferInfo *)buf.id;
1174+
if (!buf_info->is_dynamic()) {
1175+
continue;
1176+
}
1177+
const BufferDynamicInfo *dyn_buf = (const BufferDynamicInfo *)buf.id;
1178+
mask |= dyn_buf->frame_idx << shift;
1179+
// We can encode the frame index in 2 bits since frame_count won't be > 4.
1180+
shift += 2UL;
1181+
}
1182+
1183+
return mask;
1184+
}
1185+
11681186
uint64_t RenderingDeviceDriverD3D12::buffer_get_device_address(BufferID p_buffer) {
11691187
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
11701188
return buf_info->resource->GetGPUVirtualAddress();
@@ -2183,27 +2201,36 @@ bool RenderingDeviceDriverD3D12::sampler_is_format_supported_for_filter(DataForm
21832201
/**** VERTEX ARRAY ****/
21842202
/**********************/
21852203

2186-
RDD::VertexFormatID RenderingDeviceDriverD3D12::vertex_format_create(VectorView<VertexAttribute> p_vertex_attribs) {
2204+
RDD::VertexFormatID RenderingDeviceDriverD3D12::vertex_format_create(Span<VertexAttribute> p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) {
21872205
VertexFormatInfo *vf_info = VersatileResource::allocate<VertexFormatInfo>(resources_allocator);
2188-
21892206
vf_info->input_elem_descs.resize(p_vertex_attribs.size());
2190-
vf_info->vertex_buffer_strides.resize(p_vertex_attribs.size());
2207+
2208+
uint32_t max_binding = 0;
21912209
for (uint32_t i = 0; i < p_vertex_attribs.size(); i++) {
2192-
vf_info->input_elem_descs[i] = {};
2193-
vf_info->input_elem_descs[i].SemanticName = "TEXCOORD";
2194-
vf_info->input_elem_descs[i].SemanticIndex = p_vertex_attribs[i].location;
2195-
vf_info->input_elem_descs[i].Format = RD_TO_D3D12_FORMAT[p_vertex_attribs[i].format].general_format;
2196-
vf_info->input_elem_descs[i].InputSlot = i; // TODO: Can the same slot be used if data comes from the same buffer (regardless format)?
2197-
vf_info->input_elem_descs[i].AlignedByteOffset = p_vertex_attribs[i].offset;
2198-
if (p_vertex_attribs[i].frequency == VERTEX_FREQUENCY_INSTANCE) {
2199-
vf_info->input_elem_descs[i].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
2200-
vf_info->input_elem_descs[i].InstanceDataStepRate = 1;
2210+
D3D12_INPUT_ELEMENT_DESC &input_element_desc = vf_info->input_elem_descs[i];
2211+
const VertexAttribute &vertex_attrib = p_vertex_attribs[i];
2212+
const VertexAttributeBinding &vertex_binding = p_vertex_bindings[vertex_attrib.binding];
2213+
2214+
input_element_desc = {};
2215+
input_element_desc.SemanticName = "TEXCOORD";
2216+
input_element_desc.SemanticIndex = vertex_attrib.location;
2217+
input_element_desc.Format = RD_TO_D3D12_FORMAT[vertex_attrib.format].general_format;
2218+
input_element_desc.InputSlot = vertex_attrib.binding;
2219+
input_element_desc.AlignedByteOffset = vertex_attrib.offset;
2220+
if (vertex_binding.frequency == VERTEX_FREQUENCY_INSTANCE) {
2221+
input_element_desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
2222+
input_element_desc.InstanceDataStepRate = 1;
22012223
} else {
2202-
vf_info->input_elem_descs[i].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
2203-
vf_info->input_elem_descs[i].InstanceDataStepRate = 0;
2224+
input_element_desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
2225+
input_element_desc.InstanceDataStepRate = 0;
22042226
}
22052227

2206-
vf_info->vertex_buffer_strides[i] = p_vertex_attribs[i].stride;
2228+
max_binding = MAX(max_binding, vertex_attrib.binding + 1);
2229+
}
2230+
2231+
vf_info->vertex_buffer_strides.resize(max_binding);
2232+
for (const VertexAttributeBindingsMap::KV &vertex_binding_pair : p_vertex_bindings) {
2233+
vf_info->vertex_buffer_strides[vertex_binding_pair.key] = vertex_binding_pair.value.stride;
22072234
}
22082235

22092236
return VertexFormatID(vf_info);
@@ -5378,7 +5405,7 @@ void RenderingDeviceDriverD3D12::command_render_draw_indirect_count(CommandBuffe
53785405
cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset);
53795406
}
53805407

5381-
void RenderingDeviceDriverD3D12::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) {
5408+
void RenderingDeviceDriverD3D12::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) {
53825409
CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
53835410

53845411
DEV_ASSERT(cmd_buf_info->render_pass_state.current_subpass != UINT32_MAX);
@@ -5390,8 +5417,15 @@ void RenderingDeviceDriverD3D12::command_render_bind_vertex_buffers(CommandBuffe
53905417
for (uint32_t i = 0; i < p_binding_count; i++) {
53915418
BufferInfo *buffer_info = (BufferInfo *)p_buffers[i].id;
53925419

5420+
uint32_t dynamic_offset = 0;
5421+
if (buffer_info->is_dynamic()) {
5422+
uint64_t buffer_frame_idx = p_dynamic_offsets & 0x3; // Assuming max 4 frames.
5423+
p_dynamic_offsets >>= 2;
5424+
dynamic_offset = buffer_frame_idx * buffer_info->size;
5425+
}
5426+
53935427
cmd_buf_info->render_pass_state.vertex_buffer_views[i] = {};
5394-
cmd_buf_info->render_pass_state.vertex_buffer_views[i].BufferLocation = buffer_info->resource->GetGPUVirtualAddress() + p_offsets[i];
5428+
cmd_buf_info->render_pass_state.vertex_buffer_views[i].BufferLocation = buffer_info->resource->GetGPUVirtualAddress() + dynamic_offset + p_offsets[i];
53955429
cmd_buf_info->render_pass_state.vertex_buffer_views[i].SizeInBytes = buffer_info->size - p_offsets[i];
53965430
if (!barrier_capabilities.enhanced_barriers_supported) {
53975431
_resource_transition_batch(cmd_buf_info, buffer_info, 0, 1, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);

drivers/d3d12/rendering_device_driver_d3d12.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
347347
virtual uint8_t *buffer_map(BufferID p_buffer) override final;
348348
virtual void buffer_unmap(BufferID p_buffer) override final;
349349
virtual uint8_t *buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) override final;
350+
virtual uint64_t buffer_get_dynamic_offsets(Span<BufferID> p_buffers) override final;
350351
virtual uint64_t buffer_get_device_address(BufferID p_buffer) override final;
351352

352353
/*****************/
@@ -429,7 +430,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
429430
};
430431

431432
public:
432-
virtual VertexFormatID vertex_format_create(VectorView<VertexAttribute> p_vertex_attribs) override final;
433+
virtual VertexFormatID vertex_format_create(Span<VertexAttribute> p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) override final;
433434
virtual void vertex_format_free(VertexFormatID p_vertex_format) override final;
434435

435436
/******************/
@@ -862,7 +863,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
862863
virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final;
863864

864865
// Buffer binding.
865-
virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) override final;
866+
virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) override final;
866867
virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) override final;
867868

868869
private:

drivers/metal/metal_objects.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer {
663663
uint32_t p_instance_count,
664664
uint32_t p_base_vertex,
665665
uint32_t p_first_instance);
666-
void render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets);
666+
void render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets);
667667
void render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset);
668668

669669
void render_draw_indexed(uint32_t p_index_count,

drivers/metal/metal_objects.mm

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -783,10 +783,12 @@ static bool isArrayTexture(MTLTextureType p_type) {
783783

784784
if (render.dirty.has_flag(RenderState::DIRTY_VERTEX)) {
785785
uint32_t p_binding_count = render.vertex_buffers.size();
786-
uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1);
787-
[render.encoder setVertexBuffers:render.vertex_buffers.ptr()
788-
offsets:render.vertex_offsets.ptr()
789-
withRange:NSMakeRange(first, p_binding_count)];
786+
if (p_binding_count > 0) {
787+
uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1);
788+
[render.encoder setVertexBuffers:render.vertex_buffers.ptr()
789+
offsets:render.vertex_offsets.ptr()
790+
withRange:NSMakeRange(first, p_binding_count)];
791+
}
790792
}
791793

792794
render.resource_tracker.encode(render.encoder);
@@ -1252,24 +1254,47 @@ static bool isArrayTexture(MTLTextureType p_type) {
12521254
baseInstance:p_first_instance];
12531255
}
12541256

1255-
void MDCommandBuffer::render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets) {
1257+
void MDCommandBuffer::render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) {
12561258
DEV_ASSERT(type == MDCommandBufferStateType::Render);
12571259

12581260
render.vertex_buffers.resize(p_binding_count);
12591261
render.vertex_offsets.resize(p_binding_count);
12601262

1263+
// Are the existing buffer bindings the same?
1264+
bool same = true;
1265+
12611266
// Reverse the buffers, as their bindings are assigned in descending order.
12621267
for (uint32_t i = 0; i < p_binding_count; i += 1) {
12631268
const RenderingDeviceDriverMetal::BufferInfo *buf_info = (const RenderingDeviceDriverMetal::BufferInfo *)p_buffers[p_binding_count - i - 1].id;
1264-
render.vertex_buffers[i] = buf_info->metal_buffer;
1265-
render.vertex_offsets[i] = p_offsets[p_binding_count - i - 1];
1269+
1270+
NSUInteger dynamic_offset = 0;
1271+
if (buf_info->is_dynamic()) {
1272+
const MetalBufferDynamicInfo *dyn_buf = (const MetalBufferDynamicInfo *)buf_info;
1273+
uint64_t frame_idx = p_dynamic_offsets & 0x3;
1274+
p_dynamic_offsets >>= 2;
1275+
dynamic_offset = frame_idx * dyn_buf->size_bytes;
1276+
}
1277+
if (render.vertex_buffers[i] != buf_info->metal_buffer) {
1278+
render.vertex_buffers[i] = buf_info->metal_buffer;
1279+
same = false;
1280+
}
1281+
1282+
render.vertex_offsets[i] = dynamic_offset + p_offsets[p_binding_count - i - 1];
12661283
}
12671284

12681285
if (render.encoder) {
12691286
uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1);
1270-
[render.encoder setVertexBuffers:render.vertex_buffers.ptr()
1271-
offsets:render.vertex_offsets.ptr()
1272-
withRange:NSMakeRange(first, p_binding_count)];
1287+
if (same) {
1288+
NSUInteger *offset_ptr = render.vertex_offsets.ptr();
1289+
for (uint32_t i = first; i < first + p_binding_count; i++) {
1290+
[render.encoder setVertexBufferOffset:*offset_ptr atIndex:i];
1291+
offset_ptr++;
1292+
}
1293+
} else {
1294+
[render.encoder setVertexBuffers:render.vertex_buffers.ptr()
1295+
offsets:render.vertex_offsets.ptr()
1296+
withRange:NSMakeRange(first, p_binding_count)];
1297+
}
12731298
render.dirty.clear_flag(RenderState::DIRTY_VERTEX);
12741299
} else {
12751300
render.dirty.set_flag(RenderState::DIRTY_VERTEX);
@@ -1394,7 +1419,9 @@ static bool isArrayTexture(MTLTextureType p_type) {
13941419
viewports.clear();
13951420
scissors.clear();
13961421
blend_constants.reset();
1422+
bzero(vertex_buffers.ptr(), sizeof(id<MTLBuffer> __unsafe_unretained) * vertex_buffers.size());
13971423
vertex_buffers.clear();
1424+
bzero(vertex_offsets.ptr(), sizeof(NSUInteger) * vertex_offsets.size());
13981425
vertex_offsets.clear();
13991426
resource_tracker.reset();
14001427
}

drivers/metal/rendering_device_driver_metal.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet
130130
virtual uint8_t *buffer_map(BufferID p_buffer) override final;
131131
virtual void buffer_unmap(BufferID p_buffer) override final;
132132
virtual uint8_t *buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) override final;
133+
virtual uint64_t buffer_get_dynamic_offsets(Span<BufferID> p_buffers) override final;
133134
virtual void buffer_flush(BufferID p_buffer) override final;
134135
virtual uint64_t buffer_get_device_address(BufferID p_buffer) override final;
135136

@@ -164,7 +165,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet
164165

165166
private:
166167
public:
167-
virtual VertexFormatID vertex_format_create(VectorView<VertexAttribute> p_vertex_attribs) override final;
168+
virtual VertexFormatID vertex_format_create(Span<VertexAttribute> p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) override final;
168169
virtual void vertex_format_free(VertexFormatID p_vertex_format) override final;
169170

170171
#pragma mark - Barriers
@@ -403,7 +404,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet
403404
virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final;
404405

405406
// Buffer binding.
406-
virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) override final;
407+
virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) override final;
407408
virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) override final;
408409

409410
// Dynamic state.

0 commit comments

Comments
 (0)