Skip to content

Commit 1be4193

Browse files
committed
[backend|renderer] refactor command_stream
Extract le_command_stream and make it available to backend, and renderer. Command streams are owned by the Backend's Frame, and are a growable container for raw command stream data. + command streams can be re-used. + command streams can be cheaply re-set (bump allocator) when we clear the frame
1 parent b4b7b69 commit 1be4193

File tree

7 files changed

+180
-125
lines changed

7 files changed

+180
-125
lines changed

modules/le_backend_vk/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ set (SOURCES ${SOURCES} "le_backend_vk_settings.inl")
2020
set (SOURCES ${SOURCES} "le_backend_types_internal.h")
2121
set (SOURCES ${SOURCES} "private/le_backend_vk/le_backend_types_pipeline.inl")
2222
set (SOURCES ${SOURCES} "private/le_backend_vk/vk_to_str_helpers.inl")
23+
set (SOURCES ${SOURCES} "private/le_backend_vk/le_command_stream_t.h")
2324
set (SOURCES ${SOURCES} "le_instance_vk.cpp")
2425
set (SOURCES ${SOURCES} "le_pipeline.cpp")
2526
set (SOURCES ${SOURCES} "le_device_vk.cpp")
@@ -55,4 +56,4 @@ ENDIF()
5556
target_link_libraries(${TARGET} PUBLIC ${LINKER_FLAGS})
5657

5758

58-
source_group(${TARGET} FILES ${SOURCES})
59+
source_group(${TARGET} FILES ${SOURCES})

modules/le_backend_vk/le_backend_vk.cpp

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "le_core.h"
22
#include "le_backend_vk.h"
33
#include "le_log.h"
4+
#include "private/le_backend_vk/le_command_stream_t.h"
45
#include "util/vk_mem_alloc/vk_mem_alloc.h" // for allocation
56
#include "le_backend_types_internal.h" // includes vulkan.hpp
67
#include "le_swapchain_vk.h"
@@ -612,6 +613,8 @@ struct BackendFrameData {
612613

613614
le_staging_allocator_o* stagingAllocator; // owning: allocator for large objects to GPU memory
614615

616+
std::vector<le_command_stream_t*> command_streams; // owning; these must be destroyed when frame gets destroyed.
617+
615618
bool must_create_queues_dot_graph = false;
616619
};
617620

@@ -830,28 +833,36 @@ static void backend_destroy( le_backend_o* self ) {
830833

831834
vmaDestroyPool( self->mAllocator, frameData.allocationPool );
832835

833-
// destroy staging allocator
836+
// Destroy staging allocator
834837
le_staging_allocator_i.destroy( frameData.stagingAllocator );
835838

836-
// remove any binned resources
837-
for ( auto& a : frameData.binnedResources ) {
839+
{ // Remove any binned resources
840+
for ( auto& a : frameData.binnedResources ) {
838841

839-
if ( a.second.info.isBuffer() ) {
840-
vkDestroyBuffer( device, a.second.as.buffer, nullptr );
841-
} else {
842-
vkDestroyImage( device, a.second.as.image, nullptr );
843-
}
844-
if ( a.second.info.isBlas() ) {
845-
vkDestroyBuffer( device, a.second.info.blasInfo.buffer, nullptr );
846-
vkDestroyAccelerationStructureKHR( device, a.second.as.blas, nullptr );
842+
if ( a.second.info.isBuffer() ) {
843+
vkDestroyBuffer( device, a.second.as.buffer, nullptr );
844+
} else {
845+
vkDestroyImage( device, a.second.as.image, nullptr );
846+
}
847+
if ( a.second.info.isBlas() ) {
848+
vkDestroyBuffer( device, a.second.info.blasInfo.buffer, nullptr );
849+
vkDestroyAccelerationStructureKHR( device, a.second.as.blas, nullptr );
850+
}
851+
if ( a.second.info.isTlas() ) {
852+
vkDestroyBuffer( device, a.second.info.tlasInfo.buffer, nullptr );
853+
vkDestroyAccelerationStructureKHR( device, a.second.as.tlas, nullptr );
854+
}
855+
vmaFreeMemory( self->mAllocator, a.second.allocation );
847856
}
848-
if ( a.second.info.isTlas() ) {
849-
vkDestroyBuffer( device, a.second.info.tlasInfo.buffer, nullptr );
850-
vkDestroyAccelerationStructureKHR( device, a.second.as.tlas, nullptr );
857+
frameData.binnedResources.clear();
858+
}
859+
860+
{ // Clear command streams
861+
for ( auto& cs : frameData.command_streams ) {
862+
delete ( cs );
851863
}
852-
vmaFreeMemory( self->mAllocator, a.second.allocation );
864+
frameData.command_streams.clear();
853865
}
854-
frameData.binnedResources.clear();
855866
}
856867

857868
self->mFrames.clear();
@@ -2105,6 +2116,11 @@ static bool backend_clear_frame( le_backend_o* self, size_t frameIndex ) {
21052116
}
21062117
frame.passes.clear();
21072118

2119+
// Reset command streams
2120+
for ( auto cs : frame.command_streams ) {
2121+
cs->reset();
2122+
}
2123+
21082124
frame.frameNumber = self->mFramesCount++; // note post-increment
21092125

21102126
return true;
@@ -4237,6 +4253,23 @@ static le_allocator_o** backend_get_transient_allocators( le_backend_o* self, si
42374253
return self->mFrames[ frameIndex ].allocators.data();
42384254
}
42394255

4256+
// ----------------------------------------------------------------------
4257+
4258+
static le_command_stream_t** backend_get_frame_command_streams( le_backend_o* self, size_t frameIndex, size_t num_command_streams ) {
4259+
4260+
// Check if the command stream pool has enough free command stream elements in the pool for us
4261+
// If no, we must add some additional command streams
4262+
4263+
auto& cmd_streams = self->mFrames[ frameIndex ].command_streams;
4264+
4265+
// We should maybe find a nicer way to do this...
4266+
while ( cmd_streams.size() < num_command_streams ) {
4267+
cmd_streams.insert( cmd_streams.end(), new le_command_stream_t() );
4268+
}
4269+
4270+
return cmd_streams.data();
4271+
};
4272+
42404273
// ----------------------------------------------------------------------
42414274
static le_allocator_o** backend_create_transient_allocators( le_backend_o* self, size_t frameIndex, size_t numAllocators ) {
42424275

@@ -7803,6 +7836,7 @@ LE_MODULE_REGISTER_IMPL( le_backend_vk, api_ ) {
78037836
vk_backend_i.get_data_frames_count = backend_get_data_frames_count;
78047837
vk_backend_i.get_transient_allocators = backend_get_transient_allocators;
78057838
vk_backend_i.get_staging_allocator = backend_get_staging_allocator;
7839+
vk_backend_i.get_frame_command_streams = backend_get_frame_command_streams;
78067840
vk_backend_i.poll_frame_fence = backend_poll_frame_fence;
78077841
vk_backend_i.clear_frame = backend_clear_frame;
78087842
vk_backend_i.acquire_physical_resources = backend_acquire_physical_resources;

modules/le_backend_vk/le_backend_vk.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ struct le_buffer_o;
1717
struct le_allocator_o;
1818
struct le_staging_allocator_o;
1919
struct le_resource_handle_t; // defined in renderer_types
20+
struct le_command_stream_t;
2021

2122
struct le_pipeline_manager_o;
2223

@@ -132,6 +133,7 @@ struct le_backend_vk_api {
132133

133134
bool ( *dispatch_frame ) ( le_backend_o *self, size_t frameIndex );
134135
le_allocator_o** ( *get_transient_allocators ) ( le_backend_o* self, size_t frameIndex);
136+
le_command_stream_t** ( *get_frame_command_streams ) ( le_backend_o* self, size_t frameIndex, size_t num_command_streams);
135137
le_staging_allocator_o*( *get_staging_allocator ) ( le_backend_o* self, size_t frameIndex);
136138

137139
le_shader_module_handle( *create_shader_module ) ( le_backend_o* self, char const * path, const LeShaderSourceLanguageEnum& shader_source_language, const le::ShaderStageFlagBits& moduleType, char const * macro_definitions, le_shader_module_handle handle, VkSpecializationMapEntry const * specialization_map_entries, uint32_t specialization_map_entries_count, void * specialization_map_data, uint32_t specialization_map_data_num_bytes);
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#pragma once
2+
3+
#include <cstdlib>
4+
#include <stddef.h>
5+
6+
/*
7+
* The Command Stream is where the renderer stores the bytecode for
8+
* our encoded command stream; Some data such as scissor dimensions and
9+
* push constants will also be encoded into the command stream.
10+
*
11+
* The Backend reads from the command stream and decodes it into
12+
* Vulkan commands.
13+
*
14+
* We keep the command stream in a header file that is shared by renderer
15+
* and backend so that the methods herein can be inlined, as this is all
16+
* happening on the hot path; we want renderer and backend to each have
17+
* direct access to the data.
18+
*
19+
* A command stream maps 1:1 to a renderpass. As such, there should be no
20+
* threading contention, as only ever one thread may access a renderpass,
21+
* and only ever the backend or the renderer access the command stream.
22+
*
23+
* Command streams are stored with and owned by the Backend Frame. The
24+
* Backend Frame creates new Command Streams so that there is one command
25+
* stream per renderpass. Command Streams are reset when a frame gets cleared.
26+
*
27+
* Command streams work as bump, or arena-allocators. This saves us allocating
28+
* and de-allocating command streams per-frame. At the same time, command
29+
* streams may grow, if there are a large number of commands to record.
30+
*
31+
*/
32+
33+
struct le_command_stream_t {
34+
char* data = nullptr;
35+
size_t size = 0;
36+
size_t capacity = 0;
37+
size_t cmd_count = 0;
38+
39+
le_command_stream_t()
40+
: size( 0 )
41+
, capacity( 8 ) {
42+
data = ( char* )malloc( capacity );
43+
}
44+
45+
~le_command_stream_t() {
46+
47+
if ( data ) {
48+
free( data );
49+
size = 0;
50+
cmd_count = 0;
51+
data = nullptr;
52+
}
53+
}
54+
55+
void reset() {
56+
this->cmd_count = 0;
57+
this->size = 0;
58+
}
59+
60+
template <typename T>
61+
inline T* emplace_cmd( size_t payload_sz = 0 ) {
62+
63+
size_t old_sz = this->size;
64+
size_t new_sz = old_sz + sizeof( T ) + payload_sz;
65+
66+
while ( new_sz > this->capacity ) {
67+
this->capacity *= 2;
68+
this->data = ( char* )realloc( this->data, this->capacity );
69+
}
70+
71+
this->size = new_sz;
72+
this->cmd_count++;
73+
return new ( this->data + old_sz )( T );
74+
}
75+
};
76+
77+
// ----------------------------------------------------------------------

0 commit comments

Comments
 (0)