Skip to content

Commit 13ada4a

Browse files
Insert barriers between subpasses when using enhanced barriers on D3D12.
1 parent 0fdb93c commit 13ada4a

File tree

2 files changed

+173
-6
lines changed

2 files changed

+173
-6
lines changed

drivers/d3d12/rendering_device_driver_d3d12.cpp

Lines changed: 162 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2485,8 +2485,8 @@ void RenderingDeviceDriverD3D12::command_pipeline_barrier(CommandBufferID p_cmd_
24852485

24862486
// The command list must support the required interface.
24872487
const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)(p_cmd_buffer.id);
2488-
ID3D12GraphicsCommandList7 *cmd_list_7 = nullptr;
2489-
HRESULT res = cmd_buf_info->cmd_list->QueryInterface(IID_PPV_ARGS(&cmd_list_7));
2488+
ComPtr<ID3D12GraphicsCommandList7> cmd_list_7;
2489+
HRESULT res = cmd_buf_info->cmd_list->QueryInterface(cmd_list_7.GetAddressOf());
24902490
ERR_FAIL_COND(FAILED(res));
24912491

24922492
// Convert the RDD barriers to D3D12 enhanced barriers.
@@ -4795,8 +4795,17 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd
47954795
cmd_buf_info->render_pass_state.region_rect.right == fb_info->size.x &&
47964796
cmd_buf_info->render_pass_state.region_rect.bottom == fb_info->size.y);
47974797

4798+
cmd_buf_info->render_pass_state.attachment_layouts.resize(pass_info->attachments.size());
4799+
47984800
for (uint32_t i = 0; i < pass_info->attachments.size(); i++) {
4799-
if (pass_info->attachments[i].load_op == ATTACHMENT_LOAD_OP_DONT_CARE) {
4801+
const Attachment &attachment = pass_info->attachments[i];
4802+
4803+
for (RenderPassState::AttachmentLayout::AspectLayout &aspect_layout : cmd_buf_info->render_pass_state.attachment_layouts[i].aspect_layouts) {
4804+
aspect_layout.cur_layout = attachment.initial_layout;
4805+
aspect_layout.expected_layout = attachment.initial_layout;
4806+
}
4807+
4808+
if (attachment.load_op == ATTACHMENT_LOAD_OP_DONT_CARE) {
48004809
const TextureInfo *tex_info = (const TextureInfo *)fb_info->attachments[i].id;
48014810
_discard_texture_subresources(tex_info, cmd_buf_info);
48024811
}
@@ -4857,6 +4866,91 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd
48574866
}
48584867
}
48594868

4869+
// Subpass dependencies cannot be specified by the end user, and by default they are very aggressive.
4870+
// We can be more lenient by just looking at the texture layout and specifying appropriate access and stage bits.
4871+
4872+
// We specify full barrier for layouts we don't expect to see as fallback.
4873+
static const BitField<RDD::BarrierAccessBits> RD_RENDER_PASS_LAYOUT_TO_ACCESS_BITS[RDD::TEXTURE_LAYOUT_MAX] = {
4874+
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, // TEXTURE_LAYOUT_UNDEFINED
4875+
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, // TEXTURE_LAYOUT_GENERAL
4876+
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, // TEXTURE_LAYOUT_STORAGE_OPTIMAL
4877+
RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, // TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
4878+
RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, // TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
4879+
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, // TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
4880+
RDD::BARRIER_ACCESS_SHADER_READ_BIT, // TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
4881+
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, // TEXTURE_LAYOUT_COPY_SRC_OPTIMAL
4882+
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, // TEXTURE_LAYOUT_COPY_DST_OPTIMAL
4883+
RDD::BARRIER_ACCESS_RESOLVE_READ_BIT, // TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL
4884+
RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT, // TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL
4885+
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, // TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL
4886+
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT // TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL
4887+
};
4888+
4889+
// We specify all commands for layouts we don't expect to see as fallback.
4890+
static const BitField<RDD::PipelineStageBits> RD_RENDER_PASS_LAYOUT_TO_STAGE_BITS[RDD::TEXTURE_LAYOUT_MAX] = {
4891+
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, // TEXTURE_LAYOUT_UNDEFINED
4892+
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, // TEXTURE_LAYOUT_GENERAL
4893+
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, // TEXTURE_LAYOUT_STORAGE_OPTIMAL
4894+
RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, // TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
4895+
RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, // TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
4896+
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, // TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
4897+
RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT, // TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
4898+
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, // TEXTURE_LAYOUT_COPY_SRC_OPTIMAL
4899+
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, // TEXTURE_LAYOUT_COPY_DST_OPTIMAL
4900+
RDD::PIPELINE_STAGE_RESOLVE_BIT, // TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL
4901+
RDD::PIPELINE_STAGE_RESOLVE_BIT, // TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL
4902+
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, // TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL
4903+
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT // TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL
4904+
};
4905+
4906+
void RenderingDeviceDriverD3D12::_render_pass_enhanced_barriers_flush(CommandBufferID p_cmd_buffer) {
4907+
if (!barrier_capabilities.enhanced_barriers_supported) {
4908+
return;
4909+
}
4910+
4911+
BitField<PipelineStageBits> src_stages = {};
4912+
BitField<PipelineStageBits> dst_stages = {};
4913+
4914+
thread_local LocalVector<TextureBarrier> texture_barriers;
4915+
texture_barriers.clear();
4916+
4917+
CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
4918+
4919+
for (uint32_t i = 0; i < cmd_buf_info->render_pass_state.attachment_layouts.size(); i++) {
4920+
RenderPassState::AttachmentLayout &attachment_layout = cmd_buf_info->render_pass_state.attachment_layouts[i];
4921+
TextureID tex = cmd_buf_info->render_pass_state.fb_info->attachments[i];
4922+
TextureInfo *tex_info = (TextureInfo *)tex.id;
4923+
4924+
for (uint32_t j = 0; j < TEXTURE_ASPECT_MAX; j++) {
4925+
RenderPassState::AttachmentLayout::AspectLayout &aspect_layout = attachment_layout.aspect_layouts[j];
4926+
4927+
if (aspect_layout.cur_layout != aspect_layout.expected_layout) {
4928+
src_stages = src_stages | RD_RENDER_PASS_LAYOUT_TO_STAGE_BITS[aspect_layout.cur_layout];
4929+
dst_stages = dst_stages | RD_RENDER_PASS_LAYOUT_TO_STAGE_BITS[aspect_layout.expected_layout];
4930+
4931+
TextureBarrier texture_barrier;
4932+
texture_barrier.texture = tex;
4933+
texture_barrier.src_access = RD_RENDER_PASS_LAYOUT_TO_ACCESS_BITS[aspect_layout.cur_layout];
4934+
texture_barrier.dst_access = RD_RENDER_PASS_LAYOUT_TO_ACCESS_BITS[aspect_layout.expected_layout];
4935+
texture_barrier.prev_layout = aspect_layout.cur_layout;
4936+
texture_barrier.next_layout = aspect_layout.expected_layout;
4937+
texture_barrier.subresources.aspect = (TextureAspectBits)(1 << j);
4938+
texture_barrier.subresources.base_mipmap = tex_info->base_mip;
4939+
texture_barrier.subresources.mipmap_count = tex_info->mipmaps;
4940+
texture_barrier.subresources.base_layer = tex_info->base_layer;
4941+
texture_barrier.subresources.layer_count = tex_info->layers;
4942+
texture_barriers.push_back(texture_barrier);
4943+
4944+
aspect_layout.cur_layout = aspect_layout.expected_layout;
4945+
}
4946+
}
4947+
}
4948+
4949+
if (!texture_barriers.is_empty()) {
4950+
command_pipeline_barrier(p_cmd_buffer, src_stages, dst_stages, VectorView<MemoryAccessBarrier>(), VectorView<BufferBarrier>(), texture_barriers);
4951+
}
4952+
}
4953+
48604954
void RenderingDeviceDriverD3D12::_end_render_pass(CommandBufferID p_cmd_buffer) {
48614955
CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
48624956

@@ -4895,11 +4989,22 @@ void RenderingDeviceDriverD3D12::_end_render_pass(CommandBufferID p_cmd_buffer)
48954989

48964990
TextureInfo *src_tex_info = (TextureInfo *)fb_info->attachments[color_index].id;
48974991
uint32_t src_subresource = D3D12CalcSubresource(src_tex_info->base_mip, src_tex_info->base_layer, 0, src_tex_info->desc.MipLevels, src_tex_info->desc.ArraySize());
4898-
_resource_transition_batch(cmd_buf_info, src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
4992+
4993+
if (barrier_capabilities.enhanced_barriers_supported) {
4994+
cmd_buf_info->render_pass_state.attachment_layouts[color_index].aspect_layouts[TEXTURE_ASPECT_COLOR].expected_layout = TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL;
4995+
} else {
4996+
_resource_transition_batch(cmd_buf_info, src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
4997+
}
48994998

49004999
TextureInfo *dst_tex_info = (TextureInfo *)fb_info->attachments[resolve_index].id;
49015000
uint32_t dst_subresource = D3D12CalcSubresource(dst_tex_info->base_mip, dst_tex_info->base_layer, 0, dst_tex_info->desc.MipLevels, dst_tex_info->desc.ArraySize());
4902-
_resource_transition_batch(cmd_buf_info, dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST);
5001+
5002+
if (barrier_capabilities.enhanced_barriers_supported) {
5003+
// This should have already been done when beginning the subpass.
5004+
DEV_ASSERT(cmd_buf_info->render_pass_state.attachment_layouts[resolve_index].aspect_layouts[TEXTURE_ASPECT_COLOR].expected_layout == TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL);
5005+
} else {
5006+
_resource_transition_batch(cmd_buf_info, dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST);
5007+
}
49035008

49045009
resolves[num_resolves].src_res = src_tex_info->resource;
49055010
resolves[num_resolves].src_subres = src_subresource;
@@ -4911,6 +5016,11 @@ void RenderingDeviceDriverD3D12::_end_render_pass(CommandBufferID p_cmd_buffer)
49115016

49125017
_resource_transitions_flush(cmd_buf_info);
49135018

5019+
// There can be enhanced barriers to flush only when we need to resolve textures.
5020+
if (num_resolves != 0) {
5021+
_render_pass_enhanced_barriers_flush(p_cmd_buffer);
5022+
}
5023+
49145024
for (uint32_t i = 0; i < num_resolves; i++) {
49155025
cmd_buf_info->cmd_list->ResolveSubresource(resolves[i].dst_res, resolves[i].dst_subres, resolves[i].src_res, resolves[i].src_subres, resolves[i].format);
49165026
}
@@ -4933,6 +5043,16 @@ void RenderingDeviceDriverD3D12::command_end_render_pass(CommandBufferID p_cmd_b
49335043
}
49345044
}
49355045

5046+
for (uint32_t i = 0; i < pass_info->attachments.size(); i++) {
5047+
const Attachment &attachment = pass_info->attachments[i];
5048+
5049+
for (RenderPassState::AttachmentLayout::AspectLayout &aspect_layout : cmd_buf_info->render_pass_state.attachment_layouts[i].aspect_layouts) {
5050+
aspect_layout.expected_layout = attachment.final_layout;
5051+
}
5052+
}
5053+
5054+
_render_pass_enhanced_barriers_flush(p_cmd_buffer);
5055+
49365056
for (uint32_t i = 0; i < pass_info->attachments.size(); i++) {
49375057
if (pass_info->attachments[i].store_op == ATTACHMENT_STORE_OP_DONT_CARE) {
49385058
const TextureInfo *tex_info = (const TextureInfo *)fb_info->attachments[i].id;
@@ -4957,10 +5077,27 @@ void RenderingDeviceDriverD3D12::command_next_render_subpass(CommandBufferID p_c
49575077
const RenderPassInfo *pass_info = cmd_buf_info->render_pass_state.pass_info;
49585078
const Subpass &subpass = pass_info->subpasses[cmd_buf_info->render_pass_state.current_subpass];
49595079

5080+
for (uint32_t i = 0; i < subpass.input_references.size(); i++) {
5081+
const AttachmentReference &input_reference = subpass.input_references[i];
5082+
uint32_t attachment = input_reference.attachment;
5083+
5084+
if (attachment != AttachmentReference::UNUSED) {
5085+
RenderPassState::AttachmentLayout &attachment_layout = cmd_buf_info->render_pass_state.attachment_layouts[attachment];
5086+
5087+
// Vulkan cares about aspect bits only for input attachments.
5088+
for (uint32_t j = 0; j < TEXTURE_ASPECT_MAX; j++) {
5089+
if (input_reference.aspect & (1 << j)) {
5090+
attachment_layout.aspect_layouts[j].expected_layout = input_reference.layout;
5091+
}
5092+
}
5093+
}
5094+
}
5095+
49605096
D3D12_CPU_DESCRIPTOR_HANDLE *rtv_handles = ALLOCA_ARRAY(D3D12_CPU_DESCRIPTOR_HANDLE, subpass.color_references.size());
49615097
CPUDescriptorsHeapWalker rtv_heap_walker = fb_info->rtv_heap.make_walker();
49625098
for (uint32_t i = 0; i < subpass.color_references.size(); i++) {
4963-
uint32_t attachment = subpass.color_references[i].attachment;
5099+
const AttachmentReference &color_reference = subpass.color_references[i];
5100+
uint32_t attachment = color_reference.attachment;
49645101
if (attachment == AttachmentReference::UNUSED) {
49655102
if (!frames[frame_idx].null_rtv_handle.ptr) {
49665103
// No null descriptor-handle created for this frame yet.
@@ -4988,6 +5125,8 @@ void RenderingDeviceDriverD3D12::command_next_render_subpass(CommandBufferID p_c
49885125
rtv_heap_walker.rewind();
49895126
rtv_heap_walker.advance(rt_index);
49905127
rtv_handles[i] = rtv_heap_walker.get_curr_cpu_handle();
5128+
5129+
cmd_buf_info->render_pass_state.attachment_layouts[attachment].aspect_layouts[TEXTURE_ASPECT_COLOR].expected_layout = color_reference.layout;
49915130
}
49925131
}
49935132

@@ -4999,9 +5138,26 @@ void RenderingDeviceDriverD3D12::command_next_render_subpass(CommandBufferID p_c
49995138
dsv_heap_walker.rewind();
50005139
dsv_heap_walker.advance(ds_index);
50015140
dsv_handle = dsv_heap_walker.get_curr_cpu_handle();
5141+
5142+
RenderPassState::AttachmentLayout &attachment_layout = cmd_buf_info->render_pass_state.attachment_layouts[subpass.depth_stencil_reference.attachment];
5143+
attachment_layout.aspect_layouts[TEXTURE_ASPECT_DEPTH].expected_layout = subpass.depth_stencil_reference.layout;
5144+
attachment_layout.aspect_layouts[TEXTURE_ASPECT_STENCIL].expected_layout = subpass.depth_stencil_reference.layout;
50025145
}
50035146
}
50045147

5148+
for (uint32_t i = 0; i < subpass.resolve_references.size(); i++) {
5149+
const AttachmentReference &resolve_reference = subpass.resolve_references[i];
5150+
uint32_t attachment = resolve_reference.attachment;
5151+
5152+
if (attachment != AttachmentReference::UNUSED) {
5153+
// Vulkan expects the layout to be in color attachment layout, but D3D12 wants resolve destination.
5154+
DEV_ASSERT(resolve_reference.layout == TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
5155+
cmd_buf_info->render_pass_state.attachment_layouts[attachment].aspect_layouts[TEXTURE_ASPECT_COLOR].expected_layout = TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL;
5156+
}
5157+
}
5158+
5159+
_render_pass_enhanced_barriers_flush(p_cmd_buffer);
5160+
50055161
cmd_buf_info->cmd_list->OMSetRenderTargets(subpass.color_references.size(), rtv_handles, false, dsv_handle.ptr ? &dsv_handle : nullptr);
50065162
}
50075163

drivers/d3d12/rendering_device_driver_d3d12.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,11 +515,21 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
515515
struct FramebufferInfo;
516516
struct RenderPassInfo;
517517
struct RenderPassState {
518+
struct AttachmentLayout {
519+
struct AspectLayout {
520+
TextureLayout cur_layout = TEXTURE_LAYOUT_UNDEFINED;
521+
TextureLayout expected_layout = TEXTURE_LAYOUT_UNDEFINED;
522+
};
523+
524+
AspectLayout aspect_layouts[TEXTURE_ASPECT_MAX];
525+
};
526+
518527
uint32_t current_subpass = UINT32_MAX;
519528
const FramebufferInfo *fb_info = nullptr;
520529
const RenderPassInfo *pass_info = nullptr;
521530
CD3DX12_RECT region_rect = {};
522531
bool region_is_all = false;
532+
LocalVector<AttachmentLayout> attachment_layouts;
523533

524534
const VertexFormatInfo *vf_info = nullptr;
525535
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_views[8] = {};
@@ -828,6 +838,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
828838
virtual void command_begin_render_pass(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, FramebufferID p_framebuffer, CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RenderPassClearValue> p_clear_values) override final;
829839

830840
private:
841+
void _render_pass_enhanced_barriers_flush(CommandBufferID p_cmd_buffer);
831842
void _end_render_pass(CommandBufferID p_cmd_buffer);
832843

833844
public:

0 commit comments

Comments
 (0)