diff --git a/include/private/vulkan_private_extensions.h b/include/private/vulkan_private_extensions.h index 03746346fd..8615818799 100644 --- a/include/private/vulkan_private_extensions.h +++ b/include/private/vulkan_private_extensions.h @@ -1,6 +1,6 @@ #ifndef __VULKAN_PRIVATE_EXTENSIONS_H__ #define __VULKAN_PRIVATE_EXTENSIONS_H__ -/* Nothing here at the moment. Add hacks here! */ +/* Add hacks here! */ #endif diff --git a/include/vkd3d.h b/include/vkd3d.h index 69b1d68d1f..59ec8e4518 100644 --- a/include/vkd3d.h +++ b/include/vkd3d.h @@ -90,12 +90,11 @@ extern "C" { #define VKD3D_CONFIG_FLAG_ALLOW_SBT_COLLECTION (1ull << 30) #define VKD3D_CONFIG_FLAG_PLACED_TEXTURE_ALIASING (1ull << 31) #define VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK (1ull << 32) -#define VKD3D_CONFIG_FLAG_PREALLOCATE_SRV_MIP_CLAMPS (1ull << 33) +/* Bit 33 vacant */ #define VKD3D_CONFIG_FLAG_FORCE_INITIAL_TRANSITION (1ull << 34) #define VKD3D_CONFIG_FLAG_FORCE_DEDICATED_IMAGE_ALLOCATION (1ull << 35) #define VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE (1ull << 36) #define VKD3D_CONFIG_FLAG_DISABLE_SIMULTANEOUS_UAV_COMPRESSION (1ull << 37) -#define VKD3D_CONFIG_FLAG_REQUIRES_COMPUTE_INDIRECT_TEMPLATES (1ull << 38) #define VKD3D_CONFIG_FLAG_SKIP_DRIVER_WORKAROUNDS (1ull << 39) #define VKD3D_CONFIG_FLAG_RETAIN_PSOS (1ull << 40) #define VKD3D_CONFIG_FLAG_ENABLE_EXPERIMENTAL_FEATURES (1ull << 41) diff --git a/include/vkd3d_shader.h b/include/vkd3d_shader.h index 3233e148ba..41a89dc5a6 100644 --- a/include/vkd3d_shader.h +++ b/include/vkd3d_shader.h @@ -146,6 +146,7 @@ enum vkd3d_shader_binding_flag VKD3D_SHADER_BINDING_FLAG_BINDLESS = 0x00000008, VKD3D_SHADER_BINDING_FLAG_RAW_VA = 0x00000010, VKD3D_SHADER_BINDING_FLAG_RAW_SSBO = 0x00000020, + VKD3D_SHADER_BINDING_FLAG_FORCE_EXPLICIT_HEAP_INDEXING = 0x00000040, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_BINDING_FLAG), }; @@ -211,10 +212,7 @@ struct vkd3d_shader_resource_binding unsigned int register_space; unsigned int register_index; unsigned int register_count; - unsigned int descriptor_table; - unsigned int descriptor_offset; enum vkd3d_shader_visibility shader_visibility; - unsigned int flags; /* vkd3d_shader_binding_flags */ struct vkd3d_shader_descriptor_binding binding; }; @@ -231,12 +229,6 @@ struct vkd3d_shader_push_constant_buffer unsigned int size; /* in bytes */ }; -struct vkd3d_shader_descriptor_table_buffer -{ - unsigned int offset; /* in bytes */ - unsigned int count; /* number of tables */ -}; - struct vkd3d_shader_root_parameter_mapping { unsigned int root_parameter; @@ -249,9 +241,6 @@ struct vkd3d_shader_root_parameter_mapping enum vkd3d_shader_interface_flag { VKD3D_SHADER_INTERFACE_PUSH_CONSTANTS_AS_UNIFORM_BUFFER = 0x00000001u, - VKD3D_SHADER_INTERFACE_BINDLESS_CBV_AS_STORAGE_BUFFER = 0x00000002u, - VKD3D_SHADER_INTERFACE_SSBO_OFFSET_BUFFER = 0x00000004u, - VKD3D_SHADER_INTERFACE_TYPED_OFFSET_BUFFER = 0x00000008u, VKD3D_SHADER_INTERFACE_DESCRIPTOR_QA_BUFFER = 0x00000010u, /* In this model, use descriptor_size_cbv_srv_uav as array stride for raw VA buffer. */ VKD3D_SHADER_INTERFACE_RAW_VA_ALIAS_DESCRIPTOR_BUFFER = 0x00000020u, @@ -261,6 +250,7 @@ enum vkd3d_shader_interface_flag VKD3D_SHADER_INTERFACE_INSTRUCTION_QA_BUFFER_EXPECT_ASSUME = 0x00000200u, VKD3D_SHADER_INTERFACE_INSTRUCTION_QA_BUFFER_SYNC = 0x00000400u, VKD3D_SHADER_INTERFACE_INSTRUCTION_QA_BUFFER_SYNC_COMPUTE = 0x00000800u, + VKD3D_SHADER_INTERFACE_INLINE_REDZONE_CBV = 0x00001000u, }; struct vkd3d_shader_stage_io_entry @@ -291,15 +281,18 @@ struct vkd3d_shader_interface_info unsigned int min_ssbo_alignment; unsigned int patch_location_offset; - struct vkd3d_shader_descriptor_table_buffer descriptor_tables; const struct vkd3d_shader_resource_binding *bindings; unsigned int binding_count; const struct vkd3d_shader_push_constant_buffer *push_constant_buffers; unsigned int push_constant_buffer_count; + unsigned int descriptor_table_offset_words; + unsigned int num_root_descriptors; + unsigned int num_root_constants; + /* Ignored unless VKD3D_SHADER_INTERFACE_PUSH_CONSTANTS_AS_UNIFORM_BUFFER is set */ - const struct vkd3d_shader_descriptor_binding *push_constant_ubo_binding; + struct vkd3d_shader_descriptor_binding push_constant_ubo_binding; /* Ignored unless VKD3D_SHADER_INTERFACE_SSBO_OFFSET_BUFFER or TYPED_OFFSET_BUFFER is set */ const struct vkd3d_shader_descriptor_binding *offset_buffer_binding; @@ -319,6 +312,7 @@ struct vkd3d_shader_interface_info /* Used for either VKD3D_SHADER_INTERFACE_RAW_VA_ALIAS_DESCRIPTOR_BUFFER or local root signatures. */ uint32_t descriptor_size_cbv_srv_uav; + uint32_t descriptor_raw_va_offset; uint32_t descriptor_size_sampler; /* Purely for debug. Only non-NULL when running with EXTENDED_DEBUG_UTILS. */ @@ -360,12 +354,12 @@ struct vkd3d_shader_root_parameter struct vkd3d_shader_interface_local_info { + const struct vkd3d_shader_resource_binding *bindings; + unsigned int binding_count; const struct vkd3d_shader_root_parameter *local_root_parameters; unsigned int local_root_parameter_count; const struct vkd3d_shader_push_constant_buffer *shader_record_constant_buffers; unsigned int shader_record_buffer_count; - const struct vkd3d_shader_resource_binding *bindings; - unsigned int binding_count; }; struct vkd3d_shader_transform_feedback_element @@ -1045,6 +1039,7 @@ struct vkd3d_shader_node_input_push_signature VkDeviceAddress local_root_signature_bda; uint32_t node_payload_output_offset; uint32_t node_remaining_recursion_levels; + VkDeviceAddress root_parameter_bda; }; struct vkd3d_shader_node_input_data @@ -1201,6 +1196,39 @@ bool vkd3d_shader_hash_range_parse_line(char *line, vkd3d_shader_hash_t *lo, vkd3d_shader_hash_t *hi, char **trail); +/* In EXT_descriptor_heap, sets and bindings are non-physical concepts. + * Agree on a convention so that we can link SPIR-V to PSO creation. */ +enum +{ + VKD3D_SHADER_GLOBAL_HEAP_VIRTUAL_DESCRIPTOR_SET = 100, + VKD3D_SHADER_STATIC_SAMPLERS_VIRTUAL_DESCRIPTOR_SET = 101, + VKD3D_SHADER_ROOT_CONSTANTS_VIRTUAL_DESCRIPTOR_SET = 102, + VKD3D_SHADER_ROOT_DESCRIPTORS_VIRTUAL_DESCRIPTOR_SET = 103, + VKD3D_SHADER_STATIC_LOCAL_SAMPLERS_VIRTUAL_DESCRIPTOR_SET = 104, + VKD3D_SHADER_LOCAL_ROOT_CONSTANTS_VIRTUAL_DESCRIPTOR_SET = 105, + VKD3D_SHADER_LOCAL_ROOT_DESCRIPTORS_VIRTUAL_DESCRIPTOR_SET = 106, + VKD3D_SHADER_LOCAL_TABLES_VIRTUAL_DESCRIPTOR_SET_BASE = 200, + + VKD3D_SHADER_UAV_COUNTER_SET_OFFSET = 1000, + + VKD3D_SHADER_UAV_COUNTER_TABLES_VIRTUAL_DESCRIPTOR_SET_BASE = + VKD3D_SHADER_GLOBAL_HEAP_VIRTUAL_DESCRIPTOR_SET + VKD3D_SHADER_UAV_COUNTER_SET_OFFSET, + VKD3D_SHADER_UAV_COUNTER_LOCAL_TABLES_VIRTUAL_DESCRIPTOR_SET_BASE = + VKD3D_SHADER_LOCAL_TABLES_VIRTUAL_DESCRIPTOR_SET_BASE + VKD3D_SHADER_UAV_COUNTER_SET_OFFSET, + + VKD3D_SHADER_GLOBAL_HEAP_BINDING = 0, + VKD3D_SHADER_UAV_COUNTER_GLOBAL_HEAP_BINDING, + + /* These are either plain SSBOs or magic UBOs. */ + VKD3D_SHADER_GLOBAL_HEAP_BINDING_AUX_BINDINGS, + VKD3D_SHADER_RAW_VIEW_GLOBAL_HEAP_BINDING = VKD3D_SHADER_GLOBAL_HEAP_BINDING_AUX_BINDINGS, + VKD3D_SHADER_GLOBAL_HEAP_SIZE_BINDING, + VKD3D_SHADER_GLOBAL_HEAP_BINDING_AUX_BINDINGS_COUNT = 4 +}; + +#define VKD3D_FORCE_RAW_UAV_COUNTER 0 +#define VKD3D_FORCE_HEAP_ROBUSTNESS 0 + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index ce7ba4cb82..261d87a2de 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -68,15 +68,8 @@ static unsigned dxil_resource_flags_from_kind(dxil_spv_resource_kind kind, bool return VKD3D_SHADER_BINDING_FLAG_BUFFER; case DXIL_SPV_RESOURCE_KIND_TYPED_BUFFER: - return VKD3D_SHADER_BINDING_FLAG_BUFFER; - case DXIL_SPV_RESOURCE_KIND_RT_ACCELERATION_STRUCTURE: - /* Acceleration structures use aux buffer to store raw AS pointers. - * As root descriptors, we should check for buffer flag instead. */ - if (ssbo) - return VKD3D_SHADER_BINDING_FLAG_AUX_BUFFER; - else - return VKD3D_SHADER_BINDING_FLAG_BUFFER; + return VKD3D_SHADER_BINDING_FLAG_BUFFER; default: return VKD3D_SHADER_BINDING_FLAG_IMAGE; @@ -97,6 +90,23 @@ static bool vkd3d_shader_resource_binding_is_global_heap(const struct vkd3d_shad binding->register_count == UINT32_MAX; } +static bool vkd3d_virtual_descriptor_set_is_single(uint32_t desc_set) +{ + switch (desc_set) + { + case VKD3D_SHADER_STATIC_SAMPLERS_VIRTUAL_DESCRIPTOR_SET: + case VKD3D_SHADER_ROOT_CONSTANTS_VIRTUAL_DESCRIPTOR_SET: + case VKD3D_SHADER_ROOT_DESCRIPTORS_VIRTUAL_DESCRIPTOR_SET: + case VKD3D_SHADER_STATIC_LOCAL_SAMPLERS_VIRTUAL_DESCRIPTOR_SET: + case VKD3D_SHADER_LOCAL_ROOT_CONSTANTS_VIRTUAL_DESCRIPTOR_SET: + case VKD3D_SHADER_LOCAL_ROOT_DESCRIPTORS_VIRTUAL_DESCRIPTOR_SET: + return true; + + default: + return false; + } +} + static bool dxil_resource_is_in_range(const struct vkd3d_shader_resource_binding *binding, const dxil_spv_d3d_binding *d3d_binding) { @@ -124,31 +134,24 @@ static bool dxil_resource_is_in_range(const struct vkd3d_shader_resource_binding * This is normally benign, however, as a special case we need to demote unsized array or static descriptors * in some cases. dxil-spirv will retry the binding query with range of 1. */ if (d3d_binding->range_size == UINT32_MAX) - return (binding->flags & VKD3D_SHADER_BINDING_FLAG_BINDLESS) || binding->register_count != 1; + return !vkd3d_virtual_descriptor_set_is_single(binding->binding.set) || binding->register_count != 1; available_bindings = binding->register_count - (d3d_binding->register_index - binding->register_index); return d3d_binding->range_size <= available_bindings; } -static bool vkd3d_shader_binding_is_root_descriptor(const struct vkd3d_shader_resource_binding *binding) -{ - const uint32_t relevant_flags = VKD3D_SHADER_BINDING_FLAG_RAW_VA | - VKD3D_SHADER_BINDING_FLAG_AUX_BUFFER; - const uint32_t expected_flags = VKD3D_SHADER_BINDING_FLAG_RAW_VA; - return (binding->flags & relevant_flags) == expected_flags; -} - struct vkd3d_dxil_remap_userdata { const struct vkd3d_shader_interface_info *shader_interface_info; const struct vkd3d_shader_interface_local_info *shader_interface_local_info; - unsigned int num_root_descriptors; }; struct vkd3d_dxil_remap_info { const struct vkd3d_shader_resource_binding *bindings; unsigned int binding_count; + uint32_t shader_interface_flags; + unsigned int num_root_descriptors; unsigned int descriptor_table_offset_words; }; @@ -160,77 +163,109 @@ static dxil_spv_bool dxil_remap_inner( dxil_spv_vulkan_binding *vk_binding, uint32_t resource_flags) { - unsigned int root_descriptor_index = 0; unsigned int i; + /* SM 6.6 heap is redirected to virtual descriptor set. */ + if (dxil_resource_is_global_heap(d3d_binding)) + { + memset(vk_binding, 0, sizeof(*vk_binding)); + vk_binding->set = VKD3D_SHADER_GLOBAL_HEAP_VIRTUAL_DESCRIPTOR_SET; + vk_binding->binding = VKD3D_SHADER_GLOBAL_HEAP_BINDING; + vk_binding->bindless.use_heap = DXIL_SPV_TRUE; + if (resource_flags & VKD3D_SHADER_BINDING_FLAG_AUX_BUFFER) + { + if (remap->shader_interface_flags & VKD3D_SHADER_INTERFACE_RAW_VA_ALIAS_DESCRIPTOR_BUFFER) + { + vk_binding->binding = VKD3D_SHADER_RAW_VIEW_GLOBAL_HEAP_BINDING; + vk_binding->descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_BUFFER_DEVICE_ADDRESS; + } + else + { + vk_binding->binding = VKD3D_SHADER_UAV_COUNTER_GLOBAL_HEAP_BINDING; + vk_binding->descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_SSBO; + } + } + + return DXIL_SPV_TRUE; + } + for (i = 0; i < remap->binding_count; i++) { const struct vkd3d_shader_resource_binding *binding = &remap->bindings[i]; - const uint32_t mask = ~(VKD3D_SHADER_BINDING_FLAG_BINDLESS | VKD3D_SHADER_BINDING_FLAG_RAW_VA); - uint32_t match_flags = binding->flags & mask; if (binding->type == descriptor_type && dxil_resource_is_in_range(binding, d3d_binding) && - (match_flags & resource_flags) == resource_flags && dxil_match_shader_visibility(binding->shader_visibility, d3d_binding->stage)) { + bool lower_explicit_heap; + bool uav_counter; + bool raw_va; + memset(vk_binding, 0, sizeof(*vk_binding)); - if (vkd3d_shader_binding_is_root_descriptor(binding)) + vk_binding->set = binding->binding.set; + vk_binding->binding = binding->binding.binding + d3d_binding->register_index - binding->register_index; + + uav_counter = !!(resource_flags & VKD3D_SHADER_BINDING_FLAG_AUX_BUFFER); + raw_va = uav_counter && (remap->shader_interface_flags & VKD3D_SHADER_INTERFACE_RAW_VA_ALIAS_DESCRIPTOR_BUFFER); + lower_explicit_heap = (resource_flags & VKD3D_SHADER_BINDING_FLAG_FORCE_EXPLICIT_HEAP_INDEXING) || raw_va; + +#if 0 + /* Test path to lower to explicit indexing. */ + if (vk_binding->set < 100) + lower_explicit_heap = true; +#endif + +#define LOWER_SBT_ACCESS 0 + + /* If we're lowering SBTs. This depends on descriptor QA being used or similar. */ + if (LOWER_SBT_ACCESS) { - vk_binding->descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_BUFFER_DEVICE_ADDRESS; - vk_binding->root_constant_index = root_descriptor_index; + if ((vk_binding->set >= VKD3D_SHADER_LOCAL_TABLES_VIRTUAL_DESCRIPTOR_SET_BASE && + vk_binding->set < VKD3D_SHADER_LOCAL_TABLES_VIRTUAL_DESCRIPTOR_SET_BASE + 100) || + (vk_binding->set >= VKD3D_SHADER_UAV_COUNTER_LOCAL_TABLES_VIRTUAL_DESCRIPTOR_SET_BASE && + vk_binding->set < VKD3D_SHADER_UAV_COUNTER_LOCAL_TABLES_VIRTUAL_DESCRIPTOR_SET_BASE + 100)) + { + lower_explicit_heap = true; + vk_binding->set %= 100; + } } - else if (binding->flags & VKD3D_SHADER_BINDING_FLAG_BINDLESS) + + if (lower_explicit_heap) { - vk_binding->bindless.use_heap = DXIL_SPV_TRUE; - vk_binding->set = binding->binding.set; - vk_binding->binding = binding->binding.binding; + /* Rewrite to explicit SM 6.6 model. */ + uint32_t table_offset = vk_binding->binding; + uint32_t table_index = vk_binding->set; - if (dxil_resource_is_global_heap(d3d_binding)) + vk_binding->set = VKD3D_SHADER_GLOBAL_HEAP_VIRTUAL_DESCRIPTOR_SET; + if (raw_va) { - vk_binding->bindless.heap_root_offset = 0; /* No constant offset. */ - vk_binding->root_constant_index = UINT32_MAX; /* No push offset. */ + vk_binding->binding = VKD3D_SHADER_RAW_VIEW_GLOBAL_HEAP_BINDING; + vk_binding->descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_BUFFER_DEVICE_ADDRESS; } - else - { - vk_binding->bindless.heap_root_offset = binding->descriptor_offset + - d3d_binding->register_index - binding->register_index; - vk_binding->root_constant_index = binding->descriptor_table + remap->descriptor_table_offset_words; - - if (vk_binding->root_constant_index < 2 * remap->num_root_descriptors) - { - ERR("Bindless push constant table offset is impossible. %u < 2 * %u\n", - vk_binding->root_constant_index, remap->num_root_descriptors); - return DXIL_SPV_FALSE; - } - vk_binding->root_constant_index -= 2 * remap->num_root_descriptors; - } - - /* Acceleration structures are mapped to SSBO uvec2[] array instead of normal heap. */ - if (d3d_binding->kind == DXIL_SPV_RESOURCE_KIND_RT_ACCELERATION_STRUCTURE) + else if (uav_counter) { + vk_binding->binding = VKD3D_SHADER_UAV_COUNTER_GLOBAL_HEAP_BINDING; vk_binding->descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_SSBO; } - else if (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV && - (binding->flags & VKD3D_SHADER_BINDING_FLAG_AUX_BUFFER) && - !(binding->flags & VKD3D_SHADER_BINDING_FLAG_RAW_VA)) + else { - /* Force texel buffer path for UAV counters if we need to. */ - vk_binding->descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_TEXEL_BUFFER; + vk_binding->binding = VKD3D_SHADER_GLOBAL_HEAP_BINDING; } + + vk_binding->bindless.use_heap = DXIL_SPV_TRUE; + vk_binding->root_constant_index = table_index + + remap->descriptor_table_offset_words - 2 * remap->num_root_descriptors; + vk_binding->bindless.heap_root_offset = table_offset; } - else + else if (uav_counter) { - vk_binding->set = binding->binding.set; - vk_binding->binding = binding->binding.binding + d3d_binding->register_index - binding->register_index; + vk_binding->set += VKD3D_SHADER_UAV_COUNTER_SET_OFFSET; + vk_binding->descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_SSBO; } return DXIL_SPV_TRUE; } - - if (vkd3d_shader_binding_is_root_descriptor(binding)) - root_descriptor_index++; } return DXIL_SPV_FALSE; @@ -249,8 +284,9 @@ static dxil_spv_bool dxil_remap(const struct vkd3d_dxil_remap_userdata *remap, remap_info.bindings = shader_interface_info->bindings; remap_info.binding_count = shader_interface_info->binding_count; - remap_info.descriptor_table_offset_words = shader_interface_info->descriptor_tables.offset / sizeof(uint32_t); - remap_info.num_root_descriptors = remap->num_root_descriptors; + remap_info.shader_interface_flags = shader_interface_info->flags; + remap_info.num_root_descriptors = shader_interface_info->num_root_descriptors; + remap_info.descriptor_table_offset_words = shader_interface_info->descriptor_table_offset_words; if (!dxil_remap_inner(&remap_info, descriptor_type, d3d_binding, vk_binding, resource_flags)) { @@ -261,10 +297,10 @@ static dxil_spv_bool dxil_remap(const struct vkd3d_dxil_remap_userdata *remap, * Root descriptor and constants are resolved internally in dxil-spirv. */ remap_info.bindings = shader_interface_local_info->bindings; remap_info.binding_count = shader_interface_local_info->binding_count; - /* Not relevant. */ - remap_info.descriptor_table_offset_words = 0; - remap_info.num_root_descriptors = 0; - return dxil_remap_inner(&remap_info, descriptor_type, d3d_binding, vk_binding, resource_flags); + + /* TODO: Force lowering? */ + return dxil_remap_inner(&remap_info, descriptor_type, d3d_binding, vk_binding, + resource_flags); } else return DXIL_SPV_FALSE; @@ -276,39 +312,58 @@ static dxil_spv_bool dxil_remap(const struct vkd3d_dxil_remap_userdata *remap, static dxil_spv_bool dxil_srv_remap(void *userdata, const dxil_spv_d3d_binding *d3d_binding, dxil_spv_srv_vulkan_binding *vk_binding) { - const struct vkd3d_shader_interface_info *shader_interface_info; const struct vkd3d_dxil_remap_userdata *remap = userdata; unsigned int resource_flags, resource_flags_ssbo; bool use_ssbo; - shader_interface_info = remap->shader_interface_info; resource_flags_ssbo = dxil_resource_flags_from_kind(d3d_binding->kind, true); resource_flags = dxil_resource_flags_from_kind(d3d_binding->kind, false); use_ssbo = resource_flags_ssbo != resource_flags; + if (use_ssbo) + use_ssbo = d3d_binding->alignment >= remap->shader_interface_info->min_ssbo_alignment; + +#if VKD3D_FORCE_HEAP_ROBUSTNESS + /* If we have enabled descriptor QA, we force lowering for all arrayed resources. */ + if (d3d_binding->range_size > 1) + { + resource_flags_ssbo |= VKD3D_SHADER_BINDING_FLAG_FORCE_EXPLICIT_HEAP_INDEXING; + resource_flags |= VKD3D_SHADER_BINDING_FLAG_FORCE_EXPLICIT_HEAP_INDEXING; + } +#endif + if (use_ssbo && dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, d3d_binding, &vk_binding->buffer_binding, resource_flags_ssbo)) { vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_SSBO; - if (shader_interface_info->flags & VKD3D_SHADER_INTERFACE_SSBO_OFFSET_BUFFER) - { - vk_binding->offset_binding.set = shader_interface_info->offset_buffer_binding->set; - vk_binding->offset_binding.binding = shader_interface_info->offset_buffer_binding->binding; - } return DXIL_SPV_TRUE; } else { - vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_TEXEL_BUFFER; - if (shader_interface_info->flags & VKD3D_SHADER_INTERFACE_TYPED_OFFSET_BUFFER) + if (d3d_binding->kind == DXIL_SPV_RESOURCE_KIND_RT_ACCELERATION_STRUCTURE) { - vk_binding->offset_binding.set = shader_interface_info->offset_buffer_binding->set; - vk_binding->offset_binding.binding = shader_interface_info->offset_buffer_binding->binding; + vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_IDENTITY; + return dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, + d3d_binding, &vk_binding->buffer_binding, resource_flags); } - } + else + { + vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_TEXEL_BUFFER; + + if (!dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, + d3d_binding, &vk_binding->buffer_binding, resource_flags)) + return DXIL_SPV_FALSE; - return dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, - d3d_binding, &vk_binding->buffer_binding, resource_flags); + if (vk_binding->buffer_binding.set == VKD3D_SHADER_ROOT_DESCRIPTORS_VIRTUAL_DESCRIPTOR_SET) + { + vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_BUFFER_DEVICE_ADDRESS; + vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_FALSE; + vk_binding->buffer_binding.root_constant_index = vk_binding->buffer_binding.binding; + } + + return DXIL_SPV_TRUE; + } + } } static dxil_spv_bool dxil_sampler_remap(void *userdata, const dxil_spv_d3d_binding *d3d_binding, @@ -420,27 +475,40 @@ static dxil_spv_bool dxil_shader_stage_input_remap(void *userdata, const dxil_sp static dxil_spv_bool dxil_uav_remap(void *userdata, const dxil_spv_uav_d3d_binding *d3d_binding, dxil_spv_uav_vulkan_binding *vk_binding) { - const struct vkd3d_shader_interface_info *shader_interface_info; const struct vkd3d_dxil_remap_userdata *remap = userdata; unsigned int resource_flags, resource_flags_ssbo; bool use_ssbo; - shader_interface_info = remap->shader_interface_info; resource_flags_ssbo = dxil_resource_flags_from_kind(d3d_binding->d3d_binding.kind, true); resource_flags = dxil_resource_flags_from_kind(d3d_binding->d3d_binding.kind, false); use_ssbo = resource_flags != resource_flags_ssbo; + if (use_ssbo) + use_ssbo = d3d_binding->d3d_binding.alignment >= remap->shader_interface_info->min_ssbo_alignment; + + /* We can use this flag any time we need to lower to manual heap addressing. */ + if (d3d_binding->has_counter && + (remap->shader_interface_info->flags & VKD3D_SHADER_INTERFACE_RAW_VA_ALIAS_DESCRIPTOR_BUFFER)) + { + resource_flags_ssbo |= VKD3D_SHADER_BINDING_FLAG_FORCE_EXPLICIT_HEAP_INDEXING; + resource_flags |= VKD3D_SHADER_BINDING_FLAG_FORCE_EXPLICIT_HEAP_INDEXING; + } + +#if VKD3D_FORCE_HEAP_ROBUSTNESS + /* If we have enabled descriptor QA, we force lowering for all arrayed resources. */ + if (d3d_binding->d3d_binding.range_size > 1) + { + resource_flags_ssbo |= VKD3D_SHADER_BINDING_FLAG_FORCE_EXPLICIT_HEAP_INDEXING; + resource_flags |= VKD3D_SHADER_BINDING_FLAG_FORCE_EXPLICIT_HEAP_INDEXING; + } +#endif + if (use_ssbo) { if (dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, &d3d_binding->d3d_binding, &vk_binding->buffer_binding, resource_flags_ssbo)) { vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_SSBO; - if (shader_interface_info->flags & VKD3D_SHADER_INTERFACE_SSBO_OFFSET_BUFFER) - { - vk_binding->offset_binding.set = shader_interface_info->offset_buffer_binding->set; - vk_binding->offset_binding.binding = shader_interface_info->offset_buffer_binding->binding; - } } else if (!dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, &d3d_binding->d3d_binding, &vk_binding->buffer_binding, resource_flags)) @@ -452,11 +520,6 @@ static dxil_spv_bool dxil_uav_remap(void *userdata, const dxil_spv_uav_d3d_bindi /* By default, we use TEXEL_BUFFER unless dxil_remap remaps it to BDA. * We won't trigger SSBO path when using BDA. */ vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_TEXEL_BUFFER; - if (shader_interface_info->flags & VKD3D_SHADER_INTERFACE_TYPED_OFFSET_BUFFER) - { - vk_binding->offset_binding.set = shader_interface_info->offset_buffer_binding->set; - vk_binding->offset_binding.binding = shader_interface_info->offset_buffer_binding->binding; - } } } else @@ -468,17 +531,24 @@ static dxil_spv_bool dxil_uav_remap(void *userdata, const dxil_spv_uav_d3d_bindi return DXIL_SPV_FALSE; } - if (shader_interface_info->flags & VKD3D_SHADER_INTERFACE_TYPED_OFFSET_BUFFER) + if (vk_binding->buffer_binding.set == VKD3D_SHADER_ROOT_DESCRIPTORS_VIRTUAL_DESCRIPTOR_SET) { - vk_binding->offset_binding.set = shader_interface_info->offset_buffer_binding->set; - vk_binding->offset_binding.binding = shader_interface_info->offset_buffer_binding->binding; + vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_BUFFER_DEVICE_ADDRESS; + vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_FALSE; + vk_binding->buffer_binding.root_constant_index = vk_binding->buffer_binding.binding; } } if (d3d_binding->has_counter) { + resource_flags = VKD3D_SHADER_BINDING_FLAG_AUX_BUFFER; +#if VKD3D_FORCE_HEAP_ROBUSTNESS + if (d3d_binding->d3d_binding.range_size > 1) + resource_flags |= VKD3D_SHADER_BINDING_FLAG_FORCE_EXPLICIT_HEAP_INDEXING; +#endif + if (!dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, &d3d_binding->d3d_binding, - &vk_binding->counter_binding, VKD3D_SHADER_BINDING_FLAG_AUX_BUFFER)) + &vk_binding->counter_binding, resource_flags)) { return DXIL_SPV_FALSE; } @@ -492,36 +562,47 @@ static dxil_spv_bool dxil_cbv_remap(void *userdata, const dxil_spv_d3d_binding * { const struct vkd3d_shader_interface_info *shader_interface_info; const struct vkd3d_dxil_remap_userdata *remap = userdata; + uint32_t num_root_descriptors; + uint32_t resource_flags; unsigned int i; shader_interface_info = remap->shader_interface_info; + num_root_descriptors = shader_interface_info->num_root_descriptors; /* Try to map to root constant -> push constant. */ for (i = 0; i < shader_interface_info->push_constant_buffer_count; i++) { const struct vkd3d_shader_push_constant_buffer *push = &shader_interface_info->push_constant_buffers[i]; if (push->register_space == d3d_binding->register_space && - push->register_index == d3d_binding->register_index && - dxil_match_shader_visibility(push->shader_visibility, d3d_binding->stage)) + push->register_index == d3d_binding->register_index && + dxil_match_shader_visibility(push->shader_visibility, d3d_binding->stage)) { memset(vk_binding, 0, sizeof(*vk_binding)); vk_binding->push_constant = DXIL_SPV_TRUE; vk_binding->vulkan.push_constant.offset_in_words = push->offset / sizeof(uint32_t); - if (vk_binding->vulkan.push_constant.offset_in_words < remap->num_root_descriptors * 2) + if (vk_binding->vulkan.push_constant.offset_in_words < num_root_descriptors * 2) { ERR("Root descriptor offset of %u is impossible with %u root descriptors.\n", - vk_binding->vulkan.push_constant.offset_in_words, remap->num_root_descriptors); + vk_binding->vulkan.push_constant.offset_in_words, num_root_descriptors); return DXIL_SPV_FALSE; } - vk_binding->vulkan.push_constant.offset_in_words -= remap->num_root_descriptors * 2; + vk_binding->vulkan.push_constant.offset_in_words -= num_root_descriptors * 2; return DXIL_SPV_TRUE; } } + resource_flags = VKD3D_SHADER_BINDING_FLAG_BUFFER; + +#if VKD3D_FORCE_HEAP_ROBUSTNESS + /* If we have enabled descriptor QA, we force lowering for all arrayed resources. */ + if (d3d_binding->range_size > 1) + resource_flags |= VKD3D_SHADER_BINDING_FLAG_FORCE_EXPLICIT_HEAP_INDEXING; +#endif + vk_binding->push_constant = DXIL_SPV_FALSE; return dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, d3d_binding, &vk_binding->vulkan.uniform_binding, - VKD3D_SHADER_BINDING_FLAG_BUFFER); + resource_flags); } static void vkd3d_dxil_log_callback(void *userdata, dxil_spv_log_level level, const char *msg) @@ -651,8 +732,7 @@ static bool vkd3d_dxil_converter_set_quirks(dxil_spv_converter converter, } } - if ((quirks & VKD3D_SHADER_QUIRK_DESCRIPTOR_HEAP_ROBUSTNESS) && - (shader_interface_info->flags & VKD3D_SHADER_INTERFACE_RAW_VA_ALIAS_DESCRIPTOR_BUFFER)) + if (quirks & VKD3D_SHADER_QUIRK_DESCRIPTOR_HEAP_ROBUSTNESS) { /* Checking for RAW_VA_ALIAS_DESCRIPTOR_BUFFER is technically not needed, * but only RADV is affected here and NV miscompiles shaders if you only query OpArrayLength @@ -692,8 +772,8 @@ static int vkd3d_dxil_converter_set_options(dxil_spv_converter converter, /* If we don't have an offset buffer, never enter a situation where it may be used by dxil-spirv. * This is relevant for e.g. 16-bit structured buffers with awkward alignments. */ - if ((shader_interface_info->flags & VKD3D_SHADER_INTERFACE_SSBO_OFFSET_BUFFER) == 0) - helper.alignment = 1; + //if ((shader_interface_info->flags & VKD3D_SHADER_INTERFACE_SSBO_OFFSET_BUFFER) == 0) + helper.alignment = 1; if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS) { @@ -706,24 +786,12 @@ static int vkd3d_dxil_converter_set_options(dxil_spv_converter converter, { const struct dxil_spv_option_root_constant_inline_uniform_block helper = { { DXIL_SPV_OPTION_ROOT_CONSTANT_INLINE_UNIFORM_BLOCK }, - shader_interface_info->push_constant_ubo_binding->set, - shader_interface_info->push_constant_ubo_binding->binding, - DXIL_SPV_TRUE }; - if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS) - { - ERR("dxil-spirv does not support PUSH_CONSTANTS_AS_UNIFORM_BUFFER.\n"); - return VKD3D_ERROR_NOT_IMPLEMENTED; - } - } - - if (shader_interface_info->flags & VKD3D_SHADER_INTERFACE_BINDLESS_CBV_AS_STORAGE_BUFFER) - { - static const struct dxil_spv_option_bindless_cbv_ssbo_emulation helper = - { { DXIL_SPV_OPTION_BINDLESS_CBV_SSBO_EMULATION }, + shader_interface_info->push_constant_ubo_binding.set, + shader_interface_info->push_constant_ubo_binding.binding, DXIL_SPV_TRUE }; if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS) { - ERR("dxil-spirv does not support BINDLESS_CBV_AS_STORAGE_BUFFER.\n"); + ERR("dxil-spirv does not support PUSH_CONSTANTS_AS_UNIFORM_BUFFER.\n"); return VKD3D_ERROR_NOT_IMPLEMENTED; } } @@ -739,18 +807,6 @@ static int vkd3d_dxil_converter_set_options(dxil_spv_converter converter, } } - if (shader_interface_info->flags & VKD3D_SHADER_INTERFACE_TYPED_OFFSET_BUFFER) - { - const struct dxil_spv_option_bindless_typed_buffer_offsets helper = - { { DXIL_SPV_OPTION_BINDLESS_TYPED_BUFFER_OFFSETS }, - DXIL_SPV_TRUE }; - if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS) - { - ERR("dxil-spirv does not support BINDLESS_TYPED_BUFFER_OFFSETS.\n"); - return VKD3D_ERROR_NOT_IMPLEMENTED; - } - } - #ifdef VKD3D_ENABLE_DESCRIPTOR_QA if (shader_interface_info->descriptor_qa_control_binding && shader_interface_info->descriptor_qa_payload_binding) @@ -826,12 +882,12 @@ static int vkd3d_dxil_converter_set_options(dxil_spv_converter converter, } } - if (shader_interface_info->flags & VKD3D_SHADER_INTERFACE_RAW_VA_ALIAS_DESCRIPTOR_BUFFER) + //if (shader_interface_info->flags & VKD3D_SHADER_INTERFACE_RAW_VA_ALIAS_DESCRIPTOR_BUFFER) { const struct dxil_spv_option_physical_address_descriptor_indexing helper = { { DXIL_SPV_OPTION_PHYSICAL_ADDRESS_DESCRIPTOR_INDEXING }, shader_interface_info->descriptor_size_cbv_srv_uav / sizeof(VkDeviceAddress), - 0 }; + shader_interface_info->descriptor_raw_va_offset / sizeof(VkDeviceAddress) }; if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS) { @@ -1242,6 +1298,30 @@ static int vkd3d_dxil_converter_set_options(dxil_spv_converter converter, } } + if (shader_interface_info->flags & VKD3D_SHADER_INTERFACE_INLINE_REDZONE_CBV) + { + /* Redirect these through PUSH_DATA CBV. */ + dxil_spv_converter_set_meta_descriptor(converter, + DXIL_SPV_META_DESCRIPTOR_RESOURCE_DESCRIPTOR_HEAP_SIZE, + DXIL_SPV_META_DESCRIPTOR_KIND_UBO_CONTAINING_CONSTANT, + VKD3D_SHADER_GLOBAL_HEAP_VIRTUAL_DESCRIPTOR_SET, + VKD3D_SHADER_GLOBAL_HEAP_SIZE_BINDING); + + dxil_spv_converter_set_meta_descriptor(converter, + DXIL_SPV_META_DESCRIPTOR_RAW_DESCRIPTOR_HEAP_VIEW, + DXIL_SPV_META_DESCRIPTOR_KIND_UBO_CONTAINING_BDA, + VKD3D_SHADER_GLOBAL_HEAP_VIRTUAL_DESCRIPTOR_SET, + VKD3D_SHADER_RAW_VIEW_GLOBAL_HEAP_BINDING); + } + else + { + dxil_spv_converter_set_meta_descriptor(converter, + DXIL_SPV_META_DESCRIPTOR_RAW_DESCRIPTOR_HEAP_VIEW, + DXIL_SPV_META_DESCRIPTOR_KIND_READONLY_SSBO, + VKD3D_SHADER_GLOBAL_HEAP_VIRTUAL_DESCRIPTOR_SET, + VKD3D_SHADER_RAW_VIEW_GLOBAL_HEAP_BINDING); + } + return VKD3D_OK; } @@ -1253,9 +1333,6 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc, { uint32_t wave_size_min, wave_size_max, wave_size_preferred; struct vkd3d_dxil_remap_userdata remap_userdata; - unsigned int raw_va_binding_count = 0; - unsigned int num_root_descriptors = 0; - unsigned int root_constant_words = 0; unsigned int heuristic_min_wave_size; unsigned int heuristic_max_wave_size; dxil_spv_converter converter = NULL; @@ -1263,7 +1340,6 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc, dxil_spv_compiled_spirv compiled; vkd3d_shader_quirks_t quirks; dxil_spv_shader_stage stage; - unsigned int i, max_size; vkd3d_shader_hash_t hash; int ret = VKD3D_OK; void *code; @@ -1318,45 +1394,16 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc, goto end; } - /* Figure out how many words we need for push constants. */ - for (i = 0; i < shader_interface_info->push_constant_buffer_count; i++) - { - max_size = shader_interface_info->push_constant_buffers[i].offset + - shader_interface_info->push_constant_buffers[i].size; - max_size = (max_size + 3) / 4; - if (max_size > root_constant_words) - root_constant_words = max_size; - } - - max_size = shader_interface_info->descriptor_tables.offset / sizeof(uint32_t) + - shader_interface_info->descriptor_tables.count; - if (max_size > root_constant_words) - root_constant_words = max_size; - - for (i = 0; i < shader_interface_info->binding_count; i++) - { - if (shader_interface_info->bindings[i].flags & VKD3D_SHADER_BINDING_FLAG_RAW_VA) - raw_va_binding_count++; - - if (vkd3d_shader_binding_is_root_descriptor(&shader_interface_info->bindings[i])) - num_root_descriptors++; - } - - /* Root constants come after root descriptors. Offset the counts. */ - if (root_constant_words < num_root_descriptors * 2) - root_constant_words = num_root_descriptors * 2; - root_constant_words -= num_root_descriptors * 2; - if ((ret = vkd3d_dxil_converter_set_options(converter, shader_interface_info, compiler_args, quirks, - spirv->meta.hash, NULL, raw_va_binding_count || num_root_descriptors))) + spirv->meta.hash, NULL, + !!(shader_interface_info->flags & VKD3D_SHADER_INTERFACE_RAW_VA_ALIAS_DESCRIPTOR_BUFFER)))) goto end; remap_userdata.shader_interface_info = shader_interface_info; remap_userdata.shader_interface_local_info = NULL; - remap_userdata.num_root_descriptors = num_root_descriptors; - dxil_spv_converter_set_root_constant_word_count(converter, root_constant_words); - dxil_spv_converter_set_root_descriptor_count(converter, num_root_descriptors); + dxil_spv_converter_set_root_constant_word_count(converter, shader_interface_info->num_root_constants); + dxil_spv_converter_set_root_descriptor_count(converter, shader_interface_info->num_root_descriptors); dxil_spv_converter_set_srv_remapper(converter, dxil_srv_remap, &remap_userdata); dxil_spv_converter_set_sampler_remapper(converter, dxil_sampler_remap, &remap_userdata); dxil_spv_converter_set_uav_remapper(converter, dxil_uav_remap, &remap_userdata); @@ -1499,13 +1546,11 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil, const struct vkd3d_shader_resource_binding *resource_binding; const struct vkd3d_shader_root_parameter *root_parameter; struct vkd3d_dxil_remap_userdata remap_userdata; - unsigned int num_root_descriptors = 0; - unsigned int root_constant_words = 0; dxil_spv_converter converter = NULL; dxil_spv_parsed_blob blob = NULL; dxil_spv_compiled_spirv compiled; vkd3d_shader_quirks_t quirks; - unsigned int i, j, max_size; + unsigned int i, j; //, max_size; vkd3d_shader_hash_t hash; int ret = VKD3D_OK; void *code; @@ -1554,34 +1599,13 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil, goto end; } - /* Figure out how many words we need for push constants. */ - for (i = 0; i < shader_interface_info->push_constant_buffer_count; i++) - { - max_size = shader_interface_info->push_constant_buffers[i].offset + - shader_interface_info->push_constant_buffers[i].size; - max_size = (max_size + 3) / 4; - if (max_size > root_constant_words) - root_constant_words = max_size; - } - - max_size = shader_interface_info->descriptor_tables.offset / sizeof(uint32_t) + - shader_interface_info->descriptor_tables.count; - if (max_size > root_constant_words) - root_constant_words = max_size; - - for (i = 0; i < shader_interface_info->binding_count; i++) - if (vkd3d_shader_binding_is_root_descriptor(&shader_interface_info->bindings[i])) - num_root_descriptors++; - - /* Push local root parameters. We cannot rely on callbacks here - * since the local root signature has a physical layout in ShaderRecordKHR - * which needs to be precisely specified up front. */ for (i = 0; i < shader_interface_local_info->local_root_parameter_count; i++) { root_parameter = &shader_interface_local_info->local_root_parameters[i]; switch (root_parameter->parameter_type) { case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: + /* Only this path will actually end up being used. But have to declare everything so we compute proper offset. */ record_constant_buffer = &shader_interface_local_info->shader_record_constant_buffers[root_parameter->constant.constant_index]; dxil_spv_converter_add_local_root_constants(converter, @@ -1589,18 +1613,41 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil, root_parameter->constant.constant_count); break; - case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: - if (dxil_spv_converter_begin_local_root_descriptor_table(converter) != DXIL_SPV_SUCCESS) + case D3D12_ROOT_PARAMETER_TYPE_CBV: + /* Dummy parameters to make dxil-spirv happy. These are mapped with proper API elsewhere. */ + dxil_spv_converter_add_local_root_descriptor(converter, DXIL_SPV_RESOURCE_CLASS_UAV, UINT32_MAX, UINT32_MAX); + break; + + case D3D12_ROOT_PARAMETER_TYPE_SRV: + case D3D12_ROOT_PARAMETER_TYPE_UAV: + if (shader_interface_info->min_ssbo_alignment > 1) { - ret = VKD3D_ERROR_INVALID_ARGUMENT; - goto end; + dxil_spv_converter_add_local_root_descriptor(converter, + root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_SRV ? + DXIL_SPV_RESOURCE_CLASS_SRV : DXIL_SPV_RESOURCE_CLASS_UAV, + root_parameter->descriptor.binding->register_space, + root_parameter->descriptor.binding->register_index); + } + else + { + /* Dummy parameters to make dxil-spirv happy. These are mapped with proper API elsewhere. */ + dxil_spv_converter_add_local_root_descriptor(converter, DXIL_SPV_RESOURCE_CLASS_UAV, + UINT32_MAX, UINT32_MAX); } + break; + case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: + { + bool has_entry = false; + dxil_spv_converter_begin_local_root_descriptor_table(converter); for (j = 0; j < root_parameter->descriptor_table.binding_count; j++) { dxil_spv_resource_class resource_class; resource_binding = &root_parameter->descriptor_table.first_binding[j]; + if (!LOWER_SBT_ACCESS) + continue; + switch (resource_binding->type) { case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: @@ -1627,45 +1674,25 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil, dxil_spv_converter_add_local_root_descriptor_table(converter, resource_class, resource_binding->register_space, resource_binding->register_index, - resource_binding->register_count, resource_binding->descriptor_offset); + resource_binding->register_count, resource_binding->binding.binding); + has_entry = true; } - if (dxil_spv_converter_end_local_root_descriptor_table(converter) != DXIL_SPV_SUCCESS) + if (!has_entry) { - ret = VKD3D_ERROR_INVALID_ARGUMENT; - goto end; + /* Dummy parameters to make dxil-spirv happy. These are mapped with proper API elsewhere. */ + dxil_spv_converter_add_local_root_descriptor_table(converter, + DXIL_SPV_RESOURCE_CLASS_UAV, UINT32_MAX, 0, 0, 0); } - break; - case D3D12_ROOT_PARAMETER_TYPE_CBV: - dxil_spv_converter_add_local_root_descriptor(converter, - DXIL_SPV_RESOURCE_CLASS_CBV, root_parameter->descriptor.binding->register_space, - root_parameter->descriptor.binding->register_index); - break; - - case D3D12_ROOT_PARAMETER_TYPE_SRV: - dxil_spv_converter_add_local_root_descriptor(converter, - DXIL_SPV_RESOURCE_CLASS_SRV, root_parameter->descriptor.binding->register_space, - root_parameter->descriptor.binding->register_index); - break; - - case D3D12_ROOT_PARAMETER_TYPE_UAV: - dxil_spv_converter_add_local_root_descriptor(converter, - DXIL_SPV_RESOURCE_CLASS_UAV, root_parameter->descriptor.binding->register_space, - root_parameter->descriptor.binding->register_index); + dxil_spv_converter_end_local_root_descriptor_table(converter); break; + } default: - ret = VKD3D_ERROR_INVALID_ARGUMENT; - goto end; + break; } } - - /* Root constants come after root descriptors. Offset the counts. */ - if (root_constant_words < num_root_descriptors * 2) - root_constant_words = num_root_descriptors * 2; - root_constant_words -= num_root_descriptors * 2; - if ((ret = vkd3d_dxil_converter_set_options(converter, shader_interface_info, compiler_args, quirks, spirv->meta.hash, export, true))) goto end; @@ -1674,10 +1701,9 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil, remap_userdata.shader_interface_info = shader_interface_info; remap_userdata.shader_interface_local_info = shader_interface_local_info; - remap_userdata.num_root_descriptors = num_root_descriptors; - dxil_spv_converter_set_root_constant_word_count(converter, root_constant_words); - dxil_spv_converter_set_root_descriptor_count(converter, num_root_descriptors); + dxil_spv_converter_set_root_constant_word_count(converter, shader_interface_info->num_root_constants); + dxil_spv_converter_set_root_descriptor_count(converter, shader_interface_info->num_root_descriptors); dxil_spv_converter_set_srv_remapper(converter, dxil_srv_remap, &remap_userdata); dxil_spv_converter_set_sampler_remapper(converter, dxil_sampler_remap, &remap_userdata); dxil_spv_converter_set_uav_remapper(converter, dxil_uav_remap, &remap_userdata); diff --git a/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d-shader/vkd3d_shader_main.c index 2ec38c1ef5..4ffb7a7def 100644 --- a/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d-shader/vkd3d_shader_main.c @@ -490,6 +490,10 @@ vkd3d_shader_quirks_t vkd3d_shader_compile_arguments_select_quirks( } } +#if VKD3D_FORCE_HEAP_ROBUSTNESS + quirks |= VKD3D_SHADER_QUIRK_DESCRIPTOR_HEAP_ROBUSTNESS; +#endif + if (compile_args && compile_args->quirks) { for (i = 0; i < compile_args->quirks->num_hashes; i++) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 8402be8d50..b4b5910680 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -787,16 +787,6 @@ static const struct vkd3d_shader_root_constant *root_signature_get_32bit_constan return &p->constant; } -static const struct vkd3d_shader_root_parameter *root_signature_get_root_descriptor( - const struct d3d12_root_signature *root_signature, unsigned int index) -{ - const struct vkd3d_shader_root_parameter *p = root_signature_get_parameter(root_signature, index); - assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV - || p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_SRV - || p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_UAV); - return p; -} - /* ID3D12Fence */ static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence) { @@ -2277,7 +2267,7 @@ static void d3d12_command_list_begin_new_sequence(struct d3d12_command_list *lis d3d12_command_list_invalidate_all_state(list); /* Extra special consideration since we're starting a fresh command buffer. */ - list->descriptor_heap.buffers.heap_dirty = true; + list->descriptor_heap.heap_dirty = true; d3d12_command_list_debug_mark_label(list, "Split", 0.0f, 0.0f, 0.0f, 1.0f); } @@ -2600,6 +2590,10 @@ static void d3d12_command_allocator_free_resources(struct d3d12_command_allocato d3d12_pipeline_state_dec_ref(allocator->pipelines[i]); } allocator->pipelines_count = 0; + + for (i = 0; i < allocator->meta_allocs_count; i++) + d3d12_descriptor_heap_free_meta_index(allocator->meta_allocs[i].heap, allocator->meta_allocs[i].index); + allocator->meta_allocs_count = 0; } static void d3d12_command_allocator_set_name(struct d3d12_command_allocator *allocator, const char *name) @@ -2708,6 +2702,7 @@ static ULONG d3d12_command_allocator_dec_ref(struct d3d12_command_allocator *all vkd3d_free(allocator->buffer_views); vkd3d_free(allocator->views); vkd3d_free(allocator->pipelines); + vkd3d_free(allocator->meta_allocs); if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_RECYCLE_COMMAND_POOLS) { @@ -3329,6 +3324,103 @@ static struct d3d12_command_allocator *d3d12_command_allocator_from_iface(ID3D12 return impl_from_ID3D12CommandAllocator(iface); } +uint32_t d3d12_command_allocator_allocate_meta_index( + struct d3d12_command_allocator *allocator, struct d3d12_descriptor_heap *heap) +{ + uint32_t index = d3d12_descriptor_heap_allocate_meta_index(heap); + if (index == UINT32_MAX) + { + WARN("Meta descriptor pressure! Falling back to global heap (potentially slow) ...\n"); + return index; + } + + vkd3d_array_reserve((void**)&allocator->meta_allocs, &allocator->meta_allocs_size, + allocator->meta_allocs_count + 1, sizeof(*allocator->meta_allocs)); + allocator->meta_allocs[allocator->meta_allocs_count].heap = heap; + allocator->meta_allocs[allocator->meta_allocs_count].index = index; + allocator->meta_allocs_count++; + return index; +} + +static uint32_t d3d12_command_allocator_allocate_meta_buffer_view( + struct d3d12_command_allocator *allocator, struct d3d12_descriptor_heap *heap, + VkDeviceAddress va, VkDeviceSize range, VkFormat vk_format) +{ + const struct vkd3d_vk_device_procs *vk_procs = &allocator->device->vk_procs; + VkTexelBufferDescriptorInfoEXT texel_buffer_info; + VkResourceDescriptorInfoEXT desc_info; + VkHostAddressRangeEXT desc_range; + uint32_t heap_index; + + heap_index = d3d12_command_allocator_allocate_meta_index(allocator, heap); + if (heap_index == UINT32_MAX) + return heap_index; + + memset(&texel_buffer_info, 0, sizeof(texel_buffer_info)); + texel_buffer_info.sType = VK_STRUCTURE_TYPE_TEXEL_BUFFER_DESCRIPTOR_INFO_EXT; + texel_buffer_info.addressRange.address = va; + texel_buffer_info.addressRange.size = range; + texel_buffer_info.format = vk_format; + + memset(&desc_info, 0, sizeof(desc_info)); + desc_info.sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT; + desc_info.type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + desc_info.data.pTexelBuffer = &texel_buffer_info; + + desc_range.address = heap->descriptor_buffer.host_allocation + + heap_index * allocator->device->bindless_state.descriptor_heap_cbv_srv_uav_size; + desc_range.size = allocator->device->bindless_state.storage_texel_buffer_size; + VK_CALL(vkWriteResourceDescriptorsEXT(allocator->device->vk_device, 1, &desc_info, &desc_range)); + return heap_index; +} + +static uint32_t d3d12_command_allocator_allocate_meta_image_view( + struct d3d12_command_allocator *allocator, struct d3d12_descriptor_heap *heap, + const struct vkd3d_texture_view_desc *view_desc, VkImageLayout vk_image_layout) +{ + const struct vkd3d_vk_device_procs *vk_procs = &allocator->device->vk_procs; + struct vkd3d_texture_view_create_info vk_view_info; + VkResourceDescriptorInfoEXT desc_info; + VkImageDescriptorInfoEXT image_info; + VkHostAddressRangeEXT desc_range; + uint32_t heap_index; + + assert(view_desc->image_usage == VK_IMAGE_USAGE_SAMPLED_BIT || + view_desc->image_usage == VK_IMAGE_USAGE_STORAGE_BIT); + + heap_index = d3d12_command_allocator_allocate_meta_index(allocator, heap); + if (heap_index == UINT32_MAX) + return heap_index; + + vkd3d_setup_texture_view(allocator->device, view_desc, &vk_view_info); + + memset(&image_info, 0, sizeof(image_info)); + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_DESCRIPTOR_INFO_EXT; + image_info.layout = vk_image_layout; + image_info.pView = &vk_view_info.view_desc; + + memset(&desc_info, 0, sizeof(desc_info)); + desc_info.sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT; + desc_info.data.pImage = &image_info; + + desc_range.address = heap->descriptor_buffer.host_allocation + + heap_index * allocator->device->bindless_state.descriptor_heap_cbv_srv_uav_size; + + if (view_desc->image_usage == VK_IMAGE_USAGE_SAMPLED_BIT) + { + desc_info.type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + desc_range.size = allocator->device->bindless_state.sampled_image_size; + } + else + { + desc_info.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + desc_range.size = allocator->device->bindless_state.storage_image_size; + } + + VK_CALL(vkWriteResourceDescriptorsEXT(allocator->device->vk_device, 1, &desc_info, &desc_range)); + return heap_index; +} + /* ID3D12CommandList */ static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList(d3d12_command_list_iface *iface) { @@ -4970,11 +5062,56 @@ static size_t get_query_heap_stride(D3D12_QUERY_HEAP_TYPE heap_type) return sizeof(uint64_t); } +void d3d12_command_list_meta_push_data(struct d3d12_command_list *list, + VkCommandBuffer vk_command_buffer, + VkPipelineLayout vk_pipeline_layout, VkShaderStageFlags stages, + uint32_t size, const void *data) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + + if (vk_pipeline_layout != VK_NULL_HANDLE) + { + VK_CALL(vkCmdPushConstants(vk_command_buffer, + vk_pipeline_layout, + stages, 0, size, data)); + + if (list->cmd.vk_command_buffer == vk_command_buffer) + d3d12_command_list_invalidate_descriptor_heap(list); + } + else + { + VkPushDataInfoEXT push; + memset(&push, 0, sizeof(push)); + push.sType = VK_STRUCTURE_TYPE_PUSH_DATA_INFO_EXT; + push.data.address = data; + push.data.size = size; + VK_CALL(vkCmdPushDataEXT(vk_command_buffer, &push)); + } + + if (list->cmd.vk_command_buffer == vk_command_buffer) + d3d12_command_list_invalidate_root_parameters(list); +} + +void d3d12_command_list_meta_push_descriptor_index(struct d3d12_command_list *list, + VkCommandBuffer vk_command_buffer, uint32_t binding, uint32_t heap_index) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + VkPushDataInfoEXT push; + memset(&push, 0, sizeof(push)); + push.sType = VK_STRUCTURE_TYPE_PUSH_DATA_INFO_EXT; + push.data.address = &heap_index; + push.data.size = sizeof(heap_index); + push.offset = 128 + binding * sizeof(uint32_t); + + d3d12_command_list_update_descriptor_heaps(list); + VK_CALL(vkCmdPushDataEXT(vk_command_buffer, &push)); +} + static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list *list) { /* TODO allocate arrays from command allocator in case * games hit this path multiple times per frame */ - VkDeviceSize resolve_buffer_size, resolve_buffer_stride, ssbo_alignment, entry_buffer_size; + VkDeviceSize resolve_buffer_size, resolve_buffer_stride, entry_buffer_size; const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; struct vkd3d_scratch_allocation resolve_buffer, entry_buffer; struct vkd3d_query_gather_info gather_pipeline; @@ -5041,7 +5178,6 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list qsort(list->pending_queries, list->pending_queries_count, sizeof(*list->pending_queries), &vkd3d_compare_pending_query); - ssbo_alignment = d3d12_device_get_ssbo_alignment(list->device); resolve_buffer_size = 0; resolve_buffer_stride = 0; resolve_index = 0; @@ -5065,7 +5201,7 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list * doesn't get overly complicated when we need to deal with potential * SSBO alignment issues on some hardware. */ resolve_buffer_stride = get_query_heap_stride(q->heap->desc.Type); - resolve_buffer_size = align(resolve_buffer_size, ssbo_alignment); + resolve_buffer_size = align(resolve_buffer_size, 16); resolve_index = 0; d = &dispatches[dispatch_count++]; @@ -5113,7 +5249,7 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list /* Allocate scratch buffer and resolve virtual Vulkan queries into it */ if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, - resolve_buffer_size, max(ssbo_alignment, sizeof(uint64_t)), ~0u, &resolve_buffer)) + resolve_buffer_size, 16, ~0u, &resolve_buffer)) goto cleanup; for (i = 0; i < resolve_count; i++) @@ -5131,7 +5267,7 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, - entry_buffer_size, ssbo_alignment, ~0u, &entry_buffer)) + entry_buffer_size, 16, ~0u, &entry_buffer)) goto cleanup; for (i = 0; i < dispatch_count; i++) @@ -5241,9 +5377,8 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list entry_offset += d->virtual_query_count; - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, - gather_pipeline.vk_pipeline_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args)); + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + gather_pipeline.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, sizeof(args), &args); workgroup_count = vkd3d_compute_workgroup_count(d->unique_query_count, VKD3D_QUERY_OP_WORKGROUP_SIZE); VK_CALL(vkCmdDispatch(list->cmd.vk_command_buffer, workgroup_count, 1, 1)); @@ -5260,7 +5395,6 @@ static bool d3d12_command_list_gather_pending_queries(struct d3d12_command_list result = true; d3d12_command_list_invalidate_current_pipeline(list, true); - d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, &list->graphics_bindings); VKD3D_BREADCRUMB_COMMAND(GATHER_VIRTUAL_QUERY); @@ -5876,11 +6010,11 @@ void d3d12_command_list_end_current_render_pass(struct d3d12_command_list *list, static void d3d12_command_list_invalidate_push_constants(struct vkd3d_pipeline_bindings *bindings) { if (bindings->root_signature->descriptor_table_count) - bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS; + bindings->dirty_table_offsets = true; + if (bindings->root_signature->redzone_style == VKD3D_ROOT_SIGNATURE_HEAP_REDZONE_STYLE_INLINE) + bindings->dirty_inline_redzone = true; - bindings->root_descriptor_dirty_mask = - bindings->root_signature->root_descriptor_raw_va_mask | - bindings->root_signature->root_descriptor_push_mask; + bindings->root_descriptor_dirty_mask = bindings->root_signature->root_descriptor_raw_va_mask; if (vkd3d_descriptor_debug_active_instruction_qa_checks()) { @@ -5891,34 +6025,18 @@ static void d3d12_command_list_invalidate_push_constants(struct vkd3d_pipeline_b bindings->root_constant_dirty_mask = bindings->root_signature->root_constant_mask; } -void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_list *list, - struct vkd3d_pipeline_bindings *bindings, bool invalidate_descriptor_heaps, - struct vkd3d_pipeline_bindings *sibling_push_domain) +void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_list *list) { - /* For scenarios where we're emitting push constants to one bind point in meta shaders, - * this will invalidate push constants for the other bind points as well. */ - if (sibling_push_domain && sibling_push_domain->root_signature) - d3d12_command_list_invalidate_push_constants(sibling_push_domain); - - if (!bindings->root_signature) - return; - - /* Previously dirty states may no longer be dirty - * if the new root signature does not use them */ - bindings->dirty_flags = 0; - - if (bindings->root_signature->vk_sampler_descriptor_layout) - bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET; - if (bindings->root_signature->hoist_info.num_desc) - bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS; - - d3d12_command_list_invalidate_push_constants(bindings); + if (list->graphics_bindings.root_signature) + d3d12_command_list_invalidate_push_constants(&list->graphics_bindings); + if (list->compute_bindings.root_signature) + d3d12_command_list_invalidate_push_constants(&list->compute_bindings); +} - if (invalidate_descriptor_heaps) - { - struct d3d12_device *device = bindings->root_signature->device; - bindings->descriptor_heap_dirty_mask = (1ull << device->bindless_state.set_count) - 1; - } +void d3d12_command_list_invalidate_descriptor_heap(struct d3d12_command_list *list) +{ + list->descriptor_heap.heap_dirty = true; + WARN("Invalidating descriptor heap due to meta command which is incompatible with heaps.\n"); } static void vk_access_and_stage_flags_from_d3d12_resource_state(const struct d3d12_command_list *list, @@ -5947,9 +6065,6 @@ static void vk_access_and_stage_flags_from_d3d12_resource_state(const struct d3d *stages |= queue_shader_stages; *access |= VK_ACCESS_2_UNIFORM_READ_BIT; - if (device->bindless_state.flags & (VKD3D_BINDLESS_CBV_AS_SSBO | VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV)) - *access |= VK_ACCESS_2_SHADER_READ_BIT; - if (vk_queue_flags & VK_QUEUE_GRAPHICS_BIT) { *stages |= VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT; @@ -6052,8 +6167,7 @@ static void vk_access_and_stage_flags_from_d3d12_resource_state(const struct d3d *access |= VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT; /* We might use preprocessing. */ - if (list->device->device_info.device_generated_commands_features_nv.deviceGeneratedCommands || - list->device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands) + if (list->device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands) { *stages |= VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_EXT; *access |= VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_EXT; @@ -6531,8 +6645,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(d3d12_command_list_ifa d3d12_command_list_debug_mark_end_region(list); /* CommandList region */ /* Ensure that any non-temporal writes from CopyDescriptors are ordered properly. */ - if (d3d12_device_use_embedded_mutable_descriptors(list->device)) + //if (d3d12_device_use_embedded_mutable_descriptors(list->device)) + { vkd3d_memcpy_non_temporal_barrier(); + } list->rendering_info.state_flags |= VKD3D_RENDERING_END_OF_COMMAND_LIST; d3d12_command_list_decay_tracked_state(list); @@ -6754,17 +6870,6 @@ bool d3d12_command_list_reset_query(struct d3d12_command_list *list, return true; } -static void d3d12_command_list_init_default_descriptor_buffers(struct d3d12_command_list *list) -{ - if (d3d12_device_uses_descriptor_buffers(list->device)) - { - list->descriptor_heap.buffers.heap_va_resource = list->device->global_descriptor_buffer.resource.va; - list->descriptor_heap.buffers.heap_va_sampler = list->device->global_descriptor_buffer.sampler.va; - list->descriptor_heap.buffers.vk_buffer_resource = list->device->global_descriptor_buffer.resource.vk_buffer; - list->descriptor_heap.buffers.heap_dirty = true; - } -} - static void d3d12_command_list_reset_rtv_resolves(struct d3d12_command_list *list) { list->rtv_resolve_count = 0; @@ -6831,8 +6936,6 @@ static void d3d12_command_list_reset_api_state(struct d3d12_command_list *list, list->compute_bindings.root_descriptor_dirty_mask = UINT64_MAX; } - d3d12_command_list_init_default_descriptor_buffers(list); - list->state = NULL; list->rt_state = NULL; memset(&list->wg_state, 0, sizeof(list->wg_state)); @@ -6844,7 +6947,6 @@ static void d3d12_command_list_reset_api_state(struct d3d12_command_list *list, memset(list->so_counter_buffers, 0, sizeof(list->so_counter_buffers)); memset(list->so_counter_buffer_offsets, 0, sizeof(list->so_counter_buffer_offsets)); - list->cbv_srv_uav_descriptors_view = NULL; list->vrs_image = NULL; ID3D12GraphicsCommandList10_SetPipelineState(iface, initial_pipeline_state); @@ -6896,8 +6998,7 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, void d3d12_command_list_invalidate_all_state(struct d3d12_command_list *list) { d3d12_command_list_invalidate_current_pipeline(list, true); - d3d12_command_list_invalidate_root_parameters(list, &list->graphics_bindings, true, NULL); - d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, NULL); + d3d12_command_list_invalidate_root_parameters(list); list->index_buffer.is_dirty = true; } @@ -7324,199 +7425,125 @@ static bool d3d12_command_list_update_graphics_pipeline(struct d3d12_command_lis return true; } -static void d3d12_command_list_update_descriptor_table_offsets(struct d3d12_command_list *list, - struct vkd3d_pipeline_bindings *bindings, VkPipelineLayout layout, VkShaderStageFlags push_stages) +static void d3d12_command_list_update_inline_redzone(struct d3d12_command_list *list, + struct vkd3d_pipeline_bindings *bindings) { const struct d3d12_root_signature *root_signature = bindings->root_signature; const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - const struct vkd3d_shader_descriptor_table *table; - uint32_t table_offsets[D3D12_MAX_ROOT_COST]; - unsigned int root_parameter_index; - uint64_t descriptor_table_mask; + VkPushDataInfoEXT info; - assert(root_signature->descriptor_table_count); - descriptor_table_mask = root_signature->descriptor_table_mask; - - while (descriptor_table_mask) + if (list->descriptor_heap.resource.heap && + root_signature->redzone_style == VKD3D_ROOT_SIGNATURE_HEAP_REDZONE_STYLE_INLINE) { - root_parameter_index = vkd3d_bitmask_iter64(&descriptor_table_mask); - table = root_signature_get_descriptor_table(root_signature, root_parameter_index); - table_offsets[table->table_index] = bindings->descriptor_tables[root_parameter_index]; - } + memset(&info, 0, sizeof(info)); + info.sType = VK_STRUCTURE_TYPE_PUSH_DATA_INFO_EXT; + info.data.address = &list->descriptor_heap.resource.heap->desc.NumDescriptors; + info.data.size = sizeof(uint32_t); + info.offset = root_signature->heap_redzone_inline_heap_count_offset; + VK_CALL(vkCmdPushDataEXT(list->cmd.vk_command_buffer, &info)); - /* Set descriptor offsets */ - if (push_stages) - { - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, - layout, push_stages, - root_signature->descriptor_table_offset, - root_signature->descriptor_table_count * sizeof(uint32_t), - table_offsets)); + if (root_signature->heap_redzone_inline_heap_va_offset != UINT32_MAX) + { + VkDeviceAddress va = list->descriptor_heap.resource.va + list->device->bindless_state.heap_redzone_size; + info.data.address = &va; + info.data.size = sizeof(va); + info.offset = root_signature->heap_redzone_inline_heap_va_offset; + VK_CALL(vkCmdPushDataEXT(list->cmd.vk_command_buffer, &info)); + } } - bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS; -} - -static void vk_write_descriptor_set_from_root_descriptor(struct d3d12_command_list *list, - VkWriteDescriptorSet *vk_descriptor_write, const struct vkd3d_shader_root_parameter *root_parameter, - const struct vkd3d_root_descriptor_info *descriptor) -{ - vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_descriptor_write->pNext = NULL; - vk_descriptor_write->dstSet = VK_NULL_HANDLE; - vk_descriptor_write->dstBinding = root_parameter->descriptor.binding->binding.binding; - vk_descriptor_write->dstArrayElement = 0; - vk_descriptor_write->descriptorType = descriptor->vk_descriptor_type; - vk_descriptor_write->descriptorCount = 1; - vk_descriptor_write->pImageInfo = NULL; - vk_descriptor_write->pBufferInfo = &descriptor->info.buffer; - vk_descriptor_write->pTexelBufferView = &descriptor->info.buffer_view; -} - -static void vk_write_descriptor_set_from_scratch_push_ubo(VkWriteDescriptorSet *vk_descriptor_write, - VkDescriptorBufferInfo *vk_buffer_info, - const struct vkd3d_scratch_allocation *alloc, - VkDeviceSize size, uint32_t vk_binding) -{ - vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_descriptor_write->pNext = NULL; - vk_descriptor_write->dstSet = VK_NULL_HANDLE; - vk_descriptor_write->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - vk_descriptor_write->dstArrayElement = 0; - vk_descriptor_write->dstBinding = vk_binding; - vk_descriptor_write->descriptorCount = 1; - vk_descriptor_write->pBufferInfo = vk_buffer_info; - vk_descriptor_write->pImageInfo = NULL; - vk_descriptor_write->pTexelBufferView = NULL; - - vk_buffer_info->buffer = alloc->buffer; - vk_buffer_info->offset = alloc->offset; - vk_buffer_info->range = size; + bindings->dirty_inline_redzone = false; } -/* This is a big stall on some GPUs so need to track this separately. */ -void d3d12_command_list_update_descriptor_buffers(struct d3d12_command_list *list) +static void d3d12_command_list_update_descriptor_table_offsets(struct d3d12_command_list *list, + struct vkd3d_pipeline_bindings *bindings) { + const struct d3d12_root_signature *root_signature = bindings->root_signature; const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - VkDescriptorBufferBindingPushDescriptorBufferHandleEXT buffer_handle; - VkDescriptorBufferBindingInfoEXT global_buffers[2]; + const struct vkd3d_shader_descriptor_table *table; + uint32_t table_offsets[D3D12_MAX_ROOT_COST]; + unsigned int root_parameter_index; + uint64_t descriptor_table_mask; + VkPushDataInfoEXT info; - if (d3d12_device_uses_descriptor_buffers(list->device) && - list->descriptor_heap.buffers.heap_dirty) + if (root_signature->descriptor_table_count) { - global_buffers[0].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT; - global_buffers[0].pNext = NULL; - global_buffers[0].usage = list->device->global_descriptor_buffer.resource.usage; - global_buffers[0].address = list->descriptor_heap.buffers.heap_va_resource; + descriptor_table_mask = root_signature->descriptor_table_mask; - if (global_buffers[0].usage & VK_BUFFER_USAGE_PUSH_DESCRIPTORS_DESCRIPTOR_BUFFER_BIT_EXT) + while (descriptor_table_mask) { - global_buffers[0].pNext = &buffer_handle; - buffer_handle.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_PUSH_DESCRIPTOR_BUFFER_HANDLE_EXT; - buffer_handle.pNext = NULL; - buffer_handle.buffer = list->descriptor_heap.buffers.vk_buffer_resource; + root_parameter_index = vkd3d_bitmask_iter64(&descriptor_table_mask); + table = root_signature_get_descriptor_table(root_signature, root_parameter_index); + table_offsets[table->table_index] = bindings->descriptor_tables[root_parameter_index]; } - global_buffers[1].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT; - global_buffers[1].pNext = NULL; - global_buffers[1].usage = list->device->global_descriptor_buffer.sampler.usage; - global_buffers[1].address = list->descriptor_heap.buffers.heap_va_sampler; - - VK_CALL(vkCmdBindDescriptorBuffersEXT(list->cmd.vk_command_buffer, - ARRAY_SIZE(global_buffers), global_buffers)); - - list->descriptor_heap.buffers.heap_dirty = false; + memset(&info, 0, sizeof(info)); + info.sType = VK_STRUCTURE_TYPE_PUSH_DATA_INFO_EXT; + info.data.address = table_offsets; + info.data.size = root_signature->descriptor_table_count * sizeof(uint32_t); + info.offset = root_signature->descriptor_table_offset; + VK_CALL(vkCmdPushDataEXT(list->cmd.vk_command_buffer, &info)); } + + bindings->dirty_table_offsets = false; } -static void d3d12_command_list_update_descriptor_heaps(struct d3d12_command_list *list, - struct vkd3d_pipeline_bindings *bindings, VkPipelineBindPoint vk_bind_point, - VkPipelineLayout layout) +/* This is a big stall on some GPUs so need to track this separately. */ +void d3d12_command_list_update_descriptor_heaps(struct d3d12_command_list *list) { - const struct vkd3d_bindless_state *bindless_state = &list->device->bindless_state; const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - - if (!bindings->descriptor_heap_dirty_mask) + VkBindHeapInfoEXT resource_heap_info; + if (!list->descriptor_heap.heap_dirty) return; - if (d3d12_device_uses_descriptor_buffers(list->device)) - { - d3d12_command_list_update_descriptor_buffers(list); + memset(&resource_heap_info, 0, sizeof(resource_heap_info)); + resource_heap_info.sType = VK_STRUCTURE_TYPE_BIND_HEAP_INFO_EXT; - /* Prefer binding everything in one go. There is no risk of null descriptor sets here. */ - if (bindings->descriptor_heap_dirty_mask) - { - VK_CALL(vkCmdSetDescriptorBufferOffsetsEXT(list->cmd.vk_command_buffer, vk_bind_point, - layout, 0, bindless_state->set_count, - bindless_state->vk_descriptor_buffer_indices, - list->descriptor_heap.buffers.vk_offsets)); - bindings->descriptor_heap_dirty_mask = 0; - } - } - else + if (list->descriptor_heap.resource.va) { - while (bindings->descriptor_heap_dirty_mask) - { - unsigned int heap_index = vkd3d_bitmask_iter64(&bindings->descriptor_heap_dirty_mask); - - if (list->descriptor_heap.sets.vk_sets[heap_index]) - { - VK_CALL(vkCmdBindDescriptorSets(list->cmd.vk_command_buffer, vk_bind_point, - layout, heap_index, 1, - &list->descriptor_heap.sets.vk_sets[heap_index], 0, NULL)); - } - } + resource_heap_info.heapRange.address = list->descriptor_heap.resource.va; + resource_heap_info.heapRange.size = list->descriptor_heap.resource.size; + resource_heap_info.reservedRangeOffset = list->descriptor_heap.resource.reserved_offset; + resource_heap_info.reservedRangeSize = + list->device->device_info.descriptor_heap_properties.minResourceHeapReservedRange; + VK_CALL(vkCmdBindResourceHeapEXT(list->cmd.vk_command_buffer, &resource_heap_info)); } -} - -static void d3d12_command_list_update_static_samplers(struct d3d12_command_list *list, - struct vkd3d_pipeline_bindings *bindings, VkPipelineBindPoint vk_bind_point, - VkPipelineLayout layout) -{ - const struct d3d12_root_signature *root_signature = bindings->root_signature; - const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - if (bindings->static_sampler_set) + if (list->descriptor_heap.sampler.va) { - VK_CALL(vkCmdBindDescriptorSets(list->cmd.vk_command_buffer, vk_bind_point, - layout, - root_signature->sampler_descriptor_set, - 1, &bindings->static_sampler_set, 0, NULL)); - } - else if (root_signature->vk_sampler_descriptor_layout) - { - VK_CALL(vkCmdBindDescriptorBufferEmbeddedSamplersEXT(list->cmd.vk_command_buffer, vk_bind_point, - layout, root_signature->sampler_descriptor_set)); + resource_heap_info.heapRange.address = list->descriptor_heap.sampler.va; + resource_heap_info.heapRange.size = list->descriptor_heap.sampler.size; + resource_heap_info.reservedRangeOffset = list->descriptor_heap.sampler.reserved_offset; + resource_heap_info.reservedRangeSize = + list->device->device_info.descriptor_heap_properties.minSamplerHeapReservedRangeWithEmbedded; + VK_CALL(vkCmdBindSamplerHeapEXT(list->cmd.vk_command_buffer, &resource_heap_info)); } - bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET; + list->descriptor_heap.heap_dirty = false; } static void d3d12_command_list_update_root_constants(struct d3d12_command_list *list, - struct vkd3d_pipeline_bindings *bindings, - VkPipelineLayout layout, VkShaderStageFlags push_stages) + struct vkd3d_pipeline_bindings *bindings) { const struct d3d12_root_signature *root_signature = bindings->root_signature; const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; const struct vkd3d_shader_root_constant *root_constant; unsigned int root_parameter_index; - if (!push_stages) - { - bindings->root_constant_dirty_mask = 0; - return; - } - while (bindings->root_constant_dirty_mask) { + VkPushDataInfoEXT info; + root_parameter_index = vkd3d_bitmask_iter64(&bindings->root_constant_dirty_mask); root_constant = root_signature_get_32bit_constants(root_signature, root_parameter_index); - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, - layout, push_stages, - root_constant->constant_index * sizeof(uint32_t), - root_constant->constant_count * sizeof(uint32_t), - &bindings->root_constants[root_constant->constant_index])); + memset(&info, 0, sizeof(info)); + info.sType = VK_STRUCTURE_TYPE_PUSH_DATA_INFO_EXT; + info.data.address = &bindings->root_constants[root_constant->constant_index]; + info.data.size = root_constant->constant_count * sizeof(uint32_t); + info.offset = root_constant->constant_index * sizeof(uint32_t); + + VK_CALL(vkCmdPushDataEXT(list->cmd.vk_command_buffer, &info)); } } @@ -7533,7 +7560,7 @@ static unsigned int d3d12_command_list_fetch_root_descriptor_vas(struct d3d12_co while (root_descriptor_mask) { unsigned int root_parameter_index = vkd3d_bitmask_iter64(&root_descriptor_mask); - dst_data->root_descriptor_vas[va_idx++] = bindings->root_descriptors[root_parameter_index].info.va; + dst_data->root_descriptor_vas[va_idx++] = bindings->root_descriptors_va[root_parameter_index]; } return va_idx; @@ -7583,88 +7610,33 @@ void d3d12_command_list_fetch_root_parameter_data(struct d3d12_command_list *lis } static void d3d12_command_list_update_root_descriptors(struct d3d12_command_list *list, - struct vkd3d_pipeline_bindings *bindings, VkPipelineBindPoint vk_bind_point, - VkPipelineLayout layout, VkShaderStageFlags push_stages, uint32_t root_signature_flags) + struct vkd3d_pipeline_bindings *bindings) { const struct d3d12_root_signature *root_signature = bindings->root_signature; const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - VkWriteDescriptorSet descriptor_writes[D3D12_MAX_ROOT_COST / 2]; - const struct vkd3d_shader_root_parameter *root_parameter; - union vkd3d_root_parameter_data *ptr_root_parameter_data; union vkd3d_root_parameter_data root_parameter_data; - unsigned int descriptor_write_count = 0; - struct vkd3d_scratch_allocation alloc; - VkDescriptorBufferInfo buffer_info; - unsigned int root_parameter_index; unsigned int va_count = 0; - uint64_t dirty_push_mask; - - if (root_signature_flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK) - { - d3d12_command_allocator_allocate_scratch_memory(list->allocator, - VKD3D_SCRATCH_POOL_KIND_UNIFORM_UPLOAD, sizeof(root_parameter_data), - D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, ~0u, &alloc); - ptr_root_parameter_data = alloc.host_ptr; - - /* Dirty all state that enters push UBO block to make sure it's emitted. - * Push descriptors that are not raw VA can be emitted on a partial basis. - * Root constants and tables are always considered dirty here, so omit that. */ - bindings->root_descriptor_dirty_mask |= root_signature->root_descriptor_raw_va_mask; - } - else - ptr_root_parameter_data = &root_parameter_data; if (bindings->root_descriptor_dirty_mask) { /* If any raw VA descriptor is dirty, we need to update all of them. */ if (root_signature->root_descriptor_raw_va_mask & bindings->root_descriptor_dirty_mask) - va_count = d3d12_command_list_fetch_root_descriptor_vas(list, bindings, ptr_root_parameter_data); - - /* TODO bind null descriptors for inactive root descriptors. */ - dirty_push_mask = - bindings->root_descriptor_dirty_mask & - root_signature->root_descriptor_push_mask & - bindings->root_descriptor_active_mask; - - while (dirty_push_mask) - { - root_parameter_index = vkd3d_bitmask_iter64(&dirty_push_mask); - root_parameter = root_signature_get_root_descriptor(root_signature, root_parameter_index); - - vk_write_descriptor_set_from_root_descriptor(list, - &descriptor_writes[descriptor_write_count], root_parameter, - &bindings->root_descriptors[root_parameter_index]); - - descriptor_write_count += 1; - } - + va_count = d3d12_command_list_fetch_root_descriptor_vas(list, bindings, &root_parameter_data); bindings->root_descriptor_dirty_mask = 0; } - if (root_signature_flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK) - { - d3d12_command_list_fetch_root_parameter_uniform_block_data(list, bindings, ptr_root_parameter_data); - - /* Reset dirty flags to avoid redundant updates in the future. - * We consume all constants / tables here regardless of dirty state. */ - bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS; - bindings->root_constant_dirty_mask = 0; - - vk_write_descriptor_set_from_scratch_push_ubo(&descriptor_writes[descriptor_write_count], - &buffer_info, &alloc, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, - root_signature->push_constant_ubo_binding.binding); - - descriptor_write_count += 1; - } - else if (va_count && push_stages) + if (va_count) { - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, - layout, push_stages, - 0, va_count * sizeof(*root_parameter_data.root_descriptor_vas), - root_parameter_data.root_descriptor_vas)); + VkPushDataInfoEXT info; + memset(&info, 0, sizeof(info)); + info.sType = VK_STRUCTURE_TYPE_PUSH_DATA_INFO_EXT; + info.data.address = root_parameter_data.root_descriptor_vas; + info.data.size = va_count * sizeof(*root_parameter_data.root_descriptor_vas); + info.offset = 0; + VK_CALL(vkCmdPushDataEXT(list->cmd.vk_command_buffer, &info)); } -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA +#if defined(VKD3D_ENABLE_DESCRIPTOR_QA) && 0 if (vkd3d_descriptor_debug_active_instruction_qa_checks()) { VkWriteDescriptorSet *write = &descriptor_writes[descriptor_write_count]; @@ -7684,169 +7656,29 @@ static void d3d12_command_list_update_root_descriptors(struct d3d12_command_list descriptor_write_count += 2; } #endif - - if (descriptor_write_count) - { - VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, vk_bind_point, - layout, root_signature->root_descriptor_set, - descriptor_write_count, descriptor_writes)); - } -} - -static void d3d12_command_list_update_hoisted_descriptors(struct d3d12_command_list *list, - struct vkd3d_pipeline_bindings *bindings) -{ - const struct d3d12_root_signature *rs = bindings->root_signature; - const struct vkd3d_descriptor_hoist_desc *hoist_desc; - struct vkd3d_root_descriptor_info *root_parameter; - const struct vkd3d_descriptor_metadata_view *view; - const struct vkd3d_unique_resource *resource; - union vkd3d_descriptor_info *info; - unsigned int i; - - /* We don't track dirty table index, just update every hoisted descriptor. - * Uniform buffers tend to be updated all the time anyways, so this should be fine. */ - for (i = 0; i < rs->hoist_info.num_desc; i++) - { - hoist_desc = &rs->hoist_info.desc[i]; - - view = list->cbv_srv_uav_descriptors_view; - if (view) - view += bindings->descriptor_tables[hoist_desc->table_index] + hoist_desc->table_offset; - - root_parameter = &bindings->root_descriptors[hoist_desc->parameter_index]; - - bindings->root_descriptor_dirty_mask |= 1ull << hoist_desc->parameter_index; - bindings->root_descriptor_active_mask |= 1ull << hoist_desc->parameter_index; - root_parameter->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - info = &root_parameter->info; - - if (view && (view->info.buffer.flags & VKD3D_DESCRIPTOR_FLAG_BUFFER_VA_RANGE)) - { - /* Buffer descriptors must be valid on recording time. */ - resource = vkd3d_va_map_deref(&list->device->memory_allocator.va_map, view->info.buffer.va); - if (resource) - { - info->buffer.buffer = resource->vk_buffer; - info->buffer.offset = view->info.buffer.va - resource->va; - info->buffer.range = min(view->info.buffer.range, resource->size - info->buffer.offset); - } - else - { - info->buffer.buffer = VK_NULL_HANDLE; - info->buffer.offset = 0; - info->buffer.range = VK_WHOLE_SIZE; - } - } - else - { - info->buffer.buffer = VK_NULL_HANDLE; - info->buffer.offset = 0; - info->buffer.range = VK_WHOLE_SIZE; - } - } - - bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS; } static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list) { struct vkd3d_pipeline_bindings *bindings = d3d12_command_list_get_bindings(list, list->active_pipeline_type); const struct d3d12_root_signature *rs = bindings->root_signature; - const struct d3d12_bind_point_layout *bind_point_layout; - VkPipelineBindPoint vk_bind_point; - VkShaderStageFlags push_stages; - VkPipelineLayout layout; if (!rs) return; - bind_point_layout = d3d12_root_signature_get_layout(rs, list->active_pipeline_type); - layout = bind_point_layout->vk_pipeline_layout; - push_stages = bind_point_layout->vk_push_stages; - - vk_bind_point = vk_bind_point_from_pipeline_type(list->active_pipeline_type); - - if (bindings->descriptor_heap_dirty_mask) - d3d12_command_list_update_descriptor_heaps(list, bindings, vk_bind_point, layout); - - if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET) - d3d12_command_list_update_static_samplers(list, bindings, vk_bind_point, layout); - - /* If we can, hoist descriptors from the descriptor heap into fake root parameters. */ - if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS) - d3d12_command_list_update_hoisted_descriptors(list, bindings); - - if (bind_point_layout->flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK) - { - /* Root constants and descriptor table offsets are part of the root descriptor set */ - if (bindings->root_descriptor_dirty_mask || bindings->root_constant_dirty_mask - || (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS)) - { - d3d12_command_list_update_root_descriptors(list, bindings, vk_bind_point, layout, push_stages, - bind_point_layout->flags); - } - } - else - { - if (bindings->root_descriptor_dirty_mask) - { - d3d12_command_list_update_root_descriptors(list, bindings, vk_bind_point, layout, push_stages, - bind_point_layout->flags); - } - - if (bindings->root_constant_dirty_mask) - d3d12_command_list_update_root_constants(list, bindings, layout, push_stages); - - if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS) - d3d12_command_list_update_descriptor_table_offsets(list, bindings, layout, push_stages); - } -} + d3d12_command_list_update_descriptor_heaps(list); -static void d3d12_command_list_update_descriptors_post_indirect_buffer(struct d3d12_command_list *list) -{ - /* Pretend for a moment that the post indirect buffer is the main command buffer. - * Set all dirty bits so we force-flush state to a different command buffer. - * Not the most elegant solution, but avoids us having to reimplement everything - * just to plumb thorugh a different set of dirty masks, etc. */ - struct vkd3d_pipeline_bindings *bindings = d3d12_command_list_get_bindings(list, list->active_pipeline_type); - const struct d3d12_root_signature *rs = bindings->root_signature; - uint32_t old_root_descriptor_dirty_mask; - uint32_t old_descriptor_heap_dirty_mask; - uint32_t old_root_constant_dirty_mask; - VkCommandBuffer old_cmd_buffer; - bool old_heap_dirty = false; - uint32_t old_dirty_flags; + if (bindings->root_descriptor_dirty_mask) + d3d12_command_list_update_root_descriptors(list, bindings); - if (!rs) - return; + if (bindings->root_constant_dirty_mask) + d3d12_command_list_update_root_constants(list, bindings); - old_root_descriptor_dirty_mask = bindings->root_descriptor_dirty_mask; - old_descriptor_heap_dirty_mask = bindings->descriptor_heap_dirty_mask; - old_root_constant_dirty_mask = bindings->root_constant_dirty_mask; - old_dirty_flags = bindings->dirty_flags; - old_cmd_buffer = list->cmd.vk_command_buffer; - /* This is bad, but the current NV implementation does not actually - * do anything bad when rebinding descriptor buffers, so just roll with it. - * Can be fixed if necessary. */ - if (d3d12_device_uses_descriptor_buffers(list->device)) - old_heap_dirty = list->descriptor_heap.buffers.heap_dirty; - - /* Override state. */ - list->cmd.vk_command_buffer = list->cmd.vk_post_indirect_barrier_commands; - if (d3d12_device_uses_descriptor_buffers(list->device)) - list->descriptor_heap.buffers.heap_dirty = true; - d3d12_command_list_invalidate_root_parameters(list, bindings, true, NULL); - d3d12_command_list_update_descriptors(list); + if (bindings->dirty_table_offsets) + d3d12_command_list_update_descriptor_table_offsets(list, bindings); - /* Restore state. */ - bindings->root_descriptor_dirty_mask = old_root_descriptor_dirty_mask; - bindings->descriptor_heap_dirty_mask = old_descriptor_heap_dirty_mask; - bindings->root_constant_dirty_mask = old_root_constant_dirty_mask; - bindings->dirty_flags = old_dirty_flags; - list->cmd.vk_command_buffer = old_cmd_buffer; - if (d3d12_device_uses_descriptor_buffers(list->device)) - list->descriptor_heap.buffers.heap_dirty = old_heap_dirty; + if (bindings->dirty_inline_redzone) + d3d12_command_list_update_inline_redzone(list, bindings); } static void d3d12_command_list_check_pre_compute_barrier( @@ -7871,7 +7703,6 @@ static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *l static bool d3d12_command_list_update_raygen_state(struct d3d12_command_list *list) { - const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; d3d12_command_list_end_current_render_pass(list, false); if (!d3d12_command_list_update_raygen_pipeline(list)) @@ -7882,28 +7713,6 @@ static bool d3d12_command_list_update_raygen_state(struct d3d12_command_list *li d3d12_command_list_check_pre_compute_barrier(list, VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR); d3d12_command_list_update_descriptors(list); - /* If we have a static sampler set for local root signatures, bind it now. - * Don't bother with dirty tracking of this for time being. - * Should be very rare that this path is even hit. */ - if (list->rt_state_variant->local_static_sampler.set_layout) - { - if (list->rt_state_variant->local_static_sampler.desc_set) - { - VK_CALL(vkCmdBindDescriptorSets(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, - list->rt_state_variant->local_static_sampler.pipeline_layout, - list->rt_state_variant->local_static_sampler.set_index, - 1, &list->rt_state_variant->local_static_sampler.desc_set, - 0, NULL)); - } - else - { - VK_CALL(vkCmdBindDescriptorBufferEmbeddedSamplersEXT(list->cmd.vk_command_buffer, - VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, - list->rt_state_variant->local_static_sampler.pipeline_layout, - list->rt_state_variant->local_static_sampler.set_index)); - } - } - return true; } @@ -8469,7 +8278,6 @@ static bool d3d12_command_list_emit_multi_dispatch_indirect_count(struct d3d12_c if (vk_patch_cmd_buffer == list->cmd.vk_command_buffer) { d3d12_command_list_invalidate_current_pipeline(list, true); - d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, &list->graphics_bindings); } else { @@ -8485,9 +8293,9 @@ static bool d3d12_command_list_emit_multi_dispatch_indirect_count(struct d3d12_c VK_CALL(vkCmdBindPipeline(vk_patch_cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_info.vk_pipeline)); - VK_CALL(vkCmdPushConstants(vk_patch_cmd_buffer, - pipeline_info.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(args), &args)); + + d3d12_command_list_meta_push_data(list, vk_patch_cmd_buffer, + pipeline_info.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, sizeof(args), &args); VK_CALL(vkCmdDispatch(vk_patch_cmd_buffer, vkd3d_compute_workgroup_count(max_commands, vkd3d_meta_get_multi_dispatch_indirect_workgroup_size()), @@ -8514,135 +8322,42 @@ static bool d3d12_command_list_emit_multi_dispatch_indirect_count(struct d3d12_c return true; } -static bool d3d12_command_list_emit_multi_dispatch_indirect_count_state(struct d3d12_command_list *list, - struct d3d12_command_signature *signature, - VkDeviceAddress indirect_args, - uint32_t stride, uint32_t max_commands, - VkDeviceAddress count_arg, - struct vkd3d_scratch_allocation *dispatch_scratch, - struct vkd3d_scratch_allocation *ubo_scratch) +static bool d3d12_command_list_emit_predicated_command(struct d3d12_command_list *list, + enum vkd3d_predicate_command_type command_type, VkDeviceAddress indirect_args, + const union vkd3d_predicate_command_direct_args *direct_args, struct vkd3d_scratch_allocation *scratch) { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - struct vkd3d_multi_dispatch_indirect_info pipeline_info; - struct vkd3d_multi_dispatch_indirect_state_args args; - struct vkd3d_scratch_allocation template_scratch; + struct vkd3d_predicate_command_info pipeline_info; + struct vkd3d_predicate_command_args args; VkCommandBuffer vk_patch_cmd_buffer; VkMemoryBarrier2 vk_barrier; VkDependencyInfo dep_info; - vkd3d_meta_get_multi_dispatch_indirect_state_pipeline(&list->device->meta_ops, &pipeline_info); - - if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - VKD3D_SCRATCH_POOL_KIND_UNIFORM_UPLOAD, - D3D12_MAX_ROOT_COST * sizeof(uint32_t) + - sizeof(signature->state_template.compute.source_offsets), - sizeof(uint32_t), ~0u, &template_scratch)) - return false; - - if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, - sizeof(VkDispatchIndirectCommand) * max_commands, - sizeof(uint32_t), ~0u, dispatch_scratch)) - return false; + vkd3d_meta_get_predicate_pipeline(&list->device->meta_ops, command_type, &pipeline_info); if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, - (D3D12_MAX_ROOT_COST * sizeof(uint32_t)) * max_commands, - D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, - ~0u, ubo_scratch)) + pipeline_info.data_size, sizeof(uint32_t), ~0u, scratch)) return false; - d3d12_command_list_fetch_root_parameter_data(list, &list->compute_bindings, template_scratch.host_ptr); - memcpy(void_ptr_offset(template_scratch.host_ptr, D3D12_MAX_ROOT_COST * sizeof(uint32_t)), - signature->state_template.compute.source_offsets, - sizeof(signature->state_template.compute.source_offsets)); - - args.indirect_va = indirect_args; - args.count_va = count_arg; - args.dispatch_va = dispatch_scratch->va; - args.root_parameters_va = ubo_scratch->va; - args.root_parameter_template_va = template_scratch.va; - args.stride_words = stride / sizeof(uint32_t); - args.dispatch_offset_words = signature->state_template.compute.dispatch_offset_words; - d3d12_command_allocator_allocate_init_post_indirect_command_buffer(list->allocator, list); vk_patch_cmd_buffer = list->cmd.vk_post_indirect_barrier_commands; if (vk_patch_cmd_buffer == list->cmd.vk_command_buffer) - { - d3d12_command_list_invalidate_current_pipeline(list, true); - d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, &list->graphics_bindings); - } - else - { - list->cmd.indirect_meta->need_compute_to_indirect_barrier = true; - list->cmd.indirect_meta->need_compute_to_cbv_barrier = true; - } + d3d12_command_list_end_current_render_pass(list, true); + + args.predicate_va = list->predication.va; + args.dst_arg_va = scratch->va; + args.src_arg_va = indirect_args; + args.args = *direct_args; VK_CALL(vkCmdBindPipeline(vk_patch_cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_info.vk_pipeline)); - VK_CALL(vkCmdPushConstants(vk_patch_cmd_buffer, - pipeline_info.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(args), &args)); - VK_CALL(vkCmdDispatch(vk_patch_cmd_buffer, max_commands, 1, 1)); - - if (vk_patch_cmd_buffer == list->cmd.vk_command_buffer) - { - memset(&dep_info, 0, sizeof(dep_info)); - dep_info.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO; - dep_info.memoryBarrierCount = 1; - dep_info.pMemoryBarriers = &vk_barrier; - - memset(&vk_barrier, 0, sizeof(vk_barrier)); - vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2; - vk_barrier.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; - vk_barrier.srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT; - vk_barrier.dstStageMask = VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; - vk_barrier.dstAccessMask = VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_2_UNIFORM_READ_BIT; - - VK_CALL(vkCmdPipelineBarrier2(vk_patch_cmd_buffer, &dep_info)); - } - - VKD3D_BREADCRUMB_COMMAND(EXECUTE_INDIRECT_PATCH_STATE_COMPUTE); - return true; -} - -static bool d3d12_command_list_emit_predicated_command(struct d3d12_command_list *list, - enum vkd3d_predicate_command_type command_type, VkDeviceAddress indirect_args, - const union vkd3d_predicate_command_direct_args *direct_args, struct vkd3d_scratch_allocation *scratch) -{ - const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - struct vkd3d_predicate_command_info pipeline_info; - struct vkd3d_predicate_command_args args; - VkCommandBuffer vk_patch_cmd_buffer; - VkMemoryBarrier2 vk_barrier; - VkDependencyInfo dep_info; - - vkd3d_meta_get_predicate_pipeline(&list->device->meta_ops, command_type, &pipeline_info); - - if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, - pipeline_info.data_size, sizeof(uint32_t), ~0u, scratch)) - return false; - - d3d12_command_allocator_allocate_init_post_indirect_command_buffer(list->allocator, list); - vk_patch_cmd_buffer = list->cmd.vk_post_indirect_barrier_commands; - - if (vk_patch_cmd_buffer == list->cmd.vk_command_buffer) - d3d12_command_list_end_current_render_pass(list, true); - - args.predicate_va = list->predication.va; - args.dst_arg_va = scratch->va; - args.src_arg_va = indirect_args; - args.args = *direct_args; - - VK_CALL(vkCmdBindPipeline(vk_patch_cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline_info.vk_pipeline)); - VK_CALL(vkCmdPushConstants(vk_patch_cmd_buffer, - pipeline_info.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(args), &args)); - VK_CALL(vkCmdDispatch(vk_patch_cmd_buffer, 1, 1, 1)); + d3d12_command_list_meta_push_data(list, vk_patch_cmd_buffer, + pipeline_info.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, sizeof(args), &args); + + VK_CALL(vkCmdDispatch(vk_patch_cmd_buffer, 1, 1, 1)); if (vk_patch_cmd_buffer == list->cmd.vk_command_buffer) { @@ -8661,7 +8376,6 @@ static bool d3d12_command_list_emit_predicated_command(struct d3d12_command_list VK_CALL(vkCmdPipelineBarrier2(list->cmd.vk_command_buffer, &dep_info)); d3d12_command_list_invalidate_current_pipeline(list, true); - d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, &list->graphics_bindings); } else list->cmd.indirect_meta->need_compute_to_indirect_barrier = true; @@ -9529,6 +9243,7 @@ static void d3d12_command_list_copy_image(struct d3d12_command_list *list, } else { + uint32_t src_index = UINT32_MAX; VKD3D_BREADCRUMB_TAG("CopyWithRenderpass"); dst_view = src_view = NULL; @@ -9547,16 +9262,7 @@ static void d3d12_command_list_copy_image(struct d3d12_command_list *list, pipeline_key.sample_count = vk_samples_from_dxgi_sample_desc(&dst_resource->desc.SampleDesc); pipeline_key.dst_aspect_mask = region->dstSubresource.aspectMask; - if (FAILED(hr = vkd3d_meta_get_copy_image_pipeline(&list->device->meta_ops, &pipeline_key, &pipeline_info))) - { - ERR("Failed to obtain pipeline, format %u, view_type %u, sample_count %u.\n", - pipeline_key.format->vk_format, pipeline_key.view_type, pipeline_key.sample_count); - goto cleanup; - } - d3d12_command_list_invalidate_current_pipeline(list, true); - d3d12_command_list_invalidate_root_parameters(list, &list->graphics_bindings, true, &list->compute_bindings); - d3d12_command_list_update_descriptor_buffers(list); memset(&dst_view_desc, 0, sizeof(dst_view_desc)); dst_view_desc.image = dst_resource->res.vk_image; @@ -9584,20 +9290,52 @@ static void d3d12_command_list_copy_image(struct d3d12_command_list *list, src_view_desc.image_usage = VK_IMAGE_USAGE_SAMPLED_BIT; src_view_desc.allowed_swizzle = false; - if (!vkd3d_create_texture_view(list->device, &dst_view_desc, &dst_view) || - !vkd3d_create_texture_view(list->device, &src_view_desc, &src_view)) + if (list->descriptor_heap.resource.heap && !list->descriptor_heap.heap_dirty) + { + src_index = d3d12_command_allocator_allocate_meta_image_view(list->allocator, + list->descriptor_heap.resource.heap, + &src_view_desc, d3d12_resource_pick_layout(src_resource, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)); + } + + if (FAILED(hr = vkd3d_meta_get_copy_image_pipeline(&list->device->meta_ops, &pipeline_key, &pipeline_info, + src_index != UINT32_MAX))) + { + ERR("Failed to obtain pipeline, format %u, view_type %u, sample_count %u.\n", + pipeline_key.format->vk_format, pipeline_key.view_type, pipeline_key.sample_count); + goto cleanup; + } + + if (!vkd3d_create_texture_view(list->device, &dst_view_desc, &dst_view)) { ERR("Failed to create image views.\n"); goto cleanup; } - if (!d3d12_command_allocator_add_view(list->allocator, dst_view) || - !d3d12_command_allocator_add_view(list->allocator, src_view)) + if (!d3d12_command_allocator_add_view(list->allocator, dst_view)) { ERR("Failed to add views.\n"); goto cleanup; } + if (src_index != UINT32_MAX) + { + d3d12_command_list_meta_push_descriptor_index(list, list->cmd.vk_command_buffer, 0, src_index); + } + else + { + if (!vkd3d_create_texture_view(list->device, &src_view_desc, &src_view)) + { + ERR("Failed to create image views.\n"); + goto cleanup; + } + + if (!d3d12_command_allocator_add_view(list->allocator, src_view)) + { + ERR("Failed to add views.\n"); + goto cleanup; + } + } + memset(&attachment_info, 0, sizeof(attachment_info)); attachment_info.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO; attachment_info.imageView = dst_view->vk_image_view; @@ -9642,28 +9380,31 @@ static void d3d12_command_list_copy_image(struct d3d12_command_list *list, push_args.offset.x = region->srcOffset.x - region->dstOffset.x; push_args.offset.y = region->srcOffset.y - region->dstOffset.y; - vk_image_info.sampler = VK_NULL_HANDLE; - vk_image_info.imageView = src_view->vk_image_view; - vk_image_info.imageLayout = barrier.src.layout; - - vk_descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_descriptor_write.pNext = NULL; - vk_descriptor_write.dstSet = VK_NULL_HANDLE; - vk_descriptor_write.dstBinding = 0; - vk_descriptor_write.dstArrayElement = 0; - vk_descriptor_write.descriptorCount = 1; - vk_descriptor_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - vk_descriptor_write.pImageInfo = &vk_image_info; - vk_descriptor_write.pBufferInfo = NULL; - vk_descriptor_write.pTexelBufferView = NULL; + if (pipeline_info.vk_pipeline_layout) + { + vk_image_info.sampler = VK_NULL_HANDLE; + vk_image_info.imageView = src_view->vk_image_view; + vk_image_info.imageLayout = barrier.src.layout; + + vk_descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vk_descriptor_write.pNext = NULL; + vk_descriptor_write.dstSet = VK_NULL_HANDLE; + vk_descriptor_write.dstBinding = 0; + vk_descriptor_write.dstArrayElement = 0; + vk_descriptor_write.descriptorCount = 1; + vk_descriptor_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + vk_descriptor_write.pImageInfo = &vk_image_info; + vk_descriptor_write.pBufferInfo = NULL; + vk_descriptor_write.pTexelBufferView = NULL; + VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_info.vk_pipeline_layout, 0, 1, &vk_descriptor_write)); + } d3d12_command_list_debug_mark_begin_region(list, "CopyRenderPass"); VK_CALL(vkCmdBeginRendering(list->cmd.vk_command_buffer, &rendering_info)); VK_CALL(vkCmdBindPipeline(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_info.vk_pipeline)); VK_CALL(vkCmdSetViewport(list->cmd.vk_command_buffer, 0, 1, &viewport)); VK_CALL(vkCmdSetScissor(list->cmd.vk_command_buffer, 0, 1, &rendering_info.renderArea)); - VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_info.vk_pipeline_layout, 0, 1, &vk_descriptor_write)); if (pipeline_info.needs_stencil_mask) { @@ -9671,15 +9412,20 @@ static void d3d12_command_list_copy_image(struct d3d12_command_list *list, { push_args.bit_mask = 1u << i; VK_CALL(vkCmdSetStencilWriteMask(list->cmd.vk_command_buffer, VK_STENCIL_FACE_FRONT_AND_BACK, push_args.bit_mask)); - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, pipeline_info.vk_pipeline_layout, - VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(push_args), &push_args)); + + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + pipeline_info.vk_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, + sizeof(push_args), &push_args); + VK_CALL(vkCmdDraw(list->cmd.vk_command_buffer, 3, region->dstSubresource.layerCount, 0, 0)); } } else { - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, pipeline_info.vk_pipeline_layout, - VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(push_args), &push_args)); + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + pipeline_info.vk_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, + sizeof(push_args), &push_args); + VK_CALL(vkCmdDraw(list->cmd.vk_command_buffer, 3, region->dstSubresource.layerCount, 0, 0)); } @@ -11293,10 +11039,10 @@ static void d3d12_command_list_execute_resolve(struct d3d12_command_list *list, VkDescriptorImageInfo vk_src_image_info, vk_dst_image_info; struct vkd3d_resolve_image_info resolve_pipeline_info; struct vkd3d_resolve_image_compute_args compute_args; + struct vkd3d_view *dst_view = NULL, *src_view = NULL; struct vkd3d_resolve_image_args resolve_args; VkWriteDescriptorSet vk_descriptor_writes[2]; VkRenderingAttachmentInfo attachment_info; - struct vkd3d_view *dst_view, *src_view; const struct vkd3d_format *vk_format; VkResolveImageInfo2 resolve_info; VkRenderingInfo rendering_info; @@ -11319,9 +11065,6 @@ static void d3d12_command_list_execute_resolve(struct d3d12_command_list *list, else if (path == VKD3D_RESOLVE_IMAGE_PATH_COMPUTE_PIPELINE) { d3d12_command_list_invalidate_current_pipeline(list, true); - d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, &list->graphics_bindings); - d3d12_command_list_update_descriptor_buffers(list); - vk_format = d3d12_command_list_get_resolve_format(list, dst_resource, src_resource, format); memset(&dst_view_desc, 0, sizeof(dst_view_desc)); @@ -11343,17 +11086,11 @@ static void d3d12_command_list_execute_resolve(struct d3d12_command_list *list, resolve_pipeline_key.compute.mode = mode; resolve_pipeline_key.compute.srgb = dst_view_desc.format != vk_format; - if (FAILED(vkd3d_meta_get_resolve_image_pipeline(&list->device->meta_ops, &resolve_pipeline_key, &resolve_pipeline_info))) - { - ERR("Failed to get resolve pipeline.\n"); - return; - } - - VK_CALL(vkCmdBindPipeline(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, resolve_pipeline_info.vk_pipeline)); - for (i = 0; i < region_count; i++) { + uint32_t dst_index = UINT32_MAX, src_index = UINT32_MAX; const VkImageResolve2 *region = ®ions[i]; + bool use_heap; dst_view_desc.miplevel_idx = region->dstSubresource.mipLevel; dst_view_desc.miplevel_count = 1; @@ -11367,45 +11104,81 @@ static void d3d12_command_list_execute_resolve(struct d3d12_command_list *list, src_view_desc.layer_count = region->srcSubresource.layerCount; src_view_desc.aspect_mask = region->srcSubresource.aspectMask; - if (!vkd3d_create_texture_view(list->device, &dst_view_desc, &dst_view) || - !vkd3d_create_texture_view(list->device, &src_view_desc, &src_view)) + /* If we have a bound heap, we can try to grab something to avoid creating a stall. + * If we've flipped back to global heap (i.e. heap is already dirty), we've already taken the hit, + * and there is no point trying to rebind the global heap. */ + if (list->descriptor_heap.resource.heap && !list->descriptor_heap.heap_dirty) { - ERR("Failed to create image views.\n"); - goto cleanup_compute; + dst_index = d3d12_command_allocator_allocate_meta_image_view( + list->allocator, list->descriptor_heap.resource.heap, &dst_view_desc, + VK_IMAGE_LAYOUT_GENERAL); + + src_index = d3d12_command_allocator_allocate_meta_image_view( + list->allocator, list->descriptor_heap.resource.heap, &src_view_desc, + d3d12_resource_pick_layout(src_resource, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)); } - if (!d3d12_command_allocator_add_view(list->allocator, dst_view) || - !d3d12_command_allocator_add_view(list->allocator, src_view)) + use_heap = dst_index != UINT32_MAX && src_index != UINT32_MAX; + + if (FAILED(vkd3d_meta_get_resolve_image_pipeline( + &list->device->meta_ops, &resolve_pipeline_key, &resolve_pipeline_info, use_heap))) { - ERR("Failed to add views.\n"); - goto cleanup_compute; + ERR("Failed to get resolve pipeline.\n"); + return; + } + + VK_CALL(vkCmdBindPipeline(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + resolve_pipeline_info.vk_pipeline)); + + if (use_heap) + { + d3d12_command_list_meta_push_descriptor_index(list, list->cmd.vk_command_buffer, 0, src_index); + d3d12_command_list_meta_push_descriptor_index(list, list->cmd.vk_command_buffer, 1, dst_index); } + else + { + if (!vkd3d_create_texture_view(list->device, &dst_view_desc, &dst_view) || + !vkd3d_create_texture_view(list->device, &src_view_desc, &src_view)) + { + ERR("Failed to create image views.\n"); + goto cleanup_compute; + } + + if (!d3d12_command_allocator_add_view(list->allocator, dst_view) || + !d3d12_command_allocator_add_view(list->allocator, src_view)) + { + ERR("Failed to add views.\n"); + goto cleanup_compute; + } - memset(&vk_src_image_info, 0, sizeof(vk_src_image_info)); - vk_src_image_info.imageView = src_view->vk_image_view; - vk_src_image_info.imageLayout = d3d12_resource_pick_layout(src_resource, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + memset(&vk_src_image_info, 0, sizeof(vk_src_image_info)); + vk_src_image_info.imageView = src_view->vk_image_view; + vk_src_image_info.imageLayout = d3d12_resource_pick_layout(src_resource, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - memset(&vk_dst_image_info, 0, sizeof(vk_dst_image_info)); - vk_dst_image_info.imageView = dst_view->vk_image_view; - vk_dst_image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + memset(&vk_dst_image_info, 0, sizeof(vk_dst_image_info)); + vk_dst_image_info.imageView = dst_view->vk_image_view; + vk_dst_image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; - memset(&vk_descriptor_writes, 0, sizeof(vk_descriptor_writes)); - vk_descriptor_writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_descriptor_writes[0].dstBinding = 0; - vk_descriptor_writes[0].dstArrayElement = 0; - vk_descriptor_writes[0].descriptorCount = 1; - vk_descriptor_writes[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - vk_descriptor_writes[0].pImageInfo = &vk_src_image_info; + memset(&vk_descriptor_writes, 0, sizeof(vk_descriptor_writes)); + vk_descriptor_writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vk_descriptor_writes[0].dstBinding = 0; + vk_descriptor_writes[0].dstArrayElement = 0; + vk_descriptor_writes[0].descriptorCount = 1; + vk_descriptor_writes[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + vk_descriptor_writes[0].pImageInfo = &vk_src_image_info; - vk_descriptor_writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_descriptor_writes[1].dstBinding = 1; - vk_descriptor_writes[1].dstArrayElement = 0; - vk_descriptor_writes[1].descriptorCount = 1; - vk_descriptor_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - vk_descriptor_writes[1].pImageInfo = &vk_dst_image_info; + vk_descriptor_writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vk_descriptor_writes[1].dstBinding = 1; + vk_descriptor_writes[1].dstArrayElement = 0; + vk_descriptor_writes[1].descriptorCount = 1; + vk_descriptor_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + vk_descriptor_writes[1].pImageInfo = &vk_dst_image_info; - VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - resolve_pipeline_info.vk_pipeline_layout, 0, ARRAY_SIZE(vk_descriptor_writes), vk_descriptor_writes)); + VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + resolve_pipeline_info.vk_pipeline_layout, 0, ARRAY_SIZE(vk_descriptor_writes), + vk_descriptor_writes)); + } compute_args.src_offset.x = region->srcOffset.x; compute_args.src_offset.y = region->srcOffset.y; @@ -11414,8 +11187,9 @@ static void d3d12_command_list_execute_resolve(struct d3d12_command_list *list, compute_args.extent.width = region->extent.width; compute_args.extent.height = region->extent.height; - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, resolve_pipeline_info.vk_pipeline_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(compute_args), &compute_args)); + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + resolve_pipeline_info.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + sizeof(compute_args), &compute_args); VK_CALL(vkCmdDispatch(list->cmd.vk_command_buffer, vkd3d_compute_workgroup_count(region->extent.width, 8), @@ -11432,8 +11206,6 @@ static void d3d12_command_list_execute_resolve(struct d3d12_command_list *list, else { d3d12_command_list_invalidate_current_pipeline(list, true); - d3d12_command_list_invalidate_root_parameters(list, &list->graphics_bindings, true, &list->compute_bindings); - d3d12_command_list_update_descriptor_buffers(list); vk_format = d3d12_command_list_get_resolve_format(list, dst_resource, src_resource, format); @@ -11470,6 +11242,8 @@ static void d3d12_command_list_execute_resolve(struct d3d12_command_list *list, for (i = 0; i < region_count; i++) { const VkImageResolve2 *region = ®ions[i]; + uint32_t src_index = UINT32_MAX; + bool use_heap; rendering_info.renderArea.offset.x = region->dstOffset.x; rendering_info.renderArea.offset.y = region->dstOffset.y; @@ -11513,22 +11287,48 @@ static void d3d12_command_list_execute_resolve(struct d3d12_command_list *list, src_view_desc.aspect_mask = region->srcSubresource.aspectMask; src_view_desc.image_usage = VK_IMAGE_USAGE_SAMPLED_BIT; src_view_desc.allowed_swizzle = true; + + /* If we can avoid a heap transition, go for it. */ + if (list->descriptor_heap.resource.heap && !list->descriptor_heap.heap_dirty) + { + src_index = d3d12_command_allocator_allocate_meta_image_view(list->allocator, + list->descriptor_heap.resource.heap, &src_view_desc, + d3d12_resource_pick_layout(src_resource, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)); + } } - if (!vkd3d_create_texture_view(list->device, &dst_view_desc, &dst_view) || - !vkd3d_create_texture_view(list->device, &src_view_desc, &src_view)) + use_heap = src_index != UINT32_MAX; + if (!vkd3d_create_texture_view(list->device, &dst_view_desc, &dst_view)) { ERR("Failed to create image views.\n"); goto cleanup_graphics; } - if (!d3d12_command_allocator_add_view(list->allocator, dst_view) || - !d3d12_command_allocator_add_view(list->allocator, src_view)) + if (!d3d12_command_allocator_add_view(list->allocator, dst_view)) { ERR("Failed to add views.\n"); goto cleanup_graphics; } + if (use_heap) + { + d3d12_command_list_meta_push_descriptor_index(list, list->cmd.vk_command_buffer, 0, src_index); + } + else + { + if (!vkd3d_create_texture_view(list->device, &src_view_desc, &src_view)) + { + ERR("Failed to create image views.\n"); + goto cleanup_graphics; + } + + if (!d3d12_command_allocator_add_view(list->allocator, src_view)) + { + ERR("Failed to add views.\n"); + goto cleanup_graphics; + } + } + if (path == VKD3D_RESOLVE_IMAGE_PATH_RENDER_PASS_ATTACHMENT) { attachment_info.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; @@ -11544,7 +11344,8 @@ static void d3d12_command_list_execute_resolve(struct d3d12_command_list *list, resolve_pipeline_key.graphics.dst_aspect = (VkImageAspectFlagBits)region->dstSubresource.aspectMask; resolve_pipeline_key.graphics.mode = mode; - if (FAILED(vkd3d_meta_get_resolve_image_pipeline(&list->device->meta_ops, &resolve_pipeline_key, &resolve_pipeline_info))) + if (FAILED(vkd3d_meta_get_resolve_image_pipeline( + &list->device->meta_ops, &resolve_pipeline_key, &resolve_pipeline_info, use_heap))) { ERR("Failed to get resolve pipeline.\n"); return; @@ -11571,23 +11372,27 @@ static void d3d12_command_list_execute_resolve(struct d3d12_command_list *list, resolve_args.offset.x = region->srcOffset.x - region->dstOffset.x; resolve_args.offset.y = region->srcOffset.y - region->dstOffset.y; - memset(&vk_src_image_info, 0, sizeof(vk_src_image_info)); - vk_src_image_info.imageView = src_view->vk_image_view; - vk_src_image_info.imageLayout = d3d12_resource_pick_layout(src_resource, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - memset(&vk_descriptor_writes, 0, sizeof(vk_descriptor_writes)); - vk_descriptor_writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_descriptor_writes[0].dstBinding = 0; - vk_descriptor_writes[0].dstArrayElement = 0; - vk_descriptor_writes[0].descriptorCount = 1; - vk_descriptor_writes[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - vk_descriptor_writes[0].pImageInfo = &vk_src_image_info; + if (resolve_pipeline_info.vk_pipeline_layout) + { + memset(&vk_src_image_info, 0, sizeof(vk_src_image_info)); + vk_src_image_info.imageView = src_view->vk_image_view; + vk_src_image_info.imageLayout = d3d12_resource_pick_layout(src_resource, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + memset(&vk_descriptor_writes, 0, sizeof(vk_descriptor_writes)); + vk_descriptor_writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vk_descriptor_writes[0].dstBinding = 0; + vk_descriptor_writes[0].dstArrayElement = 0; + vk_descriptor_writes[0].descriptorCount = 1; + vk_descriptor_writes[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + vk_descriptor_writes[0].pImageInfo = &vk_src_image_info; + VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + resolve_pipeline_info.vk_pipeline_layout, 0, 1, &vk_descriptor_writes[0])); + } VK_CALL(vkCmdBindPipeline(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, resolve_pipeline_info.vk_pipeline)); VK_CALL(vkCmdSetViewport(list->cmd.vk_command_buffer, 0, 1, &viewport)); VK_CALL(vkCmdSetScissor(list->cmd.vk_command_buffer, 0, 1, &rendering_info.renderArea)); - VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - resolve_pipeline_info.vk_pipeline_layout, 0, 1, &vk_descriptor_writes[0])); if (resolve_pipeline_info.needs_stencil_mask) { @@ -11595,15 +11400,17 @@ static void d3d12_command_list_execute_resolve(struct d3d12_command_list *list, { resolve_args.bit_mask = 1u << j; VK_CALL(vkCmdSetStencilWriteMask(list->cmd.vk_command_buffer, VK_STENCIL_FACE_FRONT_AND_BACK, resolve_args.bit_mask)); - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, resolve_pipeline_info.vk_pipeline_layout, - VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(resolve_args), &resolve_args)); + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + resolve_pipeline_info.vk_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, + sizeof(resolve_args), &resolve_args); VK_CALL(vkCmdDraw(list->cmd.vk_command_buffer, 3, region->dstSubresource.layerCount, 0, 0)); } } else { - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, resolve_pipeline_info.vk_pipeline_layout, - VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(resolve_args), &resolve_args)); + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + resolve_pipeline_info.vk_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, + sizeof(resolve_args), &resolve_args); VK_CALL(vkCmdDraw(list->cmd.vk_command_buffer, 3, region->dstSubresource.layerCount, 0, 0)); } } @@ -11906,7 +11713,6 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(d3d12_command_ { struct d3d12_pipeline_state *state = impl_from_ID3D12PipelineState(pipeline_state); struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface); - struct vkd3d_pipeline_bindings *bindings; unsigned int i; TRACE("iface %p, pipeline_state %p.\n", iface, pipeline_state); @@ -12057,22 +11863,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(d3d12_command_ if (!state || list->active_pipeline_type != state->pipeline_type) { if (state) - { - bindings = d3d12_command_list_get_bindings(list, state->pipeline_type); - if (bindings->root_signature) - { - /* We might have clobbered push constants in the new bind point, - * invalidate all state which can affect push constants. - * We might also change the pipeline layout, in case we switch between mesh and legacy graphics. - * In this scenario, the push constant layout will be incompatible due to stage - * differences, so everything must be rebound. */ - d3d12_command_list_invalidate_root_parameters(list, bindings, true, NULL); - } - - list->active_pipeline_type = state->pipeline_type; - } - else - list->active_pipeline_type = VKD3D_PIPELINE_TYPE_NONE; + d3d12_command_list_invalidate_root_parameters(list); + list->active_pipeline_type = state ? state->pipeline_type : VKD3D_PIPELINE_TYPE_NONE; } if (state->pipeline_type != VKD3D_PIPELINE_TYPE_COMPUTE) @@ -13121,99 +12913,51 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(d3d12_command_lis d3d12_bundle_execute(bundle, iface); } -static void vkd3d_pipeline_bindings_set_dirty_sets(struct vkd3d_pipeline_bindings *bindings, uint64_t dirty_mask) -{ - bindings->descriptor_heap_dirty_mask = dirty_mask; - bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS; -} - static void d3d12_command_list_set_descriptor_heaps_buffers(struct d3d12_command_list *list, unsigned int heap_count, ID3D12DescriptorHeap *const *heaps) { - struct vkd3d_bindless_state *bindless_state = &list->device->bindless_state; VkDeviceAddress current_resource_va, current_sampler_va; - struct d3d12_desc_split d; - unsigned int i, j; + unsigned int i; - current_resource_va = list->descriptor_heap.buffers.heap_va_resource; - current_sampler_va = list->descriptor_heap.buffers.heap_va_sampler; + current_resource_va = list->descriptor_heap.resource.va; + current_sampler_va = list->descriptor_heap.sampler.va; + + list->descriptor_heap.resource.heap = NULL; + list->descriptor_heap.sampler.heap = NULL; for (i = 0; i < heap_count; i++) { struct d3d12_descriptor_heap *heap = impl_from_ID3D12DescriptorHeap(heaps[i]); - unsigned int set_index = 0; if (!heap) continue; if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) { - list->descriptor_heap.buffers.heap_va_resource = heap->descriptor_buffer.va; - list->descriptor_heap.buffers.vk_buffer_resource = heap->descriptor_buffer.vk_buffer; + list->descriptor_heap.resource.va = heap->descriptor_buffer.va; + list->descriptor_heap.resource.size = heap->descriptor_buffer.size; + list->descriptor_heap.resource.reserved_offset = heap->descriptor_buffer.reserved_offset; + list->descriptor_heap.resource.heap = heap; - if (!d3d12_device_use_embedded_mutable_descriptors(list->device)) - { - /* In case we need to hoist buffer descriptors. */ - d = d3d12_desc_decode_va(heap->cpu_va.ptr); - list->cbv_srv_uav_descriptors_view = d.view; - } + /* We might need to push new redzone inline data for example. */ + d3d12_command_list_invalidate_root_parameters(list); } else if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) { - list->descriptor_heap.buffers.heap_va_sampler = heap->descriptor_buffer.va; + list->descriptor_heap.sampler.va = heap->descriptor_buffer.va; + list->descriptor_heap.sampler.size = heap->descriptor_buffer.size; + list->descriptor_heap.sampler.reserved_offset = heap->descriptor_buffer.reserved_offset; + list->descriptor_heap.sampler.heap = heap; } - - for (j = 0; j < bindless_state->set_count; j++) - if (bindless_state->set_info[j].heap_type == heap->desc.Type) - list->descriptor_heap.buffers.vk_offsets[j] = heap->descriptor_buffer.offsets[set_index++]; } - if (current_resource_va == list->descriptor_heap.buffers.heap_va_resource && - current_sampler_va == list->descriptor_heap.buffers.heap_va_sampler) + if (current_resource_va == list->descriptor_heap.resource.va && + current_sampler_va == list->descriptor_heap.sampler.va) return; - list->descriptor_heap.buffers.heap_dirty = true; - /* Invalidation is a bit more aggressive for descriptor buffers. - * We also need to invalidate any push descriptors. */ - d3d12_command_list_invalidate_root_parameters(list, &list->graphics_bindings, true, NULL); - d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, NULL); -} - -static void d3d12_command_list_set_descriptor_heaps_sets(struct d3d12_command_list *list, - unsigned int heap_count, ID3D12DescriptorHeap *const *heaps) -{ - struct vkd3d_bindless_state *bindless_state = &list->device->bindless_state; - uint64_t dirty_mask = 0; - unsigned int i, j; - - for (i = 0; i < heap_count; i++) - { - struct d3d12_descriptor_heap *heap = impl_from_ID3D12DescriptorHeap(heaps[i]); - unsigned int set_index = 0; - - if (!heap) - continue; - - for (j = 0; j < bindless_state->set_count; j++) - { - if (bindless_state->set_info[j].heap_type != heap->desc.Type) - continue; - - list->descriptor_heap.sets.vk_sets[j] = heap->sets[set_index++].vk_descriptor_set; - dirty_mask |= 1ull << j; - } - - /* In case we need to hoist buffer descriptors. */ - if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) - { - struct d3d12_desc_split d; - d = d3d12_desc_decode_va(heap->cpu_va.ptr); - list->cbv_srv_uav_descriptors_view = d.view; - } - } - - vkd3d_pipeline_bindings_set_dirty_sets(&list->graphics_bindings, dirty_mask); - vkd3d_pipeline_bindings_set_dirty_sets(&list->compute_bindings, dirty_mask); + /* Immediately bind the heap. This allows e.g. metacommands to more aggressively use the heap as intended. */ + list->descriptor_heap.heap_dirty = true; + d3d12_command_list_update_descriptor_heaps(list); } static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(d3d12_command_list_iface *iface, @@ -13223,10 +12967,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(d3d12_comman TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps); - if (d3d12_device_uses_descriptor_buffers(list->device)) - d3d12_command_list_set_descriptor_heaps_buffers(list, heap_count, heaps); - else - d3d12_command_list_set_descriptor_heaps_sets(list, heap_count, heaps); + d3d12_command_list_set_descriptor_heaps_buffers(list, heap_count, heaps); } static void d3d12_command_list_set_root_signature(struct d3d12_command_list *list, @@ -13236,12 +12977,6 @@ static void d3d12_command_list_set_root_signature(struct d3d12_command_list *lis return; bindings->root_signature = root_signature; - bindings->static_sampler_set = VK_NULL_HANDLE; - - if (root_signature) - bindings->static_sampler_set = root_signature->vk_sampler_set; - - d3d12_command_list_invalidate_root_parameters(list, bindings, true, NULL); } static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(d3d12_command_list_iface *iface, @@ -13254,6 +12989,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(d3d12_c d3d12_command_list_set_root_signature(list, &list->compute_bindings, impl_from_ID3D12RootSignature(root_signature)); + if (list->state && list->state->pipeline_type != VKD3D_PIPELINE_TYPE_GRAPHICS) + d3d12_command_list_invalidate_root_parameters(list); + /* Changing compute root signature means we might have to bind a different RTPSO variant. */ if (list->rt_state) d3d12_command_list_invalidate_current_pipeline(list, false); @@ -13268,6 +13006,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(d3d12_ d3d12_command_list_set_root_signature(list, &list->graphics_bindings, impl_from_ID3D12RootSignature(root_signature)); + + if (list->state && list->state->pipeline_type == VKD3D_PIPELINE_TYPE_GRAPHICS) + d3d12_command_list_invalidate_root_parameters(list); } static inline void d3d12_command_list_set_descriptor_table_embedded(struct d3d12_command_list *list, @@ -13282,13 +13023,8 @@ static inline void d3d12_command_list_set_descriptor_table_embedded(struct d3d12 bindings->descriptor_tables[index] = d3d12_desc_heap_offset_from_embedded_gpu_handle( base_descriptor, cbv_srv_uav_size_log2, sampler_size_log2); - if (root_signature) - { - if (root_signature->descriptor_table_count) - bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS; - if (root_signature->hoist_info.num_desc) - bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS; - } + if (root_signature && root_signature->descriptor_table_count) + bindings->dirty_table_offsets = true; VKD3D_BREADCRUMB_AUX32(index); VKD3D_BREADCRUMB_AUX32(bindings->descriptor_tables[index]); @@ -13303,13 +13039,8 @@ static inline void d3d12_command_list_set_descriptor_table(struct d3d12_command_ assert(index < ARRAY_SIZE(bindings->descriptor_tables)); bindings->descriptor_tables[index] = d3d12_desc_heap_offset_from_gpu_handle(base_descriptor); - if (root_signature) - { - if (root_signature->descriptor_table_count) - bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS; - if (root_signature->hoist_info.num_desc) - bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS; - } + if (root_signature && root_signature->descriptor_table_count) + bindings->dirty_table_offsets = true; VKD3D_BREADCRUMB_AUX32(index); VKD3D_BREADCRUMB_AUX32(bindings->descriptor_tables[index]); @@ -13379,8 +13110,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable_e d3d12_command_list_set_descriptor_table_embedded(list, &list->compute_bindings, root_parameter_index, base_descriptor, - list->device->bindless_state.descriptor_buffer_cbv_srv_uav_size_log2, - list->device->bindless_state.descriptor_buffer_sampler_size_log2); + list->device->bindless_state.descriptor_heap_cbv_srv_uav_size_log2, + list->device->bindless_state.descriptor_heap_sampler_size_log2); } static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable_embedded_default( @@ -13394,8 +13125,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable_ d3d12_command_list_set_descriptor_table_embedded(list, &list->graphics_bindings, root_parameter_index, base_descriptor, - list->device->bindless_state.descriptor_buffer_cbv_srv_uav_size_log2, - list->device->bindless_state.descriptor_buffer_sampler_size_log2); + list->device->bindless_state.descriptor_heap_cbv_srv_uav_size_log2, + list->device->bindless_state.descriptor_heap_sampler_size_log2); } static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable_default( @@ -13496,100 +13227,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(d root_parameter_index, dst_offset, constant_count, data); } -static void d3d12_command_list_set_push_descriptor_info(struct d3d12_command_list *list, - struct vkd3d_pipeline_bindings *bindings, unsigned int index, D3D12_GPU_VIRTUAL_ADDRESS gpu_address) -{ - const struct d3d12_root_signature *root_signature = bindings->root_signature; - const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; - const struct vkd3d_shader_root_parameter *root_parameter; - struct vkd3d_root_descriptor_info *descriptor; - const struct vkd3d_unique_resource *resource; - VkBufferView vk_buffer_view; - VkDeviceSize max_range; - bool ssbo; - - ssbo = d3d12_device_use_ssbo_root_descriptors(list->device); - root_parameter = root_signature_get_root_descriptor(root_signature, index); - descriptor = &bindings->root_descriptors[index]; - - if (ssbo || root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV) - { - descriptor->vk_descriptor_type = root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV - ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - - if (gpu_address) - { - max_range = descriptor->vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER - ? vk_info->device_limits.maxUniformBufferRange - : vk_info->device_limits.maxStorageBufferRange; - - resource = vkd3d_va_map_deref(&list->device->memory_allocator.va_map, gpu_address); - - if (resource) - { - descriptor->info.buffer.buffer = resource->vk_buffer; - descriptor->info.buffer.offset = gpu_address - resource->va; - descriptor->info.buffer.range = min(resource->size - descriptor->info.buffer.offset, max_range); - } - else - { - descriptor->info.buffer.buffer = VK_NULL_HANDLE; - descriptor->info.buffer.offset = 0; - descriptor->info.buffer.range = VK_WHOLE_SIZE; - } - } - else - { - descriptor->info.buffer.buffer = VK_NULL_HANDLE; - descriptor->info.buffer.offset = 0; - descriptor->info.buffer.range = VK_WHOLE_SIZE; - } - } - else - { - descriptor->vk_descriptor_type = root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_SRV - ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - - if (gpu_address) - { - if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, &vk_buffer_view)) - { - ERR("Failed to create buffer view.\n"); - return; - } - - if (!(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view))) - { - ERR("Failed to add buffer view.\n"); - VK_CALL(vkDestroyBufferView(list->device->vk_device, vk_buffer_view, NULL)); - return; - } - - descriptor->info.buffer_view = vk_buffer_view; - } - else - descriptor->info.buffer_view = VK_NULL_HANDLE; - } -} - -static void d3d12_command_list_set_root_descriptor_va(struct d3d12_command_list *list, - struct vkd3d_root_descriptor_info *descriptor, D3D12_GPU_VIRTUAL_ADDRESS gpu_address) -{ - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_MAX_ENUM; - descriptor->info.va = gpu_address; -} - static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *list, struct vkd3d_pipeline_bindings *bindings, unsigned int index, D3D12_GPU_VIRTUAL_ADDRESS gpu_address) { - struct vkd3d_root_descriptor_info *descriptor = &bindings->root_descriptors[index]; - - if (bindings->root_signature->root_descriptor_raw_va_mask & (1ull << index)) - d3d12_command_list_set_root_descriptor_va(list, descriptor, gpu_address); - else - d3d12_command_list_set_push_descriptor_info(list, bindings, index, gpu_address); - + bindings->root_descriptors_va[index] = gpu_address; bindings->root_descriptor_dirty_mask |= 1ull << index; bindings->root_descriptor_active_mask |= 1ull << index; @@ -14531,10 +14172,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(d3d12_com struct vkd3d_clear_uav_info { DXGI_FORMAT clear_dxgi_format; - bool has_view; + struct vkd3d_view *view; + uint32_t heap_index; + bool use_heap; + union { - struct vkd3d_view *view; + struct vkd3d_descriptor_metadata_image_view image; struct vkd3d_descriptor_metadata_buffer_view buffer; } u; }; @@ -14548,6 +14192,7 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, struct vkd3d_clear_uav_pipeline pipeline; struct vkd3d_clear_uav_args clear_args; VkDescriptorBufferInfo buffer_info; + const struct vkd3d_format *format; VkDescriptorImageInfo image_info; D3D12_RECT full_rect, curr_rect; VkWriteDescriptorSet write_set; @@ -14560,8 +14205,6 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, d3d12_command_list_debug_mark_begin_region(list, "ClearUAV"); d3d12_command_list_invalidate_current_pipeline(list, true); - d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, &list->graphics_bindings); - d3d12_command_list_update_descriptor_buffers(list); sampler_feedback_clear = d3d12_resource_desc_is_sampler_feedback(&resource->desc); @@ -14584,25 +14227,31 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, memset(&full_rect, 0, sizeof(full_rect)); + format = vkd3d_get_format(list->device, args->clear_dxgi_format, false); if (d3d12_resource_is_texture(resource)) { - assert(args->has_view); + VkImageSubresourceLayers vk_subresource; - image_info.sampler = VK_NULL_HANDLE; - image_info.imageView = args->u.view->vk_image_view; - image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + if (args->view) + { + assert(!args->use_heap); + image_info.sampler = VK_NULL_HANDLE; + image_info.imageView = args->view->vk_image_view; + image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; - write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - write_set.pImageInfo = &image_info; + write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + write_set.pImageInfo = &image_info; + } - view_extent = d3d12_resource_get_view_subresource_extent(resource, args->u.view); + vk_subresource.aspectMask = format->vk_aspect_mask; + vk_subresource.mipLevel = args->u.image.mip_slice; + vk_subresource.baseArrayLayer = 0; + vk_subresource.layerCount = VK_REMAINING_ARRAY_LAYERS; + view_extent = d3d12_resource_desc_get_vk_subresource_extent(&resource->desc, format, &vk_subresource); full_rect.right = view_extent.width; full_rect.bottom = view_extent.height; - layer_count = args->u.view->info.texture.vk_view_type == VK_IMAGE_VIEW_TYPE_3D - ? view_extent.depth : args->u.view->info.texture.layer_count; - if (sampler_feedback_clear) { VkExtent3D padded = d3d12_resource_desc_get_padded_feedback_extent(&resource->desc); @@ -14610,50 +14259,61 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, full_rect.bottom = padded.height; } - /* Robustness would take care of it, but no reason to spam more threads than needed. */ - if (args->u.view->info.texture.vk_view_type == VK_IMAGE_VIEW_TYPE_3D) + if (resource->desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D) { - layer_count = min(layer_count - args->u.view->info.texture.w_offset, args->u.view->info.texture.w_size); - if (layer_count >= 0x80000000u) - { - ERR("3D slice out of bounds.\n"); - layer_count = 0; - } + layer_count = min(resource->desc.DepthOrArraySize - args->u.image.first_array_slice, args->u.image.array_size); + } + else + { + layer_count = min(max(1, resource->desc.DepthOrArraySize >> args->u.image.mip_slice) - + args->u.image.first_array_slice, args->u.image.array_size); } pipeline = vkd3d_meta_get_clear_image_uav_pipeline( - &list->device->meta_ops, args->u.view->info.texture.vk_view_type, - args->u.view->format->type == VKD3D_FORMAT_TYPE_UINT); - workgroup_size = vkd3d_meta_get_clear_image_uav_workgroup_size(args->u.view->info.texture.vk_view_type); + &list->device->meta_ops, args->u.image.vk_dimension, + !format || format->type == VKD3D_FORMAT_TYPE_UINT, args->use_heap); + workgroup_size = vkd3d_meta_get_clear_image_uav_workgroup_size(args->u.image.vk_dimension); } else { full_rect.bottom = 1; - if (args->has_view) + if (args->view) { - VkDeviceSize byte_count = args->u.view->format->byte_count; - full_rect.right = args->u.view->info.buffer.size / byte_count; + /* Legacy path. */ + VkDeviceSize byte_count = args->view->format->byte_count; + assert(!args->use_heap); + full_rect.right = args->view->info.buffer.size / byte_count; write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - write_set.pTexelBufferView = &args->u.view->vk_buffer_view; + write_set.pTexelBufferView = &args->view->vk_buffer_view; + } + else if (format) + { + VkDeviceSize byte_count = format->byte_count; + assert(args->use_heap); + full_rect.right = args->u.buffer.range / byte_count; } else { + /* Plain SSBO clear. StructuredBuffers are not allowed, but RAW is (since it has R32_TYPELESS). */ full_rect.right = args->u.buffer.range / sizeof(uint32_t); - write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - write_set.pBufferInfo = &buffer_info; - /* resource heap offset is already in descriptor */ - buffer_info.buffer = resource->res.vk_buffer; - buffer_info.offset = resource->mem.offset + (args->u.buffer.va - resource->res.va); - buffer_info.range = args->u.buffer.range; + if (!args->use_heap) + { + write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write_set.pBufferInfo = &buffer_info; + /* resource heap offset is already in descriptor */ + buffer_info.buffer = resource->res.vk_buffer; + buffer_info.offset = resource->mem.offset + (args->u.buffer.va - resource->res.va); + buffer_info.range = args->u.buffer.range; + } } layer_count = 1; pipeline = vkd3d_meta_get_clear_buffer_uav_pipeline(&list->device->meta_ops, - !args->has_view || args->u.view->format->type == VKD3D_FORMAT_TYPE_UINT, - !args->has_view); + !format || format->type == VKD3D_FORMAT_TYPE_UINT, + format == NULL, args->use_heap); workgroup_size = vkd3d_meta_get_clear_buffer_uav_workgroup_size(); } @@ -14662,8 +14322,18 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, VK_CALL(vkCmdBindPipeline(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.vk_pipeline)); - VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline.vk_pipeline_layout, 0, 1, &write_set)); + + if (pipeline.vk_pipeline_layout) + { + VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline.vk_pipeline_layout, 0, 1, &write_set)); + d3d12_command_list_invalidate_root_parameters(list); + d3d12_command_list_invalidate_descriptor_heap(list); + } + else + { + d3d12_command_list_meta_push_descriptor_index(list, list->cmd.vk_command_buffer, 0, args->heap_index); + } for (i = 0; i < rect_count || !i; i++) { @@ -14693,9 +14363,9 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, clear_args.extent.width = curr_rect.right - clear_args.offset.x; clear_args.extent.height = curr_rect.bottom - clear_args.offset.y; - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, pipeline.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(clear_args), &clear_args)); + sizeof(clear_args), &clear_args); VK_CALL(vkCmdDispatch(list->cmd.vk_command_buffer, min(workgroup_count.width - j, max_workgroup_count), @@ -14724,6 +14394,7 @@ static void d3d12_command_list_clear_uav_with_copy(struct d3d12_command_list *li const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; unsigned int base_layer, layer_count, i, j; struct vkd3d_clear_uav_pipeline pipeline; + VkImageSubresourceLayers vk_subresource; struct vkd3d_scratch_allocation scratch; struct vkd3d_clear_uav_args clear_args; VkCopyBufferToImageInfo2 copy_info; @@ -14737,19 +14408,22 @@ static void d3d12_command_list_clear_uav_with_copy(struct d3d12_command_list *li VkMemoryBarrier2 barrier; VkExtent3D view_extent; uint32_t element_count; + uint32_t heap_index; d3d12_command_list_track_resource_usage(list, resource, true); d3d12_command_list_end_current_render_pass(list, false); d3d12_command_list_debug_mark_begin_region(list, "ClearUAVWithCopy"); d3d12_command_list_invalidate_current_pipeline(list, true); - d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, &list->graphics_bindings); - d3d12_command_list_update_descriptor_buffers(list); - assert(args->has_view); + assert(!args->view && !args->use_heap); assert(d3d12_resource_is_texture(resource)); - view_extent = d3d12_resource_get_view_subresource_extent(resource, args->u.view); + vk_subresource.aspectMask = format->vk_aspect_mask; + vk_subresource.mipLevel = args->u.image.mip_slice; + vk_subresource.baseArrayLayer = 0; + vk_subresource.layerCount = VK_REMAINING_ARRAY_LAYERS; + view_extent = d3d12_resource_desc_get_vk_subresource_extent(&resource->desc, format, &vk_subresource); full_rect.left = 0; full_rect.right = view_extent.width; @@ -14786,32 +14460,52 @@ static void d3d12_command_list_clear_uav_with_copy(struct d3d12_command_list *li return; } - pipeline = vkd3d_meta_get_clear_buffer_uav_pipeline(&list->device->meta_ops, true, false); - workgroup_size = vkd3d_meta_get_clear_buffer_uav_workgroup_size(); - - if (!vkd3d_create_vk_buffer_view(list->device, scratch.buffer, format, scratch.offset, scratch_buffer_size, &vk_buffer_view)) + if (list->descriptor_heap.resource.heap && !list->descriptor_heap.heap_dirty) { - ERR("Failed to create buffer view for UAV clear.\n"); - return; + heap_index = d3d12_command_allocator_allocate_meta_buffer_view(list->allocator, + list->descriptor_heap.resource.heap, + scratch.va, scratch_buffer_size, format->vk_format); } + else + { + heap_index = UINT32_MAX; + } + + pipeline = vkd3d_meta_get_clear_buffer_uav_pipeline(&list->device->meta_ops, true, false, heap_index != UINT32_MAX); + workgroup_size = vkd3d_meta_get_clear_buffer_uav_workgroup_size(); - if (!(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view))) + if (heap_index != UINT32_MAX) { - ERR("Failed to add buffer view.\n"); - VK_CALL(vkDestroyBufferView(list->device->vk_device, vk_buffer_view, NULL)); - return; + d3d12_command_list_meta_push_descriptor_index(list, list->cmd.vk_command_buffer, 0, heap_index); } + else + { + if (!vkd3d_create_vk_buffer_view(list->device, scratch.buffer, format, scratch.offset, scratch_buffer_size, + &vk_buffer_view)) + { + ERR("Failed to create buffer view for UAV clear.\n"); + return; + } - memset(&write_set, 0, sizeof(write_set)); - write_set.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - write_set.descriptorCount = 1; - write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - write_set.pTexelBufferView = &vk_buffer_view; + if (!(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view))) + { + ERR("Failed to add buffer view.\n"); + VK_CALL(vkDestroyBufferView(list->device->vk_device, vk_buffer_view, NULL)); + return; + } + + memset(&write_set, 0, sizeof(write_set)); + write_set.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_set.descriptorCount = 1; + write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + write_set.pTexelBufferView = &vk_buffer_view; + + VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline.vk_pipeline_layout, 0, 1, &write_set)); + } VK_CALL(vkCmdBindPipeline(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.vk_pipeline)); - VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline.vk_pipeline_layout, 0, 1, &write_set)); clear_args.clear_color = *clear_value; clear_args.offset.x = 0; @@ -14819,9 +14513,9 @@ static void d3d12_command_list_clear_uav_with_copy(struct d3d12_command_list *li clear_args.extent.width = element_count; clear_args.extent.height = 1; - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, pipeline.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(clear_args), &clear_args)); + sizeof(clear_args), &clear_args); VK_CALL(vkCmdDispatch(list->cmd.vk_command_buffer, vkd3d_compute_workgroup_count(element_count, workgroup_size.width), 1, 1)); @@ -14849,29 +14543,25 @@ static void d3d12_command_list_clear_uav_with_copy(struct d3d12_command_list *li copy_region.bufferRowLength = 0; copy_region.bufferImageHeight = 0; - copy_region.imageSubresource = vk_subresource_layers_from_view(args->u.view); + base_layer = args->u.image.first_array_slice; + + copy_region.imageSubresource.aspectMask = format->vk_aspect_mask; + copy_region.imageSubresource.mipLevel = args->u.image.mip_slice; + copy_region.imageSubresource.baseArrayLayer = base_layer; + copy_region.imageOffset.z = 0; + copy_region.imageExtent.depth = 1; + copy_region.imageSubresource.layerCount = 1; - if (args->u.view->info.texture.vk_view_type == VK_IMAGE_VIEW_TYPE_3D) + if (resource->desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D) { - base_layer = args->u.view->info.texture.w_offset; - layer_count = view_extent.depth; - layer_count = min(layer_count - args->u.view->info.texture.w_offset, args->u.view->info.texture.w_size); - if (layer_count >= 0x80000000u) - { - ERR("3D slice out of bounds.\n"); - layer_count = 0; - } + layer_count = min(resource->desc.DepthOrArraySize - args->u.image.first_array_slice, args->u.image.array_size); } else { - copy_region.imageOffset.z = 0; - base_layer = copy_region.imageSubresource.baseArrayLayer; - layer_count = copy_region.imageSubresource.layerCount; + layer_count = min(max(1, resource->desc.DepthOrArraySize >> args->u.image.mip_slice) - + args->u.image.first_array_slice, args->u.image.array_size); } - copy_region.imageExtent.depth = 1; - copy_region.imageSubresource.layerCount = 1; - copy_info.sType = VK_STRUCTURE_TYPE_COPY_BUFFER_TO_IMAGE_INFO_2; copy_info.pNext = NULL; copy_info.srcBuffer = scratch.buffer; @@ -14982,20 +14672,22 @@ static const struct vkd3d_format *vkd3d_clear_uav_find_uint_format(struct d3d12_ } static inline bool vkd3d_clear_uav_info_from_metadata(struct vkd3d_clear_uav_info *args, - struct d3d12_desc_split_metadata metadata) + const struct vkd3d_descriptor_metadata_view *metadata) { - if (metadata.view->info.flags & VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW) + args->view = NULL; + args->use_heap = false; + args->heap_index = UINT32_MAX; + + if (metadata->info.flags & VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW) { - args->has_view = true; - args->u.view = metadata.view->info.image.view; - args->clear_dxgi_format = metadata.view->info.image.view->format->dxgi_format; + args->u.image = metadata->info.image; + args->clear_dxgi_format = metadata->info.image.dxgi_format; return true; } - else if (metadata.view->info.flags & VKD3D_DESCRIPTOR_FLAG_BUFFER_VA_RANGE) + else if (metadata->info.flags & VKD3D_DESCRIPTOR_FLAG_BUFFER_VA_RANGE) { - args->u.buffer = metadata.view->info.buffer; - args->has_view = false; - args->clear_dxgi_format = metadata.view->info.buffer.dxgi_format; + args->u.buffer = metadata->info.buffer; + args->clear_dxgi_format = metadata->info.buffer.dxgi_format; return true; } else @@ -15086,12 +14778,43 @@ static bool vkd3d_clear_uav_synthesize_buffer_view(struct d3d12_command_list *li return true; } +static void vkd3d_texture_view_desc_convert_from_metadata( + struct d3d12_resource *resource, + const struct vkd3d_format *format, + struct vkd3d_texture_view_desc *desc, + const struct vkd3d_descriptor_metadata_image_view *meta) +{ + memset(desc, 0, sizeof(*desc)); + + desc->image = resource->res.vk_image; + desc->format = format; + desc->aspect_mask = vk_image_aspect_flags_from_d3d12(resource->format, meta->plane_slice); + desc->image_usage = VK_IMAGE_USAGE_STORAGE_BIT; + desc->miplevel_count = 1; + desc->layer_count = 1; + desc->w_size = 1; + desc->allowed_swizzle = false; + desc->view_type = meta->vk_dimension; + desc->miplevel_idx = meta->mip_slice; + + if (meta->vk_dimension == VK_IMAGE_VIEW_TYPE_3D) + { + desc->w_offset = meta->first_array_slice; + desc->w_size = meta->array_size; + } + else + { + desc->layer_idx = meta->first_array_slice; + desc->layer_count = meta->array_size; + } +} + static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(d3d12_command_list_iface *iface, D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, const UINT values[4], UINT rect_count, const D3D12_RECT *rects) { struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface); - struct d3d12_desc_split_metadata metadata; + struct vkd3d_descriptor_metadata_view *metadata; const struct vkd3d_format *clear_format; const struct vkd3d_format *uint_format; struct vkd3d_view *inline_view = NULL; @@ -15108,20 +14831,28 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(d3 memcpy(color.uint32, values, sizeof(color.uint32)); - metadata = d3d12_desc_decode_metadata(list->device, cpu_handle.ptr); + metadata = d3d12_desc_decode_embedded_resource_va( + cpu_handle.ptr, list->device->bindless_state.descriptor_heap_packed_metadata_offset).metadata; resource_impl = impl_from_ID3D12Resource(resource); - if (!resource_impl || !metadata.view) + if (!list->descriptor_heap.resource.heap) + WARN("No resource descriptor heap is bound. Forcing fallback.\n"); + + if (!resource_impl || !metadata) return; if (!vkd3d_clear_uav_info_from_metadata(&args, metadata)) return; - if (d3d12_resource_is_texture(resource_impl) && !args.has_view) + if (d3d12_resource_is_texture(resource_impl) && !(args.u.image.flags & VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW)) + { + WARN("Image clear mismatch.\n"); + return; + } + + if (d3d12_resource_is_buffer(resource_impl) && !(args.u.buffer.flags & VKD3D_DESCRIPTOR_FLAG_BUFFER_VA_RANGE)) { - /* Theoretically possibly for buggy application that tries to clear a buffer view with a texture resource. - * Safeguard against crash. */ - WARN("Attempted to clear buffer with image resource.\n"); + WARN("Buffer clear mismatch.\n"); return; } @@ -15132,28 +14863,49 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(d3 /* Handle formatted buffer clears. * Always defer creating the VkBufferView until this time. */ - if (!args.has_view && args.clear_dxgi_format) + if (d3d12_resource_is_buffer(resource_impl)) { - uint_format = vkd3d_clear_uav_find_uint_format(list->device, args.clear_dxgi_format); - if (!uint_format) + if (args.clear_dxgi_format) { - ERR("Unhandled format %d.\n", clear_format->dxgi_format); - return; - } + uint_format = vkd3d_clear_uav_find_uint_format(list->device, args.clear_dxgi_format); + if (!uint_format) + { + ERR("Unhandled format %d.\n", clear_format->dxgi_format); + return; + } - color = vkd3d_fixup_clear_uav_uint_color(list->device, clear_format->dxgi_format, color); - color = vkd3d_fixup_clear_uav_swizzle(list->device, clear_format, color); - vkd3d_mask_uint_clear_color(color.uint32, uint_format->vk_format); + color = vkd3d_fixup_clear_uav_uint_color(list->device, clear_format->dxgi_format, color); + color = vkd3d_fixup_clear_uav_swizzle(list->device, clear_format, color); + vkd3d_mask_uint_clear_color(color.uint32, uint_format->vk_format); + } + else + { + /* Structured buffer UAV clears are not allowed, so if it's raw, it's BAB, which is R32_UINT centric. */ + uint_format = vkd3d_get_format(list->device, DXGI_FORMAT_R32_UINT, false); + } - if (!vkd3d_clear_uav_synthesize_buffer_view(list, resource_impl, &args, uint_format, &inline_view)) - return; + /* It's not legal D3D12 for apps to hit this, but native drivers always seem to just use the CPU descriptor + * and push the descriptor somehow. Since we have to anticipate broken games in the wild, we cannot trust + * the bound GPU heap. */ + if (list->descriptor_heap.resource.heap && !list->descriptor_heap.heap_dirty) + { + args.heap_index = d3d12_command_allocator_allocate_meta_buffer_view( + list->allocator, list->descriptor_heap.resource.heap, + args.u.buffer.va, args.u.buffer.range, uint_format->vk_format); + args.use_heap = args.heap_index != UINT32_MAX; + } + args.clear_dxgi_format = uint_format->dxgi_format; - args.u.view = inline_view; - args.has_view = true; + /* We cannot allocate space in the heap, have to fallback to legacy :'( */ + if (!args.use_heap) + { + if (!vkd3d_clear_uav_synthesize_buffer_view(list, resource_impl, &args, uint_format, &inline_view)) + return; + args.view = inline_view; + } } else if (d3d12_resource_is_texture(resource_impl) && clear_format->type != VKD3D_FORMAT_TYPE_UINT) { - const struct vkd3d_view *base_view = metadata.view->info.image.view; uint_format = vkd3d_clear_uav_find_uint_format(list->device, clear_format->dxgi_format); color = vkd3d_fixup_clear_uav_uint_color(list->device, clear_format->dxgi_format, color); color = vkd3d_fixup_clear_uav_swizzle(list->device, clear_format, color); @@ -15166,49 +14918,55 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(d3 vkd3d_mask_uint_clear_color(color.uint32, uint_format->vk_format); - if (d3d12_resource_view_format_is_compatible(resource_impl, uint_format)) - { - struct vkd3d_texture_view_desc view_desc; - memset(&view_desc, 0, sizeof(view_desc)); - - view_desc.image = resource_impl->res.vk_image; - view_desc.view_type = base_view->info.texture.vk_view_type; - view_desc.format = uint_format; - view_desc.miplevel_idx = base_view->info.texture.miplevel_idx; - view_desc.miplevel_count = 1; - view_desc.layer_idx = base_view->info.texture.layer_idx; - view_desc.layer_count = base_view->info.texture.layer_count; - view_desc.w_offset = base_view->info.texture.w_offset; - view_desc.w_size = base_view->info.texture.w_size; - view_desc.aspect_mask = base_view->info.texture.aspect_mask; - view_desc.image_usage = VK_IMAGE_USAGE_STORAGE_BIT; - view_desc.allowed_swizzle = false; - - if (!vkd3d_create_texture_view(list->device, &view_desc, &args.u.view)) - { - ERR("Failed to create image view.\n"); - return; - } + /* If the clear color is 0, we can safely use the existing view to perform the + * clear since the bit pattern will not change. Otherwise, fill a scratch buffer + * with the packed clear value and perform a buffer to image copy. + * We have no SINT clear variant, and SINT is always compatible with UINT due to flexible casting rules in D3D12. */ + if (clear_format->type != VKD3D_FORMAT_TYPE_SINT && + !color.uint32[0] && !color.uint32[1] && !color.uint32[2] && !color.uint32[3]) + uint_format = clear_format; + + if (!d3d12_resource_view_format_is_compatible(resource_impl, uint_format)) + { + d3d12_command_list_clear_uav_with_copy(list, resource_impl, + &args, &color, uint_format, rect_count, rects); + return; + } + + args.clear_dxgi_format = uint_format->dxgi_format; + clear_format = uint_format; + } + else if (d3d12_resource_is_texture(resource_impl)) + { + vkd3d_mask_uint_clear_color(color.uint32, clear_format->vk_format); + } + + if (d3d12_resource_is_texture(resource_impl)) + { + struct vkd3d_texture_view_desc view_desc; + vkd3d_texture_view_desc_convert_from_metadata(resource_impl, clear_format, &view_desc, &args.u.image); - inline_view = args.u.view; + /* It's not legal D3D12 for apps to hit this, but native drivers always seem to just use the CPU descriptor + * and push the descriptor somehow. Since we have to anticipate broken games in the wild, we cannot trust + * the bound GPU heap. */ + if (list->descriptor_heap.resource.heap && !list->descriptor_heap.heap_dirty) + { + args.heap_index = d3d12_command_allocator_allocate_meta_image_view( + list->allocator, list->descriptor_heap.resource.heap, &view_desc, VK_IMAGE_LAYOUT_GENERAL); + args.use_heap = args.heap_index != UINT32_MAX; } - else + + if (!args.use_heap) { - /* If the clear color is 0, we can safely use the existing view to perform the - * clear since the bit pattern will not change. Otherwise, fill a scratch buffer - * with the packed clear value and perform a buffer to image copy. */ - if (color.uint32[0] || color.uint32[1] || color.uint32[2] || color.uint32[3]) + if (!vkd3d_create_texture_view(list->device, &view_desc, &args.view)) { - d3d12_command_list_clear_uav_with_copy(list, resource_impl, - &args, &color, uint_format, rect_count, rects); + ERR("Failed to create image view.\n"); return; } + + inline_view = args.view; } } - else if (args.has_view) - { - vkd3d_mask_uint_clear_color(color.uint32, clear_format->vk_format); - } d3d12_command_list_clear_uav(list, resource_impl, &args, &color, rect_count, rects); @@ -15224,7 +14982,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(d const float values[4], UINT rect_count, const D3D12_RECT *rects) { struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface); - struct d3d12_desc_split_metadata metadata; + struct vkd3d_descriptor_metadata_view *metadata; const struct vkd3d_format *clear_format; struct vkd3d_view *inline_view = NULL; struct d3d12_resource *resource_impl; @@ -15238,20 +14996,28 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(d list->cmd.estimated_cost += VKD3D_COMMAND_COST_LOW; - metadata = d3d12_desc_decode_metadata(list->device, cpu_handle.ptr); + metadata = d3d12_desc_decode_embedded_resource_va( + cpu_handle.ptr, list->device->bindless_state.descriptor_heap_packed_metadata_offset).metadata; resource_impl = impl_from_ID3D12Resource(resource); - if (!resource_impl || !metadata.view) + if (!resource_impl || !metadata) return; if (!vkd3d_clear_uav_info_from_metadata(&args, metadata)) return; - if (d3d12_resource_is_texture(resource_impl) && !args.has_view) + if (!list->descriptor_heap.resource.heap) + WARN("No resource descriptor heap is bound. Using fallback.\n"); + + if (d3d12_resource_is_texture(resource_impl) && !(args.u.image.flags & VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW)) + { + WARN("Image clear mismatch.\n"); + return; + } + + if (d3d12_resource_is_buffer(resource_impl) && !(args.u.buffer.flags & VKD3D_DESCRIPTOR_FLAG_BUFFER_VA_RANGE)) { - /* Theoretically possibly for buggy application that tries to clear a buffer view with a texture resource. - * Safeguard against crash. */ - WARN("Attempted to clear buffer with image resource.\n"); + WARN("Buffer clear mismatch.\n"); return; } @@ -15263,16 +15029,56 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(d } else { - memset(&color, 0, sizeof(color)); + WARN("Attempting to clear float UAV without format.\n"); + return; } - if (!args.has_view && args.clear_dxgi_format) + if (d3d12_resource_is_texture(resource_impl)) { - if (!vkd3d_clear_uav_synthesize_buffer_view(list, resource_impl, &args, NULL, &inline_view)) - return; + struct vkd3d_texture_view_desc view_desc; + vkd3d_texture_view_desc_convert_from_metadata(resource_impl, clear_format, &view_desc, &args.u.image); + + /* It's not legal D3D12 for apps to hit this, but native drivers always seem to just use the CPU descriptor + * and push the descriptor somehow. Since we have to anticipate broken games in the wild, we cannot trust + * the bound GPU heap. */ + if (list->descriptor_heap.resource.heap && !list->descriptor_heap.heap_dirty) + { + args.heap_index = d3d12_command_allocator_allocate_meta_image_view( + list->allocator, list->descriptor_heap.resource.heap, &view_desc, VK_IMAGE_LAYOUT_GENERAL); + args.use_heap = args.heap_index != UINT32_MAX; + } + + if (!args.use_heap) + { + if (!vkd3d_create_texture_view(list->device, &view_desc, &args.view)) + { + ERR("Failed to create image view.\n"); + return; + } + + inline_view = args.view; + } + } + else + { + /* It's not legal D3D12 for apps to hit this, but native drivers always seem to just use the CPU descriptor + * and push the descriptor somehow. Since we have to anticipate broken games in the wild, we cannot trust + * the bound GPU heap. */ + if (list->descriptor_heap.resource.heap && !list->descriptor_heap.heap_dirty) + { + args.heap_index = d3d12_command_allocator_allocate_meta_buffer_view( + list->allocator, list->descriptor_heap.resource.heap, + args.u.buffer.va, args.u.buffer.range, clear_format->vk_format); + args.use_heap = args.heap_index != UINT32_MAX; + } - args.u.view = inline_view; - args.has_view = true; + /* We cannot allocate space in the heap, have to fallback to legacy :'( */ + if (!args.use_heap) + { + if (!vkd3d_clear_uav_synthesize_buffer_view(list, resource_impl, &args, clear_format, &inline_view)) + return; + args.view = inline_view; + } } d3d12_command_list_clear_uav(list, resource_impl, &args, &color, rect_count, rects); @@ -15751,7 +15557,6 @@ static void d3d12_command_list_resolve_binary_occlusion_queries(struct d3d12_com VkDependencyInfo dep_info; d3d12_command_list_invalidate_current_pipeline(list, true); - d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, &list->graphics_bindings); memset(&dep_info, 0, sizeof(dep_info)); dep_info.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO; @@ -15781,9 +15586,9 @@ static void d3d12_command_list_resolve_binary_occlusion_queries(struct d3d12_com args.src_va = src_va; args.query_count = count; - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, - query_ops->vk_resolve_pipeline_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args)); + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + query_ops->vk_resolve_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + sizeof(args), &args); workgroup_count = vkd3d_compute_workgroup_count(count, VKD3D_QUERY_OP_WORKGROUP_SIZE); VK_CALL(vkCmdDispatch(list->cmd.vk_command_buffer, workgroup_count, 1, 1)); @@ -15910,8 +15715,6 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(d3d12_command_li /* If we cannot hoist the predication work. */ d3d12_command_list_end_current_render_pass(list, true); d3d12_command_list_invalidate_current_pipeline(list, true); - d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, - &list->graphics_bindings); } resolve_args.src_va = resource->res.va + aligned_buffer_offset; @@ -15920,8 +15723,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(d3d12_command_li VK_CALL(vkCmdBindPipeline(vk_patch_cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, predicate_ops->vk_resolve_pipeline)); - VK_CALL(vkCmdPushConstants(vk_patch_cmd_buffer, predicate_ops->vk_resolve_pipeline_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(resolve_args), &resolve_args)); + + d3d12_command_list_meta_push_data(list, vk_patch_cmd_buffer, + predicate_ops->vk_resolve_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + sizeof(resolve_args), &resolve_args); + VK_CALL(vkCmdDispatch(vk_patch_cmd_buffer, 1, 1, 1)); memset(&vk_barrier, 0, sizeof(vk_barrier)); @@ -16163,107 +15969,6 @@ static HRESULT d3d12_command_signature_allocate_preprocess_memory_for_list_ext( uint32_t max_command_count, struct vkd3d_scratch_allocation *allocation, VkDeviceSize *size); -static HRESULT d3d12_command_signature_allocate_preprocess_memory_for_list_nv( - struct d3d12_command_list *list, - struct d3d12_command_signature *signature, VkPipeline render_pipeline, - bool explicit_preprocess, - uint32_t max_command_count, - struct vkd3d_scratch_allocation *allocation, VkDeviceSize *size); - -static void d3d12_command_list_execute_indirect_state_template_compute( - struct d3d12_command_list *list, struct d3d12_command_signature *signature, - uint32_t max_command_count, - struct d3d12_resource *arg_buffer, UINT64 arg_buffer_offset, - struct d3d12_resource *count_buffer, UINT64 count_buffer_offset) -{ - const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - VkDeviceAddress arg_va = arg_buffer->res.va + arg_buffer_offset; - struct vkd3d_scratch_allocation dispatch_scratch, ubo_scratch; - VkDeviceAddress count_va = 0; - VkWriteDescriptorSet write; - VkDescriptorBufferInfo buf; - VkPipelineLayout vk_layout; - uint32_t write_set; - unsigned int i; - - d3d12_command_list_end_current_render_pass(list, false); - d3d12_command_list_end_transfer_batch(list); - - /* If this command breaks suspend, need to refresh it now. */ - d3d12_command_list_update_conditional_rendering_state(list, false); - - if (count_buffer) - count_va = count_buffer->res.va + count_buffer_offset; - - if (!d3d12_command_list_emit_multi_dispatch_indirect_count_state(list, - signature, - arg_va, signature->desc.ByteStride, max_command_count, - count_va, &dispatch_scratch, &ubo_scratch)) - return; - - if (!d3d12_command_list_update_compute_state(list)) - { - WARN("Failed to update compute state, ignoring dispatch.\n"); - return; - } - - vk_write_descriptor_set_from_scratch_push_ubo(&write, &buf, &ubo_scratch, - D3D12_MAX_ROOT_COST * sizeof(uint32_t), - list->compute_bindings.root_signature->push_constant_ubo_binding.binding); - - vk_layout = list->compute_bindings.root_signature->compute.vk_pipeline_layout; - write_set = list->compute_bindings.root_signature->push_constant_ubo_binding.set; - - /* Run indirect dispatches back to back with one push UBO per dispatch which lets us - * update root parameters per command. */ - for (i = 0; i < max_command_count; i++) - { - VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - vk_layout, write_set, 1, &write)); - VK_CALL(vkCmdDispatchIndirect(list->cmd.vk_command_buffer, dispatch_scratch.buffer, dispatch_scratch.offset)); - - VKD3D_BREADCRUMB_AUX32(i); - VKD3D_BREADCRUMB_COMMAND(EXECUTE_INDIRECT_UNROLL_COMPUTE); - - dispatch_scratch.offset += sizeof(VkDispatchIndirectCommand); - buf.offset += D3D12_MAX_ROOT_COST * sizeof(uint32_t); - } - - /* Need to clear state to zero if it was part of a command signature. */ - for (i = 0; i < signature->desc.NumArgumentDescs; i++) - { - const D3D12_INDIRECT_ARGUMENT_DESC *arg = &signature->desc.pArgumentDescs[i]; - switch (arg->Type) - { - case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW: - case D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW: - case D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW: - { - uint32_t index = arg->ConstantBufferView.RootParameterIndex; - d3d12_command_list_set_root_descriptor(list, - &list->compute_bindings, index, 0); - break; - } - - case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT: - { - uint32_t zeroes[D3D12_MAX_ROOT_COST]; - memset(zeroes, 0, sizeof(uint32_t) * arg->Constant.Num32BitValuesToSet); - d3d12_command_list_set_root_constants(list, - &list->compute_bindings, arg->Constant.RootParameterIndex, - arg->Constant.DestOffsetIn32BitValues, - arg->Constant.Num32BitValuesToSet, zeroes); - break; - } - - default: - break; - } - } - - /* No need to implicitly invalidate anything here, since we used the normal APIs. */ -} - static void d3d12_command_list_execute_indirect_state_template_dgc( struct d3d12_command_list *list, struct d3d12_command_signature *signature, uint32_t max_command_count, @@ -16275,17 +15980,13 @@ static void d3d12_command_list_execute_indirect_state_template_dgc( struct vkd3d_scratch_allocation predication_allocation; struct vkd3d_scratch_allocation preprocess_allocation; struct vkd3d_scratch_allocation stream_allocation; - uint32_t minIndirectCommandsBufferOffsetAlignment; struct vkd3d_scratch_allocation count_allocation; VkGeneratedCommandsPipelineInfoEXT pipeline_info; - uint32_t minSequencesCountBufferOffsetAlignment; - bool old_predication_enabled_on_command_buffer; + bool old_predication_enabled_on_command_buffer; struct vkd3d_execute_indirect_args patch_args; struct vkd3d_pipeline_bindings *bindings; VkGeneratedCommandsInfoEXT generated_ext; - VkGeneratedCommandsInfoNV generated_nv; VkCommandBuffer vk_patch_cmd_buffer; - VkIndirectCommandsStreamNV stream; bool require_custom_predication; VkDeviceSize preprocess_size; VkPipeline current_pipeline; @@ -16295,12 +15996,10 @@ static void d3d12_command_list_execute_indirect_state_template_dgc( bool explicit_preprocess; bool require_ibo_update; bool require_patch; - bool use_ext_dgc; const char *tag; unsigned int i; HRESULT hr; - use_ext_dgc = list->device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands; require_custom_predication = false; restart_predication = false; explicit_preprocess = false; @@ -16403,9 +16102,7 @@ static void d3d12_command_list_execute_indirect_state_template_dgc( /* If we have custom predication, we would need to introduce a barrier to synchronize with the * new indirect count, which is not desirable. */ - if (!require_custom_predication && - (signature->state_template.dgc.layout_preprocess_nv || - signature->state_template.dgc.layout_preprocess_ext)) + if (!require_custom_predication && signature->state_template.dgc.layout_preprocess_ext) { /* If driver can take advantage of preprocess, we can consider preprocessing explicitly if we can hoist it. * If we had indirect barriers earlier in the frame, now might be a good time to split. */ @@ -16454,31 +16151,8 @@ static void d3d12_command_list_execute_indirect_state_template_dgc( } } - /* - Stride can mismatch, i.e. we need internal alignment of arguments. - * - Min required alignment on the indirect buffer itself might be too strict. - * - Min required alignment on count buffer might be too strict. - * - We require debugging. */ - if (list->device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands) - { - /* This is implied by specification. */ - minIndirectCommandsBufferOffsetAlignment = 4; - minSequencesCountBufferOffsetAlignment = 4; - } - else - { - const VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV *props = - &list->device->device_info.device_generated_commands_properties_nv; - minIndirectCommandsBufferOffsetAlignment = props->minIndirectCommandsBufferOffsetAlignment; - minSequencesCountBufferOffsetAlignment = props->minSequencesCountBufferOffsetAlignment; - } - - if ((signature->state_template.dgc.stride != signature->desc.ByteStride && max_command_count > 1) || - (arg_buffer_offset & (minIndirectCommandsBufferOffsetAlignment - 1)) || - (count_buffer && (count_buffer_offset & (minSequencesCountBufferOffsetAlignment - 1))) || - patch_args.debug_tag) - { + if (patch_args.debug_tag) require_patch = true; - } if (require_patch) { @@ -16498,8 +16172,7 @@ static void d3d12_command_list_execute_indirect_state_template_dgc( if (FAILED(hr = d3d12_command_allocator_allocate_scratch_memory(list->allocator, VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, sizeof(uint32_t), - minSequencesCountBufferOffsetAlignment, - ~0u, &count_allocation))) + 4, ~0u, &count_allocation))) { WARN("Failed to allocate count memory.\n"); return; @@ -16533,8 +16206,10 @@ static void d3d12_command_list_execute_indirect_state_template_dgc( else list->cmd.indirect_meta->need_compute_to_indirect_barrier = true; - VK_CALL(vkCmdPushConstants(vk_patch_cmd_buffer, signature->state_template.dgc.pipeline.vk_pipeline_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(patch_args), &patch_args)); + d3d12_command_list_meta_push_data(list, vk_patch_cmd_buffer, + signature->state_template.dgc.pipeline.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + sizeof(patch_args), &patch_args); + VK_CALL(vkCmdBindPipeline(vk_patch_cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, signature->state_template.dgc.pipeline.vk_pipeline)); @@ -16584,118 +16259,55 @@ static void d3d12_command_list_execute_indirect_state_template_dgc( return; } - if (use_ext_dgc) + if (FAILED(hr = d3d12_command_signature_allocate_preprocess_memory_for_list_ext( + list, signature, current_pipeline, explicit_preprocess, + max_command_count, &preprocess_allocation, &preprocess_size))) { - if (FAILED(hr = d3d12_command_signature_allocate_preprocess_memory_for_list_ext( - list, signature, current_pipeline, explicit_preprocess, - max_command_count, &preprocess_allocation, &preprocess_size))) - { - WARN("Failed to allocate preprocess memory.\n"); - return; - } + WARN("Failed to allocate preprocess memory.\n"); + return; + } - pipeline_info.sType = VK_STRUCTURE_TYPE_GENERATED_COMMANDS_PIPELINE_INFO_EXT; - pipeline_info.pNext = NULL; - pipeline_info.pipeline = list->current_pipeline; + pipeline_info.sType = VK_STRUCTURE_TYPE_GENERATED_COMMANDS_PIPELINE_INFO_EXT; + pipeline_info.pNext = NULL; + pipeline_info.pipeline = list->current_pipeline; - memset(&generated_ext, 0, sizeof(generated_ext)); - generated_ext.sType = VK_STRUCTURE_TYPE_GENERATED_COMMANDS_INFO_EXT; - generated_ext.pNext = &pipeline_info; + memset(&generated_ext, 0, sizeof(generated_ext)); + generated_ext.sType = VK_STRUCTURE_TYPE_GENERATED_COMMANDS_INFO_EXT; + generated_ext.pNext = &pipeline_info; - if (signature->pipeline_type == VKD3D_PIPELINE_TYPE_COMPUTE) - generated_ext.shaderStages = VK_SHADER_STAGE_COMPUTE_BIT; - else - generated_ext.shaderStages = list->state->graphics.stage_flags; + if (signature->pipeline_type == VKD3D_PIPELINE_TYPE_COMPUTE) + generated_ext.shaderStages = VK_SHADER_STAGE_COMPUTE_BIT; + else + generated_ext.shaderStages = list->state->graphics.stage_flags; - generated_ext.indirectCommandsLayout = explicit_preprocess ? - signature->state_template.dgc.layout_preprocess_ext : - signature->state_template.dgc.layout_implicit_ext; - generated_ext.preprocessAddress = preprocess_allocation.va; - generated_ext.preprocessSize = preprocess_size; - generated_ext.maxSequenceCount = max_command_count; + generated_ext.indirectCommandsLayout = explicit_preprocess ? + signature->state_template.dgc.layout_preprocess_ext : + signature->state_template.dgc.layout_implicit_ext; + generated_ext.preprocessAddress = preprocess_allocation.va; + generated_ext.preprocessSize = preprocess_size; + generated_ext.maxSequenceCount = max_command_count; - if (require_custom_predication) - { - generated_ext.sequenceCountAddress = predication_allocation.va; - } - else if (count_buffer) - { - if (require_patch) - generated_ext.sequenceCountAddress = count_allocation.va; - else - generated_ext.sequenceCountAddress = count_buffer->res.va + count_buffer_offset; - } + if (require_custom_predication) + { + generated_ext.sequenceCountAddress = predication_allocation.va; } - else + else if (count_buffer) { - if (FAILED(hr = d3d12_command_signature_allocate_preprocess_memory_for_list_nv( - list, signature, current_pipeline, explicit_preprocess, - max_command_count, &preprocess_allocation, &preprocess_size))) - { - WARN("Failed to allocate preprocess memory.\n"); - return; - } - - memset(&generated_nv, 0, sizeof(generated_nv)); - generated_nv.sType = VK_STRUCTURE_TYPE_GENERATED_COMMANDS_INFO_NV; - generated_nv.pipeline = list->current_pipeline; - generated_nv.pipelineBindPoint = signature->pipeline_type == VKD3D_PIPELINE_TYPE_COMPUTE ? - VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS; - generated_nv.indirectCommandsLayout = explicit_preprocess ? - signature->state_template.dgc.layout_preprocess_nv : - signature->state_template.dgc.layout_implicit_nv; - generated_nv.streamCount = 1; - generated_nv.pStreams = &stream; - generated_nv.preprocessBuffer = preprocess_allocation.buffer; - generated_nv.preprocessOffset = preprocess_allocation.offset; - generated_nv.preprocessSize = preprocess_size; - generated_nv.sequencesCount = max_command_count; - - if (require_custom_predication) - { - generated_nv.sequencesCountBuffer = predication_allocation.buffer; - generated_nv.sequencesCountOffset = predication_allocation.offset; - } - else if (count_buffer) - { - if (require_patch) - { - generated_nv.sequencesCountBuffer = count_allocation.buffer; - generated_nv.sequencesCountOffset = count_allocation.offset; - } - else - { - generated_nv.sequencesCountBuffer = count_buffer->res.vk_buffer; - generated_nv.sequencesCountOffset = count_buffer->mem.offset + count_buffer_offset; - } - } + if (require_patch) + generated_ext.sequenceCountAddress = count_allocation.va; + else + generated_ext.sequenceCountAddress = count_buffer->res.va + count_buffer_offset; } if (require_patch) { - if (use_ext_dgc) - { - generated_ext.indirectAddress = stream_allocation.va; - generated_ext.indirectAddressSize = max_command_count * signature->state_template.dgc.stride; - } - else - { - stream.buffer = stream_allocation.buffer; - stream.offset = stream_allocation.offset; - } + generated_ext.indirectAddress = stream_allocation.va; + generated_ext.indirectAddressSize = max_command_count * signature->state_template.dgc.stride; } else { - if (use_ext_dgc) - { - generated_ext.indirectAddress = arg_buffer->res.va + arg_buffer_offset; - generated_ext.indirectAddressSize = max_command_count * signature->state_template.dgc.stride; - } - else - { - stream.buffer = arg_buffer->res.vk_buffer; - stream.offset = arg_buffer->mem.offset + arg_buffer_offset; - } + generated_ext.indirectAddress = arg_buffer->res.va + arg_buffer_offset; + generated_ext.indirectAddressSize = max_command_count * signature->state_template.dgc.stride; } /* If we risk breaking suspend-resume, this will be no-oped out. */ @@ -16705,50 +16317,8 @@ static void d3d12_command_list_execute_indirect_state_template_dgc( { d3d12_command_allocator_allocate_init_post_indirect_command_buffer(list->allocator, list); - if (use_ext_dgc) - { - VK_CALL(vkCmdPreprocessGeneratedCommandsEXT(list->cmd.vk_post_indirect_barrier_commands, + VK_CALL(vkCmdPreprocessGeneratedCommandsEXT(list->cmd.vk_post_indirect_barrier_commands, &generated_ext, list->cmd.vk_command_buffer)); - } - else - { - VkConditionalRenderingBeginInfoEXT conditional_begin_info; - bool explicit_cond_rendering; - - /* With graphics NV_dgc, there are no requirements on bound state, except for pipeline. */ - /* NV_dgcc however requires that state in recording command buffer matches, but EXT_dgc provides a state cmd. */ - VK_CALL(vkCmdBindPipeline(list->cmd.vk_post_indirect_barrier_commands, - signature->pipeline_type == VKD3D_PIPELINE_TYPE_COMPUTE ? - VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS, current_pipeline)); - - if (signature->pipeline_type == VKD3D_PIPELINE_TYPE_COMPUTE) - { - /* Compute is a bit more stringent, we have to bind all state. */ - d3d12_command_list_update_descriptors_post_indirect_buffer(list); - } - - explicit_cond_rendering = list->predication.enabled_on_command_buffer && - list->cmd.vk_post_indirect_barrier_commands != list->cmd.vk_command_buffer; - - /* Predication state also has to match. Also useful to nop out explicit preprocess too. - * Assumption is that drivers will pull predication state from state command buffer on EXT, - * since states have to match. */ - if (explicit_cond_rendering) - { - conditional_begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; - conditional_begin_info.pNext = NULL; - conditional_begin_info.buffer = list->predication.vk_buffer; - conditional_begin_info.offset = list->predication.vk_buffer_offset; - conditional_begin_info.flags = 0; - VK_CALL(vkCmdBeginConditionalRenderingEXT(list->cmd.vk_post_indirect_barrier_commands, - &conditional_begin_info)); - } - - VK_CALL(vkCmdPreprocessGeneratedCommandsNV(list->cmd.vk_post_indirect_barrier_commands, &generated_nv)); - - if (explicit_cond_rendering) - VK_CALL(vkCmdEndConditionalRenderingEXT(list->cmd.vk_post_indirect_barrier_commands)); - } list->cmd.indirect_meta->need_preprocess_barrier = true; } @@ -16773,16 +16343,8 @@ static void d3d12_command_list_execute_indirect_state_template_dgc( } d3d12_command_list_debug_mark_begin_region(list, tag); - if (use_ext_dgc) - { - VK_CALL(vkCmdExecuteGeneratedCommandsEXT(list->cmd.vk_command_buffer, - explicit_preprocess ? VK_TRUE : VK_FALSE, &generated_ext)); - } - else - { - VK_CALL(vkCmdExecuteGeneratedCommandsNV(list->cmd.vk_command_buffer, - explicit_preprocess ? VK_TRUE : VK_FALSE, &generated_nv)); - } + VK_CALL(vkCmdExecuteGeneratedCommandsEXT(list->cmd.vk_command_buffer, + explicit_preprocess ? VK_TRUE : VK_FALSE, &generated_ext)); d3d12_command_list_debug_mark_end_region(list); @@ -16909,13 +16471,6 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(d3d12_command_l arg_impl, arg_buffer_offset, count_impl, count_buffer_offset); } - else if (sig_impl->pipeline_type == VKD3D_PIPELINE_TYPE_COMPUTE) - { - d3d12_command_list_execute_indirect_state_template_compute(list, sig_impl, - max_command_count, - arg_impl, arg_buffer_offset, - count_impl, count_buffer_offset); - } VKD3D_BREADCRUMB_COMMAND(EXECUTE_INDIRECT_TEMPLATE); return; @@ -17262,8 +16817,6 @@ static void d3d12_command_list_encode_sampler_feedback(struct d3d12_command_list return; d3d12_command_list_invalidate_current_pipeline(list, true); - d3d12_command_list_update_descriptor_buffers(list); - d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, &list->graphics_bindings); d3d12_command_list_debug_mark_begin_region(list, "SamplerFeedbackEncode"); /* Fixup subresource indices. */ @@ -17325,8 +16878,8 @@ static void d3d12_command_list_encode_sampler_feedback(struct d3d12_command_list if (src->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { - vkd3d_meta_get_sampler_feedback_resolve_pipeline(&list->device->meta_ops, - VKD3D_SAMPLER_FEEDBACK_RESOLVE_BUFFER_TO_MIN_MIP, &pipeline_info); + uint32_t src_index = UINT32_MAX, dst_index = UINT32_MAX; + bool use_heap; dst_view_desc.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; @@ -17342,18 +16895,43 @@ static void d3d12_command_list_encode_sampler_feedback(struct d3d12_command_list src_buffer_view_desc.offset = src->mem.offset; src_buffer_view_desc.buffer = src->res.vk_buffer; - if (!vkd3d_create_buffer_view(list->device, &src_buffer_view_desc, &src_view)) - goto cleanup; - if (!vkd3d_create_texture_view(list->device, &dst_view_desc, &dst_view)) - goto cleanup; + if (list->descriptor_heap.resource.heap && !list->descriptor_heap.heap_dirty) + { + dst_index = d3d12_command_allocator_allocate_meta_image_view(list->allocator, + list->descriptor_heap.resource.heap, &dst_view_desc, VK_IMAGE_LAYOUT_GENERAL); + + src_index = d3d12_command_allocator_allocate_meta_buffer_view(list->allocator, + list->descriptor_heap.resource.heap, + src->res.va, src_buffer_view_desc.size, VK_FORMAT_R8_UINT); + } + + use_heap = src_index != UINT32_MAX && dst_index != UINT32_MAX; + + vkd3d_meta_get_sampler_feedback_resolve_pipeline(&list->device->meta_ops, + VKD3D_SAMPLER_FEEDBACK_RESOLVE_BUFFER_TO_MIN_MIP, &pipeline_info, use_heap); + + if (use_heap) + { + d3d12_command_list_meta_push_descriptor_index(list, list->cmd.vk_command_buffer, 0, dst_index); + d3d12_command_list_meta_push_descriptor_index(list, list->cmd.vk_command_buffer, 2, src_index); + } + else + { + if (!vkd3d_create_buffer_view(list->device, &src_buffer_view_desc, &src_view)) + goto cleanup; + if (!vkd3d_create_texture_view(list->device, &dst_view_desc, &dst_view)) + goto cleanup; - vk_image_info[0].imageView = dst_view->vk_image_view; + vk_image_info[0].imageView = dst_view->vk_image_view; - vk_descriptor_writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_descriptor_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - vk_descriptor_writes[1].dstBinding = 2; - vk_descriptor_writes[1].pTexelBufferView = &src_view->vk_buffer_view; - vk_descriptor_writes[1].descriptorCount = 1; + vk_descriptor_writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vk_descriptor_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + vk_descriptor_writes[1].dstBinding = 2; + vk_descriptor_writes[1].pTexelBufferView = &src_view->vk_buffer_view; + vk_descriptor_writes[1].descriptorCount = 1; + VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline_info.vk_layout, 0, ARRAY_SIZE(vk_descriptor_writes), vk_descriptor_writes)); + } /* MinMip does not support rect semantics, so go ahead. */ @@ -17362,10 +16940,10 @@ static void d3d12_command_list_encode_sampler_feedback(struct d3d12_command_list VK_CALL(vkCmdBindPipeline(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_info.vk_pipeline)); - VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline_info.vk_layout, 0, ARRAY_SIZE(vk_descriptor_writes), vk_descriptor_writes)); - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, pipeline_info.vk_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args)); + + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + pipeline_info.vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, + sizeof(args), &args); extent.width = vkd3d_compute_workgroup_count(extent.width, vkd3d_meta_get_sampler_feedback_workgroup_size().width); @@ -17385,10 +16963,8 @@ static void d3d12_command_list_encode_sampler_feedback(struct d3d12_command_list } else { - vkd3d_meta_get_sampler_feedback_resolve_pipeline(&list->device->meta_ops, - dst->desc.Format == DXGI_FORMAT_SAMPLER_FEEDBACK_MIN_MIP_OPAQUE ? - VKD3D_SAMPLER_FEEDBACK_RESOLVE_IMAGE_TO_MIN_MIP : - VKD3D_SAMPLER_FEEDBACK_RESOLVE_IMAGE_TO_MIP_USED, &pipeline_info); + uint32_t src_index = UINT32_MAX, dst_index = UINT32_MAX; + bool use_heap; memset(&src_image_view_desc, 0, sizeof(src_image_view_desc)); src_image_view_desc.image = src->res.vk_image; @@ -17459,21 +17035,52 @@ static void d3d12_command_list_encode_sampler_feedback(struct d3d12_command_list dep_info.pImageMemoryBarriers = &vk_image_barrier; VK_CALL(vkCmdPipelineBarrier2(list->cmd.vk_command_buffer, &dep_info)); - if (!vkd3d_create_texture_view(list->device, &src_image_view_desc, &src_view)) - goto cleanup; - if (!vkd3d_create_texture_view(list->device, &dst_view_desc, &dst_view)) - goto cleanup; + if (list->descriptor_heap.resource.heap && !list->descriptor_heap.heap_dirty) + { + dst_index = d3d12_command_allocator_allocate_meta_image_view(list->allocator, + list->descriptor_heap.resource.heap, &dst_view_desc, VK_IMAGE_LAYOUT_GENERAL); + + src_index = d3d12_command_allocator_allocate_meta_image_view(list->allocator, + list->descriptor_heap.resource.heap, &src_image_view_desc, + d3d12_resource_pick_layout(src, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)); + } + + use_heap = src_index != UINT32_MAX && dst_index != UINT32_MAX; + + vkd3d_meta_get_sampler_feedback_resolve_pipeline(&list->device->meta_ops, + dst->desc.Format == DXGI_FORMAT_SAMPLER_FEEDBACK_MIN_MIP_OPAQUE ? + VKD3D_SAMPLER_FEEDBACK_RESOLVE_IMAGE_TO_MIN_MIP : + VKD3D_SAMPLER_FEEDBACK_RESOLVE_IMAGE_TO_MIP_USED, &pipeline_info, use_heap); + + if (use_heap) + { + d3d12_command_list_meta_push_descriptor_index(list, list->cmd.vk_command_buffer, 0, dst_index); + d3d12_command_list_meta_push_descriptor_index(list, list->cmd.vk_command_buffer, 1, src_index); + } + else + { + if (!vkd3d_create_texture_view(list->device, &src_image_view_desc, &src_view)) + goto cleanup; + if (!vkd3d_create_texture_view(list->device, &dst_view_desc, &dst_view)) + goto cleanup; + + vk_image_info[0].imageView = dst_view->vk_image_view; + vk_image_info[1].imageView = src_view->vk_image_view; + vk_image_info[1].sampler = VK_NULL_HANDLE; + vk_image_info[1].imageLayout = d3d12_resource_pick_layout(src, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + vk_descriptor_writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vk_descriptor_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + vk_descriptor_writes[1].dstBinding = 1; + vk_descriptor_writes[1].pImageInfo = &vk_image_info[1]; + vk_descriptor_writes[1].descriptorCount = 1; - vk_image_info[0].imageView = dst_view->vk_image_view; - vk_image_info[1].imageView = src_view->vk_image_view; - vk_image_info[1].sampler = VK_NULL_HANDLE; - vk_image_info[1].imageLayout = d3d12_resource_pick_layout(src, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline_info.vk_layout, 0, ARRAY_SIZE(vk_descriptor_writes), vk_descriptor_writes)); + } - vk_descriptor_writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_descriptor_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - vk_descriptor_writes[1].dstBinding = 1; - vk_descriptor_writes[1].pImageInfo = &vk_image_info[1]; - vk_descriptor_writes[1].descriptorCount = 1; + VK_CALL(vkCmdBindPipeline(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline_info.vk_pipeline)); if (dst->desc.Format == DXGI_FORMAT_SAMPLER_FEEDBACK_MIP_REGION_USED_OPAQUE) { @@ -17518,12 +17125,9 @@ static void d3d12_command_list_encode_sampler_feedback(struct d3d12_command_list args.resolve_width = transcoded_width; args.resolve_height = transcoded_height; - VK_CALL(vkCmdBindPipeline(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline_info.vk_pipeline)); - VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline_info.vk_layout, 0, ARRAY_SIZE(vk_descriptor_writes), vk_descriptor_writes)); - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, pipeline_info.vk_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args)); + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + pipeline_info.vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, + sizeof(args), &args); extent.width = vkd3d_compute_workgroup_count(args.resolve_width, vkd3d_meta_get_sampler_feedback_workgroup_size().width); @@ -17560,8 +17164,10 @@ static void d3d12_command_list_encode_sampler_feedback(struct d3d12_command_list VK_CALL(vkCmdPipelineBarrier2(list->cmd.vk_command_buffer, &dep_info)); } - d3d12_command_allocator_add_view(list->allocator, src_view); - d3d12_command_allocator_add_view(list->allocator, dst_view); + if (src_view) + d3d12_command_allocator_add_view(list->allocator, src_view); + if (dst_view) + d3d12_command_allocator_add_view(list->allocator, dst_view); /* Resolve does not count as a placed initialization, * so don't try to be clever here and compute writes_full_subresource. @@ -17609,12 +17215,7 @@ static void d3d12_command_list_decode_sampler_feedback(struct d3d12_command_list return; d3d12_command_list_invalidate_current_pipeline(list, true); - d3d12_command_list_update_descriptor_buffers(list); d3d12_command_list_debug_mark_begin_region(list, "SamplerFeedbackDecode"); - if (dst->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) - d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, &list->graphics_bindings); - else - d3d12_command_list_invalidate_root_parameters(list, &list->graphics_bindings, true, &list->compute_bindings); /* Fixup subresource indices. */ if (src->desc.Format == DXGI_FORMAT_SAMPLER_FEEDBACK_MIN_MIP_OPAQUE) @@ -17680,13 +17281,13 @@ static void d3d12_command_list_decode_sampler_feedback(struct d3d12_command_list if (dst->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { + uint32_t src_index = UINT32_MAX, dst_index = UINT32_MAX; + bool use_heap; + vk_image_barrier[0].dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; vk_image_barrier[0].subresourceRange.layerCount = src_view_desc.layer_count; vk_image_barrier[0].subresourceRange.levelCount = src_view_desc.miplevel_count; - vkd3d_meta_get_sampler_feedback_resolve_pipeline(&list->device->meta_ops, - VKD3D_SAMPLER_FEEDBACK_RESOLVE_MIN_MIP_TO_BUFFER, &pipeline_info); - src_view_desc.view_type = VK_IMAGE_VIEW_TYPE_2D; extent = d3d12_resource_desc_get_active_feedback_extent(&src->desc, 0); @@ -17701,18 +17302,44 @@ static void d3d12_command_list_decode_sampler_feedback(struct d3d12_command_list dst_buffer_view_desc.offset = dst->mem.offset; dst_buffer_view_desc.buffer = dst->res.vk_buffer; - if (!vkd3d_create_texture_view(list->device, &src_view_desc, &src_view)) - goto cleanup; - if (!vkd3d_create_buffer_view(list->device, &dst_buffer_view_desc, &dst_view)) - goto cleanup; + if (list->descriptor_heap.resource.heap && !list->descriptor_heap.heap_dirty) + { + src_index = d3d12_command_allocator_allocate_meta_image_view( + list->allocator, list->descriptor_heap.resource.heap, + &src_view_desc, d3d12_resource_pick_layout(src, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)); + + dst_index = d3d12_command_allocator_allocate_meta_buffer_view( + list->allocator, list->descriptor_heap.resource.heap, + dst->res.va, dst_buffer_view_desc.size, VK_FORMAT_R8_UINT); + } + + use_heap = src_index != UINT32_MAX && dst_index != UINT32_MAX; + + vkd3d_meta_get_sampler_feedback_resolve_pipeline(&list->device->meta_ops, + VKD3D_SAMPLER_FEEDBACK_RESOLVE_MIN_MIP_TO_BUFFER, &pipeline_info, use_heap); + + if (use_heap) + { + d3d12_command_list_meta_push_descriptor_index(list, list->cmd.vk_command_buffer, 0, dst_index); + d3d12_command_list_meta_push_descriptor_index(list, list->cmd.vk_command_buffer, 1, src_index); + } + else + { + if (!vkd3d_create_texture_view(list->device, &src_view_desc, &src_view)) + goto cleanup; + if (!vkd3d_create_buffer_view(list->device, &dst_buffer_view_desc, &dst_view)) + goto cleanup; - vk_image_info.imageView = src_view->vk_image_view; + vk_image_info.imageView = src_view->vk_image_view; - vk_descriptor_writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_descriptor_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - vk_descriptor_writes[1].dstBinding = 0; - vk_descriptor_writes[1].pTexelBufferView = &dst_view->vk_buffer_view; - vk_descriptor_writes[1].descriptorCount = 1; + vk_descriptor_writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vk_descriptor_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + vk_descriptor_writes[1].dstBinding = 0; + vk_descriptor_writes[1].pTexelBufferView = &dst_view->vk_buffer_view; + vk_descriptor_writes[1].descriptorCount = 1; + VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline_info.vk_layout, 0, ARRAY_SIZE(vk_descriptor_writes), vk_descriptor_writes)); + } args.resolve_width = extent.width; args.resolve_height = extent.height; @@ -17737,10 +17364,10 @@ static void d3d12_command_list_decode_sampler_feedback(struct d3d12_command_list VK_CALL(vkCmdBindPipeline(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_info.vk_pipeline)); - VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline_info.vk_layout, 0, ARRAY_SIZE(vk_descriptor_writes), vk_descriptor_writes)); - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, pipeline_info.vk_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args)); + + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + pipeline_info.vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, + sizeof(args), &args); extent.width = vkd3d_compute_workgroup_count(extent.width, vkd3d_meta_get_sampler_feedback_workgroup_size().width); @@ -17761,18 +17388,15 @@ static void d3d12_command_list_decode_sampler_feedback(struct d3d12_command_list vk_image_barrier[0].dstAccessMask = 0; VK_CALL(vkCmdPipelineBarrier2(list->cmd.vk_command_buffer, &dep_info)); - d3d12_command_allocator_add_view(list->allocator, src_view); - d3d12_command_allocator_add_view(list->allocator, dst_view); + if (src_view) + d3d12_command_allocator_add_view(list->allocator, src_view); + if (dst_view) + d3d12_command_allocator_add_view(list->allocator, dst_view); } else { vk_image_barrier[0].dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT; - vkd3d_meta_get_sampler_feedback_resolve_pipeline(&list->device->meta_ops, - src->desc.Format == DXGI_FORMAT_SAMPLER_FEEDBACK_MIN_MIP_OPAQUE ? - VKD3D_SAMPLER_FEEDBACK_RESOLVE_MIN_MIP_TO_IMAGE : - VKD3D_SAMPLER_FEEDBACK_RESOLVE_MIP_USED_TO_IMAGE, &pipeline_info); - memset(&dst_image_view_desc, 0, sizeof(dst_image_view_desc)); dst_image_view_desc.image = dst->res.vk_image; dst_image_view_desc.format = vkd3d_get_format(list->device, DXGI_FORMAT_R8_UINT, false); @@ -17887,6 +17511,9 @@ static void d3d12_command_list_decode_sampler_feedback(struct d3d12_command_list for (i = 0; i < num_mip_iterations; i++) { + uint32_t src_index = UINT32_MAX; + bool use_heap; + if (dst_view) { vkd3d_view_decref(dst_view, list->device); @@ -17899,11 +17526,36 @@ static void d3d12_command_list_decode_sampler_feedback(struct d3d12_command_list src_view = NULL; } - if (!vkd3d_create_texture_view(list->device, &src_view_desc, &src_view)) - goto cleanup; + if (list->descriptor_heap.resource.heap && !list->descriptor_heap.heap_dirty) + { + src_index = d3d12_command_allocator_allocate_meta_image_view(list->allocator, + list->descriptor_heap.resource.heap, &src_view_desc, + d3d12_resource_pick_layout(src, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)); + } + + use_heap = src_index != UINT32_MAX; + + vkd3d_meta_get_sampler_feedback_resolve_pipeline(&list->device->meta_ops, + src->desc.Format == DXGI_FORMAT_SAMPLER_FEEDBACK_MIN_MIP_OPAQUE ? + VKD3D_SAMPLER_FEEDBACK_RESOLVE_MIN_MIP_TO_IMAGE : + VKD3D_SAMPLER_FEEDBACK_RESOLVE_MIP_USED_TO_IMAGE, &pipeline_info, use_heap); + if (!vkd3d_create_texture_view(list->device, &dst_image_view_desc, &dst_view)) goto cleanup; + if (use_heap) + { + d3d12_command_list_meta_push_descriptor_index(list, list->cmd.vk_command_buffer, 1, src_index); + } + else + { + if (!vkd3d_create_texture_view(list->device, &src_view_desc, &src_view)) + goto cleanup; + vk_image_info.imageView = src_view->vk_image_view; + VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_info.vk_layout, 0, 1, vk_descriptor_writes)); + } + /* Transcoded output doesn't have to cover everything. Cover minimum. */ extent = d3d12_resource_desc_get_subresource_extent(&dst->desc, dst->format, dst_image_view_desc.miplevel_idx); transcoded_width = extent.width; @@ -17939,24 +17591,26 @@ static void d3d12_command_list_decode_sampler_feedback(struct d3d12_command_list viewport.width = (float)rendering_info.renderArea.extent.width; viewport.height = (float)rendering_info.renderArea.extent.height; - vk_image_info.imageView = src_view->vk_image_view; - VK_CALL(vkCmdBeginRendering(list->cmd.vk_command_buffer, &rendering_info)); VK_CALL(vkCmdBindPipeline(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_info.vk_pipeline)); VK_CALL(vkCmdSetViewport(list->cmd.vk_command_buffer, 0, 1, &viewport)); VK_CALL(vkCmdSetScissor(list->cmd.vk_command_buffer, 0, 1, &rendering_info.renderArea)); - VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_info.vk_layout, 0, 1, vk_descriptor_writes)); - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, pipeline_info.vk_layout, - VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(args), &args)); + + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + pipeline_info.vk_layout, VK_SHADER_STAGE_FRAGMENT_BIT, + sizeof(args), &args); + VK_CALL(vkCmdDraw(list->cmd.vk_command_buffer, 3, dst_image_view_desc.layer_count, 0, 0)); VK_CALL(vkCmdEndRendering(list->cmd.vk_command_buffer)); dst_image_view_desc.miplevel_idx++; args.mip_level++; - d3d12_command_allocator_add_view(list->allocator, src_view); - d3d12_command_allocator_add_view(list->allocator, dst_view); + + if (src_view) + d3d12_command_allocator_add_view(list->allocator, src_view); + if (dst_view) + d3d12_command_allocator_add_view(list->allocator, dst_view); } vk_image_barrier[0].srcStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT; @@ -19878,7 +19532,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState1(d3d12_command if (list->active_pipeline_type != VKD3D_PIPELINE_TYPE_RAY_TRACING) { list->active_pipeline_type = VKD3D_PIPELINE_TYPE_RAY_TRACING; - d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, NULL); + d3d12_command_list_invalidate_root_parameters(list); } #ifdef VKD3D_ENABLE_BREADCRUMBS @@ -20256,11 +19910,8 @@ static VkPipelineStageFlags2 vk_stage_flags_from_d3d12_barrier(struct d3d12_comm { stages |= VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; /* We might use explicit preprocess. */ - if (list->device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands || - list->device->device_info.device_generated_commands_features_nv.deviceGeneratedCommands) - { + if (list->device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands) stages |= VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_EXT; - } } if (sync & D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE) @@ -20317,8 +19968,7 @@ static VkAccessFlags2 vk_access_flags_from_d3d12_barrier(struct d3d12_command_li vk_access |= VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_2_SHADER_READ_BIT; /* We might use preprocessing. */ - if (list->device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands || - list->device->device_info.device_generated_commands_features_nv.deviceGeneratedCommands) + if (list->device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands) vk_access |= VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_EXT; } @@ -21082,27 +20732,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d else #endif { - list->ID3D12GraphicsCommandList_iface.lpVtbl = &d3d12_command_list_vtbl_default; - - if (d3d12_device_use_embedded_mutable_descriptors(device)) - { - /* Specialize SetDescriptorTable calls since we need different code paths for those, - * and they are quite hot. */ - if (device->bindless_state.descriptor_buffer_cbv_srv_uav_size == 64 && - device->bindless_state.descriptor_buffer_sampler_size == 16) - { - list->ID3D12GraphicsCommandList_iface.lpVtbl = &d3d12_command_list_vtbl_embedded_64_16; - } - else if (device->bindless_state.descriptor_buffer_cbv_srv_uav_size == 32 && - device->bindless_state.descriptor_buffer_sampler_size == 16) - { - list->ID3D12GraphicsCommandList_iface.lpVtbl = &d3d12_command_list_vtbl_embedded_32_16; - } - else - { - list->ID3D12GraphicsCommandList_iface.lpVtbl = &d3d12_command_list_vtbl_embedded_default; - } - } + list->ID3D12GraphicsCommandList_iface.lpVtbl = &d3d12_command_list_vtbl_embedded_default; } list->refcount = 1; @@ -24088,8 +23718,10 @@ static void d3d12_command_queue_add_submission(struct d3d12_command_queue *queue { /* Ensure that any non-temporal writes from CopyDescriptors are ordered properly * with the submission thread that calls vkQueueSubmit. */ - if (d3d12_device_use_embedded_mutable_descriptors(queue->device)) + //if (d3d12_device_use_embedded_mutable_descriptors(queue->device)) + { vkd3d_memcpy_non_temporal_barrier(); + } pthread_mutex_lock(&queue->queue_lock); d3d12_command_queue_add_submission_locked(queue, sub); @@ -24593,14 +24225,6 @@ static void d3d12_command_signature_cleanup(struct d3d12_command_signature *sign VK_CALL(vkDestroyIndirectCommandsLayoutEXT(signature->device->vk_device, signature->state_template.dgc.layout_preprocess_ext, NULL)); } - - if (signature->device->device_info.device_generated_commands_features_nv.deviceGeneratedCommands) - { - VK_CALL(vkDestroyIndirectCommandsLayoutNV(signature->device->vk_device, - signature->state_template.dgc.layout_implicit_nv, NULL)); - VK_CALL(vkDestroyIndirectCommandsLayoutNV(signature->device->vk_device, - signature->state_template.dgc.layout_preprocess_nv, NULL)); - } } d3d_destruction_notifier_free(&signature->destruction_notifier); @@ -24734,47 +24358,6 @@ static HRESULT d3d12_command_signature_init_patch_commands_buffer(struct d3d12_c return hr; } -static HRESULT d3d12_command_signature_init_indirect_commands_layout_nv( - struct d3d12_command_signature *signature, struct d3d12_device *device, - const VkIndirectCommandsLayoutTokenNV *tokens, uint32_t token_count, - uint32_t stream_stride) -{ - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VkIndirectCommandsLayoutCreateInfoNV create_info; - VkResult vr; - - memset(&create_info, 0, sizeof(create_info)); - create_info.sType = VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_NV; - create_info.pipelineBindPoint = signature->pipeline_type == VKD3D_PIPELINE_TYPE_COMPUTE ? - VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS; - create_info.streamCount = 1; - create_info.pStreamStrides = &stream_stride; - create_info.tokenCount = token_count; - create_info.pTokens = tokens; - - signature->state_template.dgc.stride = stream_stride; - - if (token_count > device->device_info.device_generated_commands_properties_nv.maxIndirectCommandsTokenCount) - { - FIXME("Token count %u is too large (max %u).\n", - token_count, device->device_info.device_generated_commands_properties_nv.maxIndirectCommandsTokenCount); - return E_NOTIMPL; - } - - /* Need two separate DGC layouts since if we set EXPLICIT_PREPROCESS, we must use preprocess if the flag is set. - * We don't always want to use explicit preprocess (especially when we cannot hoist), so pick the appropriate - * layout at ExecuteIndirect time. */ - vr = VK_CALL(vkCreateIndirectCommandsLayoutNV(device->vk_device, &create_info, NULL, - &signature->state_template.dgc.layout_implicit_nv)); - if (vr != VK_SUCCESS) - return hresult_from_vk_result(vr); - - create_info.flags = VK_INDIRECT_COMMANDS_LAYOUT_USAGE_EXPLICIT_PREPROCESS_BIT_NV; - vr = VK_CALL(vkCreateIndirectCommandsLayoutNV(device->vk_device, &create_info, NULL, - &signature->state_template.dgc.layout_preprocess_nv)); - return hresult_from_vk_result(vr); -} - static HRESULT d3d12_command_signature_init_indirect_commands_layout_ext( struct d3d12_command_signature *signature, struct d3d12_root_signature *root_signature, struct d3d12_device *device, @@ -24808,8 +24391,6 @@ static HRESULT d3d12_command_signature_init_indirect_commands_layout_ext( } create_info.indirectStride = stream_stride; - if (root_signature) - create_info.pipelineLayout = d3d12_root_signature_get_layout(root_signature, signature->pipeline_type)->vk_pipeline_layout; create_info.tokenCount = token_count; create_info.pTokens = tokens; @@ -24845,8 +24426,7 @@ static HRESULT d3d12_command_signature_allocate_stream_memory_for_list( if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE, max_command_count * signature->state_template.dgc.stride, - list->device->device_info.device_generated_commands_properties_nv.minIndirectCommandsBufferOffsetAlignment, - ~0u, allocation)) + 64, ~0u, allocation)) return E_OUTOFMEMORY; return S_OK; @@ -24903,453 +24483,35 @@ static HRESULT d3d12_command_signature_allocate_preprocess_memory_for_list_ext( return S_OK; } -static HRESULT d3d12_command_signature_allocate_preprocess_memory_for_list_nv( - struct d3d12_command_list *list, - struct d3d12_command_signature *signature, VkPipeline render_pipeline, bool explicit_preprocess, - uint32_t max_command_count, - struct vkd3d_scratch_allocation *allocation, VkDeviceSize *size) -{ - const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - VkGeneratedCommandsMemoryRequirementsInfoNV info; - VkMemoryRequirements2 memory_info; - uint32_t alignment; - - memory_info.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; - memory_info.pNext = NULL; - - info.pipelineBindPoint = signature->pipeline_type == VKD3D_PIPELINE_TYPE_COMPUTE ? - VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS; - info.sType = VK_STRUCTURE_TYPE_GENERATED_COMMANDS_MEMORY_REQUIREMENTS_INFO_NV; - info.pNext = NULL; - info.maxSequencesCount = max_command_count; - info.pipeline = render_pipeline; - info.indirectCommandsLayout = explicit_preprocess ? - signature->state_template.dgc.layout_preprocess_nv : - signature->state_template.dgc.layout_implicit_nv; - - if (max_command_count > list->device->device_info.device_generated_commands_properties_nv.maxIndirectSequenceCount) - { - FIXME("max_command_count %u exceeds device limit %u.\n", - max_command_count, - list->device->device_info.device_generated_commands_properties_nv.maxIndirectSequenceCount); - return E_NOTIMPL; - } - - VK_CALL(vkGetGeneratedCommandsMemoryRequirementsNV(list->device->vk_device, &info, &memory_info)); - - alignment = max(memory_info.memoryRequirements.alignment, - list->device->device_info.device_generated_commands_properties_nv.minIndirectCommandsBufferOffsetAlignment); - - if (!d3d12_command_allocator_allocate_scratch_memory(list->allocator, - VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS, - memory_info.memoryRequirements.size, - alignment, - memory_info.memoryRequirements.memoryTypeBits, allocation)) - return E_OUTOFMEMORY; - - /* Going to assume the memory type is okay ... It's device local after all. */ - *size = memory_info.memoryRequirements.size; - return S_OK; -} - -static HRESULT d3d12_command_signature_init_state_template_compute(struct d3d12_command_signature *signature, +static HRESULT d3d12_command_signature_init_state_template_dgc_ext(struct d3d12_command_signature *signature, const D3D12_COMMAND_SIGNATURE_DESC *desc, struct d3d12_root_signature *root_signature, struct d3d12_device *device) { - /* Compute templates are simpler, since the only state that can change is - * root constants and root descriptors, so we can work around it with some heroics. - * The implementation strategy for a non-DGC path is to upload a 256 byte buffer - * with default command list root parameter state. - * The input is either copied from the buffer directly, or it's read from the indirect buffer and replaces - * the default input. This can be done in parallel with 64 threads per dispatch. - * Some threads per workgroup will then copy the indirect dispatch parameters - * (or clear them to 0 if indirect count needs to mask the dispatch). */ + VkIndirectCommandsVertexBufferTokenEXT vb_tokens[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; + VkIndirectCommandsPushConstantTokenEXT pc_tokens[D3D12_MAX_ROOT_COST]; + const enum vkd3d_patch_command_token *generic_u32_copy_types; const struct vkd3d_shader_root_parameter *root_parameter; const struct vkd3d_shader_root_constant *root_constant; + struct vkd3d_patch_command *patch_commands = NULL; + VkIndirectCommandsLayoutTokenEXT *tokens = NULL; + VkIndirectCommandsIndexBufferTokenEXT ib_token; + uint32_t required_stride_alignment = 0; + VkIndirectCommandsLayoutTokenEXT token; + uint32_t generic_u32_copy_count; + size_t patch_commands_count = 0; + uint32_t required_alignment = 0; + size_t patch_commands_size = 0; uint32_t root_parameter_index; - uint32_t src_offset_words = 0; - uint32_t dst_offset_word; - unsigned int i, j; - - for (i = 0; i < ARRAY_SIZE(signature->state_template.compute.source_offsets); i++) - signature->state_template.compute.source_offsets[i] = -1; - - for (i = 0; i < desc->NumArgumentDescs; i++) - { - const D3D12_INDIRECT_ARGUMENT_DESC *argument_desc = &desc->pArgumentDescs[i]; - - switch (argument_desc->Type) - { - case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT: - root_parameter_index = argument_desc->Constant.RootParameterIndex; - root_constant = root_signature_get_32bit_constants(root_signature, root_parameter_index); - - dst_offset_word = root_constant->constant_index + argument_desc->Constant.DestOffsetIn32BitValues; - for (j = 0; j < argument_desc->Constant.Num32BitValuesToSet; j++, src_offset_words++) - signature->state_template.compute.source_offsets[dst_offset_word + j] = (int32_t)src_offset_words; - break; - - case D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW: - case D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW: - case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW: - root_parameter_index = argument_desc->ShaderResourceView.RootParameterIndex; - root_parameter = root_signature_get_parameter(root_signature, root_parameter_index); - - if (!(root_signature->root_descriptor_raw_va_mask & (1ull << root_parameter_index))) - { - ERR("Root parameter %u is not a raw VA. Cannot implement command signature which updates root descriptor.\n", - root_parameter_index); - return E_NOTIMPL; - } - - dst_offset_word = root_parameter->descriptor.raw_va_root_descriptor_index * sizeof(VkDeviceAddress) / sizeof(uint32_t); - for (j = 0; j < sizeof(VkDeviceAddress) / sizeof(uint32_t); j++, src_offset_words++) - signature->state_template.compute.source_offsets[dst_offset_word + j] = (int32_t)src_offset_words; - break; - - case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH: - signature->state_template.compute.dispatch_offset_words = src_offset_words; - break; - - default: - FIXME("Unsupported token type %u.\n", argument_desc->Type); - return E_NOTIMPL; - } - } - - /* No need to build a specialized pipeline here, there is a generic pipeline to handle compute. */ - - return S_OK; -} - -static HRESULT d3d12_command_signature_init_state_template_dgc_nv(struct d3d12_command_signature *signature, - const D3D12_COMMAND_SIGNATURE_DESC *desc, - struct d3d12_root_signature *root_signature, - struct d3d12_device *device) -{ - const enum vkd3d_patch_command_token *generic_u32_copy_types; - const struct vkd3d_shader_root_parameter *root_parameter; - const struct d3d12_bind_point_layout *bind_point_layout; - const struct vkd3d_shader_root_constant *root_constant; - struct vkd3d_patch_command *patch_commands = NULL; - VkIndirectCommandsLayoutTokenNV *tokens = NULL; - uint32_t required_stride_alignment = 0; - VkIndirectCommandsLayoutTokenNV token; - uint32_t generic_u32_copy_count; - size_t patch_commands_count = 0; - uint32_t required_alignment = 0; - size_t patch_commands_size = 0; - uint32_t root_parameter_index; - uint32_t src_word_offset = 0; - uint32_t stream_stride = 0; - uint32_t dst_word_offset; - size_t token_count = 0; - size_t token_size = 0; - HRESULT hr = S_OK; - uint32_t i, j; - - /* Mostly for debug. Lets debug ring report what it is writing easily. */ - static const enum vkd3d_patch_command_token ibv_types[] = - { - VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO, - VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI, - VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_SIZE, - VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_FORMAT, - }; - - static const enum vkd3d_patch_command_token vbv_types[] = - { - VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO, - VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI, - VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_SIZE, - VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_STRIDE, - }; - - static const enum vkd3d_patch_command_token draw_types[] = - { - VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_COUNT, - VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT, - VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_VERTEX, - VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE, - }; - - static const enum vkd3d_patch_command_token draw_indexed_types[] = - { - VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_COUNT, - VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT, - VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INDEX, - VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_OFFSET, - VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE, - }; - - static const enum vkd3d_patch_command_token draw_mesh_types[] = - { - VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_X, - VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Y, - VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Z, - }; - - static const enum vkd3d_patch_command_token va_types[] = - { - VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO, - VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI, - }; - - static const VkIndexType vk_index_types[] = { VK_INDEX_TYPE_UINT32, VK_INDEX_TYPE_UINT16 }; - static const uint32_t d3d_index_types[] = { DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R16_UINT }; - - bind_point_layout = d3d12_root_signature_get_layout(root_signature, signature->pipeline_type); - - for (i = 0; i < desc->NumArgumentDescs; i++) - { - const D3D12_INDIRECT_ARGUMENT_DESC *argument_desc = &desc->pArgumentDescs[i]; - memset(&token, 0, sizeof(token)); - token.sType = VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_TOKEN_NV; - generic_u32_copy_count = 0; - dst_word_offset = 0; - - switch (argument_desc->Type) - { - case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT: - root_parameter_index = argument_desc->Constant.RootParameterIndex; - root_constant = root_signature_get_32bit_constants(root_signature, root_parameter_index); - - if (bind_point_layout->flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK) - { - ERR("Root signature uses push UBO for root parameters, but this feature requires push constant path.\n"); - hr = E_NOTIMPL; - goto end; - } - - token.tokenType = VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV; - token.pushconstantPipelineLayout = bind_point_layout->vk_pipeline_layout; - token.pushconstantShaderStageFlags = bind_point_layout->vk_push_stages; - token.pushconstantOffset = root_constant->constant_index + argument_desc->Constant.DestOffsetIn32BitValues; - token.pushconstantSize = argument_desc->Constant.Num32BitValuesToSet; - token.pushconstantOffset *= sizeof(uint32_t); - token.pushconstantSize *= sizeof(uint32_t); - required_alignment = sizeof(uint32_t); - - stream_stride = align(stream_stride, required_alignment); - token.offset = stream_stride; - stream_stride += token.pushconstantSize; - dst_word_offset = token.offset / sizeof(uint32_t); - - generic_u32_copy_count = argument_desc->Constant.Num32BitValuesToSet; - generic_u32_copy_types = NULL; - break; - - case D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW: - case D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW: - case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW: - root_parameter_index = argument_desc->ShaderResourceView.RootParameterIndex; - root_parameter = root_signature_get_parameter(root_signature, root_parameter_index); - - if (bind_point_layout->flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK) - { - ERR("Root signature uses push UBO for root parameters, but this feature requires push constant path.\n"); - hr = E_NOTIMPL; - goto end; - } - - if (!(root_signature->root_descriptor_raw_va_mask & (1ull << root_parameter_index))) - { - ERR("Root parameter %u is not a raw VA. Cannot implement command signature which updates root descriptor.\n", - root_parameter_index); - hr = E_NOTIMPL; - goto end; - } - - token.tokenType = VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV; - token.pushconstantPipelineLayout = bind_point_layout->vk_pipeline_layout; - token.pushconstantShaderStageFlags = bind_point_layout->vk_push_stages; - token.pushconstantOffset = root_parameter->descriptor.raw_va_root_descriptor_index * sizeof(VkDeviceAddress); - token.pushconstantSize = sizeof(VkDeviceAddress); - required_alignment = sizeof(uint32_t); - - stream_stride = align(stream_stride, required_alignment); - token.offset = stream_stride; - stream_stride += token.pushconstantSize; - dst_word_offset = token.offset / sizeof(uint32_t); - - /* Simply patch by copying U32s. Need to handle unaligned U32s since everything is tightly packed. */ - generic_u32_copy_count = sizeof(VkDeviceAddress) / sizeof(uint32_t); - generic_u32_copy_types = va_types; - break; - - case D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW: - token.tokenType = VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV; - token.vertexBindingUnit = argument_desc->VertexBuffer.Slot; - token.vertexDynamicStride = VK_TRUE; - - /* If device exposes 4 byte alignment of the indirect command buffer, we can - * pack VA at sub-scalar alignment. */ - required_alignment = min( - device->device_info.device_generated_commands_properties_nv.minIndirectCommandsBufferOffsetAlignment, - sizeof(VkDeviceAddress)); - - stream_stride = align(stream_stride, required_alignment); - token.offset = stream_stride; - stream_stride += sizeof(VkBindVertexBufferIndirectCommandNV); - dst_word_offset = token.offset / sizeof(uint32_t); - - /* The VBV indirect layout is the same as DX, so just copy the U32s. */ - generic_u32_copy_count = sizeof(D3D12_VERTEX_BUFFER_VIEW) / sizeof(uint32_t); - generic_u32_copy_types = vbv_types; - break; - - case D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW: - token.tokenType = VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV; - token.indexTypeCount = ARRAY_SIZE(vk_index_types); - token.pIndexTypeValues = d3d_index_types; - token.pIndexTypes = vk_index_types; - - /* If device exposes 4 byte alignment of the indirect command buffer, we can - * pack VA at sub-scalar alignment. */ - required_alignment = min( - device->device_info.device_generated_commands_properties_nv.minIndirectCommandsBufferOffsetAlignment, - sizeof(VkDeviceAddress)); - - stream_stride = align(stream_stride, required_alignment); - token.offset = stream_stride; - stream_stride += sizeof(VkBindVertexBufferIndirectCommandNV); - dst_word_offset = token.offset / sizeof(uint32_t); - - vkd3d_array_reserve((void**)&patch_commands, &patch_commands_size, - patch_commands_count + sizeof(D3D12_INDEX_BUFFER_VIEW) / sizeof(uint32_t), - sizeof(*patch_commands)); - - for (j = 0; j < 4; j++) - { - patch_commands[patch_commands_count].token = ibv_types[j]; - patch_commands[patch_commands_count].src_offset = src_word_offset++; - patch_commands[patch_commands_count].dst_offset = dst_word_offset++; - patch_commands_count++; - } - break; - - case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW: - token.tokenType = VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV; - required_alignment = sizeof(uint32_t); - stream_stride = align(stream_stride, required_alignment); - token.offset = stream_stride; - stream_stride += sizeof(VkDrawIndirectCommand); - dst_word_offset = token.offset / sizeof(uint32_t); - generic_u32_copy_count = sizeof(VkDrawIndirectCommand) / sizeof(uint32_t); - generic_u32_copy_types = draw_types; - break; - - case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED: - token.tokenType = VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV; - required_alignment = sizeof(uint32_t); - stream_stride = align(stream_stride, required_alignment); - token.offset = stream_stride; - stream_stride += sizeof(VkDrawIndexedIndirectCommand); - dst_word_offset = token.offset / sizeof(uint32_t); - generic_u32_copy_count = sizeof(VkDrawIndexedIndirectCommand) / sizeof(uint32_t); - generic_u32_copy_types = draw_indexed_types; - break; - - case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH: - token.tokenType = VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV; - required_alignment = sizeof(uint32_t); - stream_stride = align(stream_stride, required_alignment); - token.offset = stream_stride; - stream_stride += sizeof(VkDrawMeshTasksIndirectCommandEXT); - dst_word_offset = token.offset / sizeof(uint32_t); - generic_u32_copy_count = sizeof(VkDrawMeshTasksIndirectCommandEXT) / sizeof(uint32_t); - generic_u32_copy_types = draw_mesh_types; - break; - - case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH: - token.tokenType = VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_NV; - required_alignment = sizeof(uint32_t); - stream_stride = align(stream_stride, required_alignment); - token.offset = stream_stride; - stream_stride += sizeof(VkDispatchIndirectCommand); - dst_word_offset = token.offset / sizeof(uint32_t); - /* TODO: Rebase on top of debug-ring-indirect. */ - generic_u32_copy_count = 0; - generic_u32_copy_types = NULL; - break; - - default: - FIXME("Unsupported token type %u.\n", argument_desc->Type); - hr = E_NOTIMPL; - goto end; - } - - vkd3d_array_reserve((void**)&tokens, &token_size, token_count + 1, sizeof(*tokens)); - tokens[token_count++] = token; - - if (generic_u32_copy_count) - { - vkd3d_array_reserve((void**)&patch_commands, &patch_commands_size, - patch_commands_count + generic_u32_copy_count, - sizeof(*patch_commands)); - - /* Simply patch by copying U32s. */ - for (j = 0; j < generic_u32_copy_count; j++, patch_commands_count++) - { - patch_commands[patch_commands_count].token = - generic_u32_copy_types ? generic_u32_copy_types[j] : VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32; - patch_commands[patch_commands_count].src_offset = src_word_offset++; - patch_commands[patch_commands_count].dst_offset = dst_word_offset++; - } - } - - /* Required alignment is scalar alignment rules, i.e. maximum individual alignment requirement. */ - required_stride_alignment = max(required_stride_alignment, required_alignment); - } - - stream_stride = max(stream_stride, desc->ByteStride); - stream_stride = align(stream_stride, required_stride_alignment); - - if (FAILED(hr = d3d12_command_signature_init_patch_commands_buffer(signature, device, patch_commands, patch_commands_count))) - goto end; - if (FAILED(hr = d3d12_command_signature_init_indirect_commands_layout_nv(signature, device, tokens, token_count, stream_stride))) - goto end; - if (FAILED(hr = vkd3d_meta_get_execute_indirect_pipeline(&device->meta_ops, patch_commands_count, - &signature->state_template.dgc.pipeline))) - goto end; - -end: - vkd3d_free(tokens); - vkd3d_free(patch_commands); - return hr; -} - -static HRESULT d3d12_command_signature_init_state_template_dgc_ext(struct d3d12_command_signature *signature, - const D3D12_COMMAND_SIGNATURE_DESC *desc, - struct d3d12_root_signature *root_signature, - struct d3d12_device *device) -{ - VkIndirectCommandsVertexBufferTokenEXT vb_tokens[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; - VkIndirectCommandsPushConstantTokenEXT pc_tokens[D3D12_MAX_ROOT_COST]; - const enum vkd3d_patch_command_token *generic_u32_copy_types; - const struct vkd3d_shader_root_parameter *root_parameter; - const struct d3d12_bind_point_layout *bind_point_layout; - const struct vkd3d_shader_root_constant *root_constant; - struct vkd3d_patch_command *patch_commands = NULL; - VkIndirectCommandsLayoutTokenEXT *tokens = NULL; - VkIndirectCommandsIndexBufferTokenEXT ib_token; - uint32_t required_stride_alignment = 0; - VkIndirectCommandsLayoutTokenEXT token; - uint32_t generic_u32_copy_count; - size_t patch_commands_count = 0; - uint32_t required_alignment = 0; - size_t patch_commands_size = 0; - uint32_t root_parameter_index; - uint32_t src_word_offset = 0; - uint32_t stream_stride = 0; - size_t vb_token_count = 0; - size_t pc_token_count = 0; - uint32_t dst_word_offset; - size_t token_count = 0; - size_t token_size = 0; - HRESULT hr = S_OK; - uint32_t i, j; + uint32_t src_word_offset = 0; + uint32_t stream_stride = 0; + size_t vb_token_count = 0; + size_t pc_token_count = 0; + uint32_t dst_word_offset; + size_t token_count = 0; + size_t token_size = 0; + HRESULT hr = S_OK; + uint32_t i, j; /* Mostly for debug. Lets debug ring report what it is writing easily. */ static const enum vkd3d_patch_command_token ibv_types[] = @@ -25398,8 +24560,6 @@ static HRESULT d3d12_command_signature_init_state_template_dgc_ext(struct d3d12_ VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI, }; - bind_point_layout = d3d12_root_signature_get_layout(root_signature, signature->pipeline_type); - for (i = 0; i < desc->NumArgumentDescs; i++) { const D3D12_INDIRECT_ARGUMENT_DESC *argument_desc = &desc->pArgumentDescs[i]; @@ -25418,25 +24578,19 @@ static HRESULT d3d12_command_signature_init_state_template_dgc_ext(struct d3d12_ root_parameter_index = argument_desc->Constant.RootParameterIndex; root_constant = root_signature_get_32bit_constants(root_signature, root_parameter_index); - if (bind_point_layout->flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK) - { - ERR("Root signature uses push UBO for root parameters, but this feature requires push constant path.\n"); - hr = E_NOTIMPL; - goto end; - } - assert(pc_token_count < ARRAY_SIZE(pc_tokens)); pc_token = &pc_tokens[pc_token_count++]; token.data.pPushConstant = pc_token; - pc_token->updateRange.stageFlags = bind_point_layout->vk_push_stages; + pc_token->updateRange.offset = root_constant->constant_index + argument_desc->Constant.DestOffsetIn32BitValues; pc_token->updateRange.size = argument_desc->Constant.Num32BitValuesToSet; pc_token->updateRange.offset *= sizeof(uint32_t); pc_token->updateRange.size *= sizeof(uint32_t); + pc_token->updateRange.stageFlags = VK_SHADER_STAGE_ALL; if (argument_desc->Type == D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT) { - token.type = VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT; + token.type = VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_DATA_EXT; required_alignment = sizeof(uint32_t); stream_stride = align(stream_stride, required_alignment); @@ -25448,7 +24602,7 @@ static HRESULT d3d12_command_signature_init_state_template_dgc_ext(struct d3d12_ } else { - token.type = VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT; + token.type = VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_DATA_SEQUENCE_INDEX_EXT; token.offset = 0; /* ignored */ pc_token->updateRange.size = sizeof(uint32_t); @@ -25464,13 +24618,6 @@ static HRESULT d3d12_command_signature_init_state_template_dgc_ext(struct d3d12_ root_parameter_index = argument_desc->ShaderResourceView.RootParameterIndex; root_parameter = root_signature_get_parameter(root_signature, root_parameter_index); - if (bind_point_layout->flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK) - { - ERR("Root signature uses push UBO for root parameters, but this feature requires push constant path.\n"); - hr = E_NOTIMPL; - goto end; - } - if (!(root_signature->root_descriptor_raw_va_mask & (1ull << root_parameter_index))) { ERR("Root parameter %u is not a raw VA. Cannot implement command signature which updates root descriptor.\n", @@ -25479,13 +24626,13 @@ static HRESULT d3d12_command_signature_init_state_template_dgc_ext(struct d3d12_ goto end; } - token.type = VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT; + token.type = VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_DATA_EXT; assert(pc_token_count < ARRAY_SIZE(pc_tokens)); pc_token = &pc_tokens[pc_token_count++]; token.data.pPushConstant = pc_token; - pc_token->updateRange.stageFlags = bind_point_layout->vk_push_stages; pc_token->updateRange.offset = root_parameter->descriptor.raw_va_root_descriptor_index * sizeof(VkDeviceAddress); pc_token->updateRange.size = sizeof(VkDeviceAddress); + pc_token->updateRange.stageFlags = VK_SHADER_STAGE_ALL; required_alignment = sizeof(uint32_t); stream_stride = align(stream_stride, required_alignment); @@ -25801,25 +24948,15 @@ HRESULT d3d12_command_signature_create(struct d3d12_device *device, struct d3d12 if ((object->requires_state_template = requires_state_template)) { - if ((pipeline_type == VKD3D_PIPELINE_TYPE_GRAPHICS || pipeline_type == VKD3D_PIPELINE_TYPE_MESH_GRAPHICS) && - !device->device_info.device_generated_commands_features_nv.deviceGeneratedCommands && + if ((pipeline_type == VKD3D_PIPELINE_TYPE_GRAPHICS || + pipeline_type == VKD3D_PIPELINE_TYPE_MESH_GRAPHICS || + pipeline_type == VKD3D_PIPELINE_TYPE_COMPUTE) && !device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands) { FIXME("Device generated commands is not supported by implementation.\n"); object->requires_state_template = false; goto out; } - else if (pipeline_type == VKD3D_PIPELINE_TYPE_COMPUTE) - { - if (!device->device_info.device_generated_commands_compute_features_nv.deviceGeneratedCompute && - !device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands && - !(device->bindless_state.flags & VKD3D_FORCE_COMPUTE_ROOT_PARAMETERS_PUSH_UBO)) - { - FIXME("State template is required for compute, but VKD3D_CONFIG_FLAG_REQUIRES_COMPUTE_INDIRECT_TEMPLATES is not enabled.\n"); - object->requires_state_template = false; - goto out; - } - } else if (pipeline_type == VKD3D_PIPELINE_TYPE_RAY_TRACING) { /* Very similar idea as indirect compute would be. */ @@ -25834,19 +24971,6 @@ HRESULT d3d12_command_signature_create(struct d3d12_device *device, struct d3d12 goto err; object->requires_state_template_dgc = true; } - else if (pipeline_type == VKD3D_PIPELINE_TYPE_GRAPHICS || pipeline_type == VKD3D_PIPELINE_TYPE_MESH_GRAPHICS || - (pipeline_type == VKD3D_PIPELINE_TYPE_COMPUTE && - device->device_info.device_generated_commands_compute_features_nv.deviceGeneratedCompute)) - { - if (FAILED(hr = d3d12_command_signature_init_state_template_dgc_nv(object, desc, root_signature, device))) - goto err; - object->requires_state_template_dgc = true; - } - else if (pipeline_type == VKD3D_PIPELINE_TYPE_COMPUTE) - { - if (FAILED(hr = d3d12_command_signature_init_state_template_compute(object, desc, root_signature, device))) - goto err; - } /* Heuristic. If game uses fancy execute indirect we're more inclined to split command buffers * for optimal reordering. */ diff --git a/libs/vkd3d/command_list_vkd3d_ext.c b/libs/vkd3d/command_list_vkd3d_ext.c index fac4082a60..cbb03f7cdc 100644 --- a/libs/vkd3d/command_list_vkd3d_ext.c +++ b/libs/vkd3d/command_list_vkd3d_ext.c @@ -88,6 +88,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_LaunchCubinShaderE if (!handle || !params || !param_size) return E_INVALIDARG; + d3d12_command_list_update_descriptor_heaps(command_list); + launchInfo.function = handle->vkCuFunction; launchInfo.gridDimX = block_x; launchInfo.gridDimY = block_y; diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 3a4e0f2ed3..a9b74d2170 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -89,6 +89,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(KHR_COMPUTE_SHADER_DERIVATIVES, KHR_compute_shader_derivatives), VK_EXTENSION(KHR_CALIBRATED_TIMESTAMPS, KHR_calibrated_timestamps), VK_EXTENSION(KHR_COOPERATIVE_MATRIX, KHR_cooperative_matrix), + VK_EXTENSION(KHR_SHADER_UNTYPED_POINTERS, KHR_shader_untyped_pointers), VK_EXTENSION(KHR_UNIFIED_IMAGE_LAYOUTS, KHR_unified_image_layouts), #ifdef _WIN32 VK_EXTENSION(KHR_EXTERNAL_MEMORY_WIN32, KHR_external_memory_win32), @@ -110,10 +111,8 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(EXT_EXTERNAL_MEMORY_HOST, EXT_external_memory_host), VK_EXTENSION(EXT_SHADER_IMAGE_ATOMIC_INT64, EXT_shader_image_atomic_int64), VK_EXTENSION(EXT_MESH_SHADER, EXT_mesh_shader), - VK_EXTENSION(EXT_MUTABLE_DESCRIPTOR_TYPE, EXT_mutable_descriptor_type), VK_EXTENSION(EXT_HDR_METADATA, EXT_hdr_metadata), VK_EXTENSION(EXT_SHADER_MODULE_IDENTIFIER, EXT_shader_module_identifier), - VK_EXTENSION(EXT_DESCRIPTOR_BUFFER, EXT_descriptor_buffer), VK_EXTENSION_DISABLE_COND(EXT_PIPELINE_LIBRARY_GROUP_HANDLES, EXT_pipeline_library_group_handles, VKD3D_CONFIG_FLAG_NO_DXR), VK_EXTENSION(EXT_IMAGE_SLICED_VIEW_OF_3D, EXT_image_sliced_view_of_3d), VK_EXTENSION(EXT_GRAPHICS_PIPELINE_LIBRARY, EXT_graphics_pipeline_library), @@ -130,6 +129,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(EXT_ZERO_INITIALIZE_DEVICE_MEMORY, EXT_zero_initialize_device_memory), VK_EXTENSION_COND(EXT_OPACITY_MICROMAP, EXT_opacity_micromap, VKD3D_CONFIG_FLAG_DXR_1_2), VK_EXTENSION(EXT_SHADER_FLOAT8, EXT_shader_float8), + VK_EXTENSION(EXT_DESCRIPTOR_HEAP, EXT_descriptor_heap), /* AMD extensions */ VK_EXTENSION(AMD_BUFFER_MARKER, AMD_buffer_marker), VK_EXTENSION(AMD_DEVICE_COHERENT_MEMORY, AMD_device_coherent_memory), @@ -144,10 +144,8 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(NV_FRAGMENT_SHADER_BARYCENTRIC, NV_fragment_shader_barycentric), VK_EXTENSION(NV_COMPUTE_SHADER_DERIVATIVES, NV_compute_shader_derivatives), VK_EXTENSION_COND(NV_DEVICE_DIAGNOSTIC_CHECKPOINTS, NV_device_diagnostic_checkpoints, VKD3D_CONFIG_FLAG_BREADCRUMBS | VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE), - VK_EXTENSION(NV_DEVICE_GENERATED_COMMANDS, NV_device_generated_commands), VK_EXTENSION(NV_SHADER_SUBGROUP_PARTITIONED, NV_shader_subgroup_partitioned), VK_EXTENSION(NV_MEMORY_DECOMPRESSION, NV_memory_decompression), - VK_EXTENSION(NV_DEVICE_GENERATED_COMMANDS_COMPUTE, NV_device_generated_commands_compute), VK_EXTENSION_VERSION(NV_LOW_LATENCY_2, NV_low_latency2, 2), VK_EXTENSION(NV_RAW_ACCESS_CHAINS, NV_raw_access_chains), VK_EXTENSION(NV_COOPERATIVE_MATRIX_2, NV_cooperative_matrix2), @@ -602,8 +600,8 @@ static const struct vkd3d_instance_application_meta application_override[] = { */ { VKD3D_STRING_COMPARE_EXACT, "HaloInfinite.exe", VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV | - VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK | VKD3D_CONFIG_FLAG_PREALLOCATE_SRV_MIP_CLAMPS | - VKD3D_CONFIG_FLAG_REQUIRES_COMPUTE_INDIRECT_TEMPLATES | VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV | + VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK | + VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV | VKD3D_CONFIG_FLAG_DISABLE_DGCC, 0 }, /* (1182900) Workaround amdgpu kernel bug with host memory import and concurrent submissions. */ { VKD3D_STRING_COMPARE_EXACT, "APlagueTaleRequiem_x64.exe", @@ -649,7 +647,7 @@ static const struct vkd3d_instance_application_meta application_override[] = { { VKD3D_STRING_COMPARE_EXACT, "RDR.exe", VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV, 0 }, /* Starfield (1716740) */ { VKD3D_STRING_COMPARE_EXACT, "Starfield.exe", - VKD3D_CONFIG_FLAG_REQUIRES_COMPUTE_INDIRECT_TEMPLATES | VKD3D_CONFIG_FLAG_REJECT_PADDED_SMALL_RESOURCE_ALIGNMENT, 0 }, + VKD3D_CONFIG_FLAG_REJECT_PADDED_SMALL_RESOURCE_ALIGNMENT, 0 }, /* Persona 3 Reload (2161700). Enables RT by default on Deck and does not run acceptably for a verified title. */ { VKD3D_STRING_COMPARE_EXACT, "P3R.exe", 0, 0, VKD3D_APPLICATION_FEATURE_NO_DEFAULT_DXR_ON_DECK }, /* Basically never bothers doing initial transitions. @@ -1252,10 +1250,8 @@ static const struct vkd3d_debug_option vkd3d_config_options[] = {"force_raw_va_cbv", VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV}, {"allow_sbt_collection", VKD3D_CONFIG_FLAG_ALLOW_SBT_COLLECTION}, {"host_import_fallback", VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK}, - {"preallocate_srv_mip_clamps", VKD3D_CONFIG_FLAG_PREALLOCATE_SRV_MIP_CLAMPS}, {"force_initial_transition", VKD3D_CONFIG_FLAG_FORCE_INITIAL_TRANSITION}, {"breadcrumbs_trace", VKD3D_CONFIG_FLAG_BREADCRUMBS | VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE}, - {"requires_compute_indirect_templates", VKD3D_CONFIG_FLAG_REQUIRES_COMPUTE_INDIRECT_TEMPLATES}, {"skip_driver_workarounds", VKD3D_CONFIG_FLAG_SKIP_DRIVER_WORKAROUNDS}, {"enable_experimental_features", VKD3D_CONFIG_FLAG_ENABLE_EXPERIMENTAL_FEATURES}, {"reject_padded_small_resource_alignment", VKD3D_CONFIG_FLAG_REJECT_PADDED_SMALL_RESOURCE_ALIGNMENT}, @@ -1782,6 +1778,8 @@ static void vkd3d_physical_device_info_apply_workarounds(struct vkd3d_physical_d if (info->vulkan_1_2_properties.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY) info->properties2.properties.limits.minStorageBufferOffsetAlignment = 4; + //info->properties2.properties.limits.minStorageBufferOffsetAlignment = 64; + /* UE5 is broken and assumes that if mesh shaders are supported, barycentrics are also supported. * This happens to be the case on RDNA2+ and Turing+ on Windows, but Mesa landed barycentrics long * after mesh shaders, so Mesa 23.1 will often fail on boot for practically all UE5 content. @@ -1804,16 +1802,17 @@ static void vkd3d_physical_device_info_apply_workarounds(struct vkd3d_physical_d if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_SKIP_DRIVER_WORKAROUNDS)) { +#if 0 if (info->vulkan_1_2_properties.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY && - device->vk_info.NV_device_generated_commands_compute && + device->vk_info.EXT_device_generated_commands && (vkd3d_config_flags & VKD3D_CONFIG_FLAG_DISABLE_DGCC)) { device->vk_info.NV_device_generated_commands_compute = false; device->vk_info.EXT_device_generated_commands = false; - device->device_info.device_generated_commands_compute_features_nv.deviceGeneratedCompute = VK_FALSE; device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands = VK_FALSE; WARN("Disabling DGCC due to config flag.\n"); } +#endif /* Two known bugs in the wild: * - presentID = 0 handling when toggling present mode is broken. @@ -2082,12 +2081,6 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->properties2, &info->shader_sm_builtins_properties); } - if (vulkan_info->VALVE_mutable_descriptor_type || vulkan_info->EXT_mutable_descriptor_type) - { - info->mutable_descriptor_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT; - vk_prepend_struct(&info->features2, &info->mutable_descriptor_features); - } - if (vulkan_info->EXT_image_view_min_lod) { info->image_view_min_lod_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_MIN_LOD_FEATURES_EXT; @@ -2141,23 +2134,6 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->features2, &info->barycentric_features_khr); } - if (vulkan_info->NV_device_generated_commands) - { - info->device_generated_commands_features_nv.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_NV; - info->device_generated_commands_properties_nv.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_PROPERTIES_NV; - vk_prepend_struct(&info->features2, &info->device_generated_commands_features_nv); - vk_prepend_struct(&info->properties2, &info->device_generated_commands_properties_nv); - } - - if (vulkan_info->NV_device_generated_commands_compute) - { - info->device_generated_commands_compute_features_nv.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_COMPUTE_FEATURES_NV; - vk_prepend_struct(&info->features2, &info->device_generated_commands_compute_features_nv); - } - if (vulkan_info->EXT_shader_image_atomic_int64) { info->shader_image_atomic_int64_features.sType = @@ -2300,16 +2276,6 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->properties2, &info->compute_shader_derivatives_properties_khr); } - if (vulkan_info->EXT_descriptor_buffer) - { - info->descriptor_buffer_features.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT; - info->descriptor_buffer_properties.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT; - vk_prepend_struct(&info->features2, &info->descriptor_buffer_features); - vk_prepend_struct(&info->properties2, &info->descriptor_buffer_properties); - } - if (vulkan_info->EXT_pipeline_library_group_handles) { info->pipeline_library_group_handles_features.sType = @@ -2480,6 +2446,14 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->features2, &info->anti_lag_amd); } + if (vulkan_info->EXT_descriptor_heap) + { + info->descriptor_heap_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_HEAP_FEATURES_EXT; + info->descriptor_heap_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_HEAP_PROPERTIES_EXT; + vk_prepend_struct(&info->features2, &info->descriptor_heap_features); + vk_prepend_struct(&info->properties2, &info->descriptor_heap_properties); + } + if (vulkan_info->KHR_unified_image_layouts) { info->unified_image_layouts_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFIED_IMAGE_LAYOUTS_FEATURES_KHR; @@ -3087,7 +3061,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, VkPhysicalDeviceExtendedDynamicState3FeaturesEXT *extended_dynamic_state3; VkPhysicalDeviceAccelerationStructureFeaturesKHR *acceleration_structure; VkPhysicalDeviceLineRasterizationFeaturesEXT *line_rasterization; - VkPhysicalDeviceDescriptorBufferFeaturesEXT *descriptor_buffer; VkPhysicalDevice physical_device = device->vk_physical_device; struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; VkPhysicalDeviceFeatures *features; @@ -3178,9 +3151,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, if (!physical_device_info->fragment_shading_rate_features.primitiveFragmentShadingRate) physical_device_info->mesh_shader_features.primitiveFragmentShadingRateMeshShader = VK_FALSE; - descriptor_buffer = &physical_device_info->descriptor_buffer_features; - descriptor_buffer->descriptorBufferCaptureReplay = VK_FALSE; - descriptor_buffer->descriptorBufferImageLayoutIgnored = VK_FALSE; + physical_device_info->descriptor_heap_features.descriptorHeapCaptureReplay = VK_FALSE; /* We only use dynamic rasterization samples. Also Keep the following enabled for 11on12: * alphaToCoverage, sampleMask, lineRasterizationMode, depthClipEnable. */ @@ -3229,8 +3200,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, physical_device_info->extended_dynamic_state2_features.extendedDynamicState2LogicOp = VK_FALSE; /* Unneeded. */ - physical_device_info->device_generated_commands_compute_features_nv.deviceGeneratedComputeCaptureReplay = VK_FALSE; - physical_device_info->device_generated_commands_compute_features_nv.deviceGeneratedComputePipelines = VK_FALSE; physical_device_info->device_generated_commands_features_ext.dynamicGeneratedPipelineLayout = VK_FALSE; { @@ -3249,12 +3218,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, } } - if (physical_device_info->device_generated_commands_features_ext.deviceGeneratedCommands) - { - physical_device_info->device_generated_commands_features_nv.deviceGeneratedCommands = VK_FALSE; - physical_device_info->device_generated_commands_compute_features_nv.deviceGeneratedCompute = VK_FALSE; - } - if (!physical_device_info->vulkan_1_2_properties.robustBufferAccessUpdateAfterBind) { /* Generally, we cannot enable robustness if this is not supported, @@ -4252,15 +4215,12 @@ uint64_t d3d12_device_get_descriptor_heap_gpu_va(struct d3d12_device *device, D3 pthread_mutex_unlock(&device->mutex); va <<= 32; - if (d3d12_device_use_embedded_mutable_descriptors(device)) - { - /* Encodes what type this heap is so that we can decode VA to offset properly later. - * When using embedded descriptors we cannot assume that the descriptor increment - * is the same for CBV_SRV_UAV and sampler anymore. */ - va <<= 1; - if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) - va |= VKD3D_RESOURCE_EMBEDDED_RESOURCE_HEAP_MASK; - } + /* Encodes what type this heap is so that we can decode VA to offset properly later. + * When using embedded descriptors we cannot assume that the descriptor increment + * is the same for CBV_SRV_UAV and sampler anymore. */ + va <<= 1; + if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + va |= VKD3D_RESOURCE_EMBEDDED_RESOURCE_HEAP_MASK; return va; } @@ -4268,8 +4228,7 @@ uint64_t d3d12_device_get_descriptor_heap_gpu_va(struct d3d12_device *device, D3 void d3d12_device_return_descriptor_heap_gpu_va(struct d3d12_device *device, uint64_t va) { /* Fixup the magic shift we used when allocating. */ - if (d3d12_device_use_embedded_mutable_descriptors(device)) - va >>= 1; + va >>= 1; pthread_mutex_lock(&device->mutex); vkd3d_array_reserve((void **)&device->descriptor_heap_gpu_vas, &device->descriptor_heap_gpu_va_size, @@ -4664,7 +4623,6 @@ static void d3d12_device_destroy(struct d3d12_device *device) vkd3d_null_rtas_allocation_cleanup(&device->null_rtas_allocation, device); vkd3d_memory_allocator_cleanup(&device->memory_allocator, device); vkd3d_memory_transfer_queue_cleanup(&device->memory_transfers); - vkd3d_global_descriptor_buffer_cleanup(&device->global_descriptor_buffer, device); d3d12_device_free_pipeline_libraries(device); /* Tear down descriptor global info late, so we catch last minute faults after we drain the queues. */ vkd3d_descriptor_debug_free_global_info(device->descriptor_qa_global_info, device); @@ -6111,16 +6069,6 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView_embedded(d3d d3d12_desc_create_cbv_embedded(descriptor.ptr, device, desc); } -static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView_default(d3d12_device_iface *iface, - const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) -{ - struct d3d12_device *device = impl_from_ID3D12Device(iface); - - TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); - - d3d12_desc_create_cbv(descriptor.ptr, device, desc); -} - static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView_embedded(d3d12_device_iface *iface, ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) @@ -6133,17 +6081,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView_embedded(d3d d3d12_desc_create_srv_embedded(descriptor.ptr, device, impl_from_ID3D12Resource(resource), desc); } -static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView_default(d3d12_device_iface *iface, - ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) -{ - struct d3d12_device *device = impl_from_ID3D12Device(iface); - - TRACE("iface %p, resource %p, desc %p, descriptor %#lx.\n", - iface, resource, desc, descriptor.ptr); - - d3d12_desc_create_srv(descriptor.ptr, device, impl_from_ID3D12Resource(resource), desc); -} +VKD3D_THREAD_LOCAL struct D3D12_UAV_INFO *d3d12_uav_info = NULL; static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView_embedded(d3d12_device_iface *iface, ID3D12Resource *resource, ID3D12Resource *counter_resource, @@ -6158,56 +6096,27 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView_embedded(d3 device, d3d12_resource_, impl_from_ID3D12Resource(counter_resource), desc); - /* Unknown at this time if we can support magic d3d12_uav_info with embedded mutable. */ -} - -VKD3D_THREAD_LOCAL struct D3D12_UAV_INFO *d3d12_uav_info = NULL; - -static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView_default(d3d12_device_iface *iface, - ID3D12Resource *resource, ID3D12Resource *counter_resource, - const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) -{ - const struct vkd3d_vk_device_procs *vk_procs; - VkResult vr; - struct d3d12_resource *d3d12_resource_ = impl_from_ID3D12Resource(resource); - struct d3d12_device *device = impl_from_ID3D12Device(iface); - TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %#lx.\n", - iface, resource, counter_resource, desc, descriptor.ptr); - - d3d12_desc_create_uav(descriptor.ptr, - device, d3d12_resource_, - impl_from_ID3D12Resource(counter_resource), desc); - /* d3d12_uav_info stores the pointer to data from previous call to d3d12_device_vkd3d_ext_CaptureUAVInfo(). Below code will update the data. */ if (d3d12_uav_info) { - struct d3d12_desc_split d = d3d12_desc_decode_va(descriptor.ptr); + struct d3d12_desc_split_embedded d = d3d12_desc_decode_embedded_resource_va(descriptor.ptr, + device->bindless_state.descriptor_heap_packed_metadata_offset); if (desc && desc->ViewDimension == D3D12_UAV_DIMENSION_BUFFER) { - d3d12_uav_info->gpuVAStart = d.view->info.buffer.va; - d3d12_uav_info->gpuVASize = d.view->info.buffer.range; + d3d12_uav_info->gpuVAStart = d.metadata->info.buffer.va; + d3d12_uav_info->gpuVASize = d.metadata->info.buffer.range; } else { - VkImageViewAddressPropertiesNVX out_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_ADDRESS_PROPERTIES_NVX }; - VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX }; - - imageViewHandleInfo.imageView = d.view->info.image.view->vk_image_view; - imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - - vk_procs = &device->vk_procs; - d3d12_uav_info->surfaceHandle = VK_CALL(vkGetImageViewHandleNVX(device->vk_device, &imageViewHandleInfo)); + d3d12_uav_info->surfaceHandle = d3d12_device_find_shader_visible_descriptor_heap_offset(device, descriptor.ptr, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - if ((vr = VK_CALL(vkGetImageViewAddressNVX(device->vk_device, imageViewHandleInfo.imageView, &out_info))) < 0) - { - ERR("Failed to get imageview address, vr %d.\n", vr); - return; - } - - d3d12_uav_info->gpuVAStart = out_info.deviceAddress; - d3d12_uav_info->gpuVASize = out_info.size; + /* Is this even used? */ + d3d12_uav_info->gpuVAStart = 0; + d3d12_uav_info->gpuVASize = 0; } + /* Set this to null so that subsequent calls to this API wont update the previous pointer. */ d3d12_uav_info = NULL; } @@ -6248,19 +6157,6 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler_embedded(d3d12_device_i d3d12_desc_create_sampler_embedded(descriptor.ptr, device, &desc2); } -static void STDMETHODCALLTYPE d3d12_device_CreateSampler_default(d3d12_device_iface *iface, - const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) -{ - struct d3d12_device *device = impl_from_ID3D12Device(iface); - D3D12_SAMPLER_DESC2 desc2; - - TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); - - memcpy(&desc2, desc, sizeof(*desc)); - desc2.Flags = D3D12_SAMPLER_FLAG_NONE; - d3d12_desc_create_sampler(descriptor.ptr, device, &desc2); -} - static void STDMETHODCALLTYPE d3d12_device_CreateSampler2_embedded(d3d12_device_iface *iface, const D3D12_SAMPLER_DESC2 *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { @@ -6271,16 +6167,6 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler2_embedded(d3d12_device_ d3d12_desc_create_sampler_embedded(descriptor.ptr, device, desc); } -static void STDMETHODCALLTYPE d3d12_device_CreateSampler2_default(d3d12_device_iface *iface, - const D3D12_SAMPLER_DESC2 *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) -{ - struct d3d12_device *device = impl_from_ID3D12Device(iface); - - TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); - - d3d12_desc_create_sampler(descriptor.ptr, device, desc); -} - static inline D3D12_CPU_DESCRIPTOR_HANDLE d3d12_advance_cpu_descriptor_handle(D3D12_CPU_DESCRIPTOR_HANDLE handle, unsigned int increment, unsigned int units) { @@ -6391,365 +6277,6 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(d3d12_device_iface *i descriptor_heap_type); } -static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple_descriptor_buffer_16_16_4(d3d12_device_iface *iface, - UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, - const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) -{ - /* Optimized NVIDIA path. Buffers are 16 byte, but images and samplers are just 4 byte indices, - * so we cannot use embedded mutable style copies. */ - - struct d3d12_device *device; - struct d3d12_desc_split dst; - struct d3d12_desc_split src; - size_t i, n; - - TRACE("iface %p, descriptor_count %u, dst_descriptor_range_offset %#lx, " - "src_descriptor_range_offset %#lx, descriptor_heap_type %#x.\n", - iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr, - descriptor_heap_type); - - if (VKD3D_EXPECT_TRUE(descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || - descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) - { - dst = d3d12_desc_decode_va(dst_descriptor_range_offset.ptr); - src = d3d12_desc_decode_va(src_descriptor_range_offset.ptr); - } - - if (VKD3D_EXPECT_TRUE(descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)) - { - const uint8_t *src_set0, *src_set1; - const VkDeviceAddress *src_va; - uint8_t *dst_set0, *dst_set1; - VkDeviceAddress *dst_va; - - dst_set0 = dst.heap->fast_pointer_bank[0]; - dst_set1 = dst.heap->fast_pointer_bank[1]; - dst_va = dst.heap->fast_pointer_bank[2]; - src_set0 = src.heap->fast_pointer_bank[0]; - src_set1 = src.heap->fast_pointer_bank[1]; - src_va = src.heap->fast_pointer_bank[2]; - - dst_set0 += dst.offset * 16; - dst_set1 += dst.offset * 16; - src_set0 += src.offset * 16; - src_set1 += src.offset * 16; - - if (VKD3D_EXPECT_TRUE(descriptor_count == 1)) - { - vkd3d_memcpy_aligned_16_cached(dst_set0, src_set0); - vkd3d_memcpy_aligned_16_cached(dst_set1, src_set1); - *dst.view = *src.view; - *dst.types = *src.types; - dst_va[dst.offset] = src_va[src.offset]; - } - else - { - vkd3d_memcpy_aligned_cached(dst_set0, src_set0, 16 * descriptor_count); - vkd3d_memcpy_aligned_cached(dst_set1, src_set1, 16 * descriptor_count); - dst_va += dst.offset; - src_va += src.offset; - - /* Enforce size_t for better x86 addressing. - * Avoid memcpy since we need to optimize for small descriptor count. */ - for (i = 0, n = descriptor_count; i < n; i++) - { - dst_va[i] = src_va[i]; - dst.view[i] = src.view[i]; - dst.types[i] = src.types[i]; - } - } - } - else if (descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) - { - const uint32_t *src_sampler = src.heap->fast_pointer_bank[0]; - uint32_t *dst_sampler = dst.heap->fast_pointer_bank[0]; - - src_sampler += src.offset; - dst_sampler += dst.offset; - - if (VKD3D_EXPECT_TRUE(descriptor_count == 1)) - { - *dst_sampler = *src_sampler; - *dst.view = *src.view; - *dst.types = *src.types; - } - else - { - /* Enforce size_t for better x86 addressing. - * Avoid memcpy since we need to optimize for small descriptor count. */ - for (i = 0, n = descriptor_count; i < n; i++) - { - dst_sampler[i] = src_sampler[i]; - dst.view[i] = src.view[i]; - dst.types[i] = src.types[i]; - } - } - } - else - { - device = unsafe_impl_from_ID3D12Device(iface); - d3d12_device_copy_descriptors(device, - 1, &dst_descriptor_range_offset, &descriptor_count, - 1, &src_descriptor_range_offset, &descriptor_count, - descriptor_heap_type); - } -} - -static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple_descriptor_buffer_64_64_32(d3d12_device_iface *iface, - UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, - const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) -{ - /* Optimized Intel Arc path. */ - - struct d3d12_device *device; - struct d3d12_desc_split dst; - struct d3d12_desc_split src; - size_t i, n; - - TRACE("iface %p, descriptor_count %u, dst_descriptor_range_offset %#lx, " - "src_descriptor_range_offset %#lx, descriptor_heap_type %#x.\n", - iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr, - descriptor_heap_type); - - if (VKD3D_EXPECT_TRUE(descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || - descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) - { - dst = d3d12_desc_decode_va(dst_descriptor_range_offset.ptr); - src = d3d12_desc_decode_va(src_descriptor_range_offset.ptr); - } - - if (VKD3D_EXPECT_TRUE(descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)) - { - const uint8_t *src_set0, *src_set1; - const VkDeviceAddress *src_va; - uint8_t *dst_set0, *dst_set1; - VkDeviceAddress *dst_va; - - dst_set0 = dst.heap->fast_pointer_bank[0]; - dst_set1 = dst.heap->fast_pointer_bank[1]; - dst_va = dst.heap->fast_pointer_bank[2]; - src_set0 = src.heap->fast_pointer_bank[0]; - src_set1 = src.heap->fast_pointer_bank[1]; - src_va = src.heap->fast_pointer_bank[2]; - - dst_set0 += dst.offset * 64; - dst_set1 += dst.offset * 64; - src_set0 += src.offset * 64; - src_set1 += src.offset * 64; - - if (VKD3D_EXPECT_TRUE(descriptor_count == 1)) - { - vkd3d_memcpy_aligned_cached(dst_set0, src_set0, 64); - vkd3d_memcpy_aligned_cached(dst_set1, src_set1, 64); - *dst.view = *src.view; - *dst.types = *src.types; - dst_va[dst.offset] = src_va[src.offset]; - } - else - { - vkd3d_memcpy_aligned_cached(dst_set0, src_set0, 64 * descriptor_count); - vkd3d_memcpy_aligned_cached(dst_set1, src_set1, 64 * descriptor_count); - dst_va += dst.offset; - src_va += src.offset; - - /* Enforce size_t for better x86 addressing. - * Avoid memcpy since we need to optimize for small descriptor count. */ - for (i = 0, n = descriptor_count; i < n; i++) - { - dst_va[i] = src_va[i]; - dst.view[i] = src.view[i]; - dst.types[i] = src.types[i]; - } - } - } - else if (descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) - { - const uint8_t *src_sampler = src.heap->fast_pointer_bank[0]; - uint8_t *dst_sampler = dst.heap->fast_pointer_bank[0]; - - src_sampler += src.offset * 32; - dst_sampler += dst.offset * 32; - - if (VKD3D_EXPECT_TRUE(descriptor_count == 1)) - { - vkd3d_memcpy_aligned_cached(dst_sampler, src_sampler, 32); - *dst.view = *src.view; - *dst.types = *src.types; - } - else - { - vkd3d_memcpy_aligned_cached(dst_sampler, src_sampler, 32 * descriptor_count); - /* Enforce size_t for better x86 addressing. - * Avoid memcpy since we need to optimize for small descriptor count. */ - for (i = 0, n = descriptor_count; i < n; i++) - { - dst.view[i] = src.view[i]; - dst.types[i] = src.types[i]; - } - } - } - else - { - device = unsafe_impl_from_ID3D12Device(iface); - d3d12_device_copy_descriptors(device, - 1, &dst_descriptor_range_offset, &descriptor_count, - 1, &src_descriptor_range_offset, &descriptor_count, - descriptor_heap_type); - } -} - -static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple_embedded_64_16_packed(d3d12_device_iface *iface, - UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, - const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) -{ - struct d3d12_device *device; - TRACE("iface %p, descriptor_count %u, dst_descriptor_range_offset %#lx, " - "src_descriptor_range_offset %#lx, descriptor_heap_type %#x.\n", - iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr, - descriptor_heap_type); - - if (VKD3D_EXPECT_TRUE(descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)) - { - /* If metadata is packed, this collapses to pure memcpy. */ - - /* The cached mask is used to mark if a page is write-combined or not. - * For CPU -> GPU copies, we can use NT stores. This helps Deck performance, - * so it's worthwhile to go to extreme lengths. This memory is always mapped write-combined. - * For CPU -> CPU copies, we cannot use NT stores since it breaks memory ordering if - * other threads aim to read the CPU descriptors later. Fortunately, CPU -> CPU copies - * are quirky at best and never used, so the branch predictor should be able to hide all overhead. */ - - /* Using a subtract here (instead of the more idiomatic negative mask) - * is a cute way of asserting the API requirement that the - * src VA must be a non-shader visible heap. We use aligned loads and stores which will fault if - * there is a misalignment. - * It is also faster since the subtract is folded in to the constant address offsets. */ - - if (VKD3D_EXPECT_TRUE(descriptor_count == 1)) - { - if (VKD3D_EXPECT_TRUE(!(dst_descriptor_range_offset.ptr & VKD3D_RESOURCE_EMBEDDED_CACHED_MASK))) - { - vkd3d_memcpy_aligned_64_non_temporal( - (void *)dst_descriptor_range_offset.ptr, - (const void *)(src_descriptor_range_offset.ptr - VKD3D_RESOURCE_EMBEDDED_CACHED_MASK)); - } - else - { - /* If we're copying to host visible descriptor memory, we have to be careful - * not to break memory ordering by using NT stores. - * This path is basically never taken. */ - vkd3d_memcpy_aligned_64_cached( - (void *)(dst_descriptor_range_offset.ptr - VKD3D_RESOURCE_EMBEDDED_CACHED_MASK), - (const void *)(src_descriptor_range_offset.ptr - VKD3D_RESOURCE_EMBEDDED_CACHED_MASK)); - } - } - else - { - if (VKD3D_EXPECT_TRUE(!(dst_descriptor_range_offset.ptr & VKD3D_RESOURCE_EMBEDDED_CACHED_MASK))) - { - vkd3d_memcpy_aligned_non_temporal( - (void *)dst_descriptor_range_offset.ptr, - (const void *)(src_descriptor_range_offset.ptr - VKD3D_RESOURCE_EMBEDDED_CACHED_MASK), - 64 * descriptor_count); - } - else - { - /* If we're copying to host visible descriptor memory, we have to be careful - * not to break memory ordering by using NT stores. - * This path is basically never taken. */ - vkd3d_memcpy_aligned_cached( - (void *)(dst_descriptor_range_offset.ptr - VKD3D_RESOURCE_EMBEDDED_CACHED_MASK), - (const void *)(src_descriptor_range_offset.ptr - VKD3D_RESOURCE_EMBEDDED_CACHED_MASK), - 64 * descriptor_count); - } - } - } - else if (descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) - { - if (VKD3D_EXPECT_TRUE(descriptor_count == 1)) - { - vkd3d_memcpy_aligned_16_cached( - (void *)dst_descriptor_range_offset.ptr, - (const void *)src_descriptor_range_offset.ptr); - } - else - { - vkd3d_memcpy_aligned_cached( - (void *)dst_descriptor_range_offset.ptr, - (const void *)src_descriptor_range_offset.ptr, - 16 * descriptor_count); - } - } - else - { - device = unsafe_impl_from_ID3D12Device(iface); - d3d12_device_copy_descriptors(device, - 1, &dst_descriptor_range_offset, &descriptor_count, - 1, &src_descriptor_range_offset, &descriptor_count, - descriptor_heap_type); - } -} - -static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple_embedded_32_16_planar(d3d12_device_iface *iface, - UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, - const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) -{ - struct d3d12_device *device; - TRACE("iface %p, descriptor_count %u, dst_descriptor_range_offset %#lx, " - "src_descriptor_range_offset %#lx, descriptor_heap_type %#x.\n", - iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr, - descriptor_heap_type); - - if (VKD3D_EXPECT_TRUE(descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)) - { - if (VKD3D_EXPECT_TRUE(descriptor_count == 1)) - { - /* Expected path. */ - d3d12_desc_copy_embedded_resource_single_32( - dst_descriptor_range_offset.ptr, - src_descriptor_range_offset.ptr); - } - else - { - /* Rare path. */ - d3d12_desc_copy_embedded_resource( - dst_descriptor_range_offset.ptr, - src_descriptor_range_offset.ptr, - 32 * descriptor_count); - } - } - else if (descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) - { - if (VKD3D_EXPECT_TRUE(descriptor_count == 1)) - { - /* Expected path. */ - vkd3d_memcpy_aligned_16_cached( - (void *)dst_descriptor_range_offset.ptr, - (const void *)src_descriptor_range_offset.ptr); - } - else - { - /* Rare path. */ - vkd3d_memcpy_aligned_cached( - (void *)dst_descriptor_range_offset.ptr, - (const void *)src_descriptor_range_offset.ptr, - 16 * descriptor_count); - } - } - else - { - device = unsafe_impl_from_ID3D12Device(iface); - d3d12_device_copy_descriptors(device, - 1, &dst_descriptor_range_offset, &descriptor_count, - 1, &src_descriptor_range_offset, &descriptor_count, - descriptor_heap_type); - } -} - static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple_embedded_generic(d3d12_device_iface *iface, UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, @@ -6768,45 +6295,14 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple_embedded_generi d3d12_desc_copy_embedded_resource( dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr, - device->bindless_state.descriptor_buffer_cbv_srv_uav_size * descriptor_count); + device->bindless_state.descriptor_heap_cbv_srv_uav_size * descriptor_count); } else if (descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) { vkd3d_memcpy_aligned_cached( (void *)dst_descriptor_range_offset.ptr, (const void *)src_descriptor_range_offset.ptr, - device->bindless_state.descriptor_buffer_sampler_size * descriptor_count); - } - else - { - d3d12_device_copy_descriptors(device, - 1, &dst_descriptor_range_offset, &descriptor_count, - 1, &src_descriptor_range_offset, &descriptor_count, - descriptor_heap_type); - } -} - -static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple_default(d3d12_device_iface *iface, - UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, - const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) -{ - struct d3d12_device *device; - TRACE("iface %p, descriptor_count %u, dst_descriptor_range_offset %#lx, " - "src_descriptor_range_offset %#lx, descriptor_heap_type %#x.\n", - iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr, - descriptor_heap_type); - - device = unsafe_impl_from_ID3D12Device(iface); - - if (VKD3D_EXPECT_TRUE(descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || - descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) - { - /* Not quite as fast as embedded, but still not bad. */ - d3d12_device_copy_descriptors_cbv_srv_uav_sampler(device, - dst_descriptor_range_offset, src_descriptor_range_offset, - descriptor_heap_type, - descriptor_count); + device->bindless_state.descriptor_heap_sampler_size * descriptor_count); } else { @@ -8478,25 +7974,6 @@ static void d3d12_device_create_sampler_feedback_desc(D3D12_UNORDERED_ACCESS_VIE uav_desc->ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; } -static void STDMETHODCALLTYPE d3d12_device_CreateSamplerFeedbackUnorderedAccessView_default(d3d12_device_iface *iface, - ID3D12Resource *target_resource, ID3D12Resource *feedback_resource, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) -{ - struct d3d12_resource *feedback = impl_from_ID3D12Resource(feedback_resource); - struct d3d12_device *device = impl_from_ID3D12Device(iface); - D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc; - - TRACE("iface %p, target_resource %p, feedback_resource %p, descriptor %#lx\n", - iface, target_resource, feedback_resource, descriptor.ptr); - - /* NULL paired resource means NULL descriptor. - * https://microsoft.github.io/DirectX-Specs/d3d/SamplerFeedback.html#null-feedback-map-binding-is-permitted */ - if (!target_resource) - feedback = NULL; - - d3d12_device_create_sampler_feedback_desc(&uav_desc, feedback); - d3d12_desc_create_uav(descriptor.ptr, device, feedback, NULL, &uav_desc); -} - static void STDMETHODCALLTYPE d3d12_device_CreateSamplerFeedbackUnorderedAccessView_embedded(d3d12_device_iface *iface, ID3D12Resource *target_resource, ID3D12Resource *feedback_resource, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { @@ -8911,12 +8388,7 @@ CONST_VTBL struct ID3D12Device12Vtbl d3d12_device_vtbl_##name = \ d3d12_device_GetResourceAllocationInfo3, \ } -VKD3D_DECLARE_D3D12_DEVICE_VARIANT(default, default, default); -VKD3D_DECLARE_D3D12_DEVICE_VARIANT(embedded_64_16_packed, embedded, embedded_64_16_packed); -VKD3D_DECLARE_D3D12_DEVICE_VARIANT(embedded_32_16_planar, embedded, embedded_32_16_planar); VKD3D_DECLARE_D3D12_DEVICE_VARIANT(embedded_generic, embedded, embedded_generic); -VKD3D_DECLARE_D3D12_DEVICE_VARIANT(descriptor_buffer_16_16_4, default, descriptor_buffer_16_16_4); -VKD3D_DECLARE_D3D12_DEVICE_VARIANT(descriptor_buffer_64_64_32, default, descriptor_buffer_64_64_32); static struct d3d12_device *impl_from_ID3D12DeviceConfiguration1(ID3D12DeviceConfiguration1 *iface) { @@ -9258,7 +8730,7 @@ static bool d3d12_device_supports_16bit_shader_ops(struct d3d12_device *device) device->device_info.vulkan_1_1_features.uniformAndStorageBuffer16BitAccess && device->device_info.vulkan_1_2_properties.shaderDenormPreserveFloat16 && device->device_info.vulkan_1_2_properties.denormBehaviorIndependence != VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE && - device->device_info.properties2.properties.limits.minStorageBufferOffsetAlignment <= 16; + device->device_info.properties2.properties.limits.minStorageBufferOffsetAlignment <= 4; } static void d3d12_device_caps_init_feature_options(struct d3d12_device *device) @@ -9467,7 +8939,7 @@ static void d3d12_device_caps_init_feature_options9(struct d3d12_device *device) /* If we cannot expose AtomicInt64OnDescriptorHeapResourceSupported, we cannot expose this one either. */ options9->AtomicInt64OnTypedResourceSupported = device->device_info.shader_image_atomic_int64_features.shaderImageInt64Atomics && - device->device_info.properties2.properties.limits.minStorageBufferOffsetAlignment <= 16; + device->device_info.properties2.properties.limits.minStorageBufferOffsetAlignment <= 4; options9->DerivativesInMeshAndAmplificationShadersSupported = d3d12_device_determine_mesh_shader_tier(device) && device->d3d12_caps.max_shader_model >= D3D_SHADER_MODEL_6_6 && device->device_info.compute_shader_derivatives_properties_khr.meshAndTaskShaderDerivatives; @@ -9495,7 +8967,7 @@ static void d3d12_device_caps_init_feature_options11(struct d3d12_device *device /* If we're not using raw SSBOs, we cannot support 64-bit atomics. */ options11->AtomicInt64OnDescriptorHeapResourceSupported = device->device_info.vulkan_1_2_features.shaderBufferInt64Atomics && - device->device_info.properties2.properties.limits.minStorageBufferOffsetAlignment <= 16; + device->device_info.properties2.properties.limits.minStorageBufferOffsetAlignment <= 4; } static void d3d12_device_caps_init_feature_options12(struct d3d12_device *device) @@ -10248,6 +9720,8 @@ static void vkd3d_compute_shader_interface_key(struct d3d12_device *device) key = hash_fnv1_iterate_u64(key, vkd3d_shader_get_revision()); key = hash_fnv1_iterate_u32(key, device->device_info.vulkan_1_3_properties.minSubgroupSize); key = hash_fnv1_iterate_u32(key, device->device_info.vulkan_1_3_properties.maxSubgroupSize); + +#if 0 key = hash_fnv1_iterate_u32(key, device->bindless_state.flags); key = hash_fnv1_iterate_u32(key, device->bindless_state.cbv_srv_uav_count); key = hash_fnv1_iterate_u32(key, device->bindless_state.set_count); @@ -10259,13 +9733,14 @@ static void vkd3d_compute_shader_interface_key(struct d3d12_device *device) key = hash_fnv1_iterate_u32(key, device->bindless_state.set_info[i].heap_type); key = hash_fnv1_iterate_u32(key, device->bindless_state.set_info[i].vk_descriptor_type); } +#endif - if (d3d12_device_use_embedded_mutable_descriptors(device)) + //if (d3d12_device_use_embedded_mutable_descriptors(device)) { /* Will affect shaders which use raw VA descriptors like RTAS, UAV counters and local root signatures. */ - key = hash_fnv1_iterate_u32(key, device->bindless_state.descriptor_buffer_cbv_srv_uav_size); + key = hash_fnv1_iterate_u32(key, device->bindless_state.descriptor_heap_cbv_srv_uav_size); /* Will affect shaders which use local root signatures. */ - key = hash_fnv1_iterate_u32(key, device->bindless_state.descriptor_buffer_sampler_size); + key = hash_fnv1_iterate_u32(key, device->bindless_state.descriptor_heap_sampler_size); } key = hash_fnv1_iterate_u32(key, quirk_info->global_quirks); @@ -10327,6 +9802,7 @@ static void d3d12_device_replace_vtable(struct d3d12_device *device) /* Don't bother modifying CopyDescriptors path, its main overhead is chasing other pointers anyway, * and that code path handles embedded mutable descriptors. */ +#if 0 if (d3d12_device_use_embedded_mutable_descriptors(device)) { if ((device->bindless_state.flags & VKD3D_BINDLESS_MUTABLE_EMBEDDED_PACKED_METADATA) && @@ -10368,6 +9844,7 @@ static void d3d12_device_replace_vtable(struct d3d12_device *device) device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl_descriptor_buffer_64_64_32; } } +#endif } extern CONST_VTBL struct ID3D12DeviceExt2Vtbl d3d12_device_vkd3d_ext_vtbl; @@ -10387,20 +9864,9 @@ static void vkd3d_scratch_pool_init(struct d3d12_device *device) if (device->device_info.vulkan_1_2_properties.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY) { - if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_REQUIRES_COMPUTE_INDIRECT_TEMPLATES) && - (device->device_info.device_generated_commands_compute_features_nv.deviceGeneratedCompute || - device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands)) - { - /* DGCC preprocess buffers are gigantic on NV. Starfield requires 27 MB for 4096 dispatches ... */ - device->scratch_pools[VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS].block_size = - VKD3D_SCRATCH_BUFFER_SIZE_DGCC_PREPROCESS_NV; - } - else - { - /* Halo Infinite can hit ~2.5 MB DGC calls on NV. Bumping to 4 MiB blocks is known to help. */ - device->scratch_pools[VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS].block_size = - VKD3D_SCRATCH_BUFFER_SIZE_PREPROCESS_NV; - } + /* Halo Infinite can hit ~2.5 MB DGC calls on NV. Bumping to 4 MiB blocks is known to help. */ + device->scratch_pools[VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS].block_size = + VKD3D_SCRATCH_BUFFER_SIZE_PREPROCESS_NV; } /* DGC tends to be pretty spammy with indirect buffers. @@ -10440,7 +9906,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, else device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl_default; #else - device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl_default; + device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl_embedded_generic; #endif device->refcount = 1; @@ -10500,11 +9966,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, if (FAILED(hr = vkd3d_memory_info_init(&device->memory_info, device))) goto out_cleanup_format_info; - if (FAILED(hr = vkd3d_global_descriptor_buffer_init(&device->global_descriptor_buffer, device))) - goto out_cleanup_memory_info; - if (FAILED(hr = vkd3d_bindless_state_init(&device->bindless_state, device))) - goto out_cleanup_global_descriptor_buffer; + goto out_cleanup_memory_info; if (FAILED(hr = vkd3d_view_map_init(&device->sampler_map.map))) goto out_cleanup_bindless_state; @@ -10610,8 +10073,6 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, vkd3d_view_map_destroy(&device->sampler_map.map, device); out_cleanup_bindless_state: vkd3d_bindless_state_cleanup(&device->bindless_state, device); -out_cleanup_global_descriptor_buffer: - vkd3d_global_descriptor_buffer_cleanup(&device->global_descriptor_buffer, device); out_cleanup_memory_info: vkd3d_memory_info_cleanup(&device->memory_info, device); out_cleanup_format_info: diff --git a/libs/vkd3d/device_vkd3d_ext.c b/libs/vkd3d/device_vkd3d_ext.c index 86d1577a06..d7e9749243 100644 --- a/libs/vkd3d/device_vkd3d_ext.c +++ b/libs/vkd3d/device_vkd3d_ext.c @@ -73,7 +73,9 @@ static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(d3d12_d ret_val = device->vk_info.NVX_binary_import; break; case D3D12_VK_NVX_IMAGE_VIEW_HANDLE: - ret_val = device->vk_info.NVX_image_view_handle; + /* For descriptor heap interactions. */ + ret_val = device->vk_info.NVX_image_view_handle && + device->vk_procs.vkGetDeviceCombinedImageSamplerIndexNVX; break; case D3D12_VK_NV_LOW_LATENCY_2: ret_val = device->vk_info.NV_low_latency2; @@ -180,63 +182,83 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_DestroyCubinComputeShade return S_OK; } +uint32_t d3d12_device_find_shader_visible_descriptor_heap_offset( + struct d3d12_device *device, vkd3d_cpu_descriptor_va_t va, D3D12_DESCRIPTOR_HEAP_TYPE type) +{ + size_t offset = vkd3d_va_map_query_descriptor_heap_offset(&device->memory_allocator.va_map, va, type); + if (offset == SIZE_MAX) + return UINT32_MAX; + + if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + { + offset += device->bindless_state.heap_redzone_size; + offset /= device->device_info.descriptor_heap_properties.imageDescriptorSize; + } + else + { + offset /= device->device_info.descriptor_heap_properties.samplerDescriptorSize; + } + + return offset; +} + static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaTextureObject(d3d12_device_vkd3d_ext_iface *iface, D3D12_CPU_DESCRIPTOR_HANDLE srv_handle, D3D12_CPU_DESCRIPTOR_HANDLE sampler_handle, UINT32 *cuda_texture_handle) { - VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX }; const struct vkd3d_vk_device_procs *vk_procs; - struct d3d12_desc_split sampler_desc; - struct d3d12_desc_split srv_desc; struct d3d12_device *device; + uint32_t sampler_index; + uint32_t image_index; TRACE("iface %p, srv_handle %zu, sampler_handle %zu, cuda_texture_handle %p.\n", iface, (size_t)srv_handle.ptr, (size_t)sampler_handle.ptr, cuda_texture_handle); if (!cuda_texture_handle) - return E_INVALIDARG; + return E_INVALIDARG; + + /* Need to translate CPU_DESCRIPTOR_HANDLE to heap offsets. + * We only know this information for shader visible heaps. */ device = d3d12_device_from_ID3D12DeviceExt(iface); - srv_desc = d3d12_desc_decode_va(srv_handle.ptr); - sampler_desc = d3d12_desc_decode_va(sampler_handle.ptr); + vk_procs = &device->vk_procs; - /* If image flag is not set, descriptor cannot be used as a CudaTexture */ - if (!(srv_desc.view->info.flags & VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW)) - return E_INVALIDARG; + image_index = d3d12_device_find_shader_visible_descriptor_heap_offset(device, srv_handle.ptr, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + sampler_index = d3d12_device_find_shader_visible_descriptor_heap_offset(device, sampler_handle.ptr, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - imageViewHandleInfo.imageView = srv_desc.view->info.image.view->vk_image_view; - imageViewHandleInfo.sampler = sampler_desc.view->info.image.view->vk_sampler; - imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + if (image_index == UINT32_MAX || sampler_index == UINT32_MAX) + { + FIXME("Could not find heap index. CPU handles must point to SHADER_VISIBLE heaps to have any meaning.\n"); + return E_INVALIDARG; + } - vk_procs = &device->vk_procs; - *cuda_texture_handle = VK_CALL(vkGetImageViewHandleNVX(device->vk_device, &imageViewHandleInfo)); + *cuda_texture_handle = VK_CALL(vkGetDeviceCombinedImageSamplerIndexNVX(device->vk_device, image_index, sampler_index)); return S_OK; } static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(d3d12_device_vkd3d_ext_iface *iface, D3D12_CPU_DESCRIPTOR_HANDLE uav_handle, UINT32 *cuda_surface_handle) { - VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX }; - const struct vkd3d_vk_device_procs *vk_procs; - struct d3d12_desc_split uav_desc; - struct d3d12_device *device; + struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface); + uint32_t image_index; TRACE("iface %p, uav_handle %zu, cuda_surface_handle %p.\n", iface, (size_t)uav_handle.ptr, cuda_surface_handle); + if (!cuda_surface_handle) - return E_INVALIDARG; + return E_INVALIDARG; - device = d3d12_device_from_ID3D12DeviceExt(iface); - uav_desc = d3d12_desc_decode_va(uav_handle.ptr); + image_index = d3d12_device_find_shader_visible_descriptor_heap_offset(device, uav_handle.ptr, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - /* If image flag is not set, descriptor cannot be used as a CudaSurface */ - if (!(uav_desc.view->info.flags & VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW)) + if (image_index == UINT32_MAX) + { + FIXME("Could not find heap index. CPU handles must point to SHADER_VISIBLE heaps to have any meaning.\n"); return E_INVALIDARG; + } - imageViewHandleInfo.imageView = uav_desc.view->info.image.view->vk_image_view; - imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - - vk_procs = &device->vk_procs; - *cuda_surface_handle = VK_CALL(vkGetImageViewHandleNVX(device->vk_device, &imageViewHandleInfo)); - return S_OK; + *cuda_surface_handle = image_index; + return S_OK; } extern VKD3D_THREAD_LOCAL struct D3D12_UAV_INFO *d3d12_uav_info; @@ -244,10 +266,10 @@ extern VKD3D_THREAD_LOCAL struct D3D12_UAV_INFO *d3d12_uav_info; static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CaptureUAVInfo(d3d12_device_vkd3d_ext_iface *iface, D3D12_UAV_INFO *uav_info) { if (!uav_info) - return E_INVALIDARG; + return E_INVALIDARG; TRACE("iface %p, uav_info %p.\n", iface, uav_info); - + /* CaptureUAVInfo() supposed to capture the information from the next CreateUnorderedAccess() on the same thread. We use d3d12_uav_info pointer to update the information in CreateUnorderedAccess() */ d3d12_uav_info = uav_info; @@ -312,15 +334,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShader static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaMergedTextureSamplerObject(d3d12_device_vkd3d_ext_iface *iface, D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS *params) { - VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX }; struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface); const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - struct d3d12_desc_split sampler_desc, texture_desc; - - TRACE("iface %p, tex_desc %zu, smp_desc %zu.\n", - iface, (size_t)params->texDesc, (size_t)params->smpDesc); + uint32_t image_index = 0, sampler_index = 0; - if (!device->vk_info.supports_cubin_64bit || !vk_procs->vkGetImageViewHandle64NVX) + if (!device->vk_info.supports_cubin_64bit) return E_NOTIMPL; if (params->pNext) @@ -329,40 +347,39 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaMergedTextureSamp params->pNext = NULL; } - texture_desc = d3d12_desc_decode_va(params->texDesc); - - if (!(texture_desc.view->info.flags & VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW)) - return E_INVALIDARG; - - imageViewHandleInfo.imageView = texture_desc.view->info.image.view->vk_image_view; + if (params->texDesc) + { + image_index = d3d12_device_find_shader_visible_descriptor_heap_offset(device, params->texDesc, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } if (params->smpDesc) { - sampler_desc = d3d12_desc_decode_va(params->smpDesc); - imageViewHandleInfo.sampler = sampler_desc.view->info.image.view->vk_sampler; - imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + sampler_index = d3d12_device_find_shader_visible_descriptor_heap_offset(device, params->smpDesc, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); } - else + + if (image_index == UINT32_MAX || sampler_index == UINT32_MAX) { - imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + FIXME("Could not find heap index. CPU handles must point to SHADER_VISIBLE heaps to have any meaning.\n"); + return E_INVALIDARG; } - params->textureHandle = VK_CALL(vkGetImageViewHandle64NVX(device->vk_device, &imageViewHandleInfo)); + params->textureHandle = VK_CALL(vkGetDeviceCombinedImageSamplerIndexNVX(device->vk_device, image_index, sampler_index)); return S_OK; } static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaIndependentDescriptorObject(d3d12_device_vkd3d_ext_iface *iface, D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS *params) { - VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX }; struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface); const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - struct d3d12_desc_split desc; + uint32_t image_index = 0; TRACE("iface %p, desc %zu, type %d.\n", iface, (size_t)params->desc, params->type); - if (!device->vk_info.supports_cubin_64bit || !vk_procs->vkGetImageViewHandle64NVX) + if (!device->vk_info.supports_cubin_64bit) return E_NOTIMPL; if (params->pNext) @@ -371,20 +388,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaIndependentDescri params->pNext = NULL; } - desc = d3d12_desc_decode_va(params->desc); - - if (!(desc.view->info.flags & VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW)) - return E_INVALIDARG; - - imageViewHandleInfo.imageView = desc.view->info.image.view->vk_image_view; - switch (params->type) { case D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_SURFACE: - imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - break; case D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TEXTURE: - imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; break; case D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_SAMPLER: FIXME("SAMPLER object type not supported.\n"); @@ -394,7 +401,16 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaIndependentDescri return E_INVALIDARG; } - params->handle = VK_CALL(vkGetImageViewHandle64NVX(device->vk_device, &imageViewHandleInfo)); + image_index = d3d12_device_find_shader_visible_descriptor_heap_offset(device, params->desc, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + if (image_index == UINT32_MAX) + { + FIXME("Could not find heap index. CPU handles must point to SHADER_VISIBLE heaps to have any meaning.\n"); + return E_INVALIDARG; + } + + params->handle = VK_CALL(vkGetDeviceCombinedImageSamplerIndexNVX(device->vk_device, image_index, 0)); return S_OK; } diff --git a/libs/vkd3d/meta.c b/libs/vkd3d/meta.c index 40bad0b88e..a63b7b6841 100644 --- a/libs/vkd3d/meta.c +++ b/libs/vkd3d/meta.c @@ -50,9 +50,7 @@ static VkResult vkd3d_meta_create_descriptor_set_layout(struct d3d12_device *dev set_layout_info.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR; set_layout_info.bindingCount = binding_count; set_layout_info.pBindings = bindings; - - if (d3d12_device_uses_descriptor_buffers(device) && descriptor_buffer_compatible) - set_layout_info.flags |= VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + (void)descriptor_buffer_compatible; return VK_CALL(vkCreateDescriptorSetLayout(device->vk_device, &set_layout_info, NULL, set_layout)); } @@ -95,7 +93,16 @@ static VkResult vkd3d_meta_create_pipeline_layout(struct d3d12_device *device, pipeline_layout_info.pushConstantRangeCount = push_constant_range_count; pipeline_layout_info.pPushConstantRanges = push_constant_ranges; - return VK_CALL(vkCreatePipelineLayout(device->vk_device, &pipeline_layout_info, NULL, pipeline_layout)); + if (set_layout_count == 0) + { + /* Don't need a pipeline layout where we're going. */ + *pipeline_layout = VK_NULL_HANDLE; + return VK_SUCCESS; + } + else + { + return VK_CALL(vkCreatePipelineLayout(device->vk_device, &pipeline_layout_info, NULL, pipeline_layout)); + } } static void vkd3d_meta_make_shader_stage(VkPipelineShaderStageCreateInfo *info, VkShaderStageFlagBits stage, @@ -118,8 +125,12 @@ static VkResult vkd3d_meta_create_compute_pipeline(struct d3d12_device *device, VkPipeline *pipeline) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkShaderDescriptorSetAndBindingMappingInfoEXT mapping_info; struct vkd3d_queue_timeline_trace_cookie cookie; + VkDescriptorSetAndBindingMappingEXT mapping[4]; VkComputePipelineCreateInfo pipeline_info; + VkPipelineCreateFlags2CreateInfo flags2; + VkSamplerCreateInfo nearest_sampler; VkShaderModule module; VkResult vr; @@ -136,14 +147,74 @@ static VkResult vkd3d_meta_create_compute_pipeline(struct d3d12_device *device, pipeline_info.basePipelineHandle = VK_NULL_HANDLE; pipeline_info.basePipelineIndex = -1; - if (d3d12_device_uses_descriptor_buffers(device) && descriptor_buffer_compatible) - pipeline_info.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + memset(&flags2, 0, sizeof(flags2)); + flags2.sType = VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO; vkd3d_meta_make_shader_stage(&pipeline_info.stage, VK_SHADER_STAGE_COMPUTE_BIT, module, "main", specialization_info); pipeline_info.stage.pNext = required_size; + if (layout == VK_NULL_HANDLE) + { + flags2.flags = VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT; + flags2.pNext = pipeline_info.pNext; + pipeline_info.pNext = &flags2; + + /* At most we use a trivial mapping with one meta descriptor at set = 0, binding = 0. */ + memset(&mapping_info, 0, sizeof(mapping_info)); + memset(mapping, 0, sizeof(mapping)); + + mapping_info.sType = VK_STRUCTURE_TYPE_SHADER_DESCRIPTOR_SET_AND_BINDING_MAPPING_INFO_EXT; + mapping_info.pNext = pipeline_info.stage.pNext; + pipeline_info.stage.pNext = &mapping_info; + + mapping_info.pMappings = mapping; + mapping_info.mappingCount = ARRAY_SIZE(mapping); + + mapping[0].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + mapping[0].source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_PUSH_INDEX_EXT; + mapping[0].descriptorSet = 0; + mapping[0].firstBinding = 0; + mapping[0].bindingCount = 1; + mapping[0].resourceMask = VK_SPIRV_RESOURCE_TYPE_SAMPLED_IMAGE_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_READ_WRITE_IMAGE_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_READ_ONLY_IMAGE_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_SAMPLER_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_COMBINED_SAMPLED_IMAGE_BIT_EXT; + mapping[0].sourceData.pushIndex.heapOffset = 0; /* We account for any red-zone ourselves. */ + mapping[0].sourceData.pushIndex.pushOffset = 128; + mapping[0].sourceData.pushIndex.heapArrayStride = device->bindless_state.descriptor_heap_cbv_srv_uav_size; + mapping[0].sourceData.pushIndex.heapIndexStride = device->bindless_state.descriptor_heap_cbv_srv_uav_size; + + mapping[1] = mapping[0]; + mapping[1].resourceMask = VK_SPIRV_RESOURCE_TYPE_READ_ONLY_STORAGE_BUFFER_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_READ_WRITE_STORAGE_BUFFER_BIT_EXT; + mapping[1].sourceData.pushIndex.heapOffset = device->bindless_state.descriptor_heap_packed_raw_buffer_offset; + + /* Some meta shaders use up to 3 descriptors. Just reserve more push registers + * since we cannot allocate meta descriptors linearly. */ + mapping[2] = mapping[0]; + mapping[2].sourceData.pushIndex.pushOffset += sizeof(uint32_t); + mapping[2].firstBinding += 1; + + mapping[3] = mapping[2]; + mapping[3].sourceData.pushIndex.pushOffset += sizeof(uint32_t); + mapping[3].firstBinding += 1; + + memset(&nearest_sampler, 0, sizeof(nearest_sampler)); + nearest_sampler.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + nearest_sampler.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + nearest_sampler.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + nearest_sampler.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + nearest_sampler.minFilter = VK_FILTER_NEAREST; + nearest_sampler.magFilter = VK_FILTER_NEAREST; + nearest_sampler.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + mapping[0].sourceData.pushIndex.pEmbeddedSampler = &nearest_sampler; + mapping[2].sourceData.pushIndex.pEmbeddedSampler = &nearest_sampler; + mapping[3].sourceData.pushIndex.pEmbeddedSampler = &nearest_sampler; + } + cookie = vkd3d_queue_timeline_trace_register_pso_compile(&device->queue_timeline_trace); vr = VK_CALL(vkCreateComputePipelines(device->vk_device, VK_NULL_HANDLE, 1, &pipeline_info, NULL, pipeline)); vkd3d_queue_timeline_trace_complete_pso_compile(&device->queue_timeline_trace, cookie, 0, "META COMP"); @@ -159,12 +230,14 @@ static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_ const VkSpecializationInfo *spec_info, bool descriptor_buffer_compatible, VkPipeline *vk_pipeline) { const struct vkd3d_vk_device_procs *vk_procs = &meta_ops->device->vk_procs; + VkShaderDescriptorSetAndBindingMappingInfoEXT mapping_info; VkPipelineColorBlendAttachmentState blend_attachment; VkPipelineShaderStageCreateInfo shader_stages[3]; struct vkd3d_queue_timeline_trace_cookie cookie; VkPipelineInputAssemblyStateCreateInfo ia_state; VkPipelineRasterizationStateCreateInfo rs_state; VkPipelineRenderingCreateInfoKHR rendering_info; + VkDescriptorSetAndBindingMappingEXT mapping[3]; VkPipelineVertexInputStateCreateInfo vi_state; VkPipelineMultisampleStateCreateInfo ms_state; VkPipelineColorBlendStateCreateInfo cb_state; @@ -172,6 +245,8 @@ static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_ VkPipelineDynamicStateCreateInfo dyn_state; VkGraphicsPipelineCreateInfo pipeline_info; const uint32_t sample_mask = 0xFFFFFFFF; + VkPipelineCreateFlags2CreateInfo flags2; + VkSamplerCreateInfo nearest_sampler; VkResult vr; static const VkDynamicState common_dynamic_states[] = @@ -281,8 +356,11 @@ static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_ pipeline_info.basePipelineHandle = VK_NULL_HANDLE; pipeline_info.basePipelineIndex = -1; - if (d3d12_device_uses_descriptor_buffers(meta_ops->device) && descriptor_buffer_compatible) - pipeline_info.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + memset(&flags2, 0, sizeof(flags2)); + flags2.sType = VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO; + + //if (d3d12_device_uses_descriptor_buffers(meta_ops->device) && descriptor_buffer_compatible) + // pipeline_info.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; vkd3d_meta_make_shader_stage(&shader_stages[pipeline_info.stageCount++], VK_SHADER_STAGE_VERTEX_BIT, @@ -301,6 +379,57 @@ static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_ VK_SHADER_STAGE_FRAGMENT_BIT, fs_module, "main", spec_info); } + if (layout == VK_NULL_HANDLE) + { + flags2.flags = VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT; + flags2.pNext = pipeline_info.pNext; + pipeline_info.pNext = &flags2; + + /* At most we use a trivial mapping with one meta descriptor at set = 0, binding = 0. */ + memset(&mapping_info, 0, sizeof(mapping_info)); + memset(mapping, 0, sizeof(mapping)); + + mapping_info.sType = VK_STRUCTURE_TYPE_SHADER_DESCRIPTOR_SET_AND_BINDING_MAPPING_INFO_EXT; + mapping_info.pNext = shader_stages[pipeline_info.stageCount - 1].pNext; + shader_stages[pipeline_info.stageCount - 1].pNext = &mapping_info; + + mapping_info.pMappings = mapping; + mapping_info.mappingCount = ARRAY_SIZE(mapping); + + mapping[0].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + mapping[0].source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_PUSH_INDEX_EXT; + mapping[0].descriptorSet = 0; + mapping[0].firstBinding = 0; + mapping[0].bindingCount = 1; + mapping[0].resourceMask = VK_SPIRV_RESOURCE_TYPE_ALL_EXT; + mapping[0].sourceData.pushIndex.heapOffset = 0; /* We account for any red-zone ourselves. */ + mapping[0].sourceData.pushIndex.pushOffset = 128; + mapping[0].sourceData.pushIndex.heapArrayStride = + meta_ops->device->bindless_state.descriptor_heap_cbv_srv_uav_size; + mapping[0].sourceData.pushIndex.heapIndexStride = + meta_ops->device->bindless_state.descriptor_heap_cbv_srv_uav_size; + + mapping[1] = mapping[0]; + mapping[1].sourceData.pushIndex.pushOffset += sizeof(uint32_t); + mapping[1].firstBinding += 1; + + mapping[2] = mapping[1]; + mapping[2].sourceData.pushIndex.pushOffset += sizeof(uint32_t); + mapping[2].firstBinding += 1; + + memset(&nearest_sampler, 0, sizeof(nearest_sampler)); + nearest_sampler.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + nearest_sampler.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + nearest_sampler.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + nearest_sampler.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + nearest_sampler.minFilter = VK_FILTER_NEAREST; + nearest_sampler.magFilter = VK_FILTER_NEAREST; + nearest_sampler.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + mapping[0].sourceData.pushIndex.pEmbeddedSampler = &nearest_sampler; + mapping[1].sourceData.pushIndex.pEmbeddedSampler = &nearest_sampler; + mapping[2].sourceData.pushIndex.pEmbeddedSampler = &nearest_sampler; + } + cookie = vkd3d_queue_timeline_trace_register_pso_compile(&meta_ops->device->queue_timeline_trace); if ((vr = VK_CALL(vkCreateGraphicsPipelines(meta_ops->device->vk_device, VK_NULL_HANDLE, 1, &pipeline_info, NULL, vk_pipeline)))) @@ -343,12 +472,12 @@ static void vkd3d_clear_uav_ops_cleanup(struct vkd3d_clear_uav_ops *meta_clear_u } static HRESULT vkd3d_clear_uav_ops_init(struct vkd3d_clear_uav_ops *meta_clear_uav_ops, - struct d3d12_device *device) + struct d3d12_device *device, bool use_heap) { VkDescriptorSetLayoutBinding set_binding; VkPushConstantRange push_constant_range; + VkResult vr = VK_SUCCESS; unsigned int i; - VkResult vr; struct { VkDescriptorSetLayout *set_layout; @@ -426,7 +555,8 @@ static HRESULT vkd3d_clear_uav_ops_init(struct vkd3d_clear_uav_ops *meta_clear_u { set_binding.descriptorType = set_layouts[i].descriptor_type; - vr = vkd3d_meta_create_descriptor_set_layout(device, 1, &set_binding, true, set_layouts[i].set_layout); + if (!use_heap) + vr = vkd3d_meta_create_descriptor_set_layout(device, 1, &set_binding, true, set_layouts[i].set_layout); if (vr < 0) { @@ -434,7 +564,7 @@ static HRESULT vkd3d_clear_uav_ops_init(struct vkd3d_clear_uav_ops *meta_clear_u goto fail; } - vr = vkd3d_meta_create_pipeline_layout(device, 1, set_layouts[i].set_layout, + vr = vkd3d_meta_create_pipeline_layout(device, !use_heap ? 1 : 0, set_layouts[i].set_layout, 1, &push_constant_range, set_layouts[i].pipeline_layout); if (vr < 0) @@ -461,9 +591,9 @@ static HRESULT vkd3d_clear_uav_ops_init(struct vkd3d_clear_uav_ops *meta_clear_u } struct vkd3d_clear_uav_pipeline vkd3d_meta_get_clear_buffer_uav_pipeline(struct vkd3d_meta_ops *meta_ops, - bool as_uint, bool raw) + bool as_uint, bool raw, bool heap) { - struct vkd3d_clear_uav_ops *meta_clear_uav_ops = &meta_ops->clear_uav; + struct vkd3d_clear_uav_ops *meta_clear_uav_ops = heap ? &meta_ops->clear_uav_heap : &meta_ops->clear_uav_legacy; struct vkd3d_clear_uav_pipeline info; const struct vkd3d_clear_uav_pipelines *pipelines = (as_uint || raw) @@ -477,9 +607,9 @@ struct vkd3d_clear_uav_pipeline vkd3d_meta_get_clear_buffer_uav_pipeline(struct } struct vkd3d_clear_uav_pipeline vkd3d_meta_get_clear_image_uav_pipeline(struct vkd3d_meta_ops *meta_ops, - VkImageViewType image_view_type, bool as_uint) + VkImageViewType image_view_type, bool as_uint, bool heap) { - struct vkd3d_clear_uav_ops *meta_clear_uav_ops = &meta_ops->clear_uav; + struct vkd3d_clear_uav_ops *meta_clear_uav_ops = heap ? &meta_ops->clear_uav_heap : &meta_ops->clear_uav_legacy; struct vkd3d_clear_uav_pipeline info; const struct vkd3d_clear_uav_pipelines *pipelines = as_uint @@ -569,7 +699,7 @@ static void vkd3d_copy_image_ops_cleanup(struct vkd3d_copy_image_ops *meta_copy_ } static HRESULT vkd3d_copy_image_ops_init(struct vkd3d_copy_image_ops *meta_copy_image_ops, - struct d3d12_device *device) + struct d3d12_device *device, bool use_heap) { VkDescriptorSetLayoutBinding set_binding; VkPushConstantRange push_constant_range; @@ -584,27 +714,31 @@ static HRESULT vkd3d_copy_image_ops_init(struct vkd3d_copy_image_ops *meta_copy_ return hresult_from_errno(rc); } - set_binding.binding = 0; - set_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - set_binding.descriptorCount = 1; - set_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - set_binding.pImmutableSamplers = NULL; - - if ((vr = vkd3d_meta_create_descriptor_set_layout(device, 1, &set_binding, true, &meta_copy_image_ops->vk_set_layout)) < 0) + if (!use_heap) { - ERR("Failed to create descriptor set layout, vr %d.\n", vr); - goto fail; - } + set_binding.binding = 0; + set_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + set_binding.descriptorCount = 1; + set_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + set_binding.pImmutableSamplers = NULL; - push_constant_range.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - push_constant_range.offset = 0; - push_constant_range.size = sizeof(struct vkd3d_copy_image_args); + if ((vr = vkd3d_meta_create_descriptor_set_layout(device, 1, &set_binding, true, + &meta_copy_image_ops->vk_set_layout)) < 0) + { + ERR("Failed to create descriptor set layout, vr %d.\n", vr); + goto fail; + } - if ((vr = vkd3d_meta_create_pipeline_layout(device, 1, &meta_copy_image_ops->vk_set_layout, - 1, &push_constant_range, &meta_copy_image_ops->vk_pipeline_layout))) - { - ERR("Failed to create pipeline layout, vr %d.\n", vr); - goto fail; + push_constant_range.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + push_constant_range.offset = 0; + push_constant_range.size = sizeof(struct vkd3d_copy_image_args); + + if ((vr = vkd3d_meta_create_pipeline_layout(device, 1, &meta_copy_image_ops->vk_set_layout, + 1, &push_constant_range, &meta_copy_image_ops->vk_pipeline_layout))) + { + ERR("Failed to create pipeline layout, vr %d.\n", vr); + goto fail; + } } if ((vr = vkd3d_meta_create_shader_module(device, SPIRV_CODE(fs_copy_image_float), @@ -664,9 +798,10 @@ static HRESULT vkd3d_meta_create_swapchain_pipeline(struct vkd3d_meta_ops *meta_ } static HRESULT vkd3d_meta_create_copy_image_pipeline(struct vkd3d_meta_ops *meta_ops, - const struct vkd3d_copy_image_pipeline_key *key, struct vkd3d_copy_image_pipeline *pipeline) + const struct vkd3d_copy_image_pipeline_key *key, struct vkd3d_copy_image_pipeline *pipeline, bool use_heap) { - struct vkd3d_copy_image_ops *meta_copy_image_ops = &meta_ops->copy_image; + struct vkd3d_copy_image_ops *meta_copy_image_ops = + use_heap ? &meta_ops->copy_image_heap : &meta_ops->copy_image_legacy; VkPipelineDepthStencilStateCreateInfo ds_state; unsigned int dynamic_state_count; VkSpecializationInfo spec_info; @@ -782,9 +917,10 @@ static HRESULT vkd3d_meta_create_copy_image_pipeline(struct vkd3d_meta_ops *meta } HRESULT vkd3d_meta_get_copy_image_pipeline(struct vkd3d_meta_ops *meta_ops, - const struct vkd3d_copy_image_pipeline_key *key, struct vkd3d_copy_image_info *info) + const struct vkd3d_copy_image_pipeline_key *key, struct vkd3d_copy_image_info *info, bool use_heap) { - struct vkd3d_copy_image_ops *meta_copy_image_ops = &meta_ops->copy_image; + struct vkd3d_copy_image_ops *meta_copy_image_ops = + use_heap ? &meta_ops->copy_image_heap : &meta_ops->copy_image_legacy; struct vkd3d_copy_image_pipeline *pipeline; HRESULT hr; size_t i; @@ -822,7 +958,7 @@ HRESULT vkd3d_meta_get_copy_image_pipeline(struct vkd3d_meta_ops *meta_ops, pipeline = &meta_copy_image_ops->pipelines[meta_copy_image_ops->pipeline_count++]; - if (FAILED(hr = vkd3d_meta_create_copy_image_pipeline(meta_ops, key, pipeline))) + if (FAILED(hr = vkd3d_meta_create_copy_image_pipeline(meta_ops, key, pipeline, use_heap))) { pthread_mutex_unlock(&meta_copy_image_ops->mutex); return hr; @@ -906,7 +1042,8 @@ static void vkd3d_resolve_image_ops_cleanup(struct vkd3d_resolve_image_ops *meta vkd3d_free(meta_resolve_image_ops->pipelines); } -static HRESULT vkd3d_resolve_image_ops_init(struct vkd3d_resolve_image_ops *meta_resolve_image_ops, struct d3d12_device *device) +static HRESULT vkd3d_resolve_image_ops_init( + struct vkd3d_resolve_image_ops *meta_resolve_image_ops, struct d3d12_device *device, bool use_heap) { VkDescriptorSetLayoutBinding set_bindings[2]; VkPushConstantRange push_constant_range; @@ -924,52 +1061,57 @@ static HRESULT vkd3d_resolve_image_ops_init(struct vkd3d_resolve_image_ops *meta memset(set_bindings, 0, sizeof(set_bindings)); - for (i = 0; i < ARRAY_SIZE(set_bindings); i++) + if (!use_heap) { - set_bindings[i].binding = i; - set_bindings[i].descriptorCount = 1; - } + for (i = 0; i < ARRAY_SIZE(set_bindings); i++) + { + set_bindings[i].binding = i; + set_bindings[i].descriptorCount = 1; + } - set_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - set_bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + set_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + set_bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - if ((vr = vkd3d_meta_create_descriptor_set_layout(device, 1, set_bindings, true, &meta_resolve_image_ops->vk_graphics_set_layout)) < 0) - { - ERR("Failed to create descriptor set layout, vr %d.\n", vr); - goto fail; - } + if ((vr = vkd3d_meta_create_descriptor_set_layout(device, 1, set_bindings, true, + &meta_resolve_image_ops->vk_graphics_set_layout)) < 0) + { + ERR("Failed to create descriptor set layout, vr %d.\n", vr); + goto fail; + } - set_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - set_bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - set_bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - set_bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + set_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + set_bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + set_bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + set_bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - if ((vr = vkd3d_meta_create_descriptor_set_layout(device, ARRAY_SIZE(set_bindings), set_bindings, true, &meta_resolve_image_ops->vk_compute_set_layout)) < 0) - { - ERR("Failed to create descriptor set layout, vr %d.\n", vr); - goto fail; - } + if ((vr = vkd3d_meta_create_descriptor_set_layout( + device, ARRAY_SIZE(set_bindings), set_bindings, true, &meta_resolve_image_ops->vk_compute_set_layout)) < 0) + { + ERR("Failed to create descriptor set layout, vr %d.\n", vr); + goto fail; + } - push_constant_range.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - push_constant_range.offset = 0; - push_constant_range.size = sizeof(struct vkd3d_resolve_image_args); + push_constant_range.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + push_constant_range.offset = 0; + push_constant_range.size = sizeof(struct vkd3d_resolve_image_args); - if ((vr = vkd3d_meta_create_pipeline_layout(device, 1, &meta_resolve_image_ops->vk_graphics_set_layout, - 1, &push_constant_range, &meta_resolve_image_ops->vk_graphics_pipeline_layout))) - { - ERR("Failed to create pipeline layout, vr %d.\n", vr); - goto fail; - } + if ((vr = vkd3d_meta_create_pipeline_layout(device, 1, &meta_resolve_image_ops->vk_graphics_set_layout, + 1, &push_constant_range, &meta_resolve_image_ops->vk_graphics_pipeline_layout))) + { + ERR("Failed to create pipeline layout, vr %d.\n", vr); + goto fail; + } - push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - push_constant_range.offset = 0; - push_constant_range.size = sizeof(struct vkd3d_resolve_image_compute_args); + push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + push_constant_range.offset = 0; + push_constant_range.size = sizeof(struct vkd3d_resolve_image_compute_args); - if ((vr = vkd3d_meta_create_pipeline_layout(device, 1, &meta_resolve_image_ops->vk_compute_set_layout, - 1, &push_constant_range, &meta_resolve_image_ops->vk_compute_pipeline_layout))) - { - ERR("Failed to create pipeline layout, vr %d.\n", vr); - goto fail; + if ((vr = vkd3d_meta_create_pipeline_layout(device, 1, &meta_resolve_image_ops->vk_compute_set_layout, + 1, &push_constant_range, &meta_resolve_image_ops->vk_compute_pipeline_layout))) + { + ERR("Failed to create pipeline layout, vr %d.\n", vr); + goto fail; + } } if ((vr = vkd3d_meta_create_shader_module(device, SPIRV_CODE(fs_resolve_color_float), @@ -1027,9 +1169,11 @@ static HRESULT vkd3d_resolve_image_ops_init(struct vkd3d_resolve_image_ops *meta } static HRESULT vkd3d_meta_create_resolve_image_graphics_pipeline(struct vkd3d_meta_ops *meta_ops, - const struct vkd3d_resolve_image_graphics_pipeline_key *key, struct vkd3d_resolve_image_pipeline *pipeline) + const struct vkd3d_resolve_image_graphics_pipeline_key *key, struct vkd3d_resolve_image_pipeline *pipeline, + bool use_heap) { - struct vkd3d_resolve_image_ops *meta_resolve_image_ops = &meta_ops->resolve_image; + struct vkd3d_resolve_image_ops *meta_resolve_image_ops = + use_heap ? &meta_ops->resolve_image_heap : &meta_ops->resolve_image_legacy; VkPipelineDepthStencilStateCreateInfo ds_state; unsigned int dynamic_state_count; VkSpecializationInfo spec_info; @@ -1135,9 +1279,11 @@ static HRESULT vkd3d_meta_create_resolve_image_graphics_pipeline(struct vkd3d_me } static HRESULT vkd3d_meta_create_resolve_image_compute_pipeline(struct vkd3d_meta_ops *meta_ops, - const struct vkd3d_resolve_image_compute_pipeline_key *key, struct vkd3d_resolve_image_pipeline *pipeline) + const struct vkd3d_resolve_image_compute_pipeline_key *key, struct vkd3d_resolve_image_pipeline *pipeline, + bool use_heap) { - struct vkd3d_resolve_image_ops *meta_resolve_image_ops = &meta_ops->resolve_image; + struct vkd3d_resolve_image_ops *meta_resolve_image_ops = + use_heap ? &meta_ops->resolve_image_heap : &meta_ops->resolve_image_legacy; VkSpecializationInfo spec_info; const void *cs_code; size_t cs_size; @@ -1197,9 +1343,10 @@ static HRESULT vkd3d_meta_create_resolve_image_compute_pipeline(struct vkd3d_met } HRESULT vkd3d_meta_get_resolve_image_pipeline(struct vkd3d_meta_ops *meta_ops, - const struct vkd3d_resolve_image_pipeline_key *key, struct vkd3d_resolve_image_info *info) + const struct vkd3d_resolve_image_pipeline_key *key, struct vkd3d_resolve_image_info *info, bool use_heap) { - struct vkd3d_resolve_image_ops *meta_resolve_image_ops = &meta_ops->resolve_image; + struct vkd3d_resolve_image_ops *meta_resolve_image_ops = + use_heap ? &meta_ops->resolve_image_heap : &meta_ops->resolve_image_legacy; struct vkd3d_resolve_image_pipeline *pipeline; HRESULT hr; size_t i; @@ -1247,9 +1394,9 @@ HRESULT vkd3d_meta_get_resolve_image_pipeline(struct vkd3d_meta_ops *meta_ops, pipeline = &meta_resolve_image_ops->pipelines[meta_resolve_image_ops->pipeline_count]; if (key->path == VKD3D_RESOLVE_IMAGE_PATH_RENDER_PASS_PIPELINE) - hr = vkd3d_meta_create_resolve_image_graphics_pipeline(meta_ops, &key->graphics, pipeline); + hr = vkd3d_meta_create_resolve_image_graphics_pipeline(meta_ops, &key->graphics, pipeline, use_heap); else - hr = vkd3d_meta_create_resolve_image_compute_pipeline(meta_ops, &key->compute, pipeline); + hr = vkd3d_meta_create_resolve_image_compute_pipeline(meta_ops, &key->compute, pipeline, use_heap); if (FAILED(hr)) { @@ -1652,8 +1799,7 @@ static HRESULT vkd3d_predicate_ops_init(struct vkd3d_predicate_ops *meta_predica meta_predicate_ops->data_sizes[i] = spec_data[i].arg_count * sizeof(uint32_t); } - if (device->device_info.device_generated_commands_features_nv.deviceGeneratedCommands || - device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands) + if (device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands) { uint32_t num_active_words; spec_info.mapEntryCount = 1; @@ -1934,7 +2080,7 @@ static void vkd3d_dstorage_ops_cleanup(struct vkd3d_dstorage_ops *dstorage_ops, } static HRESULT vkd3d_sampler_feedback_ops_init(struct vkd3d_sampler_feedback_resolve_ops *sampler_feedback_ops, - struct d3d12_device *device) + struct d3d12_device *device, bool use_heap) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkDescriptorSetLayoutBinding decode_bindings[2]; @@ -1992,74 +2138,77 @@ static HRESULT vkd3d_sampler_feedback_ops_init(struct vkd3d_sampler_feedback_res }, }; - memset(decode_bindings, 0, sizeof(decode_bindings)); - memset(encode_bindings, 0, sizeof(encode_bindings)); + if (!use_heap) + { + memset(decode_bindings, 0, sizeof(decode_bindings)); + memset(encode_bindings, 0, sizeof(encode_bindings)); - if ((vr = vkd3d_meta_create_sampler(device, VK_FILTER_NEAREST, &vk_sampler))) - return hresult_from_vk_result(vr); + if ((vr = vkd3d_meta_create_sampler(device, VK_FILTER_NEAREST, &vk_sampler))) + return hresult_from_vk_result(vr); - decode_bindings[0].binding = 0; - decode_bindings[0].descriptorCount = 1; - decode_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - decode_bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - - decode_bindings[1].binding = 1; - decode_bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT; - decode_bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - decode_bindings[1].descriptorCount = 1; - decode_bindings[1].pImmutableSamplers = &vk_sampler; - - encode_bindings[0].binding = 0; - encode_bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - encode_bindings[0].descriptorCount = 1; - encode_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - - encode_bindings[1].binding = 1; - encode_bindings[1].descriptorCount = 1; - encode_bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - encode_bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - encode_bindings[1].pImmutableSamplers = &vk_sampler; - - encode_bindings[2].binding = 2; - encode_bindings[2].descriptorCount = 1; - encode_bindings[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - encode_bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - - if ((vr = vkd3d_meta_create_descriptor_set_layout(device, - ARRAY_SIZE(decode_bindings), decode_bindings, - true, &sampler_feedback_ops->vk_decode_set_layout))) - return hresult_from_vk_result(vr); + decode_bindings[0].binding = 0; + decode_bindings[0].descriptorCount = 1; + decode_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + decode_bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + decode_bindings[1].binding = 1; + decode_bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT; + decode_bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + decode_bindings[1].descriptorCount = 1; + decode_bindings[1].pImmutableSamplers = &vk_sampler; + + encode_bindings[0].binding = 0; + encode_bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + encode_bindings[0].descriptorCount = 1; + encode_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + + encode_bindings[1].binding = 1; + encode_bindings[1].descriptorCount = 1; + encode_bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + encode_bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + encode_bindings[1].pImmutableSamplers = &vk_sampler; + + encode_bindings[2].binding = 2; + encode_bindings[2].descriptorCount = 1; + encode_bindings[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + encode_bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + + if ((vr = vkd3d_meta_create_descriptor_set_layout(device, + ARRAY_SIZE(decode_bindings), decode_bindings, + true, &sampler_feedback_ops->vk_decode_set_layout))) + return hresult_from_vk_result(vr); - if ((vr = vkd3d_meta_create_descriptor_set_layout(device, - ARRAY_SIZE(encode_bindings), encode_bindings, - true, &sampler_feedback_ops->vk_encode_set_layout))) - return hresult_from_vk_result(vr); + if ((vr = vkd3d_meta_create_descriptor_set_layout(device, + ARRAY_SIZE(encode_bindings), encode_bindings, + true, &sampler_feedback_ops->vk_encode_set_layout))) + return hresult_from_vk_result(vr); - push_range.offset = 0; + push_range.offset = 0; - push_range.size = sizeof(struct vkd3d_sampler_feedback_resolve_encode_args); - push_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - if ((vr = vkd3d_meta_create_pipeline_layout(device, - 1, &sampler_feedback_ops->vk_encode_set_layout, - 1, &push_range, - &sampler_feedback_ops->vk_compute_encode_layout))) - return hresult_from_vk_result(vr); + push_range.size = sizeof(struct vkd3d_sampler_feedback_resolve_encode_args); + push_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + if ((vr = vkd3d_meta_create_pipeline_layout(device, + 1, &sampler_feedback_ops->vk_encode_set_layout, + 1, &push_range, + &sampler_feedback_ops->vk_compute_encode_layout))) + return hresult_from_vk_result(vr); - push_range.size = sizeof(struct vkd3d_sampler_feedback_resolve_decode_args); - push_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - if ((vr = vkd3d_meta_create_pipeline_layout(device, - 1, &sampler_feedback_ops->vk_decode_set_layout, - 1, &push_range, - &sampler_feedback_ops->vk_compute_decode_layout))) - return hresult_from_vk_result(vr); + push_range.size = sizeof(struct vkd3d_sampler_feedback_resolve_decode_args); + push_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + if ((vr = vkd3d_meta_create_pipeline_layout(device, + 1, &sampler_feedback_ops->vk_decode_set_layout, + 1, &push_range, + &sampler_feedback_ops->vk_compute_decode_layout))) + return hresult_from_vk_result(vr); - push_range.size = sizeof(struct vkd3d_sampler_feedback_resolve_decode_args); - push_range.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - if ((vr = vkd3d_meta_create_pipeline_layout(device, - 1, &sampler_feedback_ops->vk_decode_set_layout, - 1, &push_range, - &sampler_feedback_ops->vk_graphics_decode_layout))) - return hresult_from_vk_result(vr); + push_range.size = sizeof(struct vkd3d_sampler_feedback_resolve_decode_args); + push_range.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + if ((vr = vkd3d_meta_create_pipeline_layout(device, + 1, &sampler_feedback_ops->vk_decode_set_layout, + 1, &push_range, + &sampler_feedback_ops->vk_graphics_decode_layout))) + return hresult_from_vk_result(vr); + } for (i = 0; i < ARRAY_SIZE(pipelines); i++) { @@ -2191,23 +2340,26 @@ static HRESULT vkd3d_workgraph_ops_init(struct vkd3d_workgraph_indirect_ops *wor } void vkd3d_meta_get_sampler_feedback_resolve_pipeline(struct vkd3d_meta_ops *meta_ops, - enum vkd3d_sampler_feedback_resolve_type type, struct vkd3d_sampler_feedback_resolve_info *info) + enum vkd3d_sampler_feedback_resolve_type type, struct vkd3d_sampler_feedback_resolve_info *info, + bool use_heap) { - info->vk_pipeline = meta_ops->sampler_feedback.vk_pipelines[type]; + struct vkd3d_sampler_feedback_resolve_ops *ops = use_heap ? + &meta_ops->sampler_feedback_heap : &meta_ops->sampler_feedback_legacy; + info->vk_pipeline = ops->vk_pipelines[type]; switch (type) { case VKD3D_SAMPLER_FEEDBACK_RESOLVE_MIP_USED_TO_IMAGE: case VKD3D_SAMPLER_FEEDBACK_RESOLVE_MIN_MIP_TO_IMAGE: - info->vk_layout = meta_ops->sampler_feedback.vk_graphics_decode_layout; + info->vk_layout = ops->vk_graphics_decode_layout; break; case VKD3D_SAMPLER_FEEDBACK_RESOLVE_MIN_MIP_TO_BUFFER: - info->vk_layout = meta_ops->sampler_feedback.vk_compute_decode_layout; + info->vk_layout = ops->vk_compute_decode_layout; break; default: - info->vk_layout = meta_ops->sampler_feedback.vk_compute_encode_layout; + info->vk_layout = ops->vk_compute_encode_layout; break; } } @@ -2284,14 +2436,23 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device if (FAILED(hr = vkd3d_meta_ops_common_init(&meta_ops->common, device))) goto fail_common; - if (FAILED(hr = vkd3d_clear_uav_ops_init(&meta_ops->clear_uav, device))) - goto fail_clear_uav_ops; + if (FAILED(hr = vkd3d_clear_uav_ops_init(&meta_ops->clear_uav_heap, device, true))) + goto fail_clear_uav_ops_heap; + + if (FAILED(hr = vkd3d_clear_uav_ops_init(&meta_ops->clear_uav_legacy, device, false))) + goto fail_clear_uav_ops_legacy; + + if (FAILED(hr = vkd3d_copy_image_ops_init(&meta_ops->copy_image_heap, device, true))) + goto fail_copy_image_heap_ops; - if (FAILED(hr = vkd3d_copy_image_ops_init(&meta_ops->copy_image, device))) - goto fail_copy_image_ops; + if (FAILED(hr = vkd3d_copy_image_ops_init(&meta_ops->copy_image_legacy, device, false))) + goto fail_copy_image_legacy_ops; - if (FAILED(hr = vkd3d_resolve_image_ops_init(&meta_ops->resolve_image, device))) - goto fail_resolve_image_ops; + if (FAILED(hr = vkd3d_resolve_image_ops_init(&meta_ops->resolve_image_heap, device, true))) + goto fail_resolve_image_heap_ops; + + if (FAILED(hr = vkd3d_resolve_image_ops_init(&meta_ops->resolve_image_legacy, device, false))) + goto fail_resolve_image_legacy_ops; if (FAILED(hr = vkd3d_swapchain_ops_init(&meta_ops->swapchain, device))) goto fail_swapchain_ops; @@ -2311,8 +2472,11 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device if (FAILED(hr = vkd3d_dstorage_ops_init(&meta_ops->dstorage, device))) goto fail_dstorage_ops; - if (FAILED(hr = vkd3d_sampler_feedback_ops_init(&meta_ops->sampler_feedback, device))) - goto fail_sampler_feedback; + if (FAILED(hr = vkd3d_sampler_feedback_ops_init(&meta_ops->sampler_feedback_heap, device, true))) + goto fail_sampler_feedback_heap; + + if (FAILED(hr = vkd3d_sampler_feedback_ops_init(&meta_ops->sampler_feedback_legacy, device, false))) + goto fail_sampler_feedback_legacy; if (FAILED(hr = vkd3d_workgraph_ops_init(&meta_ops->workgraph, device))) goto fail_workgraphs; @@ -2320,8 +2484,10 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device return S_OK; fail_workgraphs: - vkd3d_sampler_feedback_ops_cleanup(&meta_ops->sampler_feedback, device); -fail_sampler_feedback: + vkd3d_sampler_feedback_ops_cleanup(&meta_ops->sampler_feedback_legacy, device); +fail_sampler_feedback_legacy: + vkd3d_sampler_feedback_ops_cleanup(&meta_ops->sampler_feedback_heap, device); +fail_sampler_feedback_heap: vkd3d_dstorage_ops_cleanup(&meta_ops->dstorage, device); fail_dstorage_ops: vkd3d_multi_dispatch_indirect_ops_cleanup(&meta_ops->multi_dispatch_indirect, device); @@ -2334,12 +2500,18 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device fail_query_ops: vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device); fail_swapchain_ops: - vkd3d_resolve_image_ops_cleanup(&meta_ops->resolve_image, device); -fail_resolve_image_ops: - vkd3d_copy_image_ops_cleanup(&meta_ops->copy_image, device); -fail_copy_image_ops: - vkd3d_clear_uav_ops_cleanup(&meta_ops->clear_uav, device); -fail_clear_uav_ops: + vkd3d_resolve_image_ops_cleanup(&meta_ops->resolve_image_legacy, device); +fail_resolve_image_legacy_ops: + vkd3d_resolve_image_ops_cleanup(&meta_ops->resolve_image_heap, device); +fail_resolve_image_heap_ops: + vkd3d_copy_image_ops_cleanup(&meta_ops->copy_image_legacy, device); +fail_copy_image_legacy_ops: + vkd3d_copy_image_ops_cleanup(&meta_ops->copy_image_heap, device); +fail_copy_image_heap_ops: + vkd3d_clear_uav_ops_cleanup(&meta_ops->clear_uav_legacy, device); +fail_clear_uav_ops_legacy: + vkd3d_clear_uav_ops_cleanup(&meta_ops->clear_uav_heap, device); +fail_clear_uav_ops_heap: vkd3d_meta_ops_common_cleanup(&meta_ops->common, device); fail_common: return hr; @@ -2348,16 +2520,20 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device) { vkd3d_workgraph_ops_cleanup(&meta_ops->workgraph, device); - vkd3d_sampler_feedback_ops_cleanup(&meta_ops->sampler_feedback, device); + vkd3d_sampler_feedback_ops_cleanup(&meta_ops->sampler_feedback_heap, device); + vkd3d_sampler_feedback_ops_cleanup(&meta_ops->sampler_feedback_legacy, device); vkd3d_dstorage_ops_cleanup(&meta_ops->dstorage, device); vkd3d_multi_dispatch_indirect_ops_cleanup(&meta_ops->multi_dispatch_indirect, device); vkd3d_execute_indirect_ops_cleanup(&meta_ops->execute_indirect, device); vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device); vkd3d_query_ops_cleanup(&meta_ops->query, device); vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device); - vkd3d_copy_image_ops_cleanup(&meta_ops->copy_image, device); - vkd3d_resolve_image_ops_cleanup(&meta_ops->resolve_image, device); - vkd3d_clear_uav_ops_cleanup(&meta_ops->clear_uav, device); + vkd3d_copy_image_ops_cleanup(&meta_ops->copy_image_heap, device); + vkd3d_copy_image_ops_cleanup(&meta_ops->copy_image_legacy, device); + vkd3d_resolve_image_ops_cleanup(&meta_ops->resolve_image_heap, device); + vkd3d_resolve_image_ops_cleanup(&meta_ops->resolve_image_legacy, device); + vkd3d_clear_uav_ops_cleanup(&meta_ops->clear_uav_heap, device); + vkd3d_clear_uav_ops_cleanup(&meta_ops->clear_uav_legacy, device); vkd3d_meta_ops_common_cleanup(&meta_ops->common, device); return S_OK; } diff --git a/libs/vkd3d/meta_commands.c b/libs/vkd3d/meta_commands.c index 38b7e5e86c..fc1e513e29 100644 --- a/libs/vkd3d/meta_commands.c +++ b/libs/vkd3d/meta_commands.c @@ -449,8 +449,8 @@ static void d3d12_meta_command_exec_dstorage(struct d3d12_meta_command *meta_com VK_CALL(vkCmdBindPipeline(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, meta_ops->dstorage.vk_emit_nv_memory_decompression_workgroups_pipeline)); - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, meta_ops->dstorage.vk_dstorage_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_args), &push_args)); + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + meta_ops->dstorage.vk_dstorage_layout, VK_SHADER_STAGE_COMPUTE_BIT, sizeof(push_args), &push_args); workgroup_count = vkd3d_compute_workgroup_count(parameters->stream_count, 32); VK_CALL(vkCmdDispatch(list->cmd.vk_command_buffer, workgroup_count, 1, 1)); @@ -472,8 +472,8 @@ static void d3d12_meta_command_exec_dstorage(struct d3d12_meta_command *meta_com { push_args.stream_index = i; - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, meta_ops->dstorage.vk_dstorage_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_args), &push_args)); + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + meta_ops->dstorage.vk_dstorage_layout, VK_SHADER_STAGE_COMPUTE_BIT, sizeof(push_args), &push_args); VK_CALL(vkCmdDispatchIndirect(list->cmd.vk_command_buffer, scratch_buffer->vk_buffer, scratch_offset + workgroup_data_offset + i * sizeof(VkDispatchIndirectCommand))); @@ -496,8 +496,8 @@ static void d3d12_meta_command_exec_dstorage(struct d3d12_meta_command *meta_com VK_CALL(vkCmdBindPipeline(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, meta_ops->dstorage.vk_gdeflate_prepare_pipeline)); - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, meta_ops->dstorage.vk_dstorage_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_args), &push_args)); + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + meta_ops->dstorage.vk_dstorage_layout, VK_SHADER_STAGE_COMPUTE_BIT, sizeof(push_args), &push_args); workgroup_count = vkd3d_compute_workgroup_count(parameters->stream_count, 32); VK_CALL(vkCmdDispatch(list->cmd.vk_command_buffer, workgroup_count, 1, 1)); @@ -516,8 +516,8 @@ static void d3d12_meta_command_exec_dstorage(struct d3d12_meta_command *meta_com { push_args.stream_index = i; - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, meta_ops->dstorage.vk_dstorage_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_args), &push_args)); + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + meta_ops->dstorage.vk_dstorage_layout, VK_SHADER_STAGE_COMPUTE_BIT, sizeof(push_args), &push_args); VK_CALL(vkCmdDispatchIndirect(list->cmd.vk_command_buffer, scratch_buffer->vk_buffer, scratch_offset + i * sizeof(VkDispatchIndirectCommand))); diff --git a/libs/vkd3d/raytracing_pipeline.c b/libs/vkd3d/raytracing_pipeline.c index b465446024..39df970e07 100644 --- a/libs/vkd3d/raytracing_pipeline.c +++ b/libs/vkd3d/raytracing_pipeline.c @@ -159,18 +159,15 @@ static void d3d12_state_object_cleanup(struct d3d12_rt_state_object *object) if (variant->global_root_signature) d3d12_root_signature_dec_ref(variant->global_root_signature); +#if 0 if (variant->local_static_sampler.owned_handles) { VK_CALL(vkDestroyPipelineLayout(object->device->vk_device, variant->local_static_sampler.pipeline_layout, NULL)); VK_CALL(vkDestroyDescriptorSetLayout(object->device->vk_device, variant->local_static_sampler.set_layout, NULL)); - if (variant->local_static_sampler.desc_set) - { - vkd3d_sampler_state_free_descriptor_set(&object->device->sampler_state, object->device, - variant->local_static_sampler.desc_set, variant->local_static_sampler.desc_pool); - } } +#endif } vkd3d_free(object->pipelines); object->pipelines = NULL; @@ -1744,6 +1741,7 @@ static void d3d12_state_object_build_group_create_info( group_create->pShaderGroupCaptureReplayHandle = NULL; } +#if 0 static void d3d12_state_object_append_local_static_samplers( struct d3d12_rt_state_object_variant *variant, VkDescriptorSetLayoutBinding **out_vk_bindings, size_t *out_vk_bindings_size, size_t *out_vk_bindings_count, @@ -1790,6 +1788,7 @@ static void d3d12_state_object_append_local_static_samplers( *out_vk_bindings_size = vk_bindings_size; *out_vk_bindings_count = vk_bindings_count; } +#endif static bool d3d12_state_object_pipeline_data_find_global_state_object( struct d3d12_rt_state_object_pipeline_data *data, @@ -1883,7 +1882,7 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state const struct vkd3d_vk_device_procs *vk_procs = &object->device->vk_procs; struct vkd3d_shader_interface_local_info shader_interface_local_info; VkRayTracingPipelineInterfaceCreateInfoKHR interface_create_info; - VkDescriptorSetLayoutBinding *local_static_sampler_bindings; + //VkDescriptorSetLayoutBinding *local_static_sampler_bindings; struct d3d12_root_signature *default_global_root_signature; struct vkd3d_shader_interface_info shader_interface_info; VkRayTracingPipelineCreateInfoKHR pipeline_create_info; @@ -1896,14 +1895,18 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state struct vkd3d_shader_library_entry_point *entry; struct d3d12_rt_state_object_variant *variant; const struct D3D12_HIT_GROUP_DESC *hit_group; + VkPipelineCreateFlags2CreateInfo flags2_info; VkPipelineLibraryCreateInfoKHR library_info; - size_t local_static_sampler_bindings_count; - size_t local_static_sampler_bindings_size; + //size_t local_static_sampler_bindings_count; + //size_t local_static_sampler_bindings_size; VkPipelineShaderStageCreateInfo *stage; uint32_t pgroup_offset, pstage_offset; unsigned int num_groups_to_export; + size_t scratch_allocs_count = 0; + size_t scratch_allocs_size = 0; struct vkd3d_shader_code spirv; struct vkd3d_shader_code dxil; + void **scratch_allocs = NULL; size_t i, j; VkResult vr; HRESULT hr; @@ -1928,7 +1931,7 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state } memset(&shader_interface_info, 0, sizeof(shader_interface_info)); - shader_interface_info.min_ssbo_alignment = d3d12_device_get_ssbo_alignment(object->device); + shader_interface_info.min_ssbo_alignment = object->device->bindless_state.min_ssbo_alignment; /* Effectively ignored. */ shader_interface_info.stage = VK_SHADER_STAGE_ALL; @@ -1958,11 +1961,9 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state compat_global_signature->pso_compatibility_hash, compat_global_signature->layout_compatibility_hash); - local_static_sampler_bindings = NULL; - local_static_sampler_bindings_count = 0; - local_static_sampler_bindings_size = 0; - variant->local_static_sampler.set_index = compat_global_signature ? - compat_global_signature->raygen.num_set_layouts : 0; + //local_static_sampler_bindings = NULL; + //local_static_sampler_bindings_count = 0; + //local_static_sampler_bindings_size = 0; if (object->device->debug_ring.active) data->spec_info_buffer = vkd3d_calloc(data->entry_points_count, sizeof(*data->spec_info_buffer)); @@ -1970,7 +1971,7 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state for (i = 0; i < data->entry_points_count; i++) { struct d3d12_root_signature *per_entry_global_signature; - struct d3d12_root_signature *local_signature; + VKD3D_UNUSED struct d3d12_root_signature *local_signature; entry = &data->entry_points[i]; @@ -1995,15 +1996,17 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state /* We might have different bindings per PSO, even if they are considered pipeline layout compatible. * Register/space declaration could differ, but those don't change the Vulkan pipeline layout. */ shader_interface_info.flags = d3d12_root_signature_get_shader_interface_flags( - per_entry_global_signature, VKD3D_PIPELINE_TYPE_RAY_TRACING); - shader_interface_info.descriptor_tables.offset = per_entry_global_signature->descriptor_table_offset; - shader_interface_info.descriptor_tables.count = per_entry_global_signature->descriptor_table_count; + per_entry_global_signature); shader_interface_info.bindings = per_entry_global_signature->bindings; shader_interface_info.binding_count = per_entry_global_signature->binding_count; shader_interface_info.push_constant_buffers = per_entry_global_signature->root_constants; shader_interface_info.push_constant_buffer_count = per_entry_global_signature->root_constant_count; + shader_interface_info.num_root_descriptors = per_entry_global_signature->root_parameters_raw_va_count; + shader_interface_info.num_root_constants = per_entry_global_signature->root_parameters_constant_dwords; + shader_interface_info.descriptor_table_offset_words = + per_entry_global_signature->descriptor_table_offset / sizeof(uint32_t); + shader_interface_info.descriptor_raw_va_offset = object->device->bindless_state.uav_counter_embedded_offset; /* TODO: EXTENDED_DEBUG_UTILS mapping. */ - shader_interface_info.push_constant_ubo_binding = &per_entry_global_signature->push_constant_ubo_binding; shader_interface_info.offset_buffer_binding = &per_entry_global_signature->offset_buffer_binding; #ifdef VKD3D_ENABLE_DESCRIPTOR_QA shader_interface_info.descriptor_qa_payload_binding = &per_entry_global_signature->descriptor_qa_payload_binding; @@ -2013,10 +2016,41 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state else { shader_interface_info.flags = 0; - shader_interface_info.push_constant_buffer_count = 0; shader_interface_info.binding_count = 0; } +#if 1 + if (local_signature) + { + RT_TRACE(" Local root signature: (hash %016"PRIx64") (compat hash %016"PRIx64").\n", + local_signature->pso_compatibility_hash, + local_signature->layout_compatibility_hash); + + shader_interface_local_info.bindings = local_signature->bindings; + shader_interface_local_info.binding_count = local_signature->binding_count; + shader_interface_local_info.local_root_parameters = local_signature->parameters; + shader_interface_local_info.local_root_parameter_count = local_signature->parameter_count; + shader_interface_local_info.shader_record_constant_buffers = local_signature->root_constants; + shader_interface_local_info.shader_record_buffer_count = local_signature->root_constant_count; + + /* Promote state which might only be active in local root signature. */ + shader_interface_info.flags |= d3d12_root_signature_get_shader_interface_flags(local_signature); + + if (!per_entry_global_signature) + { + /* We won't have any root signature with push descriptors. + * This is a potential hole, but ray tracing shaders without a global root + * signature is questionable at best. + * The outer raygen shader will usually be the one with true side effects. */ + shader_interface_info.flags &= ~VKD3D_SHADER_INTERFACE_INSTRUCTION_QA_BUFFER; + } + } + else + { + RT_TRACE(" Local root signature: N/A\n"); + memset(&shader_interface_local_info, 0, sizeof(shader_interface_local_info)); + } +#else if (local_signature) { RT_TRACE(" Local root signature: (hash %016"PRIx64") (compat hash %016"PRIx64").\n", @@ -2035,12 +2069,14 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state local_bindings = vkd3d_malloc(sizeof(*local_bindings) * shader_interface_local_info.binding_count); shader_interface_local_info.bindings = local_bindings; +#if 0 d3d12_state_object_append_local_static_samplers(variant, &local_static_sampler_bindings, &local_static_sampler_bindings_size, &local_static_sampler_bindings_count, local_bindings, local_signature->static_samplers_desc, local_signature->static_samplers, local_signature->static_sampler_count); +#endif memcpy(local_bindings + local_signature->static_sampler_count, local_signature->bindings, sizeof(*local_bindings) * local_signature->binding_count); @@ -2062,15 +2098,13 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state * The outer raygen shader will usually be the one with true side effects. */ shader_interface_info.flags &= ~VKD3D_SHADER_INTERFACE_INSTRUCTION_QA_BUFFER; } - - if (local_signature->raygen.flags & (VKD3D_ROOT_SIGNATURE_USE_SSBO_OFFSET_BUFFER | VKD3D_ROOT_SIGNATURE_USE_TYPED_OFFSET_BUFFER)) - shader_interface_info.offset_buffer_binding = &local_signature->offset_buffer_binding; } else { RT_TRACE(" Local root signature: N/A\n"); memset(&shader_interface_local_info, 0, sizeof(shader_interface_local_info)); } +#endif if (vkd3d_stage_is_global_group(entry->stage)) { @@ -2130,6 +2164,25 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state stage->pName = "main"; stage->pSpecializationInfo = NULL; + if (per_entry_global_signature && local_signature) + { + /* Need a fused mapping table. */ + struct vkd3d_fused_root_signature_mappings *fused = + d3d12_state_object_fuse_root_signature_mappings(per_entry_global_signature, local_signature); + vk_prepend_struct(stage, &fused->mapping_info); + vkd3d_array_reserve((void **)&scratch_allocs, &scratch_allocs_size, + scratch_allocs_count + 1, sizeof(*scratch_allocs)); + scratch_allocs[scratch_allocs_count++] = fused; + } + else if (per_entry_global_signature) + { + stage->pNext = &per_entry_global_signature->mapping_info; + } + else if (local_signature) + { + stage->pNext = &local_signature->mapping_info; + } + memset(&dxil, 0, sizeof(dxil)); memset(&spirv, 0, sizeof(spirv)); @@ -2372,6 +2425,7 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state pstage_offset += collection_variant->stages_count; } +#if 0 if (local_static_sampler_bindings_count) { uint64_t hash = hash_fnv1_init(); @@ -2411,15 +2465,6 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state 0, NULL, &variant->local_static_sampler.pipeline_layout))) return hr; } - - /* Implicitly allocated and bound if we have descriptor buffer support. */ - if (!d3d12_device_uses_descriptor_buffers(object->device)) - { - if (FAILED(hr = vkd3d_sampler_state_allocate_descriptor_set(&object->device->sampler_state, - object->device, variant->local_static_sampler.set_layout, - &variant->local_static_sampler.desc_set, &variant->local_static_sampler.desc_pool))) - return hr; - } } /* If we have collections, we need to make sure that every pipeline layout is compatible. @@ -2443,7 +2488,6 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state { /* Borrow these handles. */ variant->local_static_sampler.pipeline_layout = collection_variant->local_static_sampler.pipeline_layout; - variant->local_static_sampler.desc_set = collection_variant->local_static_sampler.desc_set; variant->local_static_sampler.set_layout = collection_variant->local_static_sampler.set_layout; variant->local_static_sampler.compatibility_hash = collection_variant->local_static_sampler.compatibility_hash; } @@ -2459,9 +2503,10 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state } } } +#endif + memset(&pipeline_create_info, 0, sizeof(pipeline_create_info)); pipeline_create_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR; - pipeline_create_info.pNext = NULL; /* If we allow state object additions, we must first lower this pipeline to a library, and * then link it to itself so we can use it a library in subsequent PSO creations, but we @@ -2470,11 +2515,6 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state (object->flags & D3D12_STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS)) ? VK_PIPELINE_CREATE_LIBRARY_BIT_KHR : 0; - if (variant->local_static_sampler.pipeline_layout) - pipeline_create_info.layout = variant->local_static_sampler.pipeline_layout; - else - pipeline_create_info.layout = compat_global_signature->raygen.vk_pipeline_layout; - pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; pipeline_create_info.basePipelineIndex = -1; pipeline_create_info.pGroups = data->groups; @@ -2527,11 +2567,14 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state dynamic_state.dynamicStateCount = 1; dynamic_state.pDynamicStates = dynamic_states; - if (d3d12_device_uses_descriptor_buffers(object->device)) - pipeline_create_info.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; - TRACE("Calling vkCreateRayTracingPipelinesKHR.\n"); + memset(&flags2_info, 0, sizeof(flags2_info)); + flags2_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO; + flags2_info.flags = pipeline_create_info.flags; + flags2_info.flags |= VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT; + vk_prepend_struct(&pipeline_create_info, &flags2_info); + vr = VK_CALL(vkCreateRayTracingPipelinesKHR(object->device->vk_device, VK_NULL_HANDLE, VK_NULL_HANDLE, 1, &pipeline_create_info, NULL, (pipeline_create_info.flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) ? @@ -2541,7 +2584,7 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state object->type == D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE) { /* It is valid to inherit pipeline libraries into other pipeline libraries. */ - pipeline_create_info.flags &= ~VK_PIPELINE_CREATE_LIBRARY_BIT_KHR; + flags2_info.flags &= ~VK_PIPELINE_CREATE_LIBRARY_BIT_KHR; pipeline_create_info.pStages = NULL; pipeline_create_info.pGroups = NULL; pipeline_create_info.stageCount = 0; @@ -2554,6 +2597,10 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_rt_state VK_NULL_HANDLE, 1, &pipeline_create_info, NULL, &variant->pipeline)); } + for (i = 0; i < scratch_allocs_count; i++) + vkd3d_free(scratch_allocs[i]); + vkd3d_free(scratch_allocs); + TRACE("Completed vkCreateRayTracingPipelinesKHR.\n"); if (vr) diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 32b53b0a78..ca7c94908b 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -215,6 +215,8 @@ HRESULT vkd3d_create_buffer(struct d3d12_device *device, | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT + | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT + | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; @@ -257,12 +259,6 @@ HRESULT vkd3d_create_buffer(struct d3d12_device *device, buffer_info.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; - if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) - buffer_info.usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; - - if (!(desc->Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)) - buffer_info.usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; - /* Buffers always have properties of D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS. */ if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) { @@ -1628,55 +1624,6 @@ struct vkd3d_view *vkd3d_view_map_create_view2(struct vkd3d_view_map *view_map, return view; } -struct vkd3d_sampler_key -{ - D3D12_STATIC_SAMPLER_DESC1 desc; -}; - -struct vkd3d_sampler_entry -{ - struct hash_map_entry entry; - D3D12_STATIC_SAMPLER_DESC1 desc; - VkSampler vk_sampler; -}; - -static uint32_t vkd3d_sampler_entry_hash(const void *key) -{ - const struct vkd3d_sampler_key *k = key; - uint32_t hash; - - hash = (uint32_t)k->desc.Filter; - hash = hash_combine(hash, (uint32_t)k->desc.AddressU); - hash = hash_combine(hash, (uint32_t)k->desc.AddressV); - hash = hash_combine(hash, (uint32_t)k->desc.AddressW); - hash = hash_combine(hash, float_bits_to_uint32(k->desc.MipLODBias)); - hash = hash_combine(hash, k->desc.MaxAnisotropy); - hash = hash_combine(hash, (uint32_t)k->desc.ComparisonFunc); - hash = hash_combine(hash, (uint32_t)k->desc.BorderColor); - hash = hash_combine(hash, float_bits_to_uint32(k->desc.MinLOD)); - hash = hash_combine(hash, float_bits_to_uint32(k->desc.MaxLOD)); - hash = hash_combine(hash, k->desc.Flags); - return hash; -} - -static bool vkd3d_sampler_entry_compare(const void *key, const struct hash_map_entry *entry) -{ - const struct vkd3d_sampler_entry *e = (const struct vkd3d_sampler_entry*) entry; - const struct vkd3d_sampler_key *k = key; - - return k->desc.Filter == e->desc.Filter && - k->desc.AddressU == e->desc.AddressU && - k->desc.AddressV == e->desc.AddressV && - k->desc.AddressW == e->desc.AddressW && - k->desc.MipLODBias == e->desc.MipLODBias && - k->desc.MaxAnisotropy == e->desc.MaxAnisotropy && - k->desc.ComparisonFunc == e->desc.ComparisonFunc && - k->desc.BorderColor == e->desc.BorderColor && - k->desc.MinLOD == e->desc.MinLOD && - k->desc.MaxLOD == e->desc.MaxLOD && - k->desc.Flags == e->desc.Flags; -} - HRESULT vkd3d_sampler_state_init(struct vkd3d_sampler_state *state, struct d3d12_device *device) { @@ -1687,164 +1634,213 @@ HRESULT vkd3d_sampler_state_init(struct vkd3d_sampler_state *state, if ((rc = pthread_mutex_init(&state->mutex, NULL))) return hresult_from_errno(rc); - hash_map_init(&state->map, &vkd3d_sampler_entry_hash, &vkd3d_sampler_entry_compare, sizeof(struct vkd3d_sampler_entry)); + state->border_color_bank_size = min(4096, device->device_info.custom_border_color_properties.maxCustomBorderColorSamplers); + state->border_colors = vkd3d_calloc(state->border_color_bank_size, sizeof(*state->border_colors)); return S_OK; } void vkd3d_sampler_state_cleanup(struct vkd3d_sampler_state *state, struct d3d12_device *device) +{ + vkd3d_free(state->border_colors); + pthread_mutex_destroy(&state->mutex); +} + +uint32_t vkd3d_sampler_state_register_custom_border_color( + struct d3d12_device *device, + struct vkd3d_sampler_state *state, VkBorderColor border_color, + const VkSamplerCustomBorderColorCreateInfoEXT *info) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; uint32_t i; - for (i = 0; i < state->vk_descriptor_pool_count; i++) - VK_CALL(vkDestroyDescriptorPool(device->vk_device, state->vk_descriptor_pools[i], NULL)); - - vkd3d_free(state->vk_descriptor_pools); + pthread_mutex_lock(&state->mutex); - for (i = 0; i < state->map.entry_count; i++) + if (state->noop_registration) { - struct vkd3d_sampler_entry *e = (struct vkd3d_sampler_entry *)hash_map_get_entry(&state->map, i); - - if (e->entry.flags & HASH_MAP_ENTRY_OCCUPIED) - VK_CALL(vkDestroySampler(device->vk_device, e->vk_sampler, NULL)); + i = state->noop_registration_index; + goto unlock; } - hash_map_free(&state->map); - - pthread_mutex_destroy(&state->mutex); -} - -HRESULT d3d12_create_static_sampler(struct d3d12_device *device, - const D3D12_STATIC_SAMPLER_DESC1 *desc, VkSampler *vk_sampler); - -HRESULT vkd3d_sampler_state_create_static_sampler(struct vkd3d_sampler_state *state, - struct d3d12_device *device, const D3D12_STATIC_SAMPLER_DESC1 *desc, VkSampler *vk_sampler) -{ - struct vkd3d_sampler_entry entry, *e; - HRESULT hr; - int rc; - - if ((rc = pthread_mutex_lock(&state->mutex))) + for (i = 0; i < state->border_color_count; i++) { - ERR("Failed to lock mutex, rc %d.\n", rc); - return hresult_from_errno(rc); + if (state->border_colors[i].border_color == border_color && + memcmp(&state->border_colors[i].color, &info->customBorderColor, sizeof(VkClearColorValue)) == 0) + { + i = state->border_colors[i].index; + goto unlock; + } } - if ((e = (struct vkd3d_sampler_entry*)hash_map_find(&state->map, desc))) + if (state->border_color_count == state->border_color_bank_size) { - *vk_sampler = e->vk_sampler; - pthread_mutex_unlock(&state->mutex); - return S_OK; + i = UINT32_MAX; + goto unlock; } - if (FAILED(hr = d3d12_create_static_sampler(device, desc, vk_sampler))) + if (VK_CALL(vkRegisterCustomBorderColorEXT(device->vk_device, info, VK_FALSE, &i)) != VK_SUCCESS) { - pthread_mutex_unlock(&state->mutex); - return hr; + ERR("Failed to allocate custom border color index.\n"); + i = UINT32_MAX; } - entry.desc = *desc; - entry.vk_sampler = *vk_sampler; + /* Some drivers simply do not care about custom border colors and will just return the same value indefinitely. + * If we detect that drivers don't care, just skip the registration in the future. */ + if (state->border_color_count == 1 && i == state->border_colors[0].index) + { + state->noop_registration = true; + state->noop_registration_index = i; + goto unlock; + } - if (!hash_map_insert(&state->map, desc, &entry.entry)) - ERR("Failed to insert sampler into hash map.\n"); + state->border_colors[state->border_color_count].border_color = border_color; + state->border_colors[state->border_color_count].color = info->customBorderColor; + state->border_colors[state->border_color_count].index = i; + state->border_color_count++; +unlock: pthread_mutex_unlock(&state->mutex); - return S_OK; + return i; } -static VkResult vkd3d_sampler_state_create_descriptor_pool(struct d3d12_device *device, VkDescriptorPool *vk_pool) -{ - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VkDescriptorPoolCreateInfo pool_info; - VkDescriptorPoolSize pool_size; - - pool_size.type = VK_DESCRIPTOR_TYPE_SAMPLER; - pool_size.descriptorCount = 16384; - - pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - pool_info.pNext = NULL; - pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - pool_info.maxSets = 4096; - pool_info.poolSizeCount = 1; - pool_info.pPoolSizes = &pool_size; +HRESULT d3d12_create_static_sampler(struct d3d12_device *device, + const D3D12_STATIC_SAMPLER_DESC1 *desc, VkSampler *vk_sampler); - return VK_CALL(vkCreateDescriptorPool(device->vk_device, &pool_info, NULL, vk_pool)); +static VkSamplerReductionModeEXT vk_reduction_mode_from_d3d12(D3D12_FILTER_REDUCTION_TYPE mode) +{ + switch (mode) + { + case D3D12_FILTER_REDUCTION_TYPE_STANDARD: + case D3D12_FILTER_REDUCTION_TYPE_COMPARISON: + return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE; + case D3D12_FILTER_REDUCTION_TYPE_MINIMUM: + return VK_SAMPLER_REDUCTION_MODE_MIN; + case D3D12_FILTER_REDUCTION_TYPE_MAXIMUM: + return VK_SAMPLER_REDUCTION_MODE_MAX; + default: + FIXME("Unhandled reduction mode %#x.\n", mode); + return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE; + } } -HRESULT vkd3d_sampler_state_allocate_descriptor_set(struct vkd3d_sampler_state *state, - struct d3d12_device *device, VkDescriptorSetLayout vk_layout, VkDescriptorSet *vk_set, - VkDescriptorPool *vk_pool) +/* samplers */ +static VkFilter vk_filter_from_d3d12(D3D12_FILTER_TYPE type) { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VkResult vr = VK_ERROR_OUT_OF_POOL_MEMORY; - VkDescriptorSetAllocateInfo alloc_info; - size_t i; - int rc; - - if ((rc = pthread_mutex_lock(&state->mutex))) + switch (type) { - ERR("Failed to lock mutex, rc %d.\n", rc); - return hresult_from_errno(rc); + case D3D12_FILTER_TYPE_POINT: + return VK_FILTER_NEAREST; + case D3D12_FILTER_TYPE_LINEAR: + return VK_FILTER_LINEAR; + default: + FIXME("Unhandled filter type %#x.\n", type); + return VK_FILTER_NEAREST; } +} - alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - alloc_info.pNext = NULL; - alloc_info.descriptorSetCount = 1; - alloc_info.pSetLayouts = &vk_layout; - - for (i = 0; i < state->vk_descriptor_pool_count; i++) +static VkSamplerMipmapMode vk_mipmap_mode_from_d3d12(D3D12_FILTER_TYPE type) +{ + switch (type) { - alloc_info.descriptorPool = state->vk_descriptor_pools[i]; - vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &alloc_info, vk_set)); - - if (vr == VK_SUCCESS) - { - *vk_pool = alloc_info.descriptorPool; - break; - } + case D3D12_FILTER_TYPE_POINT: + return VK_SAMPLER_MIPMAP_MODE_NEAREST; + case D3D12_FILTER_TYPE_LINEAR: + return VK_SAMPLER_MIPMAP_MODE_LINEAR; + default: + FIXME("Unhandled filter type %#x.\n", type); + return VK_SAMPLER_MIPMAP_MODE_NEAREST; } +} - if (vr == VK_ERROR_OUT_OF_POOL_MEMORY || vr == VK_ERROR_FRAGMENTED_POOL) +static VkSamplerAddressMode vk_address_mode_from_d3d12(D3D12_TEXTURE_ADDRESS_MODE mode) +{ + switch (mode) { - vr = vkd3d_sampler_state_create_descriptor_pool(device, &alloc_info.descriptorPool); + case D3D12_TEXTURE_ADDRESS_MODE_WRAP: + return VK_SAMPLER_ADDRESS_MODE_REPEAT; + case D3D12_TEXTURE_ADDRESS_MODE_MIRROR: + return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + case D3D12_TEXTURE_ADDRESS_MODE_CLAMP: + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + case D3D12_TEXTURE_ADDRESS_MODE_BORDER: + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + case D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE: + return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + default: + FIXME("Unhandled address mode %#x.\n", mode); + return VK_SAMPLER_ADDRESS_MODE_REPEAT; + } +} - if (vr != VK_SUCCESS) - { - pthread_mutex_unlock(&state->mutex); - return hresult_from_vk_result(vr); - } - if (!vkd3d_array_reserve((void **)&state->vk_descriptor_pools, &state->vk_descriptor_pools_size, - state->vk_descriptor_pool_count + 1, sizeof(*state->vk_descriptor_pools))) - { - VK_CALL(vkDestroyDescriptorPool(device->vk_device, alloc_info.descriptorPool, NULL)); - pthread_mutex_unlock(&state->mutex); - return E_OUTOFMEMORY; - } +static bool d3d12_sampler_needs_border_color(D3D12_TEXTURE_ADDRESS_MODE u, + D3D12_TEXTURE_ADDRESS_MODE v, D3D12_TEXTURE_ADDRESS_MODE w) +{ + return u == D3D12_TEXTURE_ADDRESS_MODE_BORDER || + v == D3D12_TEXTURE_ADDRESS_MODE_BORDER || + w == D3D12_TEXTURE_ADDRESS_MODE_BORDER; +} - state->vk_descriptor_pools[state->vk_descriptor_pool_count++] = alloc_info.descriptorPool; - vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &alloc_info, vk_set)); - *vk_pool = alloc_info.descriptorPool; +static VkBorderColor vk_static_border_color_from_d3d12(D3D12_STATIC_BORDER_COLOR border_color) +{ + switch (border_color) + { + case D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK: + return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + case D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK: + return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; + case D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE: + return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + case D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK_UINT: + return VK_BORDER_COLOR_INT_OPAQUE_BLACK; + case D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE_UINT: + return VK_BORDER_COLOR_INT_OPAQUE_WHITE; + default: + WARN("Unhandled static border color %u.\n", border_color); + return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; } - - pthread_mutex_unlock(&state->mutex); - return hresult_from_vk_result(vr); } -void vkd3d_sampler_state_free_descriptor_set(struct vkd3d_sampler_state *state, - struct d3d12_device *device, VkDescriptorSet vk_set, VkDescriptorPool vk_pool) -{ - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - int rc; +void vkd3d_sampler_state_init_static_sampler(struct vkd3d_sampler_state *state, + struct d3d12_device *device, const D3D12_STATIC_SAMPLER_DESC1 *desc, + VkSamplerCreateInfo *vk_sampler_desc, + VkSamplerReductionModeCreateInfoEXT *vk_reduction_desc) +{ + vk_reduction_desc->sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT; + vk_reduction_desc->pNext = NULL; + vk_reduction_desc->reductionMode = vk_reduction_mode_from_d3d12(D3D12_DECODE_FILTER_REDUCTION(desc->Filter)); + + vk_sampler_desc->sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + vk_sampler_desc->pNext = NULL; + vk_sampler_desc->flags = 0; + vk_sampler_desc->magFilter = vk_filter_from_d3d12(D3D12_DECODE_MAG_FILTER(desc->Filter)); + vk_sampler_desc->minFilter = vk_filter_from_d3d12(D3D12_DECODE_MIN_FILTER(desc->Filter)); + vk_sampler_desc->mipmapMode = vk_mipmap_mode_from_d3d12(D3D12_DECODE_MIP_FILTER(desc->Filter)); + vk_sampler_desc->addressModeU = vk_address_mode_from_d3d12(desc->AddressU); + vk_sampler_desc->addressModeV = vk_address_mode_from_d3d12(desc->AddressV); + vk_sampler_desc->addressModeW = vk_address_mode_from_d3d12(desc->AddressW); + vk_sampler_desc->mipLodBias = desc->MipLODBias; + vk_sampler_desc->anisotropyEnable = D3D12_DECODE_IS_ANISOTROPIC_FILTER(desc->Filter); + vk_sampler_desc->maxAnisotropy = desc->MaxAnisotropy; + vk_sampler_desc->compareEnable = D3D12_DECODE_IS_COMPARISON_FILTER(desc->Filter); + vk_sampler_desc->compareOp = vk_sampler_desc->compareEnable ? vk_compare_op_from_d3d12(desc->ComparisonFunc) : 0; + vk_sampler_desc->minLod = desc->MinLOD; + vk_sampler_desc->maxLod = desc->MaxLOD; + vk_sampler_desc->borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + vk_sampler_desc->unnormalizedCoordinates = !!(desc->Flags & D3D12_SAMPLER_FLAG_NON_NORMALIZED_COORDINATES); + + if (vk_sampler_desc->maxAnisotropy < 1.0f) + vk_sampler_desc->anisotropyEnable = VK_FALSE; + + if (vk_sampler_desc->anisotropyEnable) + vk_sampler_desc->maxAnisotropy = min(16.0f, vk_sampler_desc->maxAnisotropy); - if ((rc = pthread_mutex_lock(&state->mutex))) - ERR("Failed to lock mutex, rc %d.\n", rc); + if (d3d12_sampler_needs_border_color(desc->AddressU, desc->AddressV, desc->AddressW)) + vk_sampler_desc->borderColor = vk_static_border_color_from_d3d12(desc->BorderColor); - if (vk_pool && vk_set) - VK_CALL(vkFreeDescriptorSets(device->vk_device, vk_pool, 1, &vk_set)); - pthread_mutex_unlock(&state->mutex); + if (vk_reduction_desc->reductionMode != VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE && + device->device_info.vulkan_1_2_features.samplerFilterMinmax) + vk_prepend_struct(vk_sampler_desc, vk_reduction_desc); } static void d3d12_resource_get_tiling(struct d3d12_device *device, struct d3d12_resource *resource, @@ -4382,7 +4378,7 @@ HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12 object->res.va = object->mem.resource.va; } - object->priority.allows_dynamic_residency = + object->priority.allows_dynamic_residency = device->device_info.pageable_device_memory_features.pageableDeviceLocalMemory && object->mem.chunk == NULL /* not suballocated */ && (device->memory_properties.memoryTypes[object->mem.device_allocation.vk_memory_type].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); @@ -4805,181 +4801,10 @@ void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) vkd3d_view_destroy(view, device); } -void d3d12_desc_copy_single(vkd3d_cpu_descriptor_va_t dst_va, vkd3d_cpu_descriptor_va_t src_va, - struct d3d12_device *device) -{ - VkCopyDescriptorSet vk_copies[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - const struct vkd3d_bindless_set_info *set_info; - struct vkd3d_descriptor_binding binding; - uint32_t set_mask, set_info_index; - struct d3d12_desc_split src, dst; - VkCopyDescriptorSet *vk_copy; - uint32_t copy_count = 0; - uint32_t flags; - - src = d3d12_desc_decode_va(src_va); - dst = d3d12_desc_decode_va(dst_va); - - flags = src.view->info.flags; - set_mask = src.types->set_info_mask; - - if (flags & VKD3D_DESCRIPTOR_FLAG_SINGLE_DESCRIPTOR) - { - /* Faster path, there is no need to fetch set info deep into the guts of d3d12_device, - * and also, we don't get a dependency chain on the CTZ loop -> index, which causes OoO bubbles - * it seems. */ - binding = src.types->single_binding; - - if (src.heap->sets[binding.set].copy_template_single) - { - src.heap->sets[binding.set].copy_template_single( - dst.heap->sets[binding.set].mapped_set, - src.heap->sets[binding.set].mapped_set, - dst.offset, src.offset); - } - else - { - vk_copy = &vk_copies[copy_count++]; - vk_copy->sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET; - vk_copy->pNext = NULL; - vk_copy->srcSet = src.heap->sets[binding.set].vk_descriptor_set; - vk_copy->srcBinding = binding.binding; - vk_copy->srcArrayElement = src.offset; - vk_copy->dstSet = dst.heap->sets[binding.set].vk_descriptor_set; - vk_copy->dstBinding = binding.binding; - vk_copy->dstArrayElement = dst.offset; - vk_copy->descriptorCount = 1; - } - } - else - { - /* Need to copy multiple descriptors. CTZ loop. */ - while (set_mask) - { - set_info_index = vkd3d_bitmask_iter32(&set_mask); - set_info = &device->bindless_state.set_info[set_info_index]; - - if (set_info->host_copy_template_single) - { - set_info->host_copy_template_single( - dst.heap->sets[set_info->set_index].mapped_set, - src.heap->sets[set_info->set_index].mapped_set, - dst.offset, src.offset); - } - else - { - binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, set_info_index); - - vk_copy = &vk_copies[copy_count++]; - vk_copy->sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET; - vk_copy->pNext = NULL; - vk_copy->srcSet = src.heap->sets[binding.set].vk_descriptor_set; - vk_copy->srcBinding = binding.binding; - vk_copy->srcArrayElement = src.offset; - vk_copy->dstSet = dst.heap->sets[binding.set].vk_descriptor_set; - vk_copy->dstBinding = binding.binding; - vk_copy->dstArrayElement = dst.offset; - vk_copy->descriptorCount = 1; - } - } - } - - if (flags & VKD3D_DESCRIPTOR_FLAG_RAW_VA_AUX_BUFFER) - { - const VkDeviceAddress *src_vas = src.heap->raw_va_aux_buffer.host_ptr; - VkDeviceAddress *dst_vas = dst.heap->raw_va_aux_buffer.host_ptr; - dst_vas[dst.offset] = src_vas[src.offset]; - } - - if (copy_count) - VK_CALL(vkUpdateDescriptorSets(device->vk_device, 0, NULL, copy_count, vk_copies)); - - if (flags & VKD3D_DESCRIPTOR_FLAG_BUFFER_OFFSET) - { - const struct vkd3d_bound_buffer_range *src_buffer_ranges = src.heap->buffer_ranges.host_ptr; - struct vkd3d_bound_buffer_range *dst_buffer_ranges = dst.heap->buffer_ranges.host_ptr; - dst_buffer_ranges[dst.offset] = src_buffer_ranges[src.offset]; - } - - *dst.types = *src.types; - *dst.view = *src.view; -} - -void d3d12_desc_copy_range(vkd3d_cpu_descriptor_va_t dst_va, vkd3d_cpu_descriptor_va_t src_va, - unsigned int count, D3D12_DESCRIPTOR_HEAP_TYPE heap_type, struct d3d12_device *device) -{ - VkCopyDescriptorSet vk_copies[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - const struct vkd3d_bindless_set_info *set_info; - struct vkd3d_descriptor_binding binding; - struct d3d12_desc_split src, dst; - VkCopyDescriptorSet *vk_copy; - uint32_t set_info_mask = 0; - uint32_t copy_count = 0; - uint32_t set_info_index; - unsigned int i; - - src = d3d12_desc_decode_va(src_va); - dst = d3d12_desc_decode_va(dst_va); - - for (i = 0; i < count; i++) - set_info_mask |= src.types[i].set_info_mask; - - memcpy(dst.view, src.view, sizeof(*dst.view) * count); - memcpy(dst.types, src.types, sizeof(*dst.types) * count); - - while (set_info_mask) - { - set_info_index = vkd3d_bitmask_iter32(&set_info_mask); - set_info = &device->bindless_state.set_info[set_info_index]; - - if (set_info->host_copy_template) - { - set_info->host_copy_template( - dst.heap->sets[set_info->set_index].mapped_set, - src.heap->sets[set_info->set_index].mapped_set, - dst.offset, src.offset, count); - } - else - { - binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, set_info_index); - - vk_copy = &vk_copies[copy_count++]; - vk_copy->sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET; - vk_copy->pNext = NULL; - vk_copy->srcSet = src.heap->sets[binding.set].vk_descriptor_set; - vk_copy->srcBinding = binding.binding; - vk_copy->srcArrayElement = src.offset; - vk_copy->dstSet = dst.heap->sets[binding.set].vk_descriptor_set; - vk_copy->dstBinding = binding.binding; - vk_copy->dstArrayElement = dst.offset; - vk_copy->descriptorCount = count; - } - } - - if (heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) - { - const VkDeviceAddress *src_vas = src.heap->raw_va_aux_buffer.host_ptr; - VkDeviceAddress *dst_vas = dst.heap->raw_va_aux_buffer.host_ptr; - memcpy(dst_vas + dst.offset, src_vas + src.offset, sizeof(*dst_vas) * count); - - if (device->bindless_state.flags & (VKD3D_TYPED_OFFSET_BUFFER | VKD3D_SSBO_OFFSET_BUFFER)) - { - const struct vkd3d_bound_buffer_range *src_ranges = src.heap->buffer_ranges.host_ptr; - struct vkd3d_bound_buffer_range *dst_ranges = dst.heap->buffer_ranges.host_ptr; - memcpy(dst_ranges + dst.offset, src_ranges + src.offset, sizeof(*dst_ranges) * count); - } - } - - if (copy_count) - VK_CALL(vkUpdateDescriptorSets(device->vk_device, 0, NULL, copy_count, vk_copies)); -} - void d3d12_desc_copy(vkd3d_cpu_descriptor_va_t dst_va, vkd3d_cpu_descriptor_va_t src_va, unsigned int count, D3D12_DESCRIPTOR_HEAP_TYPE heap_type, struct d3d12_device *device) { -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA +#if defined(VKD3D_ENABLE_DESCRIPTOR_QA) && 0 if (!d3d12_device_use_embedded_mutable_descriptors(device)) { struct d3d12_desc_split dst, src; @@ -4997,22 +4822,17 @@ void d3d12_desc_copy(vkd3d_cpu_descriptor_va_t dst_va, vkd3d_cpu_descriptor_va_t } #endif - if (d3d12_device_use_embedded_mutable_descriptors(device)) + /* Rare path. */ + if (heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) { - /* Rare path. */ - if (heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) - { - d3d12_desc_copy_embedded_resource(dst_va, src_va, - device->bindless_state.descriptor_buffer_cbv_srv_uav_size * count); - } - else - { - vkd3d_memcpy_aligned_cached((void *)dst_va, (const void *)src_va, - device->bindless_state.descriptor_buffer_sampler_size * count); - } + d3d12_desc_copy_embedded_resource(dst_va, src_va, + device->bindless_state.descriptor_heap_cbv_srv_uav_size * count); } else - d3d12_desc_copy_range(dst_va, src_va, count, heap_type, device); + { + vkd3d_memcpy_aligned_cached((void *)dst_va, (const void *)src_va, + device->bindless_state.descriptor_heap_sampler_size * count); + } } bool vkd3d_create_raw_r32ui_vk_buffer_view(struct d3d12_device *device, @@ -5176,13 +4996,10 @@ bool vkd3d_create_opacity_micromap_view(struct d3d12_device *device, const struc return true; } -#define VKD3D_VIEW_RAW_BUFFER 0x1 -#define VKD3D_VIEW_BUFFER_SRV 0x2 - static void vkd3d_get_metadata_buffer_view_for_resource(struct d3d12_device *device, struct d3d12_resource *resource, DXGI_FORMAT view_format, VkDeviceSize offset, VkDeviceSize size, VkDeviceSize structure_stride, - struct vkd3d_descriptor_metadata_buffer_view *view) + bool raw, struct vkd3d_descriptor_metadata_buffer_view *view) { VkDeviceSize element_size; @@ -5191,13 +5008,8 @@ static void vkd3d_get_metadata_buffer_view_for_resource(struct d3d12_device *dev view->va = resource->res.va + offset * element_size; view->range = size * element_size; - view->dxgi_format = view_format; - view->flags = VKD3D_DESCRIPTOR_FLAG_BUFFER_VA_RANGE | VKD3D_DESCRIPTOR_FLAG_NON_NULL; - - /* If we would need an SSBO offset buffer for whatever reason, just fallback to a typed view instead. */ - if (view_format == DXGI_FORMAT_UNKNOWN) - if (view->va & (device->device_info.properties2.properties.limits.minStorageBufferOffsetAlignment - 1)) - view->dxgi_format = DXGI_FORMAT_R32_UINT; + view->dxgi_format = raw ? DXGI_FORMAT_UNKNOWN : view_format; + view->flags = VKD3D_DESCRIPTOR_FLAG_BUFFER_VA_RANGE; } static DXGI_FORMAT vkd3d_structured_srv_to_texel_buffer_dxgi_format(unsigned int stride) @@ -5234,53 +5046,6 @@ static DXGI_FORMAT vkd3d_structured_uav_to_texel_buffer_dxgi_format(unsigned int return DXGI_FORMAT_R16_UINT; } -static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, - struct d3d12_resource *resource, DXGI_FORMAT view_format, - VkDeviceSize offset, VkDeviceSize size, VkDeviceSize structure_stride, - unsigned int flags, struct vkd3d_view **view) -{ - const struct vkd3d_format *format; - struct vkd3d_view_key key; - VkDeviceSize element_size; - - if (view_format == DXGI_FORMAT_R32_TYPELESS && (flags & VKD3D_VIEW_RAW_BUFFER)) - { - format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); - element_size = format->byte_count; - } - else if (view_format == DXGI_FORMAT_UNKNOWN && structure_stride) - { - DXGI_FORMAT dxgi_format; - - if (flags & VKD3D_VIEW_BUFFER_SRV) - dxgi_format = vkd3d_structured_srv_to_texel_buffer_dxgi_format(structure_stride); - else - dxgi_format = vkd3d_structured_uav_to_texel_buffer_dxgi_format(structure_stride); - - format = vkd3d_get_format(device, dxgi_format, false); - element_size = structure_stride; - } - else if ((format = vkd3d_format_from_d3d12_resource_desc(device, &resource->desc, view_format))) - { - element_size = format->byte_count; - } - else - { - WARN("Failed to find format for %#x.\n", resource->desc.Format); - return false; - } - - assert(d3d12_resource_is_buffer(resource)); - - key.view_type = VKD3D_VIEW_TYPE_BUFFER; - key.u.buffer.buffer = resource->res.vk_buffer; - key.u.buffer.format = format; - key.u.buffer.offset = resource->mem.offset + offset * element_size; - key.u.buffer.size = size * element_size; - - return !!(*view = vkd3d_view_map_create_view(&resource->view_map, device, &key)); -} - static void vkd3d_set_view_swizzle_for_format(VkComponentMapping *components, const struct vkd3d_format *format, bool allowed_swizzle) { @@ -5473,35 +5238,29 @@ static bool init_default_texture_view_desc(struct vkd3d_texture_view_desc *desc, return true; } -bool vkd3d_create_texture_view(struct d3d12_device *device, const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view) +bool vkd3d_setup_texture_view(struct d3d12_device *device, + const struct vkd3d_texture_view_desc *desc, + struct vkd3d_texture_view_create_info *info) { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VkImageViewUsageCreateInfo image_usage_create_info; const struct vkd3d_format *format = desc->format; - VkImageViewMinLodCreateInfoEXT min_lod_desc; - VkImageViewSlicedCreateInfoEXT sliced_desc; - VkImageView vk_view = VK_NULL_HANDLE; - VkImageViewCreateInfo view_desc; int32_t miplevel_clamp_fixed; - struct vkd3d_view *object; uint32_t clamp_base_level; uint32_t end_level; - VkResult vr; - view_desc.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - view_desc.pNext = NULL; - view_desc.flags = 0; - view_desc.image = desc->image; - view_desc.viewType = desc->view_type; - view_desc.format = format->vk_format; - vkd3d_set_view_swizzle_for_format(&view_desc.components, format, desc->allowed_swizzle); + memset(info, 0, sizeof(*info)); + info->view_desc.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + info->view_desc.flags = 0; + info->view_desc.image = desc->image; + info->view_desc.viewType = desc->view_type; + info->view_desc.format = format->vk_format; + vkd3d_set_view_swizzle_for_format(&info->view_desc.components, format, desc->allowed_swizzle); if (desc->allowed_swizzle) - vk_component_mapping_compose(&view_desc.components, &desc->components); - view_desc.subresourceRange.aspectMask = desc->aspect_mask; - view_desc.subresourceRange.baseMipLevel = desc->miplevel_idx; - view_desc.subresourceRange.levelCount = desc->miplevel_count; - view_desc.subresourceRange.baseArrayLayer = desc->layer_idx; - view_desc.subresourceRange.layerCount = desc->layer_count; + vk_component_mapping_compose(&info->view_desc.components, &desc->components); + info->view_desc.subresourceRange.aspectMask = desc->aspect_mask; + info->view_desc.subresourceRange.baseMipLevel = desc->miplevel_idx; + info->view_desc.subresourceRange.levelCount = desc->miplevel_count; + info->view_desc.subresourceRange.baseArrayLayer = desc->layer_idx; + info->view_desc.subresourceRange.layerCount = desc->layer_count; /* If the clamp is defined such that it would only access mip levels * outside the view range, don't make a view and use a NULL descriptor. @@ -5515,10 +5274,9 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, const struct vkd3d_t if (device->device_info.image_view_min_lod_features.minLod) { /* Clamp minLod the highest accessed mip level to stay within spec */ - min_lod_desc.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_MIN_LOD_CREATE_INFO_EXT; - min_lod_desc.pNext = NULL; - min_lod_desc.minLod = vkd3d_fixed_24_8_to_float(miplevel_clamp_fixed); - vk_prepend_struct(&view_desc, &min_lod_desc); + info->min_lod_desc.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_MIN_LOD_CREATE_INFO_EXT; + info->min_lod_desc.minLod = vkd3d_fixed_24_8_to_float(miplevel_clamp_fixed); + vk_prepend_struct(&info->view_desc, &info->min_lod_desc); } else { @@ -5526,29 +5284,64 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, const struct vkd3d_t /* This is not correct, but it's the best we can do without VK_EXT_image_view_min_lod. * It should at least avoid a scenario where implicit LOD fetches from invalid levels. */ clamp_base_level = (uint32_t)desc->miplevel_clamp; - end_level = view_desc.subresourceRange.baseMipLevel + view_desc.subresourceRange.levelCount; - view_desc.subresourceRange.levelCount = end_level - clamp_base_level; - view_desc.subresourceRange.baseMipLevel = clamp_base_level; + end_level = info->view_desc.subresourceRange.baseMipLevel + info->view_desc.subresourceRange.levelCount; + info->view_desc.subresourceRange.levelCount = end_level - clamp_base_level; + info->view_desc.subresourceRange.baseMipLevel = clamp_base_level; } } - image_usage_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO; - image_usage_create_info.pNext = NULL; - image_usage_create_info.usage = desc->image_usage; - vk_prepend_struct(&view_desc, &image_usage_create_info); + info->image_usage_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO; + info->image_usage_create_info.usage = desc->image_usage; + vk_prepend_struct(&info->view_desc, &info->image_usage_create_info); if (desc->view_type == VK_IMAGE_VIEW_TYPE_3D && (desc->w_offset != 0 || desc->w_size != VK_REMAINING_3D_SLICES_EXT) && device->device_info.image_sliced_view_of_3d_features.imageSlicedViewOf3D) { - sliced_desc.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_SLICED_CREATE_INFO_EXT; - sliced_desc.pNext = NULL; - sliced_desc.sliceOffset = desc->w_offset; - sliced_desc.sliceCount = desc->w_size; - vk_prepend_struct(&view_desc, &sliced_desc); + info->sliced_desc.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_SLICED_CREATE_INFO_EXT; + info->sliced_desc.pNext = NULL; + info->sliced_desc.sliceOffset = desc->w_offset; + info->sliced_desc.sliceCount = desc->w_size; + vk_prepend_struct(&info->view_desc, &info->sliced_desc); } - if ((vr = VK_CALL(vkCreateImageView(device->vk_device, &view_desc, NULL, &vk_view))) < 0) + /* Hacky workaround. */ + if (device->device_info.properties2.properties.vendorID == VKD3D_VENDOR_ID_NVIDIA) + { + switch (info->view_desc.viewType) + { + case VK_IMAGE_VIEW_TYPE_2D: + info->view_desc.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + break; + + case VK_IMAGE_VIEW_TYPE_1D: + info->view_desc.viewType = VK_IMAGE_VIEW_TYPE_1D_ARRAY; + break; + + default: + break; + } + } + + return true; + } + else + { + return false; + } +} + +bool vkd3d_create_texture_view(struct d3d12_device *device, const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct vkd3d_texture_view_create_info info; + VkImageView vk_view = VK_NULL_HANDLE; + struct vkd3d_view *object; + VkResult vr; + + if (vkd3d_setup_texture_view(device, desc, &info)) + { + if ((vr = VK_CALL(vkCreateImageView(device->vk_device, &info.view_desc, NULL, &vk_view))) < 0) { WARN("Failed to create Vulkan image view, vr %d.\n", vr); return false; @@ -5562,7 +5355,7 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, const struct vkd3d_t } object->vk_image_view = vk_view; - object->format = format; + object->format = desc->format; object->info.texture.vk_view_type = desc->view_type; object->info.texture.aspect_mask = desc->aspect_mask; object->info.texture.miplevel_idx = desc->miplevel_idx; @@ -5574,171 +5367,14 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, const struct vkd3d_t return true; } -static inline void vkd3d_init_write_descriptor_set(VkWriteDescriptorSet *vk_write, const struct d3d12_desc_split *split, - struct vkd3d_descriptor_binding binding, - VkDescriptorType vk_descriptor_type, const union vkd3d_descriptor_info *info) -{ - vk_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_write->pNext = NULL; - vk_write->dstSet = split->heap->sets[binding.set].vk_descriptor_set; - vk_write->dstBinding = binding.binding; - vk_write->dstArrayElement = split->offset; - vk_write->descriptorCount = 1; - vk_write->descriptorType = vk_descriptor_type; - vk_write->pImageInfo = &info->image; - vk_write->pBufferInfo = &info->buffer; - vk_write->pTexelBufferView = &info->buffer_view; -} - -static void d3d12_descriptor_heap_write_null_descriptor_template_embedded_partial(struct d3d12_device *device, - vkd3d_cpu_descriptor_va_t desc_va, - VkDescriptorType vk_descriptor_type, size_t payload_offset, size_t size) -{ - struct d3d12_desc_split_embedded desc; - const uint8_t *src; - - desc = d3d12_desc_decode_embedded_resource_va(desc_va); - src = vkd3d_bindless_state_get_null_descriptor_payload(&device->bindless_state, vk_descriptor_type); - desc.payload += payload_offset; - src += payload_offset; - - if (VKD3D_EXPECT_TRUE(desc.metadata == NULL)) - vkd3d_memcpy_aligned_non_temporal(desc.payload, src, size); - else - vkd3d_memcpy_aligned_cached(desc.payload, src, size); -} - -static void d3d12_descriptor_heap_write_null_descriptor_template_embedded(struct d3d12_device *device, - vkd3d_cpu_descriptor_va_t desc_va, - VkDescriptorType vk_descriptor_type) -{ - struct d3d12_desc_split_embedded desc; - const uint8_t *src; - - desc = d3d12_desc_decode_embedded_resource_va(desc_va); - - src = vkd3d_bindless_state_get_null_descriptor_payload(&device->bindless_state, vk_descriptor_type); - - if (VKD3D_EXPECT_TRUE(desc.metadata == NULL)) - { - /* If metadata is packed into the descriptor, it gets cleared to zero here in this copy. */ - vkd3d_memcpy_aligned_non_temporal(desc.payload, src, - device->bindless_state.descriptor_buffer_cbv_srv_uav_size); - } - else - { - vkd3d_memcpy_aligned_cached(desc.payload, src, - device->bindless_state.descriptor_buffer_cbv_srv_uav_size); - - /* If metadata is not packed, need to clear that separately. */ - memset(desc.metadata, 0, sizeof(*desc.metadata)); - } -} - -static void d3d12_descriptor_heap_write_null_descriptor_template(vkd3d_cpu_descriptor_va_t desc_va, - VkDescriptorType vk_mutable_descriptor_type) -{ - /* For null descriptors, some games don't write the correct type (usually an image SRV), - * so we will need to splat null descriptors over all descriptor sets. - * For MUTABLE, this would normally just be one descriptor set, but - * we need MUTABLE + STORAGE_BUFFER, or 6 sets for non-mutable :\ */ - const struct d3d12_null_descriptor_template *null_descriptor_template; - VkWriteDescriptorSet writes[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; - const struct vkd3d_vk_device_procs *vk_procs; - struct vkd3d_bindless_state *bindless_state; - struct d3d12_desc_split desc; - unsigned int num_writes, i; - unsigned int offset; - VkDeviceAddress *va; - const uint8_t *src; - uint8_t *dst; - - desc = d3d12_desc_decode_va(desc_va); - - null_descriptor_template = &desc.heap->null_descriptor_template; - - /* When mutable descriptors are not supported, set a dummy type. - This will make those drivers not care about the null type being different between - null writes. */ - if (!null_descriptor_template->has_mutable_descriptors) - vk_mutable_descriptor_type = 0; - - /* Skip writes with the same null type that are already null. */ - if (!(desc.view->info.flags & VKD3D_DESCRIPTOR_FLAG_NON_NULL) - && desc.types->current_null_type == vk_mutable_descriptor_type) - return; - - num_writes = null_descriptor_template->num_writes; - offset = desc.offset; - - if (null_descriptor_template->has_descriptor_buffer) - { - bindless_state = &desc.heap->device->bindless_state; - - for (i = 0; i < num_writes; i++) - { - dst = desc.heap->sets[i].mapped_set; - dst += offset * null_descriptor_template->writes.payloads[i].desc_size; - src = null_descriptor_template->writes.payloads[i].src_payload; - if (!src) - src = vkd3d_bindless_state_get_null_descriptor_payload(bindless_state, vk_mutable_descriptor_type); - memcpy(dst, src, null_descriptor_template->writes.payloads[i].desc_size); - } - } - else - { - vk_procs = &desc.heap->device->vk_procs; - - for (i = 0; i < num_writes; i++) - { - writes[i] = null_descriptor_template->writes.descriptors.writes[i]; - if (writes[i].descriptorType == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) - writes[i].descriptorType = vk_mutable_descriptor_type; - writes[i].dstArrayElement = offset; - } - - if (num_writes) - VK_CALL(vkUpdateDescriptorSets(desc.heap->device->vk_device, num_writes, writes, 0, NULL)); - } - - desc.view->info.flags = 0; - desc.types->set_info_mask = null_descriptor_template->set_info_mask; - desc.types->current_null_type = vk_mutable_descriptor_type; - memset(desc.view, 0, sizeof(*desc.view)); - - if (num_writes == 1) - { - desc.view->info.flags |= VKD3D_DESCRIPTOR_FLAG_SINGLE_DESCRIPTOR; - /* If the template has one descriptor write, this is a single set descriptor heap. */ - desc.types->single_binding.set = 0; - /* For descriptor buffer path, the binding is ignored. */ - desc.types->single_binding.binding = null_descriptor_template->writes.descriptors.writes[0].dstBinding; - } - - va = desc.heap->raw_va_aux_buffer.host_ptr; - if (va) - va[offset] = 0; - - /* Notify descriptor QA that we have a universal null descriptor. */ - vkd3d_descriptor_debug_write_descriptor(desc.heap->descriptor_heap_info.host_ptr, - desc.heap->cookie, offset, - VKD3D_DESCRIPTOR_QA_TYPE_UNIFORM_BUFFER_BIT | - VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT | - VKD3D_DESCRIPTOR_QA_TYPE_SAMPLED_IMAGE_BIT | - VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_IMAGE_BIT | - VKD3D_DESCRIPTOR_QA_TYPE_UNIFORM_TEXEL_BUFFER_BIT | - VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_TEXEL_BUFFER_BIT | - VKD3D_DESCRIPTOR_QA_TYPE_RAW_VA_BIT | - VKD3D_DESCRIPTOR_QA_TYPE_RT_ACCELERATION_STRUCTURE_BIT, vkd3d_null_cookie()); -} - void d3d12_desc_create_cbv_embedded(vkd3d_cpu_descriptor_va_t desc_va, struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VkDescriptorAddressInfoEXT addr_info; + VkResourceDescriptorInfoEXT desc_info; struct d3d12_desc_split_embedded d; - VkDescriptorGetInfoEXT get_info; + VkDeviceAddressRangeEXT addr_range; + VkHostAddressRangeEXT desc_range; if (!desc) { @@ -5752,305 +5388,60 @@ void d3d12_desc_create_cbv_embedded(vkd3d_cpu_descriptor_va_t desc_va, return; } - if (!desc->BufferLocation) - { - d3d12_descriptor_heap_write_null_descriptor_template_embedded(device, desc_va, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); - return; - } - - d = d3d12_desc_decode_embedded_resource_va(desc_va); - - /* Metadata is irrelevant for CBV since we don't enable table hoisting. */ - - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - get_info.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - get_info.data.pUniformBuffer = &addr_info; - addr_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT; - addr_info.pNext = NULL; - addr_info.format = VK_FORMAT_UNDEFINED; - addr_info.address = desc->BufferLocation; - addr_info.range = desc->SizeInBytes; - - /* For robustness purposes. If someone tries to access a UBO as an image, - * it should translate to a NULL descriptor. */ - - /* See vkd3d_bindless_state_init_null_descriptor_payloads for details. - * Use UNIFORM_BUFFER template here, since we've already prepared the desired NULL payload - * at the typed offset. */ - d3d12_descriptor_heap_write_null_descriptor_template_embedded_partial(device, desc_va, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0, device->bindless_state.descriptor_buffer_packed_raw_buffer_offset); + d = d3d12_desc_decode_embedded_resource_va(desc_va, + device->bindless_state.descriptor_heap_packed_metadata_offset); - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustUniformBufferDescriptorSize, - d.payload + device->bindless_state.descriptor_buffer_packed_raw_buffer_offset)); -} - -void d3d12_desc_create_cbv(vkd3d_cpu_descriptor_va_t desc_va, - struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc) -{ - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - const struct vkd3d_unique_resource *resource = NULL; - union vkd3d_descriptor_info null_descriptor_info; - union vkd3d_descriptor_info descriptor_info; - struct vkd3d_descriptor_binding binding; - VkDescriptorAddressInfoEXT addr_info; - VkDescriptorType vk_descriptor_type; - VkWriteDescriptorSet vk_writes[2]; - VkDescriptorGetInfoEXT get_info; - uint32_t vk_write_count = 0; - struct d3d12_desc_split d; - void *payload; - - uint32_t info_index; + memset(&desc_info, 0, sizeof(desc_info)); + desc_info.sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT; + desc_info.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - if (!desc) + if (desc->BufferLocation) { - WARN("Constant buffer desc is NULL.\n"); - return; + desc_info.data.pAddressRange = &addr_range; + addr_range.address = desc->BufferLocation; + addr_range.size = desc->SizeInBytes; } - if (desc->SizeInBytes & (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1)) - { - WARN("Size is not %u bytes aligned.\n", D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); - return; - } + /* Clear out lower half which should ideally be a null descriptor. */ + if (device->bindless_state.descriptor_heap_packed_raw_buffer_offset) + memset(d.payload, 0, device->bindless_state.descriptor_heap_packed_raw_buffer_offset); - vk_descriptor_type = vkd3d_bindless_state_get_cbv_descriptor_type(&device->bindless_state); + desc_range.address = d.payload + device->bindless_state.descriptor_heap_packed_raw_buffer_offset; + desc_range.size = device->bindless_state.ubo_size; + VK_CALL(vkWriteResourceDescriptorsEXT(device->vk_device, 1, &desc_info, &desc_range)); - if (!desc->BufferLocation) + if (device->bindless_state.ubo_size < device->device_info.descriptor_heap_properties.bufferDescriptorSize) { - d3d12_descriptor_heap_write_null_descriptor_template(desc_va, vk_descriptor_type); - return; + uint8_t *padding = d.payload + device->bindless_state.descriptor_heap_packed_raw_buffer_offset; + padding += device->bindless_state.ubo_size; + memset(padding, 0, device->device_info.descriptor_heap_properties.bufferDescriptorSize - device->bindless_state.ubo_size); } - - d = d3d12_desc_decode_va(desc_va); - - info_index = vkd3d_bindless_state_find_set_info_index_fast(device, - VKD3D_BINDLESS_STATE_INFO_INDEX_MUTABLE_SPLIT_RAW, - VKD3D_BINDLESS_SET_CBV); - - binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, info_index); - - d.types->set_info_mask = 1u << info_index; - d.types->single_binding = binding; - d.view->info.buffer.va = desc->BufferLocation; - d.view->info.buffer.range = desc->SizeInBytes; - d.view->info.buffer.padding = 0; - d.view->info.buffer.dxgi_format = DXGI_FORMAT_UNKNOWN; - d.view->info.buffer.flags = VKD3D_DESCRIPTOR_FLAG_BUFFER_VA_RANGE | VKD3D_DESCRIPTOR_FLAG_NON_NULL; - - /* De-reffing resource in descriptor buffer path is kinda redundant but there are some scenarios where - * it's required: - * - Hoisted CBV descriptors. These translate to push descriptors and must be resolved to plain buffers. - * Possible to write the VA instead and de-ref late in theory. - * - Descriptor QA. Can be ifdeffed. */ - - if (d3d12_device_uses_descriptor_buffers(device)) - { - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - get_info.type = vk_descriptor_type; - /* This is a union of pointers and UBOs are always supported here. */ - get_info.data.pUniformBuffer = &addr_info; - addr_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT; - addr_info.pNext = NULL; - addr_info.format = VK_FORMAT_UNDEFINED; - addr_info.address = desc->BufferLocation; - addr_info.range = desc->SizeInBytes; - payload = d3d12_descriptor_heap_get_mapped_payload(d.heap, binding.set, d.offset); - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustUniformBufferDescriptorSize, - payload)); -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA - /* Only used for descriptor QA in this path. */ - resource = vkd3d_va_map_deref(&device->memory_allocator.va_map, desc->BufferLocation); -#endif - } - else - { - resource = vkd3d_va_map_deref(&device->memory_allocator.va_map, desc->BufferLocation); - descriptor_info.buffer.buffer = resource->vk_buffer; - descriptor_info.buffer.offset = desc->BufferLocation - resource->va; - descriptor_info.buffer.range = min(desc->SizeInBytes, resource->size - descriptor_info.buffer.offset); - - vkd3d_init_write_descriptor_set(&vk_writes[vk_write_count++], &d, binding, vk_descriptor_type, &descriptor_info); - } - - /* Clear out sibling typed descriptor if appropriate. - * Native drivers are robust against scenarios like these, and so we need to be too. */ - if (device->bindless_state.flags & VKD3D_BINDLESS_MUTABLE_TYPE_SPLIT_RAW_TYPED) - { - /* The NULL descriptor template for non-embedded is too messy to invoke here. */ - info_index = VKD3D_BINDLESS_STATE_INFO_INDEX_MUTABLE_SPLIT_TYPED; - binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, info_index); - d.types->set_info_mask |= 1u << info_index; - - if (d3d12_device_uses_descriptor_buffers(device)) - { - get_info.type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - get_info.data.pUniformTexelBuffer = NULL; - payload = d3d12_descriptor_heap_get_mapped_payload(d.heap, binding.set, d.offset); - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustUniformTexelBufferDescriptorSize, - payload)); - } - else - { - null_descriptor_info.buffer.buffer = VK_NULL_HANDLE; - null_descriptor_info.buffer.offset = 0; - null_descriptor_info.buffer.range = VK_WHOLE_SIZE; - vkd3d_init_write_descriptor_set(&vk_writes[vk_write_count++], &d, binding, - VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, &null_descriptor_info); - } - } - else - d.view->info.buffer.flags |= VKD3D_DESCRIPTOR_FLAG_SINGLE_DESCRIPTOR; - -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA - if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_INSTRUCTION_QA_CHECKS) - { - /* We may want to peek at the buffer's raw VA when doing instrumentation. */ - VkDeviceAddress *counter_addresses = d.heap->raw_va_aux_buffer.host_ptr; - counter_addresses[d.offset] = - vkd3d_descriptor_debug_encode_buffer_va(d.view->info.buffer.va, sizeof(uint32_t)); - d.view->info.buffer.flags |= VKD3D_DESCRIPTOR_QA_TYPE_RAW_VA_BIT; - } -#endif - - if (vk_write_count) - VK_CALL(vkUpdateDescriptorSets(device->vk_device, vk_write_count, vk_writes, 0, NULL)); - - vkd3d_descriptor_metadata_view_set_qa_cookie(d.view, resource ? resource->cookie : vkd3d_null_cookie()); - vkd3d_descriptor_debug_write_descriptor(d.heap->descriptor_heap_info.host_ptr, - d.heap->cookie, - d.offset, - vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ? - VKD3D_DESCRIPTOR_QA_TYPE_UNIFORM_BUFFER_BIT : - VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT, - d.view->qa_cookie); } -static unsigned int vkd3d_view_flags_from_d3d12_buffer_srv_flags(D3D12_BUFFER_SRV_FLAGS flags) +static bool d3d12_resource_desc_supports_raw_ssbo(struct d3d12_device *device, + uint32_t stride, bool raw) { - if (flags == D3D12_BUFFER_SRV_FLAG_RAW) - return VKD3D_VIEW_RAW_BUFFER | VKD3D_VIEW_BUFFER_SRV; - if (flags) - FIXME("Unhandled buffer SRV flags %#x.\n", flags); - return VKD3D_VIEW_BUFFER_SRV; + assert(stride || raw); + if (stride) + return (stride & (device->bindless_state.min_ssbo_alignment - 1)) == 0; + else + return device->bindless_state.supports_universal_byte_address_ssbo; } -static void vkd3d_buffer_view_get_bound_range_ssbo( - struct d3d12_device *device, struct d3d12_resource *resource, - VkDeviceSize offset, VkDeviceSize range, VkDescriptorBufferInfo *vk_buffer, - struct vkd3d_bound_buffer_range *bound_range) +static bool d3d12_resource_desc_supports_raw_srv_ssbo( + struct d3d12_device *device, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) { - if (resource) - { - VkDeviceSize alignment = d3d12_device_get_ssbo_alignment(device); - VkDeviceSize aligned_begin = offset & ~(alignment - 1); - VkDeviceSize aligned_end = min((offset + range + alignment - 1) & ~(alignment - 1), resource->desc.Width); - - /* heap_offset is guaranteed to have 64KiB alignment */ - vk_buffer->buffer = resource->res.vk_buffer; - vk_buffer->offset = resource->mem.offset + aligned_begin; - vk_buffer->range = aligned_end - aligned_begin; - - bound_range->byte_offset = offset - aligned_begin; - bound_range->byte_count = range; - } - else - { - vk_buffer->buffer = VK_NULL_HANDLE; - vk_buffer->offset = 0; - vk_buffer->range = VK_WHOLE_SIZE; - - bound_range->byte_offset = 0; - bound_range->byte_count = 0; - } + return d3d12_resource_desc_supports_raw_ssbo(device, + desc->Buffer.StructureByteStride, + (desc->Buffer.Flags & D3D12_BUFFER_SRV_FLAG_RAW) != 0); } -static bool vkd3d_buffer_view_get_aligned_view( - struct d3d12_device *device, struct d3d12_resource *resource, - DXGI_FORMAT format, unsigned int vk_flags, - VkDeviceSize first_element, VkDeviceSize num_elements, - VkDeviceSize structured_stride, struct vkd3d_bound_buffer_range *bound_range, - struct vkd3d_view **view) +static bool d3d12_resource_desc_supports_raw_uav_ssbo( + struct d3d12_device *device, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) { - const struct vkd3d_format *vkd3d_format; - VkDeviceSize max_resource_elements; - VkDeviceSize max_element_headroom; - VkDeviceSize element_align; - VkDeviceSize max_elements; - VkDeviceSize begin_range; - VkDeviceSize end_range; - - if (device->bindless_state.flags & VKD3D_TYPED_OFFSET_BUFFER) - { - /* For typed buffers, we will try to remove two cases of extreme hashmap contention, i.e. - * first_element and num_elements. By quantizing these two and relying on offset buffers, - * we should achieve a bounded value for number of possible views we can create for a given resource. */ - max_elements = device->device_info.properties2.properties.limits.maxTexelBufferElements; - - if (format) - { - vkd3d_format = vkd3d_get_format(device, format, false); - max_resource_elements = resource->desc.Width / vkd3d_format->byte_count; - } - else - { - /* For structured buffers, we need to rescale input parameters to - * be in terms of u32 since the offset buffer must be in terms of words. - * When using typed buffers, the offset buffer is in format of u32 - * (element offset, element size). */ - first_element = (first_element * structured_stride) / sizeof(uint32_t); - num_elements = (num_elements * structured_stride) / sizeof(uint32_t); - structured_stride = sizeof(uint32_t); - max_resource_elements = resource->desc.Width / sizeof(uint32_t); - } - - /* Requantizing the typed offset is shaky business if we overflow max_elements when doing so. - * We can always fall back to 0 offset for the difficult and rare cases. */ - - if (num_elements > max_elements) - { - FIXME("Application is attempting to use more elements in a typed buffer (%llu) than supported by device (%llu).\n", - (unsigned long long)num_elements, (unsigned long long)max_elements); - bound_range->element_offset = 0; - bound_range->element_count = num_elements; - } - else if (num_elements >= max_resource_elements) - { - bound_range->element_offset = 0; - bound_range->element_count = num_elements; - } - else - { - /* Quantizing to alignment of N will at most increment number of elements in the view by N - 1. */ - max_element_headroom = max_elements - num_elements + 1; - - /* Based on headroom, align offset to the largest POT factor of N. */ - element_align = 1u << vkd3d_log2i(max_element_headroom); - - begin_range = first_element & ~(element_align - 1); - end_range = (first_element + num_elements + element_align - 1) & ~(element_align - 1); - end_range = min(end_range, max_resource_elements); - - bound_range->element_offset = first_element - begin_range; - bound_range->element_count = num_elements; - - first_element = begin_range; - num_elements = end_range - begin_range; - } - } - - if (!vkd3d_create_buffer_view_for_resource(device, resource, format, - first_element, num_elements, - structured_stride, vk_flags, view)) - return false; - - return true; + return d3d12_resource_desc_supports_raw_ssbo(device, + desc->Buffer.StructureByteStride, + (desc->Buffer.Flags & D3D12_BUFFER_UAV_FLAG_RAW) != 0); } static void vkd3d_create_buffer_srv_embedded(vkd3d_cpu_descriptor_va_t desc_va, @@ -6059,9 +5450,12 @@ static void vkd3d_create_buffer_srv_embedded(vkd3d_cpu_descriptor_va_t desc_va, { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct vkd3d_descriptor_metadata_buffer_view view; - VkDescriptorAddressInfoEXT addr_info; + VkTexelBufferDescriptorInfoEXT texel_buffer_info; + VkResourceDescriptorInfoEXT desc_info; struct d3d12_desc_split_embedded d; - VkDescriptorGetInfoEXT get_info; + VkDeviceAddressRangeEXT ssbo_range; + VkHostAddressRangeEXT desc_range; + uint8_t stack_payload[256]; if (!desc) { @@ -6069,345 +5463,136 @@ static void vkd3d_create_buffer_srv_embedded(vkd3d_cpu_descriptor_va_t desc_va, return; } - d = d3d12_desc_decode_embedded_resource_va(desc_va); + d = d3d12_desc_decode_embedded_resource_va(desc_va, + device->bindless_state.descriptor_heap_packed_metadata_offset); - if (desc->ViewDimension == D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE) - { - D3D12_GPU_VIRTUAL_ADDRESS va = desc->RaytracingAccelerationStructure.Location; - /* NULL RTAS in D3D12 (and Vulkan for that matter) is supposed to force a trigger on miss shader. - * What implementation can do here is to build a dummy empty RTAS instead. - * When we move to proper RTAS descriptors, this fallback will go away, but for now we - * make this work by swapping out the null descriptor ourselves. */ - if (!va) - va = vkd3d_get_null_rtas_va(device); - memcpy(d.payload, &va, sizeof(va)); - return; - } - - if (desc->ViewDimension != D3D12_SRV_DIMENSION_BUFFER) - { - WARN("Unexpected view dimension %#x.\n", desc->ViewDimension); - return; - } - - if (!resource) - { - /* We prepare a packed NULL descriptor that contains both texel buffer and SSBO. */ - d3d12_descriptor_heap_write_null_descriptor_template_embedded(device, desc_va, - VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER); - return; - } - - /* Ignore metadata for SRV. */ - vkd3d_get_metadata_buffer_view_for_resource(device, resource, - desc->Format, desc->Buffer.FirstElement, desc->Buffer.NumElements, - desc->Buffer.StructureByteStride, &view); - - /* Emit SSBO. */ - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - get_info.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - get_info.data.pStorageBuffer = &addr_info; - addr_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT; - addr_info.pNext = NULL; - addr_info.address = view.va; - addr_info.range = view.range; - addr_info.format = VK_FORMAT_UNDEFINED; - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustStorageBufferDescriptorSize, - d.payload + device->bindless_state.descriptor_buffer_packed_raw_buffer_offset)); - - /* Emit texel buffer alias. */ - get_info.type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - get_info.data.pUniformTexelBuffer = &addr_info; - addr_info.format = vkd3d_internal_get_vk_format(device, view.dxgi_format); - /* If we really intended to emit raw buffers, the fallback will be inferred as R32_UINT. */ - if (addr_info.format == VK_FORMAT_UNDEFINED) - { - /* Raw buffer is always emitted as R32_UINT on native. - * Try to match behavior observed on native drivers as close as possible here. */ - if (desc->Buffer.Flags & D3D12_BUFFER_SRV_FLAG_RAW) - { - addr_info.format = VK_FORMAT_R32_UINT; - } - else - { - addr_info.format = vkd3d_internal_get_vk_format(device, - vkd3d_structured_srv_to_texel_buffer_dxgi_format(desc->Buffer.StructureByteStride)); - } - } - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustUniformTexelBufferDescriptorSize, - d.payload)); -} - -static void vkd3d_create_buffer_srv(vkd3d_cpu_descriptor_va_t desc_va, - struct d3d12_device *device, struct d3d12_resource *resource, - const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) -{ - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VKD3D_UNUSED vkd3d_descriptor_qa_flags descriptor_qa_flags = 0; - struct vkd3d_bound_buffer_range bound_range = { 0, 0, 0, 0 }; - union vkd3d_descriptor_info descriptor_info[2]; - struct vkd3d_descriptor_binding binding; - VkDescriptorAddressInfoEXT addr_info; - VkDescriptorType vk_descriptor_type; - bool mutable_uses_single_descriptor; - VkWriteDescriptorSet vk_write[2]; - VkDescriptorGetInfoEXT get_info; - struct vkd3d_view *view = NULL; - uint32_t vk_write_count = 0; - struct d3d12_desc_split d; - unsigned int vk_flags; - uint32_t info_index; - bool desc_is_raw; - bool emit_typed; - bool emit_ssbo; - void *payload; - - if (!desc) - { - FIXME("Default buffer SRV not supported.\n"); - return; - } - - d = d3d12_desc_decode_va(desc_va); + memset(&desc_info, 0, sizeof(desc_info)); + desc_info.sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT; if (desc->ViewDimension == D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE) { - D3D12_GPU_VIRTUAL_ADDRESS va = desc->RaytracingAccelerationStructure.Location; - - /* NULL RTAS in D3D12 (and Vulkan for that matter) is supposed to force a trigger on miss shader. - * What implementation can do here is to build a dummy empty RTAS instead. - * When we move to proper RTAS descriptors, this fallback will go away, but for now we - * make this work by swapping out the null descriptor ourselves. */ - if (!va) - va = vkd3d_get_null_rtas_va(device); - - if (d3d12_device_supports_ray_tracing_tier_1_0(device)) + desc_info.type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; + if (desc->RaytracingAccelerationStructure.Location) { - /* We implement this as a raw VA in the aux buffer. */ - VkDeviceAddress *raw_addresses = d.heap->raw_va_aux_buffer.host_ptr; - uint32_t descriptor_index = d.offset; - raw_addresses[descriptor_index] = desc->RaytracingAccelerationStructure.Location; - d.view->info.buffer.flags = VKD3D_DESCRIPTOR_FLAG_RAW_VA_AUX_BUFFER | - VKD3D_DESCRIPTOR_FLAG_NON_NULL; - d.types->set_info_mask = 0; - /* There is no resource tied to this descriptor, just a naked pointer. */ - vkd3d_descriptor_metadata_view_set_qa_cookie(d.view, vkd3d_null_cookie()); + ssbo_range.address = desc->RaytracingAccelerationStructure.Location; + ssbo_range.size = 0; /* FIXME: Is this meaningful? */ + desc_info.data.pAddressRange = &ssbo_range; } - else - WARN("Using CreateSRV for RTAS without RT support?\n"); - vkd3d_descriptor_debug_write_descriptor(d.heap->descriptor_heap_info.host_ptr, - d.heap->cookie, d.offset, - VKD3D_DESCRIPTOR_QA_TYPE_RT_ACCELERATION_STRUCTURE_BIT | VKD3D_DESCRIPTOR_QA_TYPE_RAW_VA_BIT, - d.view->qa_cookie); + if (device->bindless_state.descriptor_heap_packed_raw_buffer_offset) + memset(d.payload, 0, device->bindless_state.descriptor_heap_packed_raw_buffer_offset); - return; + desc_range.address = d.payload + device->bindless_state.descriptor_heap_packed_raw_buffer_offset; + desc_range.size = device->device_info.descriptor_heap_properties.bufferDescriptorSize; + VK_CALL(vkWriteResourceDescriptorsEXT(device->vk_device, 1, &desc_info, &desc_range)); } - - if (desc->ViewDimension != D3D12_SRV_DIMENSION_BUFFER) + else if (desc->ViewDimension == D3D12_SRV_DIMENSION_BUFFER) { - WARN("Unexpected view dimension %#x.\n", desc->ViewDimension); - return; - } - - mutable_uses_single_descriptor = !!(device->bindless_state.flags & VKD3D_BINDLESS_MUTABLE_TYPE_RAW_SSBO); - desc_is_raw = (desc->Format == DXGI_FORMAT_UNKNOWN && desc->Buffer.StructureByteStride) || - (desc->Buffer.Flags & D3D12_BUFFER_SRV_FLAG_RAW); - emit_ssbo = (!mutable_uses_single_descriptor || desc_is_raw) && d3d12_device_use_ssbo_raw_buffer(device); - emit_typed = !mutable_uses_single_descriptor || !desc_is_raw || !emit_ssbo; + const struct vkd3d_bindless_state *bindless = &device->bindless_state; + bool can_emit_sibling_typed; + bool can_emit_sibling_raw; + bool emit_typed; + bool is_typed; + bool emit_raw; - if (!resource) - { - if (mutable_uses_single_descriptor) + /* Ignore metadata for SRV. */ + if (resource) { - d3d12_descriptor_heap_write_null_descriptor_template(desc_va, - desc_is_raw ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER); + vkd3d_get_metadata_buffer_view_for_resource(device, resource, + desc->Format, desc->Buffer.FirstElement, desc->Buffer.NumElements, + desc->Buffer.StructureByteStride, (desc->Buffer.Flags & D3D12_BUFFER_SRV_FLAG_RAW) != 0, + &view); } else { - /* In the mutable set, always write texel buffer. The STORAGE_BUFFER set is also written to. */ - d3d12_descriptor_heap_write_null_descriptor_template(desc_va, - VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER); + memset(&view, 0, sizeof(view)); } - return; - } - - d.types->set_info_mask = 0; - vkd3d_get_metadata_buffer_view_for_resource(device, resource, - desc->Format, desc->Buffer.FirstElement, desc->Buffer.NumElements, - desc->Buffer.StructureByteStride, &d.view->info.buffer); - - if (emit_ssbo) - { - info_index = vkd3d_bindless_state_find_set_info_index_fast(device, - VKD3D_BINDLESS_STATE_INFO_INDEX_MUTABLE_SPLIT_RAW, - VKD3D_BINDLESS_SET_SRV | VKD3D_BINDLESS_SET_RAW_SSBO); - - binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, info_index); - d.types->set_info_mask |= 1u << info_index; + is_typed = desc->Format && !(desc->Buffer.Flags & D3D12_BUFFER_SRV_FLAG_RAW); - if (device->bindless_state.flags & VKD3D_SSBO_OFFSET_BUFFER) - d.view->info.buffer.flags |= VKD3D_DESCRIPTOR_FLAG_BUFFER_OFFSET; - d.types->single_binding = binding; + can_emit_sibling_typed = bindless->descriptor_heap_packed_raw_buffer_offset >= bindless->uniform_texel_buffer_size; + can_emit_sibling_raw = bindless->descriptor_heap_packed_raw_buffer_offset >= bindless->uniform_texel_buffer_size; + memset(stack_payload, 0, bindless->descriptor_heap_cbv_srv_uav_size); - vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - descriptor_qa_flags |= VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT; - - if (d3d12_device_uses_descriptor_buffers(device)) + if (!is_typed && !d3d12_resource_desc_supports_raw_srv_ssbo(device, desc)) { - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - get_info.type = vk_descriptor_type; - get_info.data.pStorageBuffer = &addr_info; - addr_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT; - addr_info.pNext = NULL; - addr_info.address = d.view->info.buffer.va; - addr_info.range = d.view->info.buffer.range; - addr_info.format = VK_FORMAT_UNDEFINED; - payload = d3d12_descriptor_heap_get_mapped_payload(d.heap, binding.set, d.offset); - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustStorageBufferDescriptorSize, - payload)); - } - else - { - VkDeviceSize stride = desc->Format == DXGI_FORMAT_UNKNOWN - ? desc->Buffer.StructureByteStride : - vkd3d_get_format(device, desc->Format, false)->byte_count; - - vkd3d_buffer_view_get_bound_range_ssbo(device, resource, - desc->Buffer.FirstElement * stride, desc->Buffer.NumElements * stride, - &descriptor_info[vk_write_count].buffer, &bound_range); - - vkd3d_init_write_descriptor_set(&vk_write[vk_write_count], &d, binding, - vk_descriptor_type, &descriptor_info[vk_write_count]); - vk_write_count++; + is_typed = true; + view.dxgi_format = DXGI_FORMAT_R32_UINT; } - } - - if (emit_typed) - { - info_index = vkd3d_bindless_state_find_set_info_index_fast(device, - VKD3D_BINDLESS_STATE_INFO_INDEX_MUTABLE_SPLIT_TYPED, - VKD3D_BINDLESS_SET_SRV | VKD3D_BINDLESS_SET_BUFFER); - - binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, info_index); - - d.types->set_info_mask |= 1u << info_index; - if (device->bindless_state.flags & VKD3D_TYPED_OFFSET_BUFFER) - d.view->info.buffer.flags |= VKD3D_DESCRIPTOR_FLAG_BUFFER_OFFSET; - d.types->single_binding = binding; + /* TODO: Check max range for typed? */ + emit_typed = is_typed || can_emit_sibling_typed; + emit_raw = !is_typed || can_emit_sibling_raw; - vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - descriptor_qa_flags |= VKD3D_DESCRIPTOR_QA_TYPE_UNIFORM_TEXEL_BUFFER_BIT; - - if (d3d12_device_uses_descriptor_buffers(device)) + if (emit_typed) { - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - get_info.type = vk_descriptor_type; - get_info.data.pUniformTexelBuffer = &addr_info; - addr_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT; - addr_info.pNext = NULL; - addr_info.address = d.view->info.buffer.va; - addr_info.range = d.view->info.buffer.range; - addr_info.format = vkd3d_internal_get_vk_format(device, d.view->info.buffer.dxgi_format); - /* If we really intended to emit raw buffers, the fallback will be inferred as R32_UINT. */ - if (addr_info.format == VK_FORMAT_UNDEFINED) + desc_info.type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + if (resource) { - if (desc->Buffer.Flags & D3D12_BUFFER_SRV_FLAG_RAW) - { - addr_info.format = VK_FORMAT_R32_UINT; - } - else + memset(&texel_buffer_info, 0, sizeof(texel_buffer_info)); + texel_buffer_info.sType = VK_STRUCTURE_TYPE_TEXEL_BUFFER_DESCRIPTOR_INFO_EXT; + texel_buffer_info.addressRange.address = view.va; + texel_buffer_info.addressRange.size = view.range; + texel_buffer_info.format = vkd3d_internal_get_vk_format(device, view.dxgi_format); + if (texel_buffer_info.format == VK_FORMAT_UNDEFINED) { - addr_info.format = vkd3d_internal_get_vk_format(device, + if (desc->Buffer.Flags & D3D12_BUFFER_SRV_FLAG_RAW) + { + texel_buffer_info.format = VK_FORMAT_R32_UINT; + } + else + { + texel_buffer_info.format = vkd3d_internal_get_vk_format(device, vkd3d_structured_srv_to_texel_buffer_dxgi_format(desc->Buffer.StructureByteStride)); + } } + desc_info.data.pTexelBuffer = &texel_buffer_info; } - payload = d3d12_descriptor_heap_get_mapped_payload(d.heap, binding.set, d.offset); - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustUniformTexelBufferDescriptorSize, - payload)); - } - else - { - vk_flags = vkd3d_view_flags_from_d3d12_buffer_srv_flags(desc->Buffer.Flags); - if (!vkd3d_buffer_view_get_aligned_view(device, resource, desc->Format, vk_flags, - desc->Buffer.FirstElement, desc->Buffer.NumElements, desc->Buffer.StructureByteStride, - &bound_range, &view)) - return; - descriptor_info[vk_write_count].buffer_view = view ? view->vk_buffer_view : VK_NULL_HANDLE; - vkd3d_init_write_descriptor_set(&vk_write[vk_write_count], &d, binding, - vk_descriptor_type, &descriptor_info[vk_write_count]); - vk_write_count++; + desc_range.address = stack_payload; + desc_range.size = device->bindless_state.uniform_texel_buffer_size; + VK_CALL(vkWriteResourceDescriptorsEXT(device->vk_device, 1, &desc_info, &desc_range)); } - } - if (d.view->info.buffer.flags & VKD3D_DESCRIPTOR_FLAG_BUFFER_OFFSET) - { - struct vkd3d_bound_buffer_range *buffer_ranges = d.heap->buffer_ranges.host_ptr; - buffer_ranges[d.offset] = bound_range; - } + if (emit_raw) + { + desc_info.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + if (resource) + { + ssbo_range.address = view.va; + ssbo_range.size = view.range; + desc_info.data.pAddressRange = &ssbo_range; + } + else + { + desc_info.data.pAddressRange = NULL; + } - if (mutable_uses_single_descriptor) - d.view->info.buffer.flags |= VKD3D_DESCRIPTOR_FLAG_SINGLE_DESCRIPTOR; + desc_range.address = stack_payload + bindless->descriptor_heap_packed_raw_buffer_offset; + desc_range.size = device->device_info.descriptor_heap_properties.bufferDescriptorSize; + VK_CALL(vkWriteResourceDescriptorsEXT(device->vk_device, 1, &desc_info, &desc_range)); + } -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA - if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_INSTRUCTION_QA_CHECKS) - { - /* We may want to peek at the buffer's raw VA when doing instrumentation. */ - VkDeviceAddress *counter_addresses = d.heap->raw_va_aux_buffer.host_ptr; - uint32_t elem_size; - elem_size = desc->Format ? vkd3d_get_format(device, desc->Format, false)->byte_count : sizeof(uint32_t); - counter_addresses[d.offset] = vkd3d_descriptor_debug_encode_buffer_va(d.view->info.buffer.va, elem_size); - d.view->info.buffer.flags |= VKD3D_DESCRIPTOR_QA_TYPE_RAW_VA_BIT; + memcpy(d.payload, stack_payload, bindless->descriptor_heap_cbv_srv_uav_size); } -#endif - - vkd3d_descriptor_metadata_view_set_qa_cookie(d.view, resource ? resource->res.cookie : vkd3d_null_cookie()); - vkd3d_descriptor_debug_write_descriptor(d.heap->descriptor_heap_info.host_ptr, - d.heap->cookie, d.offset, descriptor_qa_flags, d.view->qa_cookie); - - if (vk_write_count) - VK_CALL(vkUpdateDescriptorSets(device->vk_device, vk_write_count, vk_write, 0, NULL)); -} - -static void vkd3d_texture_view_desc_fixup(struct d3d12_device *device, struct vkd3d_texture_view_desc *desc) -{ - if (device->device_info.properties2.properties.vendorID == VKD3D_VENDOR_ID_NVIDIA) + else { - FIXME_ONCE("Remapping 2D to 2D_ARRAY. Needs Vulkan spec tightening to match D3D12 properly.\n"); - /* D3D allows some reinterpretation between Texture2D and Texture2DArray. - * Texture2D in shader can read a resource with 1 array layer, - * and Texture2DArray can read a Texture2D descriptor. - * NVIDIA does not correctly deal with Texture2DArray unless we always emit 2D_ARRAY views. - * Other implementations don't seem to care, so just emit the natural 2D view. */ - if (desc->view_type == VK_IMAGE_VIEW_TYPE_1D) - desc->view_type = VK_IMAGE_VIEW_TYPE_1D_ARRAY; - if (desc->view_type == VK_IMAGE_VIEW_TYPE_2D) - desc->view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + WARN("Unexpected view dimension %#x.\n", desc->ViewDimension); } } -static struct vkd3d_view *vkd3d_create_texture_uav_view(struct d3d12_device *device, - struct d3d12_resource *resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) +static bool vkd3d_setup_texture_uav_view(struct d3d12_device *device, + struct d3d12_resource *resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, + struct vkd3d_texture_view_create_info *info) { - struct vkd3d_view_key key; - key.view_type = VKD3D_VIEW_TYPE_IMAGE; + struct vkd3d_texture_view_desc texture; - if (!init_default_texture_view_desc(&key.u.texture, resource, desc ? desc->Format : 0)) + if (!init_default_texture_view_desc(&texture, resource, desc ? desc->Format : 0)) return NULL; - key.u.texture.image_usage = VK_IMAGE_USAGE_STORAGE_BIT; + texture.image_usage = VK_IMAGE_USAGE_STORAGE_BIT; - if (vkd3d_format_is_compressed(key.u.texture.format)) + if (vkd3d_format_is_compressed(texture.format)) { WARN("UAVs cannot be created for compressed formats.\n"); return NULL; @@ -6418,47 +5603,47 @@ static struct vkd3d_view *vkd3d_create_texture_uav_view(struct d3d12_device *dev switch (desc->ViewDimension) { case D3D12_UAV_DIMENSION_TEXTURE1D: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_1D; - key.u.texture.miplevel_idx = desc->Texture1D.MipSlice; - key.u.texture.layer_count = 1; + texture.view_type = VK_IMAGE_VIEW_TYPE_1D; + texture.miplevel_idx = desc->Texture1D.MipSlice; + texture.layer_count = 1; break; case D3D12_UAV_DIMENSION_TEXTURE1DARRAY: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_1D_ARRAY; - key.u.texture.miplevel_idx = desc->Texture1DArray.MipSlice; - key.u.texture.layer_idx = desc->Texture1DArray.FirstArraySlice; - key.u.texture.layer_count = desc->Texture1DArray.ArraySize; + texture.view_type = VK_IMAGE_VIEW_TYPE_1D_ARRAY; + texture.miplevel_idx = desc->Texture1DArray.MipSlice; + texture.layer_idx = desc->Texture1DArray.FirstArraySlice; + texture.layer_count = desc->Texture1DArray.ArraySize; break; case D3D12_UAV_DIMENSION_TEXTURE2D: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_2D; - key.u.texture.miplevel_idx = desc->Texture2D.MipSlice; - key.u.texture.layer_count = 1; - key.u.texture.aspect_mask = vk_image_aspect_flags_from_d3d12(resource->format, desc->Texture2D.PlaneSlice); + texture.view_type = VK_IMAGE_VIEW_TYPE_2D; + texture.miplevel_idx = desc->Texture2D.MipSlice; + texture.layer_count = 1; + texture.aspect_mask = vk_image_aspect_flags_from_d3d12(resource->format, desc->Texture2D.PlaneSlice); break; case D3D12_UAV_DIMENSION_TEXTURE2DMS: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_2D; - key.u.texture.miplevel_idx = 0; - key.u.texture.layer_count = 1; - key.u.texture.aspect_mask = vk_image_aspect_flags_from_d3d12(resource->format, desc->Texture2D.PlaneSlice); + texture.view_type = VK_IMAGE_VIEW_TYPE_2D; + texture.miplevel_idx = 0; + texture.layer_count = 1; + texture.aspect_mask = vk_image_aspect_flags_from_d3d12(resource->format, desc->Texture2D.PlaneSlice); break; case D3D12_UAV_DIMENSION_TEXTURE2DARRAY: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; - key.u.texture.miplevel_idx = desc->Texture2DArray.MipSlice; - key.u.texture.layer_idx = desc->Texture2DArray.FirstArraySlice; - key.u.texture.layer_count = desc->Texture2DArray.ArraySize; - key.u.texture.aspect_mask = vk_image_aspect_flags_from_d3d12(resource->format, desc->Texture2DArray.PlaneSlice); + texture.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + texture.miplevel_idx = desc->Texture2DArray.MipSlice; + texture.layer_idx = desc->Texture2DArray.FirstArraySlice; + texture.layer_count = desc->Texture2DArray.ArraySize; + texture.aspect_mask = vk_image_aspect_flags_from_d3d12(resource->format, desc->Texture2DArray.PlaneSlice); break; case D3D12_UAV_DIMENSION_TEXTURE2DMSARRAY: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; - key.u.texture.miplevel_idx = 0; - key.u.texture.layer_idx = desc->Texture2DMSArray.FirstArraySlice; - key.u.texture.layer_count = desc->Texture2DMSArray.ArraySize; - key.u.texture.aspect_mask = vk_image_aspect_flags_from_d3d12(resource->format, 0); + texture.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + texture.miplevel_idx = 0; + texture.layer_idx = desc->Texture2DMSArray.FirstArraySlice; + texture.layer_count = desc->Texture2DMSArray.ArraySize; + texture.aspect_mask = vk_image_aspect_flags_from_d3d12(resource->format, 0); break; case D3D12_UAV_DIMENSION_TEXTURE3D: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_3D; - key.u.texture.miplevel_idx = desc->Texture3D.MipSlice; - key.u.texture.w_offset = desc->Texture3D.FirstWSlice; - key.u.texture.w_size = desc->Texture3D.WSize; + texture.view_type = VK_IMAGE_VIEW_TYPE_3D; + texture.miplevel_idx = desc->Texture3D.MipSlice; + texture.w_offset = desc->Texture3D.FirstWSlice; + texture.w_size = desc->Texture3D.WSize; if (!device->device_info.image_sliced_view_of_3d_features.imageSlicedViewOf3D) { if (desc->Texture3D.FirstWSlice || @@ -6475,23 +5660,21 @@ static struct vkd3d_view *vkd3d_create_texture_uav_view(struct d3d12_device *dev } } - vkd3d_texture_view_desc_fixup(device, &key.u.texture); - - return vkd3d_view_map_create_view(&resource->view_map, device, &key); + return vkd3d_setup_texture_view(device, &texture, info); } -static struct vkd3d_view *vkd3d_create_texture_srv_view(struct d3d12_device *device, - struct d3d12_resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) +static bool vkd3d_setup_texture_srv_view(struct d3d12_device *device, + struct d3d12_resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, + struct vkd3d_texture_view_create_info *info) { - struct vkd3d_view_key key; + struct vkd3d_texture_view_desc texture; - if (!init_default_texture_view_desc(&key.u.texture, resource, desc ? desc->Format : 0)) + if (!init_default_texture_view_desc(&texture, resource, desc ? desc->Format : 0)) return NULL; - key.view_type = VKD3D_VIEW_TYPE_IMAGE; - key.u.texture.miplevel_count = VK_REMAINING_MIP_LEVELS; - key.u.texture.allowed_swizzle = true; - key.u.texture.image_usage = VK_IMAGE_USAGE_SAMPLED_BIT; + texture.miplevel_count = VK_REMAINING_MIP_LEVELS; + texture.allowed_swizzle = true; + texture.image_usage = VK_IMAGE_USAGE_SAMPLED_BIT; if (desc) { @@ -6500,116 +5683,84 @@ static struct vkd3d_view *vkd3d_create_texture_srv_view(struct d3d12_device *dev TRACE("Component mapping %s for format %#x.\n", debug_d3d12_shader_component_mapping(desc->Shader4ComponentMapping), desc->Format); - vk_component_mapping_from_d3d12(&key.u.texture.components, desc->Shader4ComponentMapping); + vk_component_mapping_from_d3d12(&texture.components, desc->Shader4ComponentMapping); } switch (desc->ViewDimension) { case D3D12_SRV_DIMENSION_TEXTURE1D: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_1D; - key.u.texture.miplevel_idx = desc->Texture1D.MostDetailedMip; - key.u.texture.miplevel_count = desc->Texture1D.MipLevels; - key.u.texture.miplevel_clamp = desc->Texture1D.ResourceMinLODClamp; - key.u.texture.layer_count = 1; + texture.view_type = VK_IMAGE_VIEW_TYPE_1D; + texture.miplevel_idx = desc->Texture1D.MostDetailedMip; + texture.miplevel_count = desc->Texture1D.MipLevels; + texture.miplevel_clamp = desc->Texture1D.ResourceMinLODClamp; + texture.layer_count = 1; break; case D3D12_SRV_DIMENSION_TEXTURE1DARRAY: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_1D_ARRAY; - key.u.texture.miplevel_idx = desc->Texture1DArray.MostDetailedMip; - key.u.texture.miplevel_count = desc->Texture1DArray.MipLevels; - key.u.texture.miplevel_clamp = desc->Texture1DArray.ResourceMinLODClamp; - key.u.texture.layer_idx = desc->Texture1DArray.FirstArraySlice; - key.u.texture.layer_count = desc->Texture1DArray.ArraySize; + texture.view_type = VK_IMAGE_VIEW_TYPE_1D_ARRAY; + texture.miplevel_idx = desc->Texture1DArray.MostDetailedMip; + texture.miplevel_count = desc->Texture1DArray.MipLevels; + texture.miplevel_clamp = desc->Texture1DArray.ResourceMinLODClamp; + texture.layer_idx = desc->Texture1DArray.FirstArraySlice; + texture.layer_count = desc->Texture1DArray.ArraySize; break; case D3D12_SRV_DIMENSION_TEXTURE2D: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_2D; - key.u.texture.miplevel_idx = desc->Texture2D.MostDetailedMip; - key.u.texture.miplevel_count = desc->Texture2D.MipLevels; - key.u.texture.miplevel_clamp = desc->Texture2D.ResourceMinLODClamp; - key.u.texture.layer_count = 1; - key.u.texture.aspect_mask = vk_image_aspect_flags_from_d3d12(resource->format, desc->Texture2D.PlaneSlice); + texture.view_type = VK_IMAGE_VIEW_TYPE_2D; + texture.miplevel_idx = desc->Texture2D.MostDetailedMip; + texture.miplevel_count = desc->Texture2D.MipLevels; + texture.miplevel_clamp = desc->Texture2D.ResourceMinLODClamp; + texture.layer_count = 1; + texture.aspect_mask = vk_image_aspect_flags_from_d3d12(resource->format, desc->Texture2D.PlaneSlice); break; case D3D12_SRV_DIMENSION_TEXTURE2DARRAY: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; - key.u.texture.miplevel_idx = desc->Texture2DArray.MostDetailedMip; - key.u.texture.miplevel_count = desc->Texture2DArray.MipLevels; - key.u.texture.miplevel_clamp = desc->Texture2DArray.ResourceMinLODClamp; - key.u.texture.layer_idx = desc->Texture2DArray.FirstArraySlice; - key.u.texture.layer_count = desc->Texture2DArray.ArraySize; - key.u.texture.aspect_mask = vk_image_aspect_flags_from_d3d12(resource->format, desc->Texture2DArray.PlaneSlice); + texture.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + texture.miplevel_idx = desc->Texture2DArray.MostDetailedMip; + texture.miplevel_count = desc->Texture2DArray.MipLevels; + texture.miplevel_clamp = desc->Texture2DArray.ResourceMinLODClamp; + texture.layer_idx = desc->Texture2DArray.FirstArraySlice; + texture.layer_count = desc->Texture2DArray.ArraySize; + texture.aspect_mask = vk_image_aspect_flags_from_d3d12(resource->format, desc->Texture2DArray.PlaneSlice); break; case D3D12_SRV_DIMENSION_TEXTURE2DMS: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_2D; - key.u.texture.layer_count = 1; + texture.view_type = VK_IMAGE_VIEW_TYPE_2D; + texture.layer_count = 1; break; case D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; - key.u.texture.layer_idx = desc->Texture2DMSArray.FirstArraySlice; - key.u.texture.layer_count = desc->Texture2DMSArray.ArraySize; + texture.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + texture.layer_idx = desc->Texture2DMSArray.FirstArraySlice; + texture.layer_count = desc->Texture2DMSArray.ArraySize; break; case D3D12_SRV_DIMENSION_TEXTURE3D: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_3D; - key.u.texture.miplevel_idx = desc->Texture3D.MostDetailedMip; - key.u.texture.miplevel_count = desc->Texture3D.MipLevels; - key.u.texture.miplevel_clamp = desc->Texture3D.ResourceMinLODClamp; + texture.view_type = VK_IMAGE_VIEW_TYPE_3D; + texture.miplevel_idx = desc->Texture3D.MostDetailedMip; + texture.miplevel_count = desc->Texture3D.MipLevels; + texture.miplevel_clamp = desc->Texture3D.ResourceMinLODClamp; break; case D3D12_SRV_DIMENSION_TEXTURECUBE: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_CUBE; - key.u.texture.miplevel_idx = desc->TextureCube.MostDetailedMip; - key.u.texture.miplevel_count = desc->TextureCube.MipLevels; - key.u.texture.miplevel_clamp = desc->TextureCube.ResourceMinLODClamp; - key.u.texture.layer_count = 6; + texture.view_type = VK_IMAGE_VIEW_TYPE_CUBE; + texture.miplevel_idx = desc->TextureCube.MostDetailedMip; + texture.miplevel_count = desc->TextureCube.MipLevels; + texture.miplevel_clamp = desc->TextureCube.ResourceMinLODClamp; + texture.layer_count = 6; break; case D3D12_SRV_DIMENSION_TEXTURECUBEARRAY: - key.u.texture.view_type = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; - key.u.texture.miplevel_idx = desc->TextureCubeArray.MostDetailedMip; - key.u.texture.miplevel_count = desc->TextureCubeArray.MipLevels; - key.u.texture.miplevel_clamp = desc->TextureCubeArray.ResourceMinLODClamp; - key.u.texture.layer_idx = desc->TextureCubeArray.First2DArrayFace; - key.u.texture.layer_count = desc->TextureCubeArray.NumCubes; - if (key.u.texture.layer_count != VK_REMAINING_ARRAY_LAYERS) - key.u.texture.layer_count *= 6; + texture.view_type = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; + texture.miplevel_idx = desc->TextureCubeArray.MostDetailedMip; + texture.miplevel_count = desc->TextureCubeArray.MipLevels; + texture.miplevel_clamp = desc->TextureCubeArray.ResourceMinLODClamp; + texture.layer_idx = desc->TextureCubeArray.First2DArrayFace; + texture.layer_count = desc->TextureCubeArray.NumCubes; + if (texture.layer_count != VK_REMAINING_ARRAY_LAYERS) + texture.layer_count *= 6; break; default: FIXME("Unhandled view dimension %#x.\n", desc->ViewDimension); } } - if (key.u.texture.miplevel_count == VK_REMAINING_MIP_LEVELS) - key.u.texture.miplevel_count = resource->desc.MipLevels - key.u.texture.miplevel_idx; - - vkd3d_texture_view_desc_fixup(device, &key.u.texture); - - if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_PREALLOCATE_SRV_MIP_CLAMPS) && - desc->ViewDimension != D3D12_SRV_DIMENSION_TEXTURE2DMS && - desc->ViewDimension != D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY) - { - bool found; - - key.u.texture.miplevel_clamp = floor(key.u.texture.miplevel_clamp); - - rw_spinlock_acquire_read(&resource->view_map.spinlock); - found = !!hash_map_find(&resource->view_map.map, &key); - rw_spinlock_release_read(&resource->view_map.spinlock); - - if (!found) - { - uint32_t starting_mip = key.u.texture.miplevel_idx; - uint32_t mip_count = key.u.texture.miplevel_count != UINT32_MAX ? - key.u.texture.miplevel_count : - resource->desc.MipLevels - starting_mip; - uint32_t i; - - struct vkd3d_view_key preallocate_key = key; - - for (i = starting_mip; i < mip_count; i++) - { - preallocate_key.u.texture.miplevel_clamp = (float)(i); - vkd3d_view_map_create_view(&resource->view_map, device, &preallocate_key); - } - } - } + if (texture.miplevel_count == VK_REMAINING_MIP_LEVELS) + texture.miplevel_count = resource->desc.MipLevels - texture.miplevel_idx; - return vkd3d_view_map_create_view(&resource->view_map, device, &key); + return vkd3d_setup_texture_view(device, &texture, info); } static void vkd3d_create_texture_srv_embedded(vkd3d_cpu_descriptor_va_t desc_va, @@ -6617,130 +5768,38 @@ static void vkd3d_create_texture_srv_embedded(vkd3d_cpu_descriptor_va_t desc_va, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct vkd3d_texture_view_create_info info; + VkResourceDescriptorInfoEXT desc_info; + VkImageDescriptorInfoEXT image_info; struct d3d12_desc_split_embedded d; - VkDescriptorGetInfoEXT get_info; - struct vkd3d_view *view = NULL; - VkDescriptorImageInfo image; + VkHostAddressRangeEXT desc_range; - if (!resource) - { - d3d12_descriptor_heap_write_null_descriptor_template_embedded(device, desc_va, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE); - return; - } + d = d3d12_desc_decode_embedded_resource_va(desc_va, + device->bindless_state.descriptor_heap_packed_metadata_offset); - d = d3d12_desc_decode_embedded_resource_va(desc_va); + memset(&desc_info, 0, sizeof(desc_info)); + desc_info.sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT; + desc_info.type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; /* Ignore metadata. */ - view = vkd3d_create_texture_srv_view(device, resource, desc); - - image.sampler = VK_NULL_HANDLE; - image.imageView = view ? view->vk_image_view : VK_NULL_HANDLE; - image.imageLayout = view ? resource->common_layout : VK_IMAGE_LAYOUT_UNDEFINED; - - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - get_info.type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - get_info.data.pSampledImage = ℑ - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.sampledImageDescriptorSize, - d.payload)); -} - -static void vkd3d_create_texture_srv(vkd3d_cpu_descriptor_va_t desc_va, - struct d3d12_device *device, struct d3d12_resource *resource, - const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) -{ - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - union vkd3d_descriptor_info null_descriptor_info; - union vkd3d_descriptor_info descriptor_info; - struct vkd3d_descriptor_binding binding; - VkWriteDescriptorSet vk_writes[2]; - VkDescriptorGetInfoEXT get_info; - struct vkd3d_view *view = NULL; - uint32_t vk_write_count = 0; - struct d3d12_desc_split d; - uint32_t info_index; - void *payload; - - if (!resource) + if (resource && vkd3d_setup_texture_srv_view(device, resource, desc, &info)) { - d3d12_descriptor_heap_write_null_descriptor_template(desc_va, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE); - return; + desc_info.data.pImage = &image_info; + memset(&image_info, 0, sizeof(image_info)); + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_DESCRIPTOR_INFO_EXT; + image_info.layout = d3d12_resource_pick_layout(resource, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + image_info.pView = &info.view_desc; } - d = d3d12_desc_decode_va(desc_va); - - view = vkd3d_create_texture_srv_view(device, resource, desc); - - descriptor_info.image.sampler = VK_NULL_HANDLE; - descriptor_info.image.imageView = view ? view->vk_image_view : VK_NULL_HANDLE; - descriptor_info.image.imageLayout = view ? resource->common_layout : VK_IMAGE_LAYOUT_UNDEFINED; - - info_index = vkd3d_bindless_state_find_set_info_index_fast(device, - VKD3D_BINDLESS_STATE_INFO_INDEX_MUTABLE_SPLIT_TYPED, - VKD3D_BINDLESS_SET_SRV | VKD3D_BINDLESS_SET_IMAGE); - - binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, info_index); - - d.view->info.image.view = view; - d.view->info.image.flags = VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW | VKD3D_DESCRIPTOR_FLAG_NON_NULL; - d.types->set_info_mask = 1u << info_index; - d.types->single_binding = binding; + desc_range.address = d.payload; + desc_range.size = device->bindless_state.sampled_image_size; + VK_CALL(vkWriteResourceDescriptorsEXT(device->vk_device, 1, &desc_info, &desc_range)); - if (d3d12_device_uses_descriptor_buffers(device)) + if (device->bindless_state.sampled_image_size < device->bindless_state.descriptor_heap_cbv_srv_uav_size) { - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - get_info.type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - get_info.data.pSampledImage = &descriptor_info.image; - payload = d3d12_descriptor_heap_get_mapped_payload(d.heap, binding.set, d.offset); - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.sampledImageDescriptorSize, - payload)); + memset(d.payload + device->bindless_state.sampled_image_size, 0, + device->bindless_state.descriptor_heap_cbv_srv_uav_size - device->bindless_state.sampled_image_size); } - else - { - vkd3d_init_write_descriptor_set(&vk_writes[vk_write_count++], &d, binding, - VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &descriptor_info); - } - - /* Clear out sibling typed descriptor if appropriate. - * Native drivers are robust against scenarios like these, and so we need to be too. */ - if (device->bindless_state.flags & VKD3D_BINDLESS_MUTABLE_TYPE_SPLIT_RAW_TYPED) - { - /* The NULL descriptor template for non-embedded is too messy to invoke here. */ - info_index = VKD3D_BINDLESS_STATE_INFO_INDEX_MUTABLE_SPLIT_RAW; - binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, info_index); - d.types->set_info_mask |= 1u << info_index; - - if (d3d12_device_uses_descriptor_buffers(device)) - { - get_info.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - get_info.data.pStorageBuffer = NULL; - payload = d3d12_descriptor_heap_get_mapped_payload(d.heap, binding.set, d.offset); - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustStorageBufferDescriptorSize, - payload)); - } - else - { - null_descriptor_info.buffer.buffer = VK_NULL_HANDLE; - null_descriptor_info.buffer.offset = 0; - null_descriptor_info.buffer.range = VK_WHOLE_SIZE; - vkd3d_init_write_descriptor_set(&vk_writes[vk_write_count++], &d, binding, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &null_descriptor_info); - } - } - else - d.view->info.image.flags |= VKD3D_DESCRIPTOR_FLAG_SINGLE_DESCRIPTOR; - - if (vk_write_count) - VK_CALL(vkUpdateDescriptorSets(device->vk_device, vk_write_count, vk_writes, 0, NULL)); - - vkd3d_descriptor_metadata_view_set_qa_cookie(d.view, view ? view->cookie : vkd3d_null_cookie()); - vkd3d_descriptor_debug_write_descriptor(d.heap->descriptor_heap_info.host_ptr, - d.heap->cookie, d.offset, - VKD3D_DESCRIPTOR_QA_TYPE_SAMPLED_IMAGE_BIT, d.view->qa_cookie); } void d3d12_desc_create_srv_embedded(vkd3d_cpu_descriptor_va_t desc_va, @@ -6770,42 +5829,6 @@ void d3d12_desc_create_srv_embedded(vkd3d_cpu_descriptor_va_t desc_va, vkd3d_create_texture_srv_embedded(desc_va, device, resource, desc); } -void d3d12_desc_create_srv(vkd3d_cpu_descriptor_va_t desc_va, - struct d3d12_device *device, struct d3d12_resource *resource, - const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) -{ - bool is_buffer; - - if (resource) - { - is_buffer = d3d12_resource_is_buffer(resource); - } - else if (desc) - { - is_buffer = desc->ViewDimension == D3D12_SRV_DIMENSION_BUFFER || - desc->ViewDimension == D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE; - } - else - { - WARN("Description required for NULL SRV.\n"); - return; - } - - if (is_buffer) - vkd3d_create_buffer_srv(desc_va, device, resource, desc); - else - vkd3d_create_texture_srv(desc_va, device, resource, desc); -} - -static unsigned int vkd3d_view_flags_from_d3d12_buffer_uav_flags(D3D12_BUFFER_UAV_FLAGS flags) -{ - if (flags == D3D12_BUFFER_UAV_FLAG_RAW) - return VKD3D_VIEW_RAW_BUFFER; - if (flags) - FIXME("Unhandled buffer UAV flags %#x.\n", flags); - return 0; -} - VkDeviceAddress vkd3d_get_buffer_device_address(struct d3d12_device *device, VkBuffer vk_buffer) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; @@ -6835,122 +5858,20 @@ static void vkd3d_create_buffer_uav_embedded(vkd3d_cpu_descriptor_va_t desc_va, struct d3d12_resource *resource, struct d3d12_resource *counter_resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) { + const struct vkd3d_bindless_state *bindless = &device->bindless_state; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct vkd3d_descriptor_metadata_buffer_view view; - VkDescriptorAddressInfoEXT addr_info; + VkTexelBufferDescriptorInfoEXT texel_buffer_info; + VkResourceDescriptorInfoEXT desc_info; struct d3d12_desc_split_embedded d; - struct d3d12_desc_split_metadata m; - VkDescriptorGetInfoEXT get_info; - - if (!desc) - { - FIXME("Default buffer UAV not supported.\n"); - return; - } - - if (desc->ViewDimension != D3D12_UAV_DIMENSION_BUFFER) - { - WARN("Unexpected view dimension %#x.\n", desc->ViewDimension); - return; - } - - if (!resource) - { - /* We prepare a packed NULL descriptor that contains both texel buffer and SSBO. */ - d3d12_descriptor_heap_write_null_descriptor_template_embedded(device, desc_va, - VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); - return; - } - - d = d3d12_desc_decode_embedded_resource_va(desc_va); - m = d3d12_desc_decode_metadata(device, desc_va); - - vkd3d_get_metadata_buffer_view_for_resource(device, resource, - desc->Format, desc->Buffer.FirstElement, desc->Buffer.NumElements, - desc->Buffer.StructureByteStride, &view); - - if (m.view) - m.view->info.buffer = view; - - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - get_info.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - get_info.data.pStorageBuffer = &addr_info; - addr_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT; - addr_info.pNext = NULL; - addr_info.address = view.va; - addr_info.range = view.range; - addr_info.format = VK_FORMAT_UNDEFINED; - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustStorageBufferDescriptorSize, - d.payload + device->bindless_state.descriptor_buffer_packed_raw_buffer_offset)); - - /* UAV counter and texel buffers alias. This is fine. We don't expect having to work around - * scenarios where this happens. - * Reuse the texel buffer system here directly. - * We might as well use it when it's practical. */ - - get_info.type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - get_info.data.pStorageTexelBuffer = &addr_info; - - if (resource && counter_resource) - { - assert(d3d12_resource_is_buffer(counter_resource)); - assert(desc->Buffer.StructureByteStride); - addr_info.address = counter_resource->res.va + desc->Buffer.CounterOffsetInBytes; - addr_info.range = 4; - addr_info.format = VK_FORMAT_R32_UINT; - } - else - { - addr_info.format = vkd3d_internal_get_vk_format(device, view.dxgi_format); - - if (addr_info.format == VK_FORMAT_UNDEFINED) - { - /* Raw buffer is always emitted as R32_UINT on native. - * Try to match behavior observed on native drivers as close as possible here. */ - if (desc->Buffer.Flags & D3D12_BUFFER_UAV_FLAG_RAW) - { - addr_info.format = VK_FORMAT_R32_UINT; - } - else - { - addr_info.format = vkd3d_internal_get_vk_format(device, - vkd3d_structured_uav_to_texel_buffer_dxgi_format(desc->Buffer.StructureByteStride)); - } - } - } - - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustStorageTexelBufferDescriptorSize, - d.payload)); -} - -static void vkd3d_create_buffer_uav(vkd3d_cpu_descriptor_va_t desc_va, struct d3d12_device *device, - struct d3d12_resource *resource, struct d3d12_resource *counter_resource, - const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) -{ - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VKD3D_UNUSED vkd3d_descriptor_qa_flags descriptor_qa_flags = 0; - struct vkd3d_bound_buffer_range bound_range = { 0, 0, 0, 0 }; - union vkd3d_descriptor_info descriptor_info[3]; - struct vkd3d_descriptor_binding binding; - unsigned int flags, vk_write_count = 0; - VkDescriptorAddressInfoEXT addr_info; - bool mutable_uses_single_descriptor; - VkDescriptorType vk_descriptor_type; - VkDeviceAddress uav_counter_address; - VkDeviceAddress *counter_addresses; - VkWriteDescriptorSet vk_write[3]; - VkDescriptorGetInfoEXT get_info; - struct vkd3d_view *view = NULL; - struct d3d12_desc_split d; - uint32_t descriptor_index; - uint32_t info_index; - bool desc_is_raw; + VkDeviceAddressRangeEXT ssbo_range; + VkHostAddressRangeEXT desc_range; + bool can_emit_sibling_typed; + uint8_t stack_payload[256]; + bool can_emit_sibling_raw; bool emit_typed; - bool emit_ssbo; - void *payload; + bool is_typed; + bool emit_raw; if (!desc) { @@ -6962,199 +5883,119 @@ static void vkd3d_create_buffer_uav(vkd3d_cpu_descriptor_va_t desc_va, struct d3 { WARN("Unexpected view dimension %#x.\n", desc->ViewDimension); return; - } - - mutable_uses_single_descriptor = !!(device->bindless_state.flags & VKD3D_BINDLESS_MUTABLE_TYPE_RAW_SSBO); - desc_is_raw = (desc->Format == DXGI_FORMAT_UNKNOWN && desc->Buffer.StructureByteStride) || - (desc->Buffer.Flags & D3D12_BUFFER_UAV_FLAG_RAW); - emit_ssbo = (!mutable_uses_single_descriptor || desc_is_raw) && d3d12_device_use_ssbo_raw_buffer(device); - emit_typed = !mutable_uses_single_descriptor || !desc_is_raw || !emit_ssbo; - - if (!resource) - { - if (mutable_uses_single_descriptor) - { - d3d12_descriptor_heap_write_null_descriptor_template(desc_va, - desc_is_raw ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); - } - else - { - /* In the mutable set, always write texel buffer. The STORAGE_BUFFER set is also written to. */ - d3d12_descriptor_heap_write_null_descriptor_template(desc_va, - VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); - } - return; - } - - d = d3d12_desc_decode_va(desc_va); - - /* Handle UAV itself */ - d.types->set_info_mask = 0; - - vkd3d_get_metadata_buffer_view_for_resource(device, resource, - desc->Format, desc->Buffer.FirstElement, desc->Buffer.NumElements, - desc->Buffer.StructureByteStride, &d.view->info.buffer); - d.view->info.buffer.flags |= VKD3D_DESCRIPTOR_FLAG_RAW_VA_AUX_BUFFER; - - if (emit_ssbo) - { - info_index = vkd3d_bindless_state_find_set_info_index_fast(device, - VKD3D_BINDLESS_STATE_INFO_INDEX_MUTABLE_SPLIT_RAW, - VKD3D_BINDLESS_SET_UAV | VKD3D_BINDLESS_SET_RAW_SSBO); - - binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, info_index); - - d.types->set_info_mask |= 1u << info_index; - - if (device->bindless_state.flags & VKD3D_SSBO_OFFSET_BUFFER) - d.view->info.buffer.flags |= VKD3D_DESCRIPTOR_FLAG_BUFFER_OFFSET; - d.types->single_binding = binding; - - vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - descriptor_qa_flags |= VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT; - - if (d3d12_device_uses_descriptor_buffers(device)) - { - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - get_info.type = vk_descriptor_type; - get_info.data.pStorageBuffer = &addr_info; - addr_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT; - addr_info.pNext = NULL; - addr_info.address = d.view->info.buffer.va; - addr_info.range = d.view->info.buffer.range; - addr_info.format = VK_FORMAT_UNDEFINED; - payload = d3d12_descriptor_heap_get_mapped_payload(d.heap, binding.set, d.offset); - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustStorageBufferDescriptorSize, - payload)); - } - else - { - VkDescriptorBufferInfo *buffer_info = &descriptor_info[vk_write_count].buffer; - VkDeviceSize stride = desc->Format == DXGI_FORMAT_UNKNOWN - ? desc->Buffer.StructureByteStride : - vkd3d_get_format(device, desc->Format, false)->byte_count; + } - vkd3d_buffer_view_get_bound_range_ssbo(device, resource, - desc->Buffer.FirstElement * stride, desc->Buffer.NumElements * stride, - buffer_info, &bound_range); + memset(&desc_info, 0, sizeof(desc_info)); + desc_info.sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT; - vkd3d_init_write_descriptor_set(&vk_write[vk_write_count], &d, binding, - vk_descriptor_type, &descriptor_info[vk_write_count]); - vk_write_count++; - } - } + d = d3d12_desc_decode_embedded_resource_va(desc_va, + device->bindless_state.descriptor_heap_packed_metadata_offset); - if (emit_typed) + if (resource) + { + vkd3d_get_metadata_buffer_view_for_resource(device, resource, + desc->Format, desc->Buffer.FirstElement, desc->Buffer.NumElements, + desc->Buffer.StructureByteStride, (desc->Buffer.Flags & D3D12_BUFFER_UAV_FLAG_RAW) != 0, + &view); + } + else { - info_index = vkd3d_bindless_state_find_set_info_index_fast(device, - VKD3D_BINDLESS_STATE_INFO_INDEX_MUTABLE_SPLIT_TYPED, - VKD3D_BINDLESS_SET_UAV | VKD3D_BINDLESS_SET_BUFFER); + memset(&view, 0, sizeof(view)); + } - binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, info_index); + is_typed = desc->Format && !(desc->Buffer.Flags & D3D12_BUFFER_UAV_FLAG_RAW); - d.types->set_info_mask |= 1u << info_index; + can_emit_sibling_typed = bindless->descriptor_heap_packed_raw_buffer_offset >= bindless->storage_texel_buffer_size && + !counter_resource; + can_emit_sibling_raw = bindless->descriptor_heap_packed_raw_buffer_offset >= bindless->storage_texel_buffer_size; - if (device->bindless_state.flags & VKD3D_TYPED_OFFSET_BUFFER) - d.view->info.buffer.flags |= VKD3D_DESCRIPTOR_FLAG_BUFFER_OFFSET; - d.types->single_binding = binding; + if (!is_typed && !d3d12_resource_desc_supports_raw_uav_ssbo(device, desc)) + { + is_typed = true; + view.dxgi_format = DXGI_FORMAT_R32_UINT; + } - vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - descriptor_qa_flags |= VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_TEXEL_BUFFER_BIT; + /* TODO: Check max range for typed? */ + emit_typed = is_typed || can_emit_sibling_typed; + emit_raw = !is_typed || can_emit_sibling_raw; + memset(stack_payload, 0, bindless->descriptor_heap_cbv_srv_uav_size); - if (d3d12_device_uses_descriptor_buffers(device)) + if (emit_typed) + { + desc_info.type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + if (resource) { - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - get_info.type = vk_descriptor_type; - get_info.data.pStorageTexelBuffer = &addr_info; - addr_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT; - addr_info.pNext = NULL; - addr_info.address = d.view->info.buffer.va; - addr_info.range = d.view->info.buffer.range; - addr_info.format = vkd3d_internal_get_vk_format(device, d.view->info.buffer.dxgi_format); - - if (addr_info.format == VK_FORMAT_UNDEFINED) + memset(&texel_buffer_info, 0, sizeof(texel_buffer_info)); + texel_buffer_info.sType = VK_STRUCTURE_TYPE_TEXEL_BUFFER_DESCRIPTOR_INFO_EXT; + texel_buffer_info.addressRange.address = view.va; + texel_buffer_info.addressRange.size = view.range; + texel_buffer_info.format = vkd3d_internal_get_vk_format(device, view.dxgi_format); + if (texel_buffer_info.format == VK_FORMAT_UNDEFINED) { - /* If we really intended to emit raw buffers, the fallback will be inferred as R32_UINT. */ if (desc->Buffer.Flags & D3D12_BUFFER_UAV_FLAG_RAW) { - addr_info.format = VK_FORMAT_R32_UINT; + texel_buffer_info.format = VK_FORMAT_R32_UINT; } else { - addr_info.format = vkd3d_internal_get_vk_format(device, - vkd3d_structured_uav_to_texel_buffer_dxgi_format(desc->Buffer.StructureByteStride)); + texel_buffer_info.format = vkd3d_internal_get_vk_format(device, + vkd3d_structured_uav_to_texel_buffer_dxgi_format(desc->Buffer.StructureByteStride)); } } - - payload = d3d12_descriptor_heap_get_mapped_payload(d.heap, binding.set, d.offset); - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustStorageTexelBufferDescriptorSize, - payload)); + desc_info.data.pTexelBuffer = &texel_buffer_info; } - else - { - flags = vkd3d_view_flags_from_d3d12_buffer_uav_flags(desc->Buffer.Flags); - - if (!vkd3d_buffer_view_get_aligned_view(device, resource, desc->Format, flags, - desc->Buffer.FirstElement, desc->Buffer.NumElements, - desc->Buffer.StructureByteStride, &bound_range, &view)) - return; - descriptor_info[vk_write_count].buffer_view = view ? view->vk_buffer_view : VK_NULL_HANDLE; - - vkd3d_init_write_descriptor_set(&vk_write[vk_write_count], &d, binding, - vk_descriptor_type, &descriptor_info[vk_write_count]); - vk_write_count++; - } + desc_range.address = stack_payload; + desc_range.size = device->bindless_state.storage_texel_buffer_size; + VK_CALL(vkWriteResourceDescriptorsEXT(device->vk_device, 1, &desc_info, &desc_range)); } - if (d.view->info.buffer.flags & VKD3D_DESCRIPTOR_FLAG_BUFFER_OFFSET) + if (emit_raw) { - struct vkd3d_bound_buffer_range *buffer_ranges = d.heap->buffer_ranges.host_ptr; - buffer_ranges[d.offset] = bound_range; - } + desc_info.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - if (mutable_uses_single_descriptor) - d.view->info.buffer.flags |= VKD3D_DESCRIPTOR_FLAG_SINGLE_DESCRIPTOR; + if (resource) + { + ssbo_range.address = view.va; + ssbo_range.size = view.range; + desc_info.data.pAddressRange = &ssbo_range; + } + else + { + desc_info.data.pAddressRange = NULL; + } - /* Handle UAV counter */ - uav_counter_address = 0; + desc_range.address = stack_payload + bindless->descriptor_heap_packed_raw_buffer_offset; + desc_range.size = device->device_info.descriptor_heap_properties.bufferDescriptorSize; + VK_CALL(vkWriteResourceDescriptorsEXT(device->vk_device, 1, &desc_info, &desc_range)); + } - if (resource && counter_resource) + if (counter_resource && desc->Buffer.StructureByteStride != 0) { - assert(d3d12_resource_is_buffer(counter_resource)); - assert(desc->Buffer.StructureByteStride); + ssbo_range.address = counter_resource->res.va + desc->Buffer.CounterOffsetInBytes; + ssbo_range.size = sizeof(uint32_t); + desc_info.data.pAddressRange = &ssbo_range; + desc_range.address = stack_payload; + desc_range.size = device->device_info.descriptor_heap_properties.bufferDescriptorSize; - uav_counter_address = counter_resource->res.va + desc->Buffer.CounterOffsetInBytes; - - /* This is used to denote that a counter descriptor is present, irrespective of underlying descriptor type. */ - descriptor_qa_flags |= VKD3D_DESCRIPTOR_QA_TYPE_RAW_VA_BIT; - } -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA - else if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_INSTRUCTION_QA_CHECKS) - { - /* We may want to peek at the buffer's raw VA when doing instrumentation. - * UAV counted resources do not get instrumentation, so the aliasing should not be a problem in practice. */ - uint32_t elem_size; - elem_size = desc->Format ? vkd3d_get_format(device, desc->Format, false)->byte_count : sizeof(uint32_t); - uav_counter_address = vkd3d_descriptor_debug_encode_buffer_va(d.view->info.buffer.va, elem_size); - d.view->info.buffer.flags |= VKD3D_DESCRIPTOR_QA_TYPE_RAW_VA_BIT; + if (!VKD3D_FORCE_RAW_UAV_COUNTER && bindless->descriptor_heap_packed_raw_buffer_offset >= device->device_info.descriptor_heap_properties.bufferDescriptorSize) + { + VK_CALL(vkWriteResourceDescriptorsEXT(device->vk_device, 1, &desc_info, &desc_range)); + } + else + { + /* Deep YOLO, place a raw pointer inside the descriptor payload. Pray that it just werks :v */ + memcpy(stack_payload + bindless->uav_counter_embedded_offset, + &ssbo_range.address, sizeof(VkDeviceAddress)); + } } -#endif - counter_addresses = d.heap->raw_va_aux_buffer.host_ptr; - descriptor_index = d.offset; - counter_addresses[descriptor_index] = uav_counter_address; + /* We're doing a lot of small writes all over the place, optimize for WC throughput. + * TODO: Consider partial copy for RDNA2? */ + memcpy(d.payload, stack_payload, bindless->descriptor_heap_cbv_srv_uav_size); - vkd3d_descriptor_metadata_view_set_qa_cookie(d.view, resource ? resource->res.cookie : vkd3d_null_cookie()); - vkd3d_descriptor_debug_write_descriptor(d.heap->descriptor_heap_info.host_ptr, - d.heap->cookie, d.offset, - descriptor_qa_flags, d.view->qa_cookie); - - VK_CALL(vkUpdateDescriptorSets(device->vk_device, vk_write_count, vk_write, 0, NULL)); + if (d.metadata) + d.metadata->info.buffer = view; } static void vkd3d_create_texture_uav_embedded(vkd3d_cpu_descriptor_va_t desc_va, @@ -7162,149 +6003,81 @@ static void vkd3d_create_texture_uav_embedded(vkd3d_cpu_descriptor_va_t desc_va, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct vkd3d_texture_view_create_info info; + VkResourceDescriptorInfoEXT desc_info; + VkImageDescriptorInfoEXT image_info; struct d3d12_desc_split_embedded d; - struct d3d12_desc_split_metadata m; - VkDescriptorGetInfoEXT get_info; - struct vkd3d_view *view = NULL; - VkDescriptorImageInfo image; - - d = d3d12_desc_decode_embedded_resource_va(desc_va); - m = d3d12_desc_decode_metadata(device, desc_va); - - if (!resource) - { - d3d12_descriptor_heap_write_null_descriptor_template_embedded(device, desc_va, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE); - return; - } + VkHostAddressRangeEXT desc_range; - view = vkd3d_create_texture_uav_view(device, resource, desc); + d = d3d12_desc_decode_embedded_resource_va(desc_va, + device->bindless_state.descriptor_heap_packed_metadata_offset); - image.sampler = VK_NULL_HANDLE; - image.imageView = view ? view->vk_image_view : VK_NULL_HANDLE; - image.imageLayout = view ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED; + memset(&desc_info, 0, sizeof(desc_info)); + desc_info.sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT; + desc_info.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - if (m.view) + if (resource && vkd3d_setup_texture_uav_view(device, resource, desc, &info)) { - m.view->info.image.view = view; - m.view->info.image.flags = VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW; - } - - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - get_info.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - get_info.data.pStorageImage = ℑ - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.storageImageDescriptorSize, - d.payload)); - - /* We should clear out the sibling raw resource that is packed in the higher bits. - * If we have planar metadata there isn't much we can do since the storage image will take up the entire - * 32 bytes, but reading an image as SSBO is far less common than reading buffers as images. */ - if (device->bindless_state.flags & VKD3D_BINDLESS_MUTABLE_EMBEDDED_PACKED_METADATA) - { - /* See vkd3d_bindless_state_init_null_descriptor_payloads for details. - * Use STORAGE_IMAGE template here, since we've already prepared the desired NULL payload - * at the raw offset. */ - d3d12_descriptor_heap_write_null_descriptor_template_embedded_partial(device, desc_va, - VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, device->bindless_state.descriptor_buffer_packed_raw_buffer_offset, - device->device_info.descriptor_buffer_properties.robustStorageBufferDescriptorSize); - } -} - -static void vkd3d_create_texture_uav(vkd3d_cpu_descriptor_va_t desc_va, - struct d3d12_device *device, struct d3d12_resource *resource, - const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) -{ - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - union vkd3d_descriptor_info null_descriptor_info; - union vkd3d_descriptor_info descriptor_info; - struct vkd3d_descriptor_binding binding; - VkWriteDescriptorSet vk_writes[2]; - VkDescriptorGetInfoEXT get_info; - struct vkd3d_view *view = NULL; - uint32_t vk_write_count = 0; - struct d3d12_desc_split d; - uint32_t info_index; - void *payload; - - if (!resource) - { - d3d12_descriptor_heap_write_null_descriptor_template(desc_va, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE); - return; - } - - d = d3d12_desc_decode_va(desc_va); + desc_info.data.pImage = &image_info; + memset(&image_info, 0, sizeof(image_info)); + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_DESCRIPTOR_INFO_EXT; + image_info.layout = VK_IMAGE_LAYOUT_GENERAL; + image_info.pView = &info.view_desc; - view = vkd3d_create_texture_uav_view(device, resource, desc); + /* Setup metadata if the resource is used as clear UAV, and we have to do fallback views. */ + if (d.metadata) + { + struct vkd3d_descriptor_metadata_image_view *image = &d.metadata->info.image; + image->flags = VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW; - descriptor_info.image.sampler = VK_NULL_HANDLE; - descriptor_info.image.imageView = view ? view->vk_image_view : VK_NULL_HANDLE; - descriptor_info.image.imageLayout = view ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED; + if (desc && desc->Format) + image->dxgi_format = desc->Format; + else + image->dxgi_format = resource->format->dxgi_format; - info_index = vkd3d_bindless_state_find_set_info_index_fast(device, - VKD3D_BINDLESS_STATE_INFO_INDEX_MUTABLE_SPLIT_TYPED, - VKD3D_BINDLESS_SET_UAV | VKD3D_BINDLESS_SET_IMAGE); - binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, info_index); + image->plane_slice = 0; + image->mip_slice = info.view_desc.subresourceRange.baseMipLevel; - d.view->info.image.view = view; - d.view->info.image.flags = VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW | VKD3D_DESCRIPTOR_FLAG_NON_NULL; - d.types->set_info_mask = 1u << info_index; - d.types->single_binding = binding; + if (info.view_desc.viewType == VK_IMAGE_VIEW_TYPE_3D) + { + if (desc) + { + image->first_array_slice = desc->Texture3D.FirstWSlice; + image->array_size = desc->Texture3D.WSize; + } + else + { + image->first_array_slice = 0; + image->array_size = UINT16_MAX; + } + } + else + { + image->first_array_slice = info.view_desc.subresourceRange.baseArrayLayer; + image->array_size = info.view_desc.subresourceRange.layerCount; + } - if (d3d12_device_uses_descriptor_buffers(device)) - { - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - get_info.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - get_info.data.pSampledImage = &descriptor_info.image; - payload = d3d12_descriptor_heap_get_mapped_payload(d.heap, binding.set, d.offset); - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.storageImageDescriptorSize, - payload)); + image->vk_dimension = info.view_desc.viewType; + if (desc && desc->ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2D) + image->plane_slice = desc->Texture2D.PlaneSlice; + } } - else + else if (d.metadata) { - vkd3d_init_write_descriptor_set(&vk_writes[vk_write_count++], &d, binding, - VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptor_info); + memset(&d.metadata->info.image, 0, sizeof(d.metadata->info.image)); } - /* Clear out sibling typed descriptor if appropriate. - * Native drivers are robust against scenarios like these, and so we need to be too. */ - if (device->bindless_state.flags & VKD3D_BINDLESS_MUTABLE_TYPE_SPLIT_RAW_TYPED) - { - /* The NULL descriptor template for non-embedded is too messy to invoke here. */ - info_index = VKD3D_BINDLESS_STATE_INFO_INDEX_MUTABLE_SPLIT_RAW; - binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, info_index); - d.types->set_info_mask |= 1u << info_index; + desc_range.address = d.payload; + desc_range.size = device->bindless_state.storage_image_size; + VK_CALL(vkWriteResourceDescriptorsEXT(device->vk_device, 1, &desc_info, &desc_range)); - if (d3d12_device_uses_descriptor_buffers(device)) - { - get_info.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - get_info.data.pStorageBuffer = NULL; - payload = d3d12_descriptor_heap_get_mapped_payload(d.heap, binding.set, d.offset); - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustStorageBufferDescriptorSize, - payload)); - } - else - { - null_descriptor_info.buffer.buffer = VK_NULL_HANDLE; - null_descriptor_info.buffer.offset = 0; - null_descriptor_info.buffer.range = VK_WHOLE_SIZE; - vkd3d_init_write_descriptor_set(&vk_writes[vk_write_count++], &d, binding, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &null_descriptor_info); - } + /* Clear out any sibling buffer descriptor. */ + if (device->bindless_state.descriptor_heap_packed_raw_buffer_offset >= device->bindless_state.storage_image_size && + device->bindless_state.descriptor_heap_packed_raw_buffer_offset < device->bindless_state.descriptor_heap_packed_metadata_offset) + { + memset(d.payload + device->bindless_state.descriptor_heap_packed_raw_buffer_offset, 0, + device->device_info.descriptor_heap_properties.bufferDescriptorSize); } - else - d.view->info.image.flags |= VKD3D_DESCRIPTOR_FLAG_SINGLE_DESCRIPTOR; - - if (vk_write_count) - VK_CALL(vkUpdateDescriptorSets(device->vk_device, vk_write_count, vk_writes, 0, NULL)); - - vkd3d_descriptor_metadata_view_set_qa_cookie(d.view, view ? view->cookie : vkd3d_null_cookie()); - vkd3d_descriptor_debug_write_descriptor(d.heap->descriptor_heap_info.host_ptr, - d.heap->cookie, d.offset, - VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_IMAGE_BIT, d.view->qa_cookie); } void d3d12_desc_create_uav_embedded(vkd3d_cpu_descriptor_va_t desc_va, struct d3d12_device *device, @@ -7336,146 +6109,6 @@ void d3d12_desc_create_uav_embedded(vkd3d_cpu_descriptor_va_t desc_va, struct d3 vkd3d_create_texture_uav_embedded(desc_va, device, resource, desc); } -void d3d12_desc_create_uav(vkd3d_cpu_descriptor_va_t desc_va, struct d3d12_device *device, - struct d3d12_resource *resource, struct d3d12_resource *counter_resource, - const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) -{ - bool is_buffer; - - if (resource) - { - is_buffer = d3d12_resource_is_buffer(resource); - } - else if (desc) - { - is_buffer = desc->ViewDimension == D3D12_UAV_DIMENSION_BUFFER; - } - else - { - WARN("Description required for NULL UAV.\n"); - return; - } - - if (counter_resource && (!resource || !is_buffer)) - FIXME("Ignoring counter resource %p.\n", counter_resource); - - if (is_buffer) - vkd3d_create_buffer_uav(desc_va, device, resource, counter_resource, desc); - else - vkd3d_create_texture_uav(desc_va, device, resource, desc); -} - -bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, - D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view) -{ - const struct vkd3d_unique_resource *resource; - uint64_t range; - uint64_t offset; - - resource = vkd3d_va_map_deref(&device->memory_allocator.va_map, gpu_address); - assert(resource && resource->va && resource->size); - - offset = gpu_address - resource->va; - range = min(resource->size - offset, device->vk_info.device_limits.maxStorageBufferRange); - - return vkd3d_create_raw_r32ui_vk_buffer_view(device, resource->vk_buffer, - offset, range, vk_buffer_view); -} - -/* samplers */ -static VkFilter vk_filter_from_d3d12(D3D12_FILTER_TYPE type) -{ - switch (type) - { - case D3D12_FILTER_TYPE_POINT: - return VK_FILTER_NEAREST; - case D3D12_FILTER_TYPE_LINEAR: - return VK_FILTER_LINEAR; - default: - FIXME("Unhandled filter type %#x.\n", type); - return VK_FILTER_NEAREST; - } -} - -static VkSamplerMipmapMode vk_mipmap_mode_from_d3d12(D3D12_FILTER_TYPE type) -{ - switch (type) - { - case D3D12_FILTER_TYPE_POINT: - return VK_SAMPLER_MIPMAP_MODE_NEAREST; - case D3D12_FILTER_TYPE_LINEAR: - return VK_SAMPLER_MIPMAP_MODE_LINEAR; - default: - FIXME("Unhandled filter type %#x.\n", type); - return VK_SAMPLER_MIPMAP_MODE_NEAREST; - } -} - -static VkSamplerAddressMode vk_address_mode_from_d3d12(D3D12_TEXTURE_ADDRESS_MODE mode) -{ - switch (mode) - { - case D3D12_TEXTURE_ADDRESS_MODE_WRAP: - return VK_SAMPLER_ADDRESS_MODE_REPEAT; - case D3D12_TEXTURE_ADDRESS_MODE_MIRROR: - return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; - case D3D12_TEXTURE_ADDRESS_MODE_CLAMP: - return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - case D3D12_TEXTURE_ADDRESS_MODE_BORDER: - return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - case D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE: - return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; - default: - FIXME("Unhandled address mode %#x.\n", mode); - return VK_SAMPLER_ADDRESS_MODE_REPEAT; - } -} - -static VkSamplerReductionModeEXT vk_reduction_mode_from_d3d12(D3D12_FILTER_REDUCTION_TYPE mode) -{ - switch (mode) - { - case D3D12_FILTER_REDUCTION_TYPE_STANDARD: - case D3D12_FILTER_REDUCTION_TYPE_COMPARISON: - return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE; - case D3D12_FILTER_REDUCTION_TYPE_MINIMUM: - return VK_SAMPLER_REDUCTION_MODE_MIN; - case D3D12_FILTER_REDUCTION_TYPE_MAXIMUM: - return VK_SAMPLER_REDUCTION_MODE_MAX; - default: - FIXME("Unhandled reduction mode %#x.\n", mode); - return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE; - } -} - -static bool d3d12_sampler_needs_border_color(D3D12_TEXTURE_ADDRESS_MODE u, - D3D12_TEXTURE_ADDRESS_MODE v, D3D12_TEXTURE_ADDRESS_MODE w) -{ - return u == D3D12_TEXTURE_ADDRESS_MODE_BORDER || - v == D3D12_TEXTURE_ADDRESS_MODE_BORDER || - w == D3D12_TEXTURE_ADDRESS_MODE_BORDER; -} - -static VkBorderColor vk_static_border_color_from_d3d12(D3D12_STATIC_BORDER_COLOR border_color) -{ - switch (border_color) - { - case D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK: - return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - case D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK: - return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; - case D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE: - return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; - case D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK_UINT: - return VK_BORDER_COLOR_INT_OPAQUE_BLACK; - case D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE_UINT: - return VK_BORDER_COLOR_INT_OPAQUE_WHITE; - default: - WARN("Unhandled static border color %u.\n", border_color); - return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - } -} - static VkBorderColor vk_border_color_from_d3d12(struct d3d12_device *device, const uint32_t *border_color, D3D12_SAMPLER_FLAGS flags) { @@ -7584,6 +6217,7 @@ HRESULT d3d12_create_static_sampler(struct d3d12_device *device, return hresult_from_vk_result(vr); } +#if 1 static HRESULT d3d12_create_sampler(struct d3d12_device *device, const D3D12_SAMPLER_DESC2 *desc, VkSampler *vk_sampler) { @@ -7691,101 +6325,84 @@ static HRESULT d3d12_create_sampler(struct d3d12_device *device, return hresult_from_vk_result(vr); } +#endif void d3d12_desc_create_sampler_embedded(vkd3d_cpu_descriptor_va_t desc_va, struct d3d12_device *device, const D3D12_SAMPLER_DESC2 *desc) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VkDescriptorGetInfoEXT get_info; - struct vkd3d_view_key key; - struct vkd3d_view *view; - - if (!desc) - { - WARN("NULL sampler desc.\n"); - return; - } - - key.view_type = VKD3D_VIEW_TYPE_SAMPLER; - key.u.sampler = *desc; - - if (!(view = vkd3d_view_map_create_view(&device->sampler_map.map, device, &key))) - return; - - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - get_info.type = VK_DESCRIPTOR_TYPE_SAMPLER; - get_info.data.pSampler = &view->vk_sampler; - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.samplerDescriptorSize, - (void *)desc_va)); -} - -void d3d12_desc_create_sampler(vkd3d_cpu_descriptor_va_t desc_va, - struct d3d12_device *device, const D3D12_SAMPLER_DESC2 *desc) -{ - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - union vkd3d_descriptor_info descriptor_info; - struct vkd3d_descriptor_binding binding; - VkDescriptorGetInfoEXT get_info; - VkWriteDescriptorSet vk_write; - struct d3d12_desc_split d; - struct vkd3d_view_key key; - struct vkd3d_view *view; - uint32_t info_index; - void *payload; - - if (!desc) - { - WARN("NULL sampler desc.\n"); - return; - } + VkSamplerCustomBorderColorIndexCreateInfoEXT index_create_info; + VkSamplerCustomBorderColorCreateInfoEXT border_color_info; + VkSamplerReductionModeCreateInfoEXT reduction_desc; + VkSamplerCreateInfo sampler_desc; + VkHostAddressRangeEXT desc_range; - d = d3d12_desc_decode_va(desc_va); + border_color_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT; + border_color_info.pNext = NULL; + memcpy(border_color_info.customBorderColor.uint32, desc->UintBorderColor, + sizeof(border_color_info.customBorderColor.uint32)); + border_color_info.format = VK_FORMAT_UNDEFINED; - key.view_type = VKD3D_VIEW_TYPE_SAMPLER; - key.u.sampler = *desc; + reduction_desc.sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT; + reduction_desc.pNext = NULL; + reduction_desc.reductionMode = vk_reduction_mode_from_d3d12(D3D12_DECODE_FILTER_REDUCTION(desc->Filter)); - if (!(view = vkd3d_view_map_create_view(&device->sampler_map.map, device, &key))) - return; + sampler_desc.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_desc.pNext = NULL; + sampler_desc.flags = 0; + sampler_desc.magFilter = vk_filter_from_d3d12(D3D12_DECODE_MAG_FILTER(desc->Filter)); + sampler_desc.minFilter = vk_filter_from_d3d12(D3D12_DECODE_MIN_FILTER(desc->Filter)); + sampler_desc.mipmapMode = vk_mipmap_mode_from_d3d12(D3D12_DECODE_MIP_FILTER(desc->Filter)); + sampler_desc.addressModeU = vk_address_mode_from_d3d12(desc->AddressU); + sampler_desc.addressModeV = vk_address_mode_from_d3d12(desc->AddressV); + sampler_desc.addressModeW = vk_address_mode_from_d3d12(desc->AddressW); + sampler_desc.mipLodBias = desc->MipLODBias; + sampler_desc.anisotropyEnable = D3D12_DECODE_IS_ANISOTROPIC_FILTER(desc->Filter); + sampler_desc.maxAnisotropy = desc->MaxAnisotropy; + sampler_desc.compareEnable = D3D12_DECODE_IS_COMPARISON_FILTER(desc->Filter); + sampler_desc.compareOp = sampler_desc.compareEnable ? vk_compare_op_from_d3d12(desc->ComparisonFunc) : 0; + sampler_desc.minLod = desc->MinLOD; + sampler_desc.maxLod = desc->MaxLOD; + sampler_desc.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + sampler_desc.unnormalizedCoordinates = !!(desc->Flags & D3D12_SAMPLER_FLAG_NON_NORMALIZED_COORDINATES); - vkd3d_descriptor_debug_register_view_cookie(device->descriptor_qa_global_info, view->cookie, vkd3d_null_cookie()); + if (sampler_desc.maxAnisotropy < 1.0f) + sampler_desc.anisotropyEnable = VK_FALSE; - info_index = VKD3D_BINDLESS_STATE_INFO_INDEX_SAMPLER; - binding = vkd3d_bindless_state_binding_from_info_index(&device->bindless_state, info_index); + if (sampler_desc.anisotropyEnable) + sampler_desc.maxAnisotropy = min(16.0f, sampler_desc.maxAnisotropy); - d.view->info.image.view = view; - d.view->info.image.flags = VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW | - VKD3D_DESCRIPTOR_FLAG_NON_NULL | - VKD3D_DESCRIPTOR_FLAG_SINGLE_DESCRIPTOR; - d.types->set_info_mask = 1u << info_index; - d.types->single_binding = binding; + if (d3d12_sampler_needs_border_color(desc->AddressU, desc->AddressV, desc->AddressW)) + sampler_desc.borderColor = vk_border_color_from_d3d12(device, desc->UintBorderColor, desc->Flags); - if (d3d12_device_uses_descriptor_buffers(device)) - { - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - get_info.type = VK_DESCRIPTOR_TYPE_SAMPLER; - get_info.data.pSampler = &view->vk_sampler; - payload = d3d12_descriptor_heap_get_mapped_payload(d.heap, binding.set, d.offset); - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.samplerDescriptorSize, - payload)); - } - else + if (sampler_desc.borderColor == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT || + sampler_desc.borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT) { - descriptor_info.image.sampler = view->vk_sampler; - descriptor_info.image.imageView = VK_NULL_HANDLE; - descriptor_info.image.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + memset(&index_create_info, 0, sizeof(index_create_info)); + index_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_INDEX_CREATE_INFO_EXT; + index_create_info.index = vkd3d_sampler_state_register_custom_border_color( + device, &device->sampler_state, sampler_desc.borderColor, &border_color_info); - vkd3d_init_write_descriptor_set(&vk_write, &d, binding, VK_DESCRIPTOR_TYPE_SAMPLER, &descriptor_info); - VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, &vk_write, 0, NULL)); + if (index_create_info.index == UINT32_MAX) + { + FIXME_ONCE("Border color heap exhausted, falling back to transparent black border color.\n"); + sampler_desc.borderColor = sampler_desc.borderColor == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ? + VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK : VK_BORDER_COLOR_INT_TRANSPARENT_BLACK; + } + else + { + vk_prepend_struct(&sampler_desc, &border_color_info); + vk_prepend_struct(&sampler_desc, &index_create_info); + } } - vkd3d_descriptor_metadata_view_set_qa_cookie(d.view, view->cookie); - vkd3d_descriptor_debug_write_descriptor(d.heap->descriptor_heap_info.host_ptr, - d.heap->cookie, d.offset, - VKD3D_DESCRIPTOR_QA_TYPE_SAMPLER_BIT, d.view->qa_cookie); + if (reduction_desc.reductionMode != VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE && + device->device_info.vulkan_1_2_features.samplerFilterMinmax) + vk_prepend_struct(&sampler_desc, &reduction_desc); + + desc_range.address = (void *)desc_va; + desc_range.size = device->bindless_state.descriptor_heap_sampler_size; + VK_CALL(vkWriteSamplerDescriptorsEXT(device->vk_device, 1, &sampler_desc, &desc_range)); } /* RTVs */ @@ -8039,6 +6656,50 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_AddRef(ID3D12DescriptorHeap return refcount; } +static void d3d12_descriptor_heap_inc_ref(struct d3d12_descriptor_heap *heap) +{ + InterlockedIncrement(&heap->internal_refcount); +} + +static void d3d12_descriptor_heap_dec_ref(struct d3d12_descriptor_heap *heap) +{ + ULONG refcount = InterlockedDecrement(&heap->internal_refcount); + + if (!refcount) + { + d3d12_descriptor_heap_cleanup(heap); + vkd3d_private_store_destroy(&heap->private_store); + vkd3d_free_aligned(heap); + } +} + +uint32_t d3d12_descriptor_heap_allocate_meta_index(struct d3d12_descriptor_heap *heap) +{ + uint32_t index = UINT32_MAX; + pthread_mutex_lock(&heap->meta_descriptor_lock); + + if (heap->meta_descriptor_index_count == 0) + goto unlock; + + index = heap->meta_descriptor_indices[--heap->meta_descriptor_index_count]; + d3d12_descriptor_heap_inc_ref(heap); + +unlock: + pthread_mutex_unlock(&heap->meta_descriptor_lock); + return index; +} + +void d3d12_descriptor_heap_free_meta_index(struct d3d12_descriptor_heap *heap, uint32_t index) +{ + assert(heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && + (heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); + pthread_mutex_lock(&heap->meta_descriptor_lock); + assert(heap->meta_descriptor_index_count < VKD3D_DESCRIPTOR_HEAP_META_DESCRIPTOR_COUNT); + heap->meta_descriptor_indices[heap->meta_descriptor_index_count++] = index; + pthread_mutex_unlock(&heap->meta_descriptor_lock); + d3d12_descriptor_heap_dec_ref(heap); +} + static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHeap *iface) { struct d3d12_descriptor_heap *heap = impl_from_ID3D12DescriptorHeap(iface); @@ -8049,13 +6710,8 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHea if (!refcount) { struct d3d12_device *device = heap->device; - d3d_destruction_notifier_free(&heap->destruction_notifier); - - d3d12_descriptor_heap_cleanup(heap); - vkd3d_private_store_destroy(&heap->private_store); - vkd3d_free_aligned(heap); - + d3d12_descriptor_heap_dec_ref(heap); d3d12_device_release(device); } @@ -8151,798 +6807,198 @@ CONST_VTBL struct ID3D12DescriptorHeapVtbl d3d12_descriptor_heap_vtbl = (void *)d3d12_object_SetName, /* ID3D12DeviceChild methods */ d3d12_descriptor_heap_GetDevice, - /* ID3D12DescriptorHeap methods */ - d3d12_descriptor_heap_GetDesc, - d3d12_descriptor_heap_GetCPUDescriptorHandleForHeapStart, - d3d12_descriptor_heap_GetGPUDescriptorHandleForHeapStart, -}; - -static HRESULT d3d12_descriptor_heap_create_descriptor_buffer(struct d3d12_descriptor_heap *descriptor_heap) -{ - const struct vkd3d_vk_device_procs *vk_procs = &descriptor_heap->device->vk_procs; - const uint8_t *src_null_payloads[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; - size_t src_null_payload_offsets[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; - size_t src_null_payload_sizes[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; - struct d3d12_device *device = descriptor_heap->device; - VkMemoryPropertyFlags property_flags; - VkDeviceSize total_alloc_size = 0; - VkDeviceSize descriptor_count; - unsigned int i, j, set_count; - VkBufferUsageFlags2KHR usage; - VkDeviceSize alloc_size; - VkResult vr; - HRESULT hr; - - if (descriptor_heap->desc.Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && - descriptor_heap->desc.Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) - return S_OK; - - descriptor_count = descriptor_heap->desc.NumDescriptors; - if (descriptor_heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && - d3d12_descriptor_heap_require_padding_descriptors(device)) - { - descriptor_count += VKD3D_DESCRIPTOR_DEBUG_NUM_PAD_DESCRIPTORS; - } - - /* For embedded mutable we view descriptors in a sliced sense, so make sure to allocate memory for that - * partially OOB descriptor. This is mostly relevant for tooling, but technically speaking we need to do this - * to be spec legal. */ - if (d3d12_device_use_embedded_mutable_descriptors(device) && - descriptor_heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && - (descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) - { - descriptor_count++; - } - - for (i = 0, set_count = 0; i < device->bindless_state.set_count; i++) - { - const struct vkd3d_bindless_set_info *set_info = &device->bindless_state.set_info[i]; - bool aliased_descriptor_set; - - if (set_info->heap_type == descriptor_heap->desc.Type) - { - /* This set does not allocate memory on its own, it just offsets into the first buffer. */ - aliased_descriptor_set = set_count != 0 && d3d12_device_use_embedded_mutable_descriptors(device); - - if (!aliased_descriptor_set) - { - /* For VARIABLE_COUNT, the required size needs to be computed based on offset of binding, desired count - * and number of descriptors. */ - alloc_size = set_info->host_mapping_offset; - alloc_size += set_info->host_mapping_descriptor_size * descriptor_count; - - /* If we're using embedded descriptors, and we need CPU visibility, we will need to store metadata - * as well. The CPU VA encodes an offset from descriptor payload to its associated metadata. */ - if (d3d12_device_use_embedded_mutable_descriptors(device) && - !(device->bindless_state.flags & VKD3D_BINDLESS_MUTABLE_EMBEDDED_PACKED_METADATA) && - !(descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) && - set_info->heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) - { - alloc_size += set_info->host_mapping_descriptor_size * (1u << vkd3d_log2i_ceil(max(1u, descriptor_count))); - } - - /* Align to cache lines to avoid false sharing. */ - total_alloc_size = align64(total_alloc_size, - max(device->device_info.descriptor_buffer_properties.descriptorBufferOffsetAlignment, - device->device_info.properties2.properties.limits.nonCoherentAtomSize)); - - descriptor_heap->descriptor_buffer.offsets[set_count] = total_alloc_size; - } - else - { - /* Deduce offset based on the first set. - * set_info->host_mapping_offset *really* should be 0, - * but if it's not we can shuffle things around so that the offsets cancel - * each other out ... - * For this to work we need device to support low alignment, - * but we check this when enabling embedded mutable descriptors. */ - descriptor_heap->descriptor_buffer.offsets[set_count] = - src_null_payload_offsets[0] + - device->bindless_state.descriptor_buffer_packed_raw_buffer_offset - set_info->host_mapping_offset; - assert(descriptor_heap->descriptor_buffer.offsets[set_count] < UINT_MAX); - assert(!(descriptor_heap->descriptor_buffer.offsets[set_count] & - (device->device_info.descriptor_buffer_properties.descriptorBufferOffsetAlignment - 1))); - - alloc_size = 0; - } - - if (aliased_descriptor_set || set_info->vk_descriptor_type == VK_DESCRIPTOR_TYPE_SAMPLER) - src_null_payloads[set_count] = NULL; - else - { - /* Similar rationale as descriptor set implementation, need to pick some descriptor type, - * there is no generic null descriptor. */ - src_null_payloads[set_count] = vkd3d_bindless_state_get_null_descriptor_payload( - &device->bindless_state, - set_info->vk_descriptor_type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT ? - VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE : set_info->vk_descriptor_type); - } - - src_null_payload_sizes[set_count] = set_info->host_mapping_descriptor_size; - src_null_payload_offsets[set_count] = set_info->host_mapping_offset + total_alloc_size; - total_alloc_size += alloc_size; - set_count++; - } - } - - if (descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) - { - usage = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; - if (descriptor_heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) - { - usage |= VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - if (!device->device_info.descriptor_buffer_properties.bufferlessPushDescriptors) - usage |= VK_BUFFER_USAGE_PUSH_DESCRIPTORS_DESCRIPTOR_BUFFER_BIT_EXT; - } - else - usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT; - - if (FAILED(hr = vkd3d_create_buffer_explicit_usage(device, usage, total_alloc_size, - "descriptor-buffer", &descriptor_heap->descriptor_buffer.vk_buffer))) - return hr; - - property_flags = device->memory_info.descriptor_heap_memory_properties; - - if (FAILED(hr = vkd3d_allocate_internal_buffer_memory(device, descriptor_heap->descriptor_buffer.vk_buffer, - property_flags, - &descriptor_heap->descriptor_buffer.device_allocation))) - { - VK_CALL(vkDestroyBuffer(device->vk_device, descriptor_heap->descriptor_buffer.vk_buffer, NULL)); - descriptor_heap->descriptor_buffer.vk_buffer = VK_NULL_HANDLE; - return hr; - } - - descriptor_heap->descriptor_buffer.va = - vkd3d_get_buffer_device_address(device, descriptor_heap->descriptor_buffer.vk_buffer); - - if ((vr = VK_CALL(vkMapMemory(device->vk_device, - descriptor_heap->descriptor_buffer.device_allocation.vk_memory, - 0, VK_WHOLE_SIZE, 0, (void**)&descriptor_heap->descriptor_buffer.host_allocation)))) - { - ERR("Failed to map descriptor set memory.\n"); - vkd3d_free_device_memory(device, &descriptor_heap->descriptor_buffer.device_allocation); - VK_CALL(vkDestroyBuffer(device->vk_device, descriptor_heap->descriptor_buffer.vk_buffer, NULL)); - return hresult_from_vk_result(vr); - } - } - else - { - descriptor_heap->descriptor_buffer.host_allocation = vkd3d_malloc_aligned(total_alloc_size, - device->device_info.properties2.properties.limits.nonCoherentAtomSize); - - if (!descriptor_heap->descriptor_buffer.host_allocation) - { - ERR("Failed to allocate host descriptor buffer.\n"); - return E_OUTOFMEMORY; - } - } - - /* Clear all descriptors with NULL descriptors. Ideally we'd just use memset(), - * but NULL descriptors might not be all zero in memory sadly. */ - for (i = 0; i < set_count; i++) - { - const uint8_t *src; - uint8_t *dst; - size_t size; - - src = src_null_payloads[i]; - if (!src) - continue; - - dst = descriptor_heap->descriptor_buffer.host_allocation + src_null_payload_offsets[i]; - size = src_null_payload_sizes[i]; - - for (j = 0; j < descriptor_count; j++) - { - memcpy(dst, src, size); - dst += size; - } - } - - return S_OK; -} - -static HRESULT d3d12_descriptor_heap_create_descriptor_pool(struct d3d12_descriptor_heap *descriptor_heap, - VkDescriptorPool *vk_descriptor_pool) -{ - const struct vkd3d_vk_device_procs *vk_procs = &descriptor_heap->device->vk_procs; - VkDescriptorPoolSize vk_pool_sizes[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; - const struct d3d12_device *device = descriptor_heap->device; - unsigned int i, pool_count = 0, ssbo_count = 0; - VkDescriptorPoolCreateInfo vk_pool_info; - VkDescriptorPoolSize *ssbo_pool = NULL; - VkResult vr; - - for (i = 0; i < device->bindless_state.set_count; i++) - { - const struct vkd3d_bindless_set_info *set_info = &device->bindless_state.set_info[i]; - - if (set_info->heap_type == descriptor_heap->desc.Type) - { - VkDescriptorPoolSize *vk_pool_size = &vk_pool_sizes[pool_count++]; - vk_pool_size->type = set_info->vk_descriptor_type; - vk_pool_size->descriptorCount = descriptor_heap->desc.NumDescriptors; - - if (vkd3d_descriptor_debug_active_descriptor_qa_checks() && - descriptor_heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) - { - vk_pool_size->descriptorCount += VKD3D_DESCRIPTOR_DEBUG_NUM_PAD_DESCRIPTORS; - } - - if (set_info->vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) - ssbo_pool = vk_pool_size; - } - - ssbo_count += vkd3d_popcount(set_info->flags & VKD3D_BINDLESS_SET_EXTRA_MASK); - } - - if (ssbo_count && !ssbo_pool) - { - ssbo_pool = &vk_pool_sizes[pool_count++]; - ssbo_pool->type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - ssbo_pool->descriptorCount = 0; /* see below */ - } - - if (ssbo_pool) - ssbo_pool->descriptorCount += ssbo_count; - - if (!pool_count) - return S_OK; - - /* If using mutable type, we will allocate the most conservative size. - * This is fine since we're attempting to allocate a completely generic descriptor set. */ - - vk_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - vk_pool_info.pNext = NULL; - - vk_pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT; - if (!(descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) && - (descriptor_heap->device->bindless_state.flags & VKD3D_BINDLESS_MUTABLE_TYPE)) - vk_pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT; - - vk_pool_info.maxSets = pool_count; - vk_pool_info.poolSizeCount = pool_count; - vk_pool_info.pPoolSizes = vk_pool_sizes; - - if ((vr = VK_CALL(vkCreateDescriptorPool(device->vk_device, - &vk_pool_info, NULL, vk_descriptor_pool))) < 0) - { - ERR("Failed to create descriptor pool, vr %d.\n", vr); - return hresult_from_vk_result(vr); - } - - return S_OK; -} - -static void d3d12_descriptor_heap_zero_initialize(struct d3d12_descriptor_heap *descriptor_heap, - VkDescriptorType vk_descriptor_type, VkDescriptorSet vk_descriptor_set, - uint32_t binding_index, uint32_t descriptor_count) -{ - const struct vkd3d_vk_device_procs *vk_procs = &descriptor_heap->device->vk_procs; - const struct d3d12_device *device = descriptor_heap->device; - VkDescriptorBufferInfo *buffer_infos = NULL; - VkDescriptorImageInfo *image_infos = NULL; - VkBufferView *buffer_view_infos = NULL; - VkWriteDescriptorSet write; - uint32_t i; - - /* Clear out descriptor heap with the largest possible descriptor type we know of when using mutable descriptor type. - * Purely for defensive purposes. */ - assert(vk_descriptor_type != VK_DESCRIPTOR_TYPE_MUTABLE_EXT); - - write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - write.pNext = NULL; - write.descriptorType = vk_descriptor_type; - write.dstSet = vk_descriptor_set; - write.dstBinding = binding_index; - write.dstArrayElement = 0; - write.descriptorCount = descriptor_count; - write.pTexelBufferView = NULL; - write.pImageInfo = NULL; - write.pBufferInfo = NULL; - - switch (vk_descriptor_type) - { - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - image_infos = vkd3d_calloc(descriptor_count, sizeof(*image_infos)); - write.pImageInfo = image_infos; - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - buffer_infos = vkd3d_calloc(descriptor_count, sizeof(*buffer_infos)); - write.pBufferInfo = buffer_infos; - for (i = 0; i < descriptor_count; i++) - buffer_infos[i].range = VK_WHOLE_SIZE; - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - buffer_view_infos = vkd3d_calloc(descriptor_count, sizeof(*buffer_view_infos)); - write.pTexelBufferView = buffer_view_infos; - break; - - default: - break; - } - - VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, &write, 0, NULL)); - vkd3d_free(image_infos); - vkd3d_free(buffer_view_infos); - vkd3d_free(buffer_infos); -} - -static void d3d12_descriptor_heap_get_host_mapping(struct d3d12_descriptor_heap *descriptor_heap, - const struct vkd3d_bindless_set_info *binding, uint32_t set_index) -{ - const struct vkd3d_vk_device_procs *vk_procs = &descriptor_heap->device->vk_procs; - uint8_t *mapped_set = NULL; - - descriptor_heap->sets[set_index].mapped_set = NULL; - descriptor_heap->sets[set_index].copy_template = NULL; - descriptor_heap->sets[set_index].copy_template_single = NULL; - - if (binding->host_copy_template && binding->host_copy_template_single) - { - if (d3d12_device_uses_descriptor_buffers(descriptor_heap->device)) - { - mapped_set = descriptor_heap->descriptor_buffer.host_allocation + - descriptor_heap->descriptor_buffer.offsets[set_index]; - } - else - { - VK_CALL(vkGetDescriptorSetHostMappingVALVE(descriptor_heap->device->vk_device, - descriptor_heap->sets[set_index].vk_descriptor_set, (void**)&mapped_set)); - } - - if (mapped_set) - { - mapped_set += binding->host_mapping_offset; - descriptor_heap->sets[set_index].stride = binding->host_mapping_descriptor_size; - descriptor_heap->sets[set_index].mapped_set = mapped_set; - /* Keep a local copy close so we can fetch stuff from same cache line easily. */ - descriptor_heap->sets[set_index].copy_template = binding->host_copy_template; - descriptor_heap->sets[set_index].copy_template_single = binding->host_copy_template_single; - } - } -} - -static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descriptor_heap *descriptor_heap, - const struct vkd3d_bindless_set_info *binding, VkDescriptorSet *vk_descriptor_set) -{ - const struct vkd3d_vk_device_procs *vk_procs = &descriptor_heap->device->vk_procs; - VkDescriptorSetVariableDescriptorCountAllocateInfoEXT vk_variable_count_info; - uint32_t descriptor_count = descriptor_heap->desc.NumDescriptors; - const struct d3d12_device *device = descriptor_heap->device; - VkDescriptorSetAllocateInfo vk_set_info; - VkResult vr; - - if (vkd3d_descriptor_debug_active_descriptor_qa_checks() && - descriptor_heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) - descriptor_count += VKD3D_DESCRIPTOR_DEBUG_NUM_PAD_DESCRIPTORS; - - vk_variable_count_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT; - vk_variable_count_info.pNext = NULL; - vk_variable_count_info.descriptorSetCount = 1; - vk_variable_count_info.pDescriptorCounts = &descriptor_count; - - vk_set_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - vk_set_info.pNext = &vk_variable_count_info; - vk_set_info.descriptorPool = descriptor_heap->vk_descriptor_pool; - vk_set_info.descriptorSetCount = 1; - vk_set_info.pSetLayouts = &binding->vk_host_set_layout; - - if (descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) - vk_set_info.pSetLayouts = &binding->vk_set_layout; - - if ((vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &vk_set_info, vk_descriptor_set))) < 0) - { - ERR("Failed to allocate descriptor set, vr %d.\n", vr); - return hresult_from_vk_result(vr); - } - - if (binding->vk_init_null_descriptor_type != VK_DESCRIPTOR_TYPE_SAMPLER) - { - d3d12_descriptor_heap_zero_initialize(descriptor_heap, - binding->vk_init_null_descriptor_type, *vk_descriptor_set, - binding->binding_index, descriptor_count); - } - - return S_OK; -} - -static void d3d12_descriptor_heap_get_buffer_range(struct d3d12_descriptor_heap *descriptor_heap, - VkDeviceSize *offset, VkDeviceSize size, struct vkd3d_host_visible_buffer_range *range) -{ - if (size) - { - range->descriptor.buffer = descriptor_heap->vk_buffer; - range->descriptor.offset = *offset; - range->descriptor.range = size; - range->host_ptr = void_ptr_offset(descriptor_heap->host_memory, *offset); - - *offset += size; - } - else - { - range->descriptor.buffer = VK_NULL_HANDLE; - range->descriptor.offset = 0; - range->descriptor.range = VK_WHOLE_SIZE; - range->host_ptr = NULL; - } -} + /* ID3D12DescriptorHeap methods */ + d3d12_descriptor_heap_GetDesc, + d3d12_descriptor_heap_GetCPUDescriptorHandleForHeapStart, + d3d12_descriptor_heap_GetGPUDescriptorHandleForHeapStart, +}; -static HRESULT d3d12_descriptor_heap_init_data_buffer(struct d3d12_descriptor_heap *descriptor_heap, - struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) +static HRESULT d3d12_descriptor_heap_create_descriptor_buffer(struct d3d12_descriptor_heap *descriptor_heap) { const struct vkd3d_vk_device_procs *vk_procs = &descriptor_heap->device->vk_procs; - VkDeviceSize alignment = max(device->device_info.properties2.properties.limits.minStorageBufferOffsetAlignment, - device->device_info.properties2.properties.limits.nonCoherentAtomSize); - VkDeviceSize raw_va_buffer_size = 0, offset_buffer_size = 0; - VKD3D_UNUSED VkDeviceSize descriptor_heap_info_size = 0; + struct d3d12_device *device = descriptor_heap->device; VkMemoryPropertyFlags property_flags; - D3D12_RESOURCE_DESC1 buffer_desc; - VkDeviceSize buffer_size, offset; - D3D12_HEAP_PROPERTIES heap_info; - D3D12_HEAP_FLAGS heap_flags; + VkDeviceSize descriptor_count; + VkBufferUsageFlags2KHR usage; + VkDeviceSize alloc_size; VkResult vr; HRESULT hr; + size_t i; - /* Embedded mutable descriptors alias the raw va buffer on top of the regular descriptor buffer. */ - if (desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && !d3d12_device_use_embedded_mutable_descriptors(device)) - { - raw_va_buffer_size = align(desc->NumDescriptors * sizeof(VkDeviceAddress), alignment); - if (vkd3d_descriptor_debug_active_descriptor_qa_checks()) - raw_va_buffer_size += align(VKD3D_DESCRIPTOR_DEBUG_NUM_PAD_DESCRIPTORS * sizeof(VkDeviceAddress), alignment); - - if (device->bindless_state.flags & (VKD3D_SSBO_OFFSET_BUFFER | VKD3D_TYPED_OFFSET_BUFFER)) - offset_buffer_size = align(desc->NumDescriptors * sizeof(struct vkd3d_bound_buffer_range), alignment); - - if (vkd3d_descriptor_debug_active_descriptor_qa_checks()) - descriptor_heap_info_size = align(vkd3d_descriptor_debug_heap_info_size(desc->NumDescriptors), alignment); - } - - buffer_size = raw_va_buffer_size + offset_buffer_size; - buffer_size += descriptor_heap_info_size; - - if (!buffer_size) + if (descriptor_heap->desc.Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && + descriptor_heap->desc.Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) return S_OK; - if (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) + descriptor_count = descriptor_heap->desc.NumDescriptors; + + if (descriptor_heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) { - memset(&buffer_desc, 0, sizeof(buffer_desc)); - buffer_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - buffer_desc.Width = buffer_size; - buffer_desc.Height = 1; - buffer_desc.DepthOrArraySize = 1; - buffer_desc.MipLevels = 1; - buffer_desc.SampleDesc.Count = 1; - buffer_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - buffer_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + alloc_size = device->bindless_state.descriptor_heap_cbv_srv_uav_size * descriptor_count; - /* host-visible device memory */ - memset(&heap_info, 0, sizeof(heap_info)); - heap_info.Type = D3D12_HEAP_TYPE_UPLOAD; + if (!(descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) + { + if (device->bindless_state.descriptor_heap_packed_metadata_offset == 0) + { + alloc_size += device->bindless_state.descriptor_heap_cbv_srv_uav_size * + (1u << vkd3d_log2i_ceil(max(1u, descriptor_count))); + } + } + else + { + /* At the beginning of the heap, store some magic. */ + alloc_size += device->bindless_state.heap_redzone_size; - heap_flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + alloc_size = align64(alloc_size, device->bindless_state.descriptor_heap_cbv_srv_uav_size); - if (FAILED(hr = vkd3d_create_buffer(device, &heap_info, heap_flags, &buffer_desc, - "descriptor-buffer", &descriptor_heap->vk_buffer))) - return hr; + /* Allocate some space for clamped NULL descriptor + * (only relevant if we're doing some form of workaround or QA checks). */ + alloc_size += device->bindless_state.descriptor_heap_cbv_srv_uav_size; - property_flags = device->memory_info.descriptor_heap_memory_properties; + /* Allocate space for meta descriptors. */ + pthread_mutex_init(&descriptor_heap->meta_descriptor_lock, NULL); + descriptor_heap->meta_descriptor_indices = vkd3d_malloc(VKD3D_DESCRIPTOR_HEAP_META_DESCRIPTOR_COUNT * sizeof(uint32_t)); + for (i = 0; i < VKD3D_DESCRIPTOR_HEAP_META_DESCRIPTOR_COUNT; i++) + { + /* All meta shaders use a simple stride from base. */ + descriptor_heap->meta_descriptor_indices[i] = + alloc_size >> device->bindless_state.descriptor_heap_cbv_srv_uav_size_log2; + alloc_size += device->bindless_state.descriptor_heap_cbv_srv_uav_size; + } + descriptor_heap->meta_descriptor_index_count = VKD3D_DESCRIPTOR_HEAP_META_DESCRIPTOR_COUNT; - if (FAILED(hr = vkd3d_allocate_internal_buffer_memory(device, descriptor_heap->vk_buffer, - property_flags, &descriptor_heap->device_allocation))) - return hr; + /* Unclear what alignment to use for reserved region. */ + alloc_size = align64(alloc_size, device->device_info.descriptor_heap_properties.resourceHeapAlignment); + descriptor_heap->descriptor_buffer.reserved_offset = alloc_size; + alloc_size += device->device_info.descriptor_heap_properties.minResourceHeapReservedRange; - if ((vr = VK_CALL(vkMapMemory(device->vk_device, descriptor_heap->device_allocation.vk_memory, - 0, VK_WHOLE_SIZE, 0, &descriptor_heap->host_memory)))) - { - ERR("Failed to map buffer, vr %d.\n", vr); - return hresult_from_vk_result(vr); + if (alloc_size > device->device_info.descriptor_heap_properties.maxResourceHeapSize) + { + ERR("Resource heap is allocated with too large size, %"PRIu64" > %"PRIu64".\n", + alloc_size, device->device_info.descriptor_heap_properties.maxResourceHeapSize); + return E_OUTOFMEMORY; + } } } else { - memset(&descriptor_heap->device_allocation, 0, sizeof(descriptor_heap->device_allocation)); - descriptor_heap->vk_buffer = VK_NULL_HANDLE; - descriptor_heap->host_memory = vkd3d_calloc(1, buffer_size); - } - - offset = 0; - - d3d12_descriptor_heap_get_buffer_range(descriptor_heap, &offset, raw_va_buffer_size, &descriptor_heap->raw_va_aux_buffer); - d3d12_descriptor_heap_get_buffer_range(descriptor_heap, &offset, offset_buffer_size, &descriptor_heap->buffer_ranges); -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA - d3d12_descriptor_heap_get_buffer_range(descriptor_heap, &offset, - descriptor_heap_info_size, - &descriptor_heap->descriptor_heap_info); -#endif - return S_OK; -} - -static void d3d12_descriptor_heap_update_extra_bindings(struct d3d12_descriptor_heap *descriptor_heap, - struct d3d12_device *device) -{ - VkDescriptorBufferInfo vk_buffer_info[VKD3D_BINDLESS_SET_MAX_EXTRA_BINDINGS]; - VkWriteDescriptorSet vk_writes[VKD3D_BINDLESS_SET_MAX_EXTRA_BINDINGS]; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - unsigned int i, binding_index, set_index = 0, write_count = 0; - VkDescriptorAddressInfoEXT desc_addr_info; - VkDescriptorGetInfoEXT get_info; - VkDeviceSize binding_offset; - uint32_t flags; - - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - get_info.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - get_info.data.pStorageBuffer = &desc_addr_info; - - desc_addr_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT; - desc_addr_info.pNext = NULL; - desc_addr_info.format = VK_FORMAT_UNDEFINED; - - for (i = 0; i < device->bindless_state.set_count; i++) - { - const struct vkd3d_bindless_set_info *set_info = &device->bindless_state.set_info[i]; - - if (set_info->heap_type != descriptor_heap->desc.Type) - continue; + alloc_size = device->bindless_state.descriptor_heap_sampler_size; + alloc_size *= descriptor_count; - flags = set_info->flags & VKD3D_BINDLESS_SET_EXTRA_MASK; - binding_index = 0; - - while (flags) + if (descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) { - enum vkd3d_bindless_set_flag flag = (enum vkd3d_bindless_set_flag)(flags & -flags); - VkDescriptorBufferInfo *vk_buffer = &vk_buffer_info[write_count]; - VkWriteDescriptorSet *vk_write = &vk_writes[write_count]; - uint8_t *host_ptr; + alloc_size = align64(alloc_size, device->device_info.descriptor_heap_properties.samplerHeapAlignment); + descriptor_heap->descriptor_buffer.reserved_offset = alloc_size; + alloc_size += device->device_info.descriptor_heap_properties.minSamplerHeapReservedRangeWithEmbedded; - if (descriptor_heap->descriptor_buffer.host_allocation) + if (alloc_size > device->device_info.descriptor_heap_properties.maxSamplerHeapSize) { - host_ptr = descriptor_heap->descriptor_buffer.host_allocation + - descriptor_heap->descriptor_buffer.offsets[set_index]; - VK_CALL(vkGetDescriptorSetLayoutBindingOffsetEXT(device->vk_device, - set_info->vk_set_layout, binding_index, &binding_offset)); - host_ptr += binding_offset; + ERR("Sampler heap is allocated with too large size, %"PRIu64" > %"PRIu64".\n", + alloc_size, device->device_info.descriptor_heap_properties.maxSamplerHeapSize); + return E_OUTOFMEMORY; } - else - host_ptr = NULL; - - vk_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_write->pNext = NULL; - vk_write->dstSet = descriptor_heap->sets[set_index].vk_descriptor_set; - vk_write->dstBinding = binding_index++; - vk_write->dstArrayElement = 0; - vk_write->descriptorCount = 1; - vk_write->descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - vk_write->pImageInfo = NULL; - vk_write->pBufferInfo = vk_buffer; - vk_write->pTexelBufferView = NULL; - - switch (flag) - { - case VKD3D_BINDLESS_SET_EXTRA_RAW_VA_AUX_BUFFER: - if (d3d12_device_use_embedded_mutable_descriptors(device)) - { - /* Bind the descriptor buffer itself as an SSBO. */ - vk_buffer->buffer = descriptor_heap->descriptor_buffer.vk_buffer; - vk_buffer->offset = (uintptr_t)descriptor_heap->sets[0].mapped_set - - (uintptr_t)descriptor_heap->descriptor_buffer.host_allocation; - vk_buffer->range = descriptor_heap->desc.NumDescriptors * - d3d12_device_get_descriptor_handle_increment_size(device, descriptor_heap->desc.Type); - } - else - { - *vk_buffer = descriptor_heap->raw_va_aux_buffer.descriptor; - } - break; + } + } - case VKD3D_BINDLESS_SET_EXTRA_OFFSET_BUFFER: - *vk_buffer = descriptor_heap->buffer_ranges.descriptor; - assert(!d3d12_device_use_embedded_mutable_descriptors(device)); - break; + if (descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) + { + usage = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_DESCRIPTOR_HEAP_BIT_EXT; + if (device->bindless_state.heap_redzone_size) + usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA - case VKD3D_BINDLESS_SET_EXTRA_FEEDBACK_PAYLOAD_INFO_BUFFER: - *vk_buffer = *vkd3d_descriptor_debug_get_payload_info_descriptor(device->descriptor_qa_global_info); - assert(!d3d12_device_use_embedded_mutable_descriptors(device)); - break; + if (FAILED(hr = vkd3d_create_buffer_explicit_usage(device, usage, alloc_size, + "descriptor-buffer", &descriptor_heap->descriptor_buffer.vk_buffer))) + return hr; - case VKD3D_BINDLESS_SET_EXTRA_FEEDBACK_CONTROL_INFO_BUFFER: - *vk_buffer = descriptor_heap->descriptor_heap_info.descriptor; - assert(!d3d12_device_use_embedded_mutable_descriptors(device)); - break; -#endif + property_flags = device->memory_info.descriptor_heap_memory_properties; - default: - ERR("Unsupported extra flags %#x.\n", flag); - continue; - } + if (FAILED(hr = vkd3d_allocate_internal_buffer_memory(device, descriptor_heap->descriptor_buffer.vk_buffer, + property_flags, + &descriptor_heap->descriptor_buffer.device_allocation))) + { + VK_CALL(vkDestroyBuffer(device->vk_device, descriptor_heap->descriptor_buffer.vk_buffer, NULL)); + descriptor_heap->descriptor_buffer.vk_buffer = VK_NULL_HANDLE; + return hr; + } - if (host_ptr) - { - desc_addr_info.address = - vkd3d_get_buffer_device_address(device, vk_buffer->buffer) + vk_buffer->offset; - desc_addr_info.range = vk_buffer->range; - assert(vk_buffer->range != VK_WHOLE_SIZE); - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustStorageBufferDescriptorSize, - host_ptr)); - } - else - write_count += 1; + descriptor_heap->descriptor_buffer.va = + vkd3d_get_buffer_device_address(device, descriptor_heap->descriptor_buffer.vk_buffer); - flags -= flag; + if ((vr = VK_CALL(vkMapMemory(device->vk_device, + descriptor_heap->descriptor_buffer.device_allocation.vk_memory, + 0, VK_WHOLE_SIZE, 0, (void**)&descriptor_heap->descriptor_buffer.host_allocation)))) + { + ERR("Failed to map descriptor set memory.\n"); + vkd3d_free_device_memory(device, &descriptor_heap->descriptor_buffer.device_allocation); + VK_CALL(vkDestroyBuffer(device->vk_device, descriptor_heap->descriptor_buffer.vk_buffer, NULL)); + return hresult_from_vk_result(vr); } - - set_index += 1; } - - if (write_count) - VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, vk_writes, 0, NULL)); -} - -static void d3d12_descriptor_heap_add_null_descriptor_template_buffers( - struct d3d12_descriptor_heap *descriptor_heap, - const struct vkd3d_bindless_set_info *set_info, - unsigned int set_info_index) -{ - struct d3d12_null_descriptor_template *null_descriptor_template; - unsigned int index; - - null_descriptor_template = &descriptor_heap->null_descriptor_template; - index = descriptor_heap->null_descriptor_template.num_writes; - - if (set_info->vk_descriptor_type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) - null_descriptor_template->writes.payloads[index].src_payload = NULL; else { - null_descriptor_template->writes.payloads[index].src_payload = - vkd3d_bindless_state_get_null_descriptor_payload( - &descriptor_heap->device->bindless_state, - set_info->vk_descriptor_type); - } - - null_descriptor_template->writes.payloads[index].dst_base = - descriptor_heap->sets[set_info->set_index].mapped_set; - null_descriptor_template->writes.payloads[index].desc_size = - descriptor_heap->device->bindless_state.set_info[set_info_index].host_mapping_descriptor_size; + descriptor_heap->descriptor_buffer.host_allocation = vkd3d_malloc_aligned(alloc_size, + device->device_info.properties2.properties.limits.nonCoherentAtomSize); - if (index == 0) - { - null_descriptor_template->has_mutable_descriptors = - descriptor_heap->device->device_info.mutable_descriptor_features.mutableDescriptorType; - null_descriptor_template->has_descriptor_buffer = true; + if (!descriptor_heap->descriptor_buffer.host_allocation) + { + ERR("Failed to allocate host descriptor buffer.\n"); + return E_OUTOFMEMORY; + } } - descriptor_heap->null_descriptor_template.num_writes++; - descriptor_heap->null_descriptor_template.set_info_mask |= 1u << set_info_index; + descriptor_heap->descriptor_buffer.size = alloc_size; + + return S_OK; } -static void d3d12_descriptor_heap_add_null_descriptor_template_descriptors( - struct d3d12_descriptor_heap *descriptor_heap, - const struct vkd3d_bindless_set_info *set_info, - unsigned int set_info_index) +static void d3d12_descriptor_heap_write_redzone_descriptors( + struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) { - struct VkWriteDescriptorSet *write; - unsigned int index; - - index = descriptor_heap->null_descriptor_template.num_writes; - - write = &descriptor_heap->null_descriptor_template.writes.descriptors.writes[index]; - write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - write->pNext = NULL; - write->descriptorCount = 1; - write->dstSet = descriptor_heap->sets[set_info->set_index].vk_descriptor_set; - write->dstBinding = set_info->binding_index; + /* TODO: Can write QA descriptors here as well. */ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + uint8_t *host_memory = descriptor_heap->descriptor_buffer.host_allocation; + VkResourceDescriptorInfoEXT desc_info; + VkDeviceAddressRangeEXT ssbo_range; + VkHostAddressRangeEXT desc_range; - /* Replaced when instantiating template. */ - write->dstArrayElement = 0; + /* If we don't need redzone descriptors, just skip it. */ + if (!device->bindless_state.heap_redzone_size) + return; - /* For mutable, will be replaced when instantiating template. */ - write->descriptorType = set_info->vk_descriptor_type; + memset(&desc_info, 0, sizeof(desc_info)); + desc_info.sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT; + desc_info.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + desc_info.data.pAddressRange = &ssbo_range; - write->pBufferInfo = &descriptor_heap->null_descriptor_template.writes.descriptors.buffer; - write->pImageInfo = &descriptor_heap->null_descriptor_template.writes.descriptors.image; - write->pTexelBufferView = &descriptor_heap->null_descriptor_template.writes.descriptors.buffer_view; + ssbo_range.address = descriptor_heap->descriptor_buffer.va + device->bindless_state.heap_redzone_size; + ssbo_range.size = descriptor_heap->desc.NumDescriptors * device->bindless_state.descriptor_heap_cbv_srv_uav_size; + desc_range.address = host_memory; + desc_range.size = device->device_info.descriptor_heap_properties.bufferDescriptorSize; - if (index == 0) - { - descriptor_heap->null_descriptor_template.writes.descriptors.buffer.offset = 0; - descriptor_heap->null_descriptor_template.writes.descriptors.buffer.range = VK_WHOLE_SIZE; - descriptor_heap->null_descriptor_template.writes.descriptors.buffer.buffer = VK_NULL_HANDLE; - descriptor_heap->null_descriptor_template.writes.descriptors.image.sampler = VK_NULL_HANDLE; - descriptor_heap->null_descriptor_template.writes.descriptors.image.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; - descriptor_heap->null_descriptor_template.writes.descriptors.image.imageView = VK_NULL_HANDLE; - descriptor_heap->null_descriptor_template.writes.descriptors.buffer_view = VK_NULL_HANDLE; - descriptor_heap->null_descriptor_template.has_mutable_descriptors = - descriptor_heap->device->device_info.mutable_descriptor_features.mutableDescriptorType; - descriptor_heap->null_descriptor_template.has_descriptor_buffer = false; - } + VK_CALL(vkWriteResourceDescriptorsEXT(device->vk_device, 1, &desc_info, &desc_range)); - descriptor_heap->null_descriptor_template.num_writes++; - descriptor_heap->null_descriptor_template.set_info_mask |= 1u << set_info_index; + /* TODO: Can write QA descriptors here as well. */ } static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) { - uint32_t fast_bank_pointer_index = 0; - unsigned int i; HRESULT hr; memset(descriptor_heap, 0, sizeof(*descriptor_heap)); descriptor_heap->ID3D12DescriptorHeap_iface.lpVtbl = &d3d12_descriptor_heap_vtbl; descriptor_heap->refcount = 1; + descriptor_heap->internal_refcount = 1; descriptor_heap->device = device; descriptor_heap->desc = *desc; if (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) descriptor_heap->gpu_va = d3d12_device_get_descriptor_heap_gpu_va(device, desc->Type); - if (d3d12_device_uses_descriptor_buffers(device)) - { - if (FAILED(hr = d3d12_descriptor_heap_create_descriptor_buffer(descriptor_heap))) - goto fail; - } - else - { - if (FAILED(hr = d3d12_descriptor_heap_create_descriptor_pool(descriptor_heap, - &descriptor_heap->vk_descriptor_pool))) - goto fail; - } - - if (desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || - desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) - { - for (i = 0; i < device->bindless_state.set_count; i++) - { - const struct vkd3d_bindless_set_info *set_info = &device->bindless_state.set_info[i]; - - if (set_info->heap_type == desc->Type) - { - if (!d3d12_device_uses_descriptor_buffers(device)) - { - if (FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, - set_info, &descriptor_heap->sets[set_info->set_index].vk_descriptor_set))) - goto fail; - } - - d3d12_descriptor_heap_get_host_mapping(descriptor_heap, set_info, set_info->set_index); - - /* For special fast paths of descriptor copies - * (e.g. d3d12_device_CopyDescriptorsSimple_descriptor_buffer_16_16_4), - * we can store the mapped pointers in a convenient location. */ - if (fast_bank_pointer_index < ARRAY_SIZE(descriptor_heap->fast_pointer_bank) - 1) - { - descriptor_heap->fast_pointer_bank[fast_bank_pointer_index++] = - descriptor_heap->sets[set_info->set_index].mapped_set; - } - - if (descriptor_heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && - !d3d12_device_use_embedded_mutable_descriptors(device)) - { - if (d3d12_device_uses_descriptor_buffers(device)) - d3d12_descriptor_heap_add_null_descriptor_template_buffers(descriptor_heap, set_info, i); - else - d3d12_descriptor_heap_add_null_descriptor_template_descriptors(descriptor_heap, set_info, i); - } - } - } - } - - if (FAILED(hr = d3d12_descriptor_heap_init_data_buffer(descriptor_heap, device, desc))) + if (FAILED(hr = d3d12_descriptor_heap_create_descriptor_buffer(descriptor_heap))) goto fail; - if (desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) - descriptor_heap->fast_pointer_bank[fast_bank_pointer_index++] = descriptor_heap->raw_va_aux_buffer.host_ptr; - - if (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) - d3d12_descriptor_heap_update_extra_bindings(descriptor_heap, device); + if (desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) + d3d12_descriptor_heap_write_redzone_descriptors(descriptor_heap, device); if (FAILED(hr = vkd3d_private_store_init(&descriptor_heap->private_store))) goto fail; @@ -8957,32 +7013,6 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript return hr; } -static void d3d12_descriptor_heap_init_descriptors(struct d3d12_descriptor_heap *descriptor_heap) -{ - struct vkd3d_descriptor_metadata_types *meta; - unsigned int i; - - switch (descriptor_heap->desc.Type) - { - case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV: - case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER: - if (!d3d12_device_use_embedded_mutable_descriptors(descriptor_heap->device)) - { - meta = (struct vkd3d_descriptor_metadata_types *) descriptor_heap->descriptors; - for (i = 0; i < descriptor_heap->desc.NumDescriptors; i++) - meta[i].set_info_mask = descriptor_heap->null_descriptor_template.set_info_mask; - } - break; - - case D3D12_DESCRIPTOR_HEAP_TYPE_RTV: - case D3D12_DESCRIPTOR_HEAP_TYPE_DSV: - break; - - default: - WARN("Unhandled descriptor heap type: %d.\n", descriptor_heap->desc.Type); - } -} - #ifndef VKD3D_NO_TRACE_MESSAGES static void d3d12_descriptor_heap_report_allocation(const D3D12_DESCRIPTOR_HEAP_DESC *desc) { @@ -9008,8 +7038,6 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, { size_t max_descriptor_count, descriptor_size; struct d3d12_descriptor_heap *object; - unsigned int num_descriptor_bits = 0; - unsigned int num_descriptors_pot = 0; size_t required_size; size_t alignment; HRESULT hr; @@ -9062,40 +7090,10 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, if (desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) { - if (d3d12_device_use_embedded_mutable_descriptors(device)) - { - /* Ultra-fast path. Here, all required metadata is embedded inside the descriptor buffer itself, - * so the heap is never accessed when writing descriptors and copying them. */ - required_size = sizeof(struct d3d12_descriptor_heap); - alignment = D3D12_DESC_ALIGNMENT; - } - else - { - /* Always allocate a POT number of descriptors. We want the CPU VA layout to be very specific. - * This allows us to synthesize pointers to the heap and metadata without - * performing any de-references, which is great, because it avoids cache misses, - * and reduces our dependency chain of memory accesses required to perform a descriptor copy. - * Missing caches in descriptor copies is a large chunk of our CPU overhead, so we should make sure to avoid it - * when we can. - * We encode descriptors as: - * <--- MSBs ------------------------------------------------------- LSBs ---> - * | descriptor_heap VA | heap_offset | log2i_ceil(num_descriptors) (5 bits) | - * The base VA of the heap therefore becomes descriptor_heap CPU VA + log2i_ceil(num_descriptors). - * The increment is set to 1 << 5. */ - - num_descriptor_bits = vkd3d_log2i_ceil(max(1, desc->NumDescriptors)); - num_descriptors_pot = 1u << num_descriptor_bits; - - required_size = sizeof(struct d3d12_descriptor_heap); - /* From base descriptor heap, we can offset directly to get metadata_bindings. - * Metadata view data is placed at an offset we can deduce from num descriptors. */ - required_size += num_descriptors_pot * sizeof(struct vkd3d_descriptor_metadata_types); - required_size += desc->NumDescriptors * sizeof(struct vkd3d_descriptor_metadata_view); - - /* The alignment should scale roughly with size of the heap, - * so any wasted space shouldn't really be that bad. */ - alignment = max(D3D12_DESC_ALIGNMENT, num_descriptors_pot * VKD3D_RESOURCE_DESC_INCREMENT); - } + /* Ultra-fast path. Here, all required metadata is embedded inside the descriptor buffer itself, + * so the heap is never accessed when writing descriptors and copying them. */ + required_size = sizeof(struct d3d12_descriptor_heap); + alignment = D3D12_DESC_ALIGNMENT; } else { @@ -9118,43 +7116,43 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, if (desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) { - if (d3d12_device_use_embedded_mutable_descriptors(device)) + /* Need to guarantee that this offset is aligned to 32 byte. + * We're guaranteed the base allocation is aligned, but to align the mutable descriptor binding itself, + * we might need to get creative. + * We can tweak the descriptor set layout such that we get an aligned offset, however. */ + object->cpu_va.ptr = (SIZE_T)object->descriptor_buffer.host_allocation; + if (desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) + object->cpu_va.ptr += device->bindless_state.heap_redzone_size; + + if (device->vk_info.NVX_image_view_handle) { - /* Need to guarantee that this offset is aligned to 32 byte. - * We're guaranteed the base allocation is aligned, but to align the mutable descriptor binding itself, - * we might need to get creative. - * We can tweak the descriptor set layout such that we get an aligned offset, however. */ - object->cpu_va.ptr = (SIZE_T)object->sets[0].mapped_set; - assert(!(object->cpu_va.ptr & VKD3D_RESOURCE_EMBEDDED_METADATA_OFFSET_LOG2_MASK)); + vkd3d_va_map_insert_descriptor_heap(&device->memory_allocator.va_map, object->cpu_va.ptr, + descriptor_size * desc->NumDescriptors, desc->Type); + } - if (!(desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) && - desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + assert(!(object->cpu_va.ptr & VKD3D_RESOURCE_EMBEDDED_METADATA_OFFSET_LOG2_MASK)); + + if (!(desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) && desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + { + if (device->bindless_state.descriptor_heap_packed_metadata_offset == 0) { - if (!(device->bindless_state.flags & VKD3D_BINDLESS_MUTABLE_EMBEDDED_PACKED_METADATA)) - { - /* Need to encode offset to metadata from any given CPU VA. - * Samplers don't require metadata structs, only non-shader visible resource heap does. */ - object->cpu_va.ptr |= vkd3d_log2i_ceil(descriptor_size * max(1u, desc->NumDescriptors)); - } - else - { - /* Use this bit only to mark if this is a shader visible heap or not. - * If we're copying to shader visible heap, - * we can use non-temporal copies for more perf on Deck. - * For the more generic functions which decode VAs, the log2 offset must be greater - * than this value for it to detect planar metadata. - * Specialized functions can make use of this bit to enter more optimal code paths. */ - - /* Ignore all of this for sampler heaps since they are irrelevant - * from a performance standpoint. */ - object->cpu_va.ptr += VKD3D_RESOURCE_EMBEDDED_CACHED_MASK; - } + /* Need to encode offset to metadata from any given CPU VA. + * Samplers don't require metadata structs, only non-shader visible resource heap does. */ + object->cpu_va.ptr |= vkd3d_log2i_ceil(descriptor_size * max(1u, desc->NumDescriptors)); + } + else + { + /* Use this bit only to mark if this is a shader visible heap or not. + * If we're copying to shader visible heap, + * we can use non-temporal copies for more perf on Deck. + * For the more generic functions which decode VAs, the log2 offset must be greater + * than this value for it to detect planar metadata. + * Specialized functions can make use of this bit to enter more optimal code paths. */ + + /* Ignore all of this for sampler heaps since they are irrelevant + * from a performance standpoint. */ + object->cpu_va.ptr += VKD3D_RESOURCE_EMBEDDED_CACHED_MASK; } - } - else - { - /* See comments above on how this is supposed to work */ - object->cpu_va.ptr = (SIZE_T)object + num_descriptor_bits; } } else @@ -9162,8 +7160,6 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, object->cpu_va.ptr = (SIZE_T)object->descriptors; } - d3d12_descriptor_heap_init_descriptors(object); - TRACE("Created descriptor heap %p.\n", object); #ifdef VKD3D_ENABLE_DESCRIPTOR_QA @@ -9181,6 +7177,14 @@ void d3d12_descriptor_heap_cleanup(struct d3d12_descriptor_heap *descriptor_heap const struct vkd3d_vk_device_procs *vk_procs = &descriptor_heap->device->vk_procs; struct d3d12_device *device = descriptor_heap->device; + if (device->vk_info.NVX_image_view_handle && + (descriptor_heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || + descriptor_heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) + { + vkd3d_va_map_remove_descriptor_heap(&device->memory_allocator.va_map, + descriptor_heap->cpu_va.ptr, descriptor_heap->desc.Type); + } + #ifndef VKD3D_NO_TRACE_MESSAGES if (descriptor_heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || descriptor_heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) @@ -9206,6 +7210,18 @@ void d3d12_descriptor_heap_cleanup(struct d3d12_descriptor_heap *descriptor_heap vkd3d_free_device_memory(device, &descriptor_heap->descriptor_buffer.device_allocation); VK_CALL(vkDestroyBuffer(device->vk_device, descriptor_heap->descriptor_buffer.vk_buffer, NULL)); + if (descriptor_heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && + (descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) + { + if (descriptor_heap->meta_descriptor_index_count != VKD3D_DESCRIPTOR_HEAP_META_DESCRIPTOR_COUNT) + { + FIXME("Mismatch in meta descriptors. Expected VKD3D_DESCRIPTOR_HEAP_META_DESCRIPTOR_COUNT, got %zu.\n", + descriptor_heap->meta_descriptor_index_count); + } + pthread_mutex_destroy(&descriptor_heap->meta_descriptor_lock); + vkd3d_free(descriptor_heap->meta_descriptor_indices); + } + vkd3d_descriptor_debug_unregister_heap(descriptor_heap->cookie); } @@ -9907,113 +7923,3 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info, info->cpu_accessible_domain.rt_ds_type_mask); return S_OK; } - -HRESULT vkd3d_global_descriptor_buffer_init(struct vkd3d_global_descriptor_buffer *global_descriptor_buffer, - struct d3d12_device *device) -{ - VkBufferUsageFlags2KHR vk_usage_flags; - HRESULT hr; - - bool requires_offset_buffer = device->device_info.properties2.properties.limits.minStorageBufferOffsetAlignment > 4; - bool uses_ssbo = device->device_info.properties2.properties.limits.minStorageBufferOffsetAlignment <= 16; - if (!uses_ssbo) - requires_offset_buffer = false; - - /* Don't bother with descriptor buffers if we need to keep offset buffer around. - * Also, ignore descriptor buffers if implementation does not support non-uniform UBO indexing. - * We want to keep the descriptor buffer path as lean as possible. */ - if (!device->device_info.descriptor_buffer_features.descriptorBuffer || - !device->device_info.descriptor_buffer_features.descriptorBufferPushDescriptors || - !device->device_info.vulkan_1_2_features.shaderUniformBufferArrayNonUniformIndexing || - requires_offset_buffer) - return S_OK; - - if (device->device_info.mutable_descriptor_features.mutableDescriptorType) - { - /* If we are forced to use MUTABLE_SINGLE_SET due to small address space for resources, - * we ignore descriptor buffers as well. Similar rationale to non-uniform UBO indexing. - * We will not add even more code paths to deal with that. - * Non-mutable + descriptor buffer is only relevant on AMD Windows driver for the time being, - * and eventually we will make mutable a hard requirement, so don't bother checking that case. */ - VkDeviceSize required_resource_descriptors = VKD3D_MIN_VIEW_DESCRIPTOR_COUNT + 1; /* One magic SSBO for internal VA buffer. */ - uint32_t flags = VKD3D_BINDLESS_MUTABLE_TYPE; - VkDeviceSize mutable_desc_size; - - if (uses_ssbo) - flags |= VKD3D_BINDLESS_RAW_SSBO; - - /* If we cannot interleave SSBO / texel buffers, we'll have to do them side by side. - * Implementation needs to support 2M descriptors in that case. */ - if (!vkd3d_bindless_supports_embedded_mutable_type(device, flags)) - required_resource_descriptors *= 2; - - mutable_desc_size = vkd3d_bindless_get_mutable_descriptor_type_size(device); - if (device->device_info.descriptor_buffer_properties.maxResourceDescriptorBufferRange < - required_resource_descriptors * mutable_desc_size) - { - INFO("Small descriptor heap detected, falling back to MUTABLE_SINGLE_SET.\n"); - return S_OK; - } - } - - vk_usage_flags = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - - /* If push descriptors require extra backing storage, we need to let the driver reserve magic push space for it. */ - if (!device->device_info.descriptor_buffer_properties.bufferlessPushDescriptors) - vk_usage_flags |= VK_BUFFER_USAGE_PUSH_DESCRIPTORS_DESCRIPTOR_BUFFER_BIT_EXT; - - /* Creates a default descriptor buffer we can use if the application does not bind anything. - * This might happen if a meta shader is used without any prior descriptor heap bound, - * and we need to use push descriptors with bufferlessPushDescriptors == VK_FALSE. */ - if (FAILED(hr = vkd3d_create_buffer_explicit_usage(device, vk_usage_flags, - 4 * 1024, "descriptor-buffer", &global_descriptor_buffer->resource.vk_buffer))) - { - return hr; - } - - if (FAILED(hr = vkd3d_allocate_internal_buffer_memory(device, global_descriptor_buffer->resource.vk_buffer, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - &global_descriptor_buffer->resource.device_allocation))) - { - vkd3d_global_descriptor_buffer_cleanup(global_descriptor_buffer, device); - return hr; - } - - global_descriptor_buffer->resource.va = - vkd3d_get_buffer_device_address(device, global_descriptor_buffer->resource.vk_buffer); - global_descriptor_buffer->resource.usage = vk_usage_flags; - - vk_usage_flags = VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; - if (FAILED(hr = vkd3d_create_buffer_explicit_usage(device, vk_usage_flags, - 4 * 1024, "descriptor-buffer", &global_descriptor_buffer->sampler.vk_buffer))) - { - return hr; - } - - if (FAILED(hr = vkd3d_allocate_internal_buffer_memory(device, global_descriptor_buffer->sampler.vk_buffer, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - &global_descriptor_buffer->sampler.device_allocation))) - { - vkd3d_global_descriptor_buffer_cleanup(global_descriptor_buffer, device); - return hr; - } - - global_descriptor_buffer->sampler.va = - vkd3d_get_buffer_device_address(device, global_descriptor_buffer->sampler.vk_buffer); - global_descriptor_buffer->sampler.usage = vk_usage_flags; - - return S_OK; -} - -void vkd3d_global_descriptor_buffer_cleanup(struct vkd3d_global_descriptor_buffer *global_descriptor_buffer, - struct d3d12_device *device) -{ - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VK_CALL(vkDestroyBuffer(device->vk_device, global_descriptor_buffer->resource.vk_buffer, NULL)); - VK_CALL(vkDestroyBuffer(device->vk_device, global_descriptor_buffer->sampler.vk_buffer, NULL)); - vkd3d_free_device_memory(device, &global_descriptor_buffer->resource.device_allocation); - vkd3d_free_device_memory(device, &global_descriptor_buffer->sampler.device_allocation); -} diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 323b7f8cc5..f27c6929f7 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -75,23 +75,11 @@ static ULONG STDMETHODCALLTYPE d3d12_root_signature_AddRef(ID3D12RootSignature * static void d3d12_root_signature_cleanup(struct d3d12_root_signature *root_signature, struct d3d12_device *device) { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - - vkd3d_sampler_state_free_descriptor_set(&device->sampler_state, device, - root_signature->vk_sampler_set, root_signature->vk_sampler_pool); - - VK_CALL(vkDestroyPipelineLayout(device->vk_device, root_signature->graphics.vk_pipeline_layout, NULL)); - VK_CALL(vkDestroyPipelineLayout(device->vk_device, root_signature->mesh.vk_pipeline_layout, NULL)); - VK_CALL(vkDestroyPipelineLayout(device->vk_device, root_signature->compute.vk_pipeline_layout, NULL)); - VK_CALL(vkDestroyPipelineLayout(device->vk_device, root_signature->raygen.vk_pipeline_layout, NULL)); - VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, root_signature->vk_sampler_descriptor_layout, NULL)); - VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, root_signature->vk_root_descriptor_layout, NULL)); - vkd3d_free(root_signature->parameters); vkd3d_free(root_signature->bindings); vkd3d_free(root_signature->root_constants); - vkd3d_free(root_signature->static_samplers); vkd3d_free(root_signature->static_samplers_desc); + vkd3d_free(root_signature->vk_static_samplers_desc); vkd3d_free(root_signature->root_parameter_mappings); vkd3d_free(root_signature->root_signature_blob); } @@ -242,25 +230,6 @@ enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHADER_VIS } } -static VkDescriptorType vk_descriptor_type_from_d3d12_root_parameter(struct d3d12_device *device, D3D12_ROOT_PARAMETER_TYPE type) -{ - bool use_ssbo = d3d12_device_use_ssbo_root_descriptors(device); - - switch (type) - { - /* SRV and UAV root parameters are buffer views. */ - case D3D12_ROOT_PARAMETER_TYPE_SRV: - return use_ssbo ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - case D3D12_ROOT_PARAMETER_TYPE_UAV: - return use_ssbo ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - case D3D12_ROOT_PARAMETER_TYPE_CBV: - return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - default: - FIXME("Unhandled descriptor root parameter type %#x.\n", type); - return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - } -} - static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_range_type( D3D12_DESCRIPTOR_RANGE_TYPE type) { @@ -297,209 +266,27 @@ static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_root_p } } -HRESULT vkd3d_create_descriptor_set_layout(struct d3d12_device *device, - VkDescriptorSetLayoutCreateFlags flags, unsigned int binding_count, - const VkDescriptorSetLayoutBinding *bindings, - VkDescriptorSetLayoutCreateFlags descriptor_buffer_flags, - VkDescriptorSetLayout *set_layout) -{ - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VkDescriptorSetLayoutCreateInfo set_desc; - VkResult vr; - - set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - set_desc.pNext = NULL; - set_desc.flags = flags; - set_desc.bindingCount = binding_count; - set_desc.pBindings = bindings; - - if (d3d12_device_uses_descriptor_buffers(device)) - set_desc.flags |= descriptor_buffer_flags; - - if ((vr = VK_CALL(vkCreateDescriptorSetLayout(device->vk_device, &set_desc, NULL, set_layout))) < 0) - { - WARN("Failed to create Vulkan descriptor set layout, vr %d.\n", vr); - return hresult_from_vk_result(vr); - } - - return S_OK; -} - -HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, - unsigned int set_layout_count, const VkDescriptorSetLayout *set_layouts, - unsigned int push_constant_count, const VkPushConstantRange *push_constants, - VkPipelineLayout *pipeline_layout) -{ - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - struct VkPipelineLayoutCreateInfo pipeline_layout_info; - VkResult vr; - - if (set_layout_count > device->vk_info.device_limits.maxBoundDescriptorSets) - { - ERR("Root signature requires %u descriptor sets, but device only supports %u.\n", - set_layout_count, device->vk_info.device_limits.maxBoundDescriptorSets); - return E_INVALIDARG; - } - - pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pipeline_layout_info.pNext = NULL; - pipeline_layout_info.flags = 0; - pipeline_layout_info.setLayoutCount = set_layout_count; - pipeline_layout_info.pSetLayouts = set_layouts; - pipeline_layout_info.pushConstantRangeCount = push_constant_count; - pipeline_layout_info.pPushConstantRanges = push_constants; - if ((vr = VK_CALL(vkCreatePipelineLayout(device->vk_device, - &pipeline_layout_info, NULL, pipeline_layout))) < 0) - { - WARN("Failed to create Vulkan pipeline layout, vr %d.\n", vr); - return hresult_from_vk_result(vr); - } - - return S_OK; -} - -static HRESULT vkd3d_create_pipeline_layout_for_stage_mask(struct d3d12_device *device, - unsigned int set_layout_count, const VkDescriptorSetLayout *set_layouts, - const VkPushConstantRange *push_constants, - VkShaderStageFlags stages, - struct d3d12_bind_point_layout *bind_point_layout) -{ - VkPushConstantRange range; - /* Can just mask directly since STAGE_ALL and ALL_GRAPHICS are OR masks. */ - range.stageFlags = push_constants->stageFlags & stages; - range.offset = push_constants->offset; - range.size = push_constants->size; - - bind_point_layout->vk_push_stages = range.stageFlags; - return vkd3d_create_pipeline_layout(device, set_layout_count, set_layouts, - range.stageFlags ? 1 : 0, &range, - &bind_point_layout->vk_pipeline_layout); -} - struct d3d12_root_signature_info { uint32_t binding_count; uint32_t descriptor_count; uint32_t parameter_count; - uint32_t push_descriptor_count; uint32_t root_constant_count; - uint32_t hoist_descriptor_count; - bool has_ssbo_offset_buffer; - bool has_typed_offset_buffer; uint32_t cost; }; -static bool d3d12_descriptor_range_can_hoist_cbv_descriptor( - struct d3d12_device *device, const D3D12_DESCRIPTOR_RANGE1 *range) -{ - /* Cannot/should not hoist arrays. - * We only care about CBVs. SRVs and UAVs are too fiddly - * since they don't necessary map to buffers at all. */ - if (!(device->bindless_state.flags & VKD3D_HOIST_STATIC_TABLE_CBV) || - range->RangeType != D3D12_DESCRIPTOR_RANGE_TYPE_CBV || - range->NumDescriptors != 1) - { - return false; - } - - /* If descriptors are not marked volatile, we are guaranteed that the descriptors are - * set before updating the root table parameter in the command list. - * We can latch the descriptor at draw time. - * As a speed hack, we can pretend that all CBVs have this flag set. - * Basically no applications set this flag, even though they really could. */ - return !(range->Flags & D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE) || - (vkd3d_config_flags & VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV); -} - -static void d3d12_root_signature_info_count_srv_uav_table(struct d3d12_root_signature_info *info, - struct d3d12_device *device) -{ - /* separate image + buffer descriptors + aux buffer descriptor. */ - info->binding_count += 3; - - if (device->bindless_state.flags & VKD3D_BINDLESS_RAW_SSBO) - info->binding_count += 1; - - if (device->bindless_state.flags & VKD3D_SSBO_OFFSET_BUFFER) - info->has_ssbo_offset_buffer = true; - if (device->bindless_state.flags & VKD3D_TYPED_OFFSET_BUFFER) - info->has_typed_offset_buffer = true; -} - -static void d3d12_root_signature_info_count_cbv_table(struct d3d12_root_signature_info *info) -{ - info->binding_count += 1; -} - -static void d3d12_root_signature_info_count_sampler_table(struct d3d12_root_signature_info *info) -{ - info->binding_count += 1; -} - -static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_signature_info *info, - struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC2 *desc, const D3D12_DESCRIPTOR_RANGE1 *range) -{ - switch (range->RangeType) - { - case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: - case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: - d3d12_root_signature_info_count_srv_uav_table(info, device); - break; - case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: - if (!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE) && - d3d12_descriptor_range_can_hoist_cbv_descriptor(device, range)) - { - info->hoist_descriptor_count += 1; - } - d3d12_root_signature_info_count_cbv_table(info); - break; - case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER: - d3d12_root_signature_info_count_sampler_table(info); - break; - default: - FIXME("Unhandled descriptor type %#x.\n", range->RangeType); - return E_NOTIMPL; - } - - return S_OK; -} - -static bool d3d12_root_signature_may_require_global_heap_binding(struct d3d12_device *device) -{ -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA - /* Expect-assume path always wants to see global heap binding for size query purposes. */ - return true; -#else - /* Robustness purposes, we may access the global heap out of band of the root signature. */ - return d3d12_descriptor_heap_require_padding_descriptors(device); -#endif -} - static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_info *info, struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC2 *desc) { bool local_root_signature; - unsigned int i, j; - HRESULT hr; + unsigned int i; memset(info, 0, sizeof(*info)); local_root_signature = !!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE); - /* Need to emit bindings for the magic internal table binding. */ - if (d3d12_root_signature_may_require_global_heap_binding(device) || - (desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED)) - { - d3d12_root_signature_info_count_srv_uav_table(info, device); - d3d12_root_signature_info_count_srv_uav_table(info, device); - d3d12_root_signature_info_count_cbv_table(info); - } - - if (desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED) - d3d12_root_signature_info_count_sampler_table(info); - for (i = 0; i < desc->NumParameters; ++i) { const D3D12_ROOT_PARAMETER1 *p = &desc->pParameters[i]; @@ -507,10 +294,7 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i switch (p->ParameterType) { case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: - for (j = 0; j < p->DescriptorTable.NumDescriptorRanges; ++j) - if (FAILED(hr = d3d12_root_signature_info_count_descriptors(info, - device, desc, &p->DescriptorTable.pDescriptorRanges[j]))) - return hr; + info->binding_count += p->DescriptorTable.NumDescriptorRanges; /* Local root signature directly affects memory layout. */ if (local_root_signature) @@ -519,24 +303,11 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i break; case D3D12_ROOT_PARAMETER_TYPE_CBV: - - /* Local root signature directly affects memory layout. */ - if (local_root_signature) - info->cost = (info->cost + 1u) & ~1u; - else if (!(device->bindless_state.flags & VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV)) - info->push_descriptor_count += 1; - - info->binding_count += 1; - info->cost += 2; - break; - case D3D12_ROOT_PARAMETER_TYPE_SRV: case D3D12_ROOT_PARAMETER_TYPE_UAV: /* Local root signature directly affects memory layout. */ if (local_root_signature) info->cost = (info->cost + 1u) & ~1u; - else if (!(device->bindless_state.flags & VKD3D_RAW_VA_ROOT_DESCRIPTOR_SRV_UAV)) - info->push_descriptor_count += 1; info->binding_count += 1; info->cost += 2; @@ -553,470 +324,472 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i } } - if (!local_root_signature) - { - /* Make sure that we won't exceed device limits. - * Minimum spec for push descriptors is 32 descriptors, which fits exactly what we need for D3D12. - * Worst case scenarios: - * - 32 root CBVs -> all 32 push descriptors are used. No push constants. - * - Root constants > 128 bytes, 15 root CBVs. 1 push descriptor for push UBO. Can hoist 16 other descriptors. - * Just base the amount of descriptors we can hoist on the root signature cost. This is simple and is trivially correct. */ - info->hoist_descriptor_count = min(info->hoist_descriptor_count, VKD3D_MAX_HOISTED_DESCRIPTORS); - info->hoist_descriptor_count = min(info->hoist_descriptor_count, (D3D12_MAX_ROOT_COST - info->cost) / 2); - - info->push_descriptor_count += info->hoist_descriptor_count; - info->binding_count += info->hoist_descriptor_count; - info->binding_count += desc->NumStaticSamplers; - - if (vkd3d_descriptor_debug_active_instruction_qa_checks()) - info->push_descriptor_count += 2; - } - - info->parameter_count = desc->NumParameters + info->hoist_descriptor_count; + info->binding_count += desc->NumStaticSamplers; + info->parameter_count = desc->NumParameters; // + info->hoist_descriptor_count; return S_OK; } -static bool d3d12_root_signature_parameter_is_raw_va(struct d3d12_root_signature *root_signature, - D3D12_ROOT_PARAMETER_TYPE type) +struct vkd3d_descriptor_set_context { - if (type == D3D12_ROOT_PARAMETER_TYPE_CBV) - return !!(root_signature->device->bindless_state.flags & VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV); - else if (type == D3D12_ROOT_PARAMETER_TYPE_SRV || type == D3D12_ROOT_PARAMETER_TYPE_UAV) - return !!(root_signature->device->bindless_state.flags & VKD3D_RAW_VA_ROOT_DESCRIPTOR_SRV_UAV); - else - return false; -} + uint32_t binding_index; +}; -static HRESULT d3d12_root_signature_init_shader_record_constants( - struct d3d12_root_signature *root_signature, - const D3D12_ROOT_SIGNATURE_DESC2 *desc, const struct d3d12_root_signature_info *info) +static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signature *root_signature, + const D3D12_ROOT_SIGNATURE_DESC2 *desc, const struct d3d12_root_signature_info *info, + struct vkd3d_descriptor_set_context *context) { + bool local_root_signature = !!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE); unsigned int i, j; + size_t size = 0; + + if (!local_root_signature) + { + /* Put root descriptor VAs at the start to avoid alignment issues */ + for (i = 0; i < desc->NumParameters; ++i) + { + const D3D12_ROOT_PARAMETER1 *p = &desc->pParameters[i]; + switch (p->ParameterType) + { + case D3D12_ROOT_PARAMETER_TYPE_SRV: + case D3D12_ROOT_PARAMETER_TYPE_CBV: + case D3D12_ROOT_PARAMETER_TYPE_UAV: + size += sizeof(VkDeviceSize); + break; + + default: + break; + } + } + } + + /* Append actual root constants */ for (i = 0, j = 0; i < desc->NumParameters; ++i) { const D3D12_ROOT_PARAMETER1 *p = &desc->pParameters[i]; if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS) + { + /* Root descriptor and table are both 8 bytes for local root signatures. */ + if (local_root_signature) + { + size = align(size, sizeof(VkDeviceAddress)); + size += sizeof(VkDeviceAddress); + } continue; + } root_signature->parameters[i].parameter_type = p->ParameterType; - root_signature->parameters[i].constant.constant_index = j; root_signature->parameters[i].constant.constant_count = p->Constants.Num32BitValues; + if (!local_root_signature) + { + root_signature->root_constant_mask |= 1ull << i; + root_signature->root_parameters_constant_dwords += p->Constants.Num32BitValues; + root_signature->parameters[i].constant.constant_index = size / sizeof(uint32_t); + } + else + { + root_signature->parameters[i].constant.constant_index = j; + } + + /* It not quite safe to use plain CBVs for root parameters since we have to handle robustness + * if a root CBV staddles the 256 byte boundary with vec4. + * This is also the only reasonable way to spill push data when doing workgraphs + * without rewriting a ton of stuff in dxil-spirv. */ + root_signature->root_constants[j].register_space = p->Constants.RegisterSpace; root_signature->root_constants[j].register_index = p->Constants.ShaderRegister; - root_signature->root_constants[j].shader_visibility = vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility); - root_signature->root_constants[j].offset = 0; + root_signature->root_constants[j].shader_visibility = vkd3d_shader_visibility_from_d3d12( + p->ShaderVisibility); + root_signature->root_constants[j].offset = size; root_signature->root_constants[j].size = p->Constants.Num32BitValues * sizeof(uint32_t); + size += p->Constants.Num32BitValues * sizeof(uint32_t); ++j; } + root_signature->descriptor_table_offset = size; return S_OK; } -static void d3d12_root_signature_add_root_parameter_mapping(struct d3d12_root_signature *root_signature, - uint32_t index, uint32_t offset) +static HRESULT d3d12_root_signature_init_global_heaps(struct d3d12_root_signature *root_signature, + const struct d3d12_root_signature_info *info) { - if (root_signature->root_parameter_mappings) - { - struct vkd3d_shader_root_parameter_mapping *mapping; - assert(root_signature->root_parameter_mappings_count < root_signature->parameter_count); - mapping = &root_signature->root_parameter_mappings[root_signature->root_parameter_mappings_count++]; - mapping->root_parameter = index; - mapping->offset = offset; - mapping->descriptor = false; - } -} + VkDescriptorSetAndBindingMappingEXT *mapping; -static void d3d12_root_signature_add_root_descriptor_mapping(struct d3d12_root_signature *root_signature, - uint32_t index, uint32_t vk_set, uint32_t vk_binding) -{ - if (root_signature->root_parameter_mappings) - { - struct vkd3d_shader_root_parameter_mapping *mapping; - assert(root_signature->root_parameter_mappings_count < root_signature->parameter_count); - mapping = &root_signature->root_parameter_mappings[root_signature->root_parameter_mappings_count++]; - mapping->root_parameter = index; - mapping->vk_set = vk_set; - mapping->vk_binding = vk_binding; - mapping->descriptor = true; + if (!vkd3d_array_reserve((void**)&root_signature->mappings, &root_signature->mappings_size, + root_signature->mappings_count + 9, sizeof(*root_signature->mappings))) + return E_OUTOFMEMORY; + + /* SM 6.6 image heap mapping. */ + { + mapping = &root_signature->mappings[root_signature->mappings_count++]; + memset(mapping, 0, sizeof(*mapping)); + mapping->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + mapping->resourceMask = VK_SPIRV_RESOURCE_TYPE_SAMPLED_IMAGE_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_READ_ONLY_IMAGE_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_READ_WRITE_IMAGE_BIT_EXT; + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_CONSTANT_OFFSET_EXT; + mapping->descriptorSet = VKD3D_SHADER_GLOBAL_HEAP_VIRTUAL_DESCRIPTOR_SET; + mapping->firstBinding = 0; + mapping->bindingCount = 1; + mapping->sourceData.constantOffset.heapOffset = root_signature->device->bindless_state.heap_redzone_size; + mapping->sourceData.constantOffset.heapArrayStride = + root_signature->device->bindless_state.descriptor_heap_cbv_srv_uav_size; + } + + /* SM 6.6 buffer heap mapping */ + { + mapping = &root_signature->mappings[root_signature->mappings_count++]; + memset(mapping, 0, sizeof(*mapping)); + mapping->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + mapping->resourceMask = VK_SPIRV_RESOURCE_TYPE_ACCELERATION_STRUCTURE_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_UNIFORM_BUFFER_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_READ_ONLY_STORAGE_BUFFER_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_READ_WRITE_STORAGE_BUFFER_BIT_EXT; + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_CONSTANT_OFFSET_EXT; + mapping->descriptorSet = VKD3D_SHADER_GLOBAL_HEAP_VIRTUAL_DESCRIPTOR_SET; + mapping->firstBinding = 0; + mapping->bindingCount = 1; + mapping->sourceData.constantOffset.heapOffset = + root_signature->device->bindless_state.heap_redzone_size + + root_signature->device->bindless_state.descriptor_heap_packed_raw_buffer_offset; + mapping->sourceData.constantOffset.heapArrayStride = + root_signature->device->bindless_state.descriptor_heap_cbv_srv_uav_size; + } + + /* SM 6.6 UAV counter heap mapping. Relevant if not using raw VA style. */ + { + mapping = &root_signature->mappings[root_signature->mappings_count++]; + memset(mapping, 0, sizeof(*mapping)); + mapping->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + mapping->resourceMask = VK_SPIRV_RESOURCE_TYPE_READ_ONLY_STORAGE_BUFFER_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_READ_WRITE_STORAGE_BUFFER_BIT_EXT; + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_CONSTANT_OFFSET_EXT; + mapping->descriptorSet = VKD3D_SHADER_GLOBAL_HEAP_VIRTUAL_DESCRIPTOR_SET; + mapping->firstBinding = VKD3D_SHADER_UAV_COUNTER_GLOBAL_HEAP_BINDING; + mapping->bindingCount = 1; + mapping->sourceData.constantOffset.heapOffset = root_signature->device->bindless_state.heap_redzone_size; + mapping->sourceData.constantOffset.heapArrayStride = + root_signature->device->bindless_state.descriptor_heap_cbv_srv_uav_size; + } + + /* SM 6.6 sampler heap mapping */ + { + mapping = &root_signature->mappings[root_signature->mappings_count++]; + memset(mapping, 0, sizeof(*mapping)); + mapping->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + mapping->resourceMask = VK_SPIRV_RESOURCE_TYPE_SAMPLER_BIT_EXT; + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_CONSTANT_OFFSET_EXT; + mapping->descriptorSet = VKD3D_SHADER_GLOBAL_HEAP_VIRTUAL_DESCRIPTOR_SET; + mapping->firstBinding = 0; + mapping->bindingCount = 1; + mapping->sourceData.constantOffset.heapOffset = 0; + mapping->sourceData.constantOffset.heapArrayStride = + root_signature->device->bindless_state.descriptor_heap_sampler_size; + } + + /* Global root parameter mapping when read as a UBO. */ + { + mapping = &root_signature->mappings[root_signature->mappings_count++]; + memset(mapping, 0, sizeof(*mapping)); + mapping->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + mapping->resourceMask = VK_SPIRV_RESOURCE_TYPE_UNIFORM_BUFFER_BIT_EXT; + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_DATA_EXT; + mapping->descriptorSet = VKD3D_SHADER_ROOT_CONSTANTS_VIRTUAL_DESCRIPTOR_SET; + mapping->firstBinding = 0; + mapping->bindingCount = 1; + mapping->sourceData.pushDataOffset = 0; + } + + /* Global mapping when reading heap meta descriptors. */ + { + mapping = &root_signature->mappings[root_signature->mappings_count++]; + memset(mapping, 0, sizeof(*mapping)); + mapping->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + mapping->resourceMask = + VK_SPIRV_RESOURCE_TYPE_READ_ONLY_STORAGE_BUFFER_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_READ_WRITE_STORAGE_BUFFER_BIT_EXT; + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_CONSTANT_OFFSET_EXT; + mapping->descriptorSet = VKD3D_SHADER_GLOBAL_HEAP_VIRTUAL_DESCRIPTOR_SET; + mapping->firstBinding = VKD3D_SHADER_GLOBAL_HEAP_BINDING_AUX_BINDINGS; + mapping->bindingCount = VKD3D_SHADER_GLOBAL_HEAP_BINDING_AUX_BINDINGS_COUNT; + mapping->sourceData.constantOffset.heapOffset = 0; + mapping->sourceData.constantOffset.heapArrayStride = + align(root_signature->device->device_info.descriptor_heap_properties.bufferDescriptorSize, + root_signature->device->device_info.descriptor_heap_properties.bufferDescriptorAlignment); + } + + root_signature->redzone_style = VKD3D_ROOT_SIGNATURE_HEAP_REDZONE_STYLE_NONE; + + if (root_signature->device->bindless_state.heap_redzone_size) + { + bool require_heap_va = + !!(d3d12_root_signature_get_shader_interface_flags(root_signature) & + VKD3D_SHADER_INTERFACE_RAW_VA_ALIAS_DESCRIPTOR_BUFFER); + + /* We need to access the heap somehow in shaders. This gets more annoying than it should be ... */ + if (info->cost < (require_heap_va ? 61 : 63)) + { + root_signature->redzone_style = VKD3D_ROOT_SIGNATURE_HEAP_REDZONE_STYLE_INLINE; + + /* We can store the data inline. */ + mapping = &root_signature->mappings[root_signature->mappings_count++]; + memset(mapping, 0, sizeof(*mapping)); + mapping->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + mapping->resourceMask = VK_SPIRV_RESOURCE_TYPE_UNIFORM_BUFFER_BIT_EXT; + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_DATA_EXT; + mapping->descriptorSet = VKD3D_SHADER_GLOBAL_HEAP_VIRTUAL_DESCRIPTOR_SET; + mapping->firstBinding = VKD3D_SHADER_GLOBAL_HEAP_SIZE_BINDING; + mapping->bindingCount = 1; + mapping->sourceData.pushDataOffset = info->cost * sizeof(uint32_t); + + root_signature->heap_redzone_inline_heap_count_offset = info->cost * sizeof(uint32_t); + + if (require_heap_va) + { + root_signature->heap_redzone_inline_heap_va_offset = + align(root_signature->heap_redzone_inline_heap_count_offset + sizeof(uint32_t), + sizeof(VkDeviceAddress)); + + mapping = &root_signature->mappings[root_signature->mappings_count++]; + memset(mapping, 0, sizeof(*mapping)); + mapping->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + mapping->resourceMask = VK_SPIRV_RESOURCE_TYPE_UNIFORM_BUFFER_BIT_EXT; + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_DATA_EXT; + mapping->descriptorSet = VKD3D_SHADER_GLOBAL_HEAP_VIRTUAL_DESCRIPTOR_SET; + mapping->firstBinding = VKD3D_SHADER_RAW_VIEW_GLOBAL_HEAP_BINDING; + mapping->bindingCount = 1; + mapping->sourceData.pushDataOffset = root_signature->heap_redzone_inline_heap_va_offset; + } + else + { + root_signature->heap_redzone_inline_heap_va_offset = UINT32_MAX; + } + } + else + { + root_signature->redzone_style = VKD3D_ROOT_SIGNATURE_HEAP_REDZONE_STYLE_DESCRIPTOR; + /* Refer to AUX descriptors. Can use a single SSBO here which covers both size + payloads. + * Slower, but it will always work. */ + } } + + return S_OK; } -static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signature *root_signature, +static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_root_signature *root_signature, const D3D12_ROOT_SIGNATURE_DESC2 *desc, const struct d3d12_root_signature_info *info, - struct VkPushConstantRange *push_constant_range) + struct vkd3d_descriptor_set_context *context) { - unsigned int i, j; + struct vkd3d_shader_resource_binding binding; + struct vkd3d_shader_descriptor_table *table; + unsigned int i, j, t, range_count; + uint32_t range_descriptor_offset; + uint32_t local_root_size = 0; + bool local_root_signature; - /* Stages set later. */ - push_constant_range->stageFlags = 0; - push_constant_range->offset = 0; - push_constant_range->size = 0; + local_root_signature = !!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE); - /* Put root descriptor VAs at the start to avoid alignment issues */ - for (i = 0; i < desc->NumParameters; ++i) + for (i = 0, t = (local_root_signature ? VKD3D_SHADER_LOCAL_TABLES_VIRTUAL_DESCRIPTOR_SET_BASE : 0); + i < desc->NumParameters; ++i) { const D3D12_ROOT_PARAMETER1 *p = &desc->pParameters[i]; + VkDescriptorSetAndBindingMappingEXT *mapping; - if (d3d12_root_signature_parameter_is_raw_va(root_signature, p->ParameterType)) + if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) { - push_constant_range->stageFlags |= vkd3d_vk_stage_flags_from_visibility(p->ShaderVisibility); - d3d12_root_signature_add_root_parameter_mapping(root_signature, i, push_constant_range->size); - push_constant_range->size += sizeof(VkDeviceSize); + if (p->ParameterType == D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS) + local_root_size += p->Constants.Num32BitValues * sizeof(uint32_t); + else + local_root_size = align(local_root_size + sizeof(VkDeviceAddress), sizeof(VkDeviceAddress)); + continue; } - } - /* Append actual root constants */ - for (i = 0, j = 0; i < desc->NumParameters; ++i) - { - const D3D12_ROOT_PARAMETER1 *p = &desc->pParameters[i]; + local_root_size = align(local_root_size, sizeof(VkDeviceAddress)); - if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS) - continue; + if (!local_root_signature) + { + root_signature->descriptor_table_mask |= 1ull << i; + root_signature->descriptor_table_count++; + root_signature->root_parameters_constant_dwords += 1; + } - d3d12_root_signature_add_root_parameter_mapping(root_signature, i, push_constant_range->size); - root_signature->root_constant_mask |= 1ull << i; + table = &root_signature->parameters[i].descriptor_table; + range_count = p->DescriptorTable.NumDescriptorRanges; + range_descriptor_offset = 0; root_signature->parameters[i].parameter_type = p->ParameterType; - root_signature->parameters[i].constant.constant_index = push_constant_range->size / sizeof(uint32_t); - root_signature->parameters[i].constant.constant_count = p->Constants.Num32BitValues; - - root_signature->root_constants[j].register_space = p->Constants.RegisterSpace; - root_signature->root_constants[j].register_index = p->Constants.ShaderRegister; - root_signature->root_constants[j].shader_visibility = vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility); - root_signature->root_constants[j].offset = push_constant_range->size; - root_signature->root_constants[j].size = p->Constants.Num32BitValues * sizeof(uint32_t); - - push_constant_range->stageFlags |= vkd3d_vk_stage_flags_from_visibility(p->ShaderVisibility); - push_constant_range->size += p->Constants.Num32BitValues * sizeof(uint32_t); - ++j; - } + table->binding_count = 0; + table->first_binding = &root_signature->bindings[context->binding_index]; - /* Append one 32-bit push constant for each descriptor table offset */ - if (root_signature->device->bindless_state.flags) - { - root_signature->descriptor_table_offset = push_constant_range->size; + if (!vkd3d_array_reserve((void**)&root_signature->mappings, &root_signature->mappings_size, + root_signature->mappings_count + 4, sizeof(*root_signature->mappings))) + return E_OUTOFMEMORY; - for (i = 0; i < desc->NumParameters; ++i) + /* Image heap mapping. */ { - const D3D12_ROOT_PARAMETER1 *p = &desc->pParameters[i]; + mapping = &root_signature->mappings[root_signature->mappings_count++]; + memset(mapping, 0, sizeof(*mapping)); + mapping->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + mapping->resourceMask = VK_SPIRV_RESOURCE_TYPE_SAMPLED_IMAGE_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_READ_ONLY_IMAGE_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_READ_WRITE_IMAGE_BIT_EXT; + mapping->descriptorSet = t; + mapping->firstBinding = 0; + mapping->bindingCount = UINT32_MAX; + + if (local_root_signature) + { + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_SHADER_RECORD_INDEX_EXT; + mapping->sourceData.shaderRecordIndex.heapOffset = root_signature->device->bindless_state.heap_redzone_size; + mapping->sourceData.shaderRecordIndex.shaderRecordOffset = local_root_size; + mapping->sourceData.shaderRecordIndex.heapIndexStride = 1; + mapping->sourceData.shaderRecordIndex.heapArrayStride = + root_signature->device->bindless_state.descriptor_heap_cbv_srv_uav_size; + } + else + { + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_PUSH_INDEX_EXT; + mapping->sourceData.pushIndex.heapOffset = root_signature->device->bindless_state.heap_redzone_size; + mapping->sourceData.shaderRecordIndex.shaderRecordOffset = local_root_size; + mapping->sourceData.pushIndex.pushOffset = + root_signature->descriptor_table_offset + t * sizeof(uint32_t); + mapping->sourceData.pushIndex.heapIndexStride = + root_signature->device->bindless_state.descriptor_heap_cbv_srv_uav_size; + mapping->sourceData.pushIndex.heapArrayStride = mapping->sourceData.pushIndex.heapIndexStride; + } + } - if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) - continue; + /* Buffer heap mapping */ + { + mapping = &root_signature->mappings[root_signature->mappings_count++]; + memset(mapping, 0, sizeof(*mapping)); + mapping->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + mapping->resourceMask = VK_SPIRV_RESOURCE_TYPE_ACCELERATION_STRUCTURE_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_UNIFORM_BUFFER_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_READ_ONLY_STORAGE_BUFFER_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_READ_WRITE_STORAGE_BUFFER_BIT_EXT; + mapping->descriptorSet = t; + mapping->firstBinding = 0; + mapping->bindingCount = UINT32_MAX; + + if (local_root_signature) + { + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_SHADER_RECORD_INDEX_EXT; + mapping->sourceData.shaderRecordIndex.heapOffset = + root_signature->device->bindless_state.heap_redzone_size + + root_signature->device->bindless_state.descriptor_heap_packed_raw_buffer_offset; + mapping->sourceData.shaderRecordIndex.shaderRecordOffset = local_root_size; + mapping->sourceData.shaderRecordIndex.heapIndexStride = 1; + mapping->sourceData.shaderRecordIndex.heapArrayStride = + root_signature->device->bindless_state.descriptor_heap_cbv_srv_uav_size; + } + else + { + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_PUSH_INDEX_EXT; + mapping->sourceData.pushIndex.heapOffset = + root_signature->device->bindless_state.heap_redzone_size + + root_signature->device->bindless_state.descriptor_heap_packed_raw_buffer_offset; + mapping->sourceData.pushIndex.pushOffset = + root_signature->descriptor_table_offset + t * sizeof(uint32_t); + mapping->sourceData.pushIndex.heapIndexStride = + root_signature->device->bindless_state.descriptor_heap_cbv_srv_uav_size; + mapping->sourceData.pushIndex.heapArrayStride = mapping->sourceData.pushIndex.heapIndexStride; + } + } - d3d12_root_signature_add_root_parameter_mapping(root_signature, i, push_constant_range->size); - root_signature->descriptor_table_count += 1; + /* Buffer UAV counter heap mapping */ + { + mapping = &root_signature->mappings[root_signature->mappings_count++]; + memset(mapping, 0, sizeof(*mapping)); + mapping->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + mapping->resourceMask = VK_SPIRV_RESOURCE_TYPE_READ_ONLY_STORAGE_BUFFER_BIT_EXT | + VK_SPIRV_RESOURCE_TYPE_READ_WRITE_STORAGE_BUFFER_BIT_EXT; + mapping->descriptorSet = t + VKD3D_SHADER_UAV_COUNTER_SET_OFFSET; + mapping->firstBinding = 0; + mapping->bindingCount = UINT32_MAX; + + if (local_root_signature) + { + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_SHADER_RECORD_INDEX_EXT; + mapping->sourceData.shaderRecordIndex.heapOffset = root_signature->device->bindless_state.heap_redzone_size; + mapping->sourceData.shaderRecordIndex.shaderRecordOffset = local_root_size; + mapping->sourceData.shaderRecordIndex.heapIndexStride = 1; + mapping->sourceData.shaderRecordIndex.heapArrayStride = + root_signature->device->bindless_state.descriptor_heap_cbv_srv_uav_size; + } + else + { + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_PUSH_INDEX_EXT; + mapping->sourceData.pushIndex.heapOffset = root_signature->device->bindless_state.heap_redzone_size; + mapping->sourceData.pushIndex.pushOffset = + root_signature->descriptor_table_offset + t * sizeof(uint32_t); + mapping->sourceData.pushIndex.heapIndexStride = + root_signature->device->bindless_state.descriptor_heap_cbv_srv_uav_size; + mapping->sourceData.pushIndex.heapArrayStride = mapping->sourceData.pushIndex.heapIndexStride; + } + } - push_constant_range->stageFlags |= vkd3d_vk_stage_flags_from_visibility(p->ShaderVisibility); - push_constant_range->size += sizeof(uint32_t); + /* Sampler heap mapping */ + { + mapping = &root_signature->mappings[root_signature->mappings_count++]; + memset(mapping, 0, sizeof(*mapping)); + mapping->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + mapping->resourceMask = VK_SPIRV_RESOURCE_TYPE_SAMPLER_BIT_EXT; + mapping->descriptorSet = t; + mapping->firstBinding = 0; + mapping->bindingCount = UINT32_MAX; + + if (local_root_signature) + { + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_SHADER_RECORD_INDEX_EXT; + mapping->sourceData.shaderRecordIndex.heapOffset = 0; + mapping->sourceData.shaderRecordIndex.shaderRecordOffset = local_root_size; + mapping->sourceData.shaderRecordIndex.heapIndexStride = 1; + mapping->sourceData.shaderRecordIndex.heapArrayStride = + root_signature->device->bindless_state.descriptor_heap_sampler_size; + } + else + { + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_PUSH_INDEX_EXT; + mapping->sourceData.pushIndex.heapOffset = 0; + mapping->sourceData.pushIndex.pushOffset = + root_signature->descriptor_table_offset + t * sizeof(uint32_t); + mapping->sourceData.pushIndex.heapIndexStride = + root_signature->device->bindless_state.descriptor_heap_sampler_size; + mapping->sourceData.pushIndex.heapArrayStride = mapping->sourceData.pushIndex.heapIndexStride; + } } - } - return S_OK; -} + for (j = 0; j < range_count; ++j) + { + const D3D12_DESCRIPTOR_RANGE1 *range = &p->DescriptorTable.pDescriptorRanges[j]; -struct vkd3d_descriptor_set_context -{ - uint32_t binding_index; - uint32_t vk_set; - uint32_t vk_binding; -}; - -static enum vkd3d_bindless_set_flag vkd3d_bindless_set_flag_from_descriptor_range_type(D3D12_DESCRIPTOR_RANGE_TYPE range_type) -{ - switch (range_type) - { - case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER: - return VKD3D_BINDLESS_SET_SAMPLER; - case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: - return VKD3D_BINDLESS_SET_CBV; - case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: - return VKD3D_BINDLESS_SET_SRV; - case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: - return VKD3D_BINDLESS_SET_UAV; - default: - ERR("Unhandled descriptor range type %u.\n", range_type); - return VKD3D_BINDLESS_SET_SRV; - } -} - -static void vkd3d_shader_resource_binding_init_global_heap(struct vkd3d_shader_resource_binding *binding, - D3D12_DESCRIPTOR_RANGE_TYPE range_type) -{ - binding->type = vkd3d_descriptor_type_from_d3d12_range_type(range_type); - binding->register_space = UINT32_MAX; - binding->register_index = UINT32_MAX; - binding->register_count = UINT32_MAX; - binding->shader_visibility = VKD3D_SHADER_VISIBILITY_ALL; - binding->descriptor_table = 0; /* Ignored. */ - binding->descriptor_offset = 0; /* Ignored. */ -} - -static void d3d12_root_signature_init_srv_uav_binding(struct d3d12_root_signature *root_signature, - struct vkd3d_descriptor_set_context *context, D3D12_DESCRIPTOR_RANGE_TYPE range_type, - struct vkd3d_shader_resource_binding *binding, - struct vkd3d_shader_resource_binding *out_bindings_base, uint32_t *out_index) -{ - struct vkd3d_bindless_state *bindless_state = &root_signature->device->bindless_state; - enum vkd3d_bindless_set_flag range_flag; - - range_flag = vkd3d_bindless_set_flag_from_descriptor_range_type(range_type); - binding->flags = VKD3D_SHADER_BINDING_FLAG_BINDLESS | VKD3D_SHADER_BINDING_FLAG_AUX_BUFFER; - - if (d3d12_device_use_embedded_mutable_descriptors(root_signature->device) && - range_type == D3D12_DESCRIPTOR_RANGE_TYPE_UAV) - { - /* If we're relying on embedded mutable descriptors we have to be a bit careful with aliasing raw VAs. - * With application bugs in play, it's somewhat easy to end up aliasing a true texel buffer - * descriptor and raw VA descriptor. Avoid this scenario by pretending the AUX_BUFFER texel buffers - * and normal texel buffers are one and the same. This is robust against many kinds of hypothetical app bugs: - * - App creates RWStructuredBuffer without counter: Counter will point to base address of the RWStructuredBuffer. - * - App creates texel buffer: Counter will point to the texel buffer itself. - * - NULL resource: Implicitly handled without shader magic. - * - App creates RWStructuredBuffer with counter, app reads as typed buffer: Typed buffer will read from counter. */ - if (vkd3d_bindless_state_find_binding(bindless_state, range_flag | VKD3D_BINDLESS_SET_BUFFER, &binding->binding)) - out_bindings_base[(*out_index)++] = *binding; - } - else - { - /* Use raw VA for both RTAS and UAV counters. */ - binding->flags |= VKD3D_SHADER_BINDING_FLAG_RAW_VA; - binding->binding = root_signature->raw_va_aux_buffer_binding; - out_bindings_base[(*out_index)++] = *binding; - } - - if (vkd3d_bindless_state_find_binding(bindless_state, range_flag | VKD3D_BINDLESS_SET_BUFFER, &binding->binding)) - { - binding->flags = VKD3D_SHADER_BINDING_FLAG_BINDLESS | VKD3D_SHADER_BINDING_FLAG_BUFFER; - out_bindings_base[(*out_index)++] = *binding; - } - - if (vkd3d_bindless_state_find_binding(bindless_state, range_flag | VKD3D_BINDLESS_SET_RAW_SSBO, &binding->binding)) - { - binding->flags = VKD3D_SHADER_BINDING_FLAG_BINDLESS | VKD3D_SHADER_BINDING_FLAG_BUFFER | VKD3D_SHADER_BINDING_FLAG_RAW_SSBO; - out_bindings_base[(*out_index)++] = *binding; - } - - if (vkd3d_bindless_state_find_binding(bindless_state, range_flag | VKD3D_BINDLESS_SET_IMAGE, &binding->binding)) - { - binding->flags = VKD3D_SHADER_BINDING_FLAG_BINDLESS | VKD3D_SHADER_BINDING_FLAG_IMAGE; - out_bindings_base[(*out_index)++] = *binding; - } -} - -static void d3d12_root_signature_init_srv_uav_heap_bindings(struct d3d12_root_signature *root_signature, - struct vkd3d_descriptor_set_context *context, D3D12_DESCRIPTOR_RANGE_TYPE range_type) -{ - struct vkd3d_shader_resource_binding binding; - vkd3d_shader_resource_binding_init_global_heap(&binding, range_type); - d3d12_root_signature_init_srv_uav_binding(root_signature, context, range_type, &binding, - root_signature->bindings, &context->binding_index); -} - -static void d3d12_root_signature_init_cbv_srv_uav_heap_bindings(struct d3d12_root_signature *root_signature, - struct vkd3d_descriptor_set_context *context) -{ - struct vkd3d_bindless_state *bindless_state = &root_signature->device->bindless_state; - struct vkd3d_shader_resource_binding binding; - - d3d12_root_signature_init_srv_uav_heap_bindings(root_signature, context, D3D12_DESCRIPTOR_RANGE_TYPE_SRV); - d3d12_root_signature_init_srv_uav_heap_bindings(root_signature, context, D3D12_DESCRIPTOR_RANGE_TYPE_UAV); - - vkd3d_shader_resource_binding_init_global_heap(&binding, D3D12_DESCRIPTOR_RANGE_TYPE_CBV); - if (vkd3d_bindless_state_find_binding(bindless_state, VKD3D_BINDLESS_SET_CBV, &binding.binding)) - { - binding.flags = VKD3D_SHADER_BINDING_FLAG_BINDLESS | VKD3D_SHADER_BINDING_FLAG_BUFFER; - root_signature->bindings[context->binding_index++] = binding; - } -} - -static void d3d12_root_signature_init_sampler_heap_bindings(struct d3d12_root_signature *root_signature, - struct vkd3d_descriptor_set_context *context) -{ - struct vkd3d_bindless_state *bindless_state = &root_signature->device->bindless_state; - struct vkd3d_shader_resource_binding binding; - - vkd3d_shader_resource_binding_init_global_heap(&binding, D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER); - if (vkd3d_bindless_state_find_binding(bindless_state, VKD3D_BINDLESS_SET_SAMPLER, &binding.binding)) - { - binding.flags = VKD3D_SHADER_BINDING_FLAG_BINDLESS | VKD3D_SHADER_BINDING_FLAG_IMAGE; - root_signature->bindings[context->binding_index++] = binding; - } -} - -static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_root_signature *root_signature, - const D3D12_ROOT_SIGNATURE_DESC2 *desc, const struct d3d12_root_signature_info *info, - struct vkd3d_descriptor_set_context *context) -{ - struct vkd3d_bindless_state *bindless_state = &root_signature->device->bindless_state; - struct vkd3d_shader_resource_binding binding; - struct vkd3d_shader_descriptor_table *table; - unsigned int i, j, t, range_count; - uint32_t range_descriptor_offset; - bool local_root_signature; - - local_root_signature = !!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE); - - if (d3d12_root_signature_may_require_global_heap_binding(root_signature->device) || - (desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED)) - d3d12_root_signature_init_cbv_srv_uav_heap_bindings(root_signature, context); - if (desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED) - d3d12_root_signature_init_sampler_heap_bindings(root_signature, context); - - for (i = 0, t = 0; i < desc->NumParameters; ++i) - { - const D3D12_ROOT_PARAMETER1 *p = &desc->pParameters[i]; - if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) - continue; - - if (!local_root_signature) - root_signature->descriptor_table_mask |= 1ull << i; - - table = &root_signature->parameters[i].descriptor_table; - range_count = p->DescriptorTable.NumDescriptorRanges; - range_descriptor_offset = 0; - - root_signature->parameters[i].parameter_type = p->ParameterType; - - if (local_root_signature) - table->table_index = i; - else - table->table_index = t++; - - table->binding_count = 0; - table->first_binding = &root_signature->bindings[context->binding_index]; - - for (j = 0; j < range_count; ++j) - { - const D3D12_DESCRIPTOR_RANGE1 *range = &p->DescriptorTable.pDescriptorRanges[j]; - enum vkd3d_bindless_set_flag range_flag = vkd3d_bindless_set_flag_from_descriptor_range_type(range->RangeType); - - if (range->OffsetInDescriptorsFromTableStart != D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) - range_descriptor_offset = range->OffsetInDescriptorsFromTableStart; + if (range->OffsetInDescriptorsFromTableStart != D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) + range_descriptor_offset = range->OffsetInDescriptorsFromTableStart; binding.type = vkd3d_descriptor_type_from_d3d12_range_type(range->RangeType); binding.register_space = range->RegisterSpace; binding.register_index = range->BaseShaderRegister; binding.register_count = range->NumDescriptors; - binding.descriptor_table = table->table_index; - binding.descriptor_offset = range_descriptor_offset; binding.shader_visibility = vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility); + binding.binding.set = t; + binding.binding.binding = range_descriptor_offset; + table->first_binding[table->binding_count++] = binding; - switch (range->RangeType) - { - case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER: - if (vkd3d_bindless_state_find_binding(bindless_state, range_flag, &binding.binding)) - { - binding.flags = VKD3D_SHADER_BINDING_FLAG_BINDLESS | VKD3D_SHADER_BINDING_FLAG_IMAGE; - table->first_binding[table->binding_count++] = binding; - } - break; - case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: - if (vkd3d_bindless_state_find_binding(bindless_state, range_flag, &binding.binding)) - { - binding.flags = VKD3D_SHADER_BINDING_FLAG_BINDLESS | VKD3D_SHADER_BINDING_FLAG_BUFFER; - table->first_binding[table->binding_count++] = binding; - } - break; - case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: - case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: - d3d12_root_signature_init_srv_uav_binding(root_signature, context, range->RangeType, - &binding, table->first_binding, &table->binding_count); - break; - default: - FIXME("Unhandled descriptor range type %u.\n", range->RangeType); - } - - range_descriptor_offset = binding.descriptor_offset + binding.register_count; + range_descriptor_offset += binding.register_count; } - context->binding_index += table->binding_count; - } - - return S_OK; -} - -static void d3d12_root_signature_add_common_flags(struct d3d12_root_signature *root_signature, - uint32_t common_flags) -{ - root_signature->graphics.flags |= common_flags; - root_signature->mesh.flags |= common_flags; - root_signature->compute.flags |= common_flags; - root_signature->raygen.flags |= common_flags; -} - -static void d3d12_root_signature_init_extra_bindings(struct d3d12_root_signature *root_signature, - const struct d3d12_root_signature_info *info) -{ - vkd3d_bindless_state_find_binding(&root_signature->device->bindless_state, - VKD3D_BINDLESS_SET_EXTRA_RAW_VA_AUX_BUFFER, - &root_signature->raw_va_aux_buffer_binding); - - if (info->has_ssbo_offset_buffer || info->has_typed_offset_buffer) - { - if (info->has_ssbo_offset_buffer) - d3d12_root_signature_add_common_flags(root_signature, VKD3D_ROOT_SIGNATURE_USE_SSBO_OFFSET_BUFFER); - if (info->has_typed_offset_buffer) - d3d12_root_signature_add_common_flags(root_signature, VKD3D_ROOT_SIGNATURE_USE_TYPED_OFFSET_BUFFER); - - vkd3d_bindless_state_find_binding(&root_signature->device->bindless_state, - VKD3D_BINDLESS_SET_EXTRA_OFFSET_BUFFER, - &root_signature->offset_buffer_binding); - } - -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA - if (vkd3d_descriptor_debug_active_descriptor_qa_checks()) - { - vkd3d_bindless_state_find_binding(&root_signature->device->bindless_state, - VKD3D_BINDLESS_SET_EXTRA_FEEDBACK_CONTROL_INFO_BUFFER, - &root_signature->descriptor_qa_control_binding); - vkd3d_bindless_state_find_binding(&root_signature->device->bindless_state, - VKD3D_BINDLESS_SET_EXTRA_FEEDBACK_PAYLOAD_INFO_BUFFER, - &root_signature->descriptor_qa_payload_binding); - } -#endif -} - -static HRESULT d3d12_root_signature_init_shader_record_descriptors( - struct d3d12_root_signature *root_signature, - const D3D12_ROOT_SIGNATURE_DESC2 *desc, const struct d3d12_root_signature_info *info, - struct vkd3d_descriptor_set_context *context) -{ - struct vkd3d_shader_resource_binding *binding; - struct vkd3d_shader_root_parameter *param; - unsigned int i; - - for (i = 0; i < desc->NumParameters; ++i) - { - const D3D12_ROOT_PARAMETER1 *p = &desc->pParameters[i]; - - if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_CBV - && p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_SRV - && p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_UAV) - continue; - - binding = &root_signature->bindings[context->binding_index]; - binding->type = vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType); - binding->register_space = p->Descriptor.RegisterSpace; - binding->register_index = p->Descriptor.ShaderRegister; - binding->register_count = 1; - binding->descriptor_table = 0; /* ignored */ - binding->descriptor_offset = 0; /* ignored */ - binding->shader_visibility = vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility); - binding->flags = VKD3D_SHADER_BINDING_FLAG_BUFFER; - binding->binding.binding = 0; /* ignored */ - binding->binding.set = 0; /* ignored */ - binding->flags |= VKD3D_SHADER_BINDING_FLAG_RAW_VA; - - param = &root_signature->parameters[i]; - param->parameter_type = p->ParameterType; - param->descriptor.binding = binding; + if (local_root_signature) + table->table_index = i; + else + table->table_index = t++; - context->binding_index++; + context->binding_index += table->binding_count; + local_root_size += sizeof(VkDeviceAddress); } return S_OK; @@ -1024,164 +797,80 @@ static HRESULT d3d12_root_signature_init_shader_record_descriptors( static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature, const D3D12_ROOT_SIGNATURE_DESC2 *desc, struct d3d12_root_signature_info *info, - const VkPushConstantRange *push_constant_range, struct vkd3d_descriptor_set_context *context, - VkDescriptorSetLayout *vk_set_layout) + struct vkd3d_descriptor_set_context *context) { - VkDescriptorSetLayoutBinding *vk_binding, *vk_binding_info = NULL; - struct vkd3d_descriptor_hoist_desc *hoist_desc; + bool local_root_signature = !!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE); struct vkd3d_shader_resource_binding *binding; struct vkd3d_shader_root_parameter *param; uint32_t raw_va_root_descriptor_count = 0; - unsigned int hoisted_parameter_index; - const D3D12_DESCRIPTOR_RANGE1 *range; - unsigned int i, j, k; + uint32_t local_root_size = 0; HRESULT hr = S_OK; - uint32_t or_flags; - - or_flags = root_signature->graphics.flags | - root_signature->compute.flags | - root_signature->raygen.flags | - root_signature->mesh.flags; - - if (info->push_descriptor_count || (or_flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK)) - { - if (!(vk_binding_info = vkd3d_malloc(sizeof(*vk_binding_info) * (info->push_descriptor_count + 1)))) - return E_OUTOFMEMORY; - } - else if (!(root_signature->device->bindless_state.flags & - (VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV | VKD3D_RAW_VA_ROOT_DESCRIPTOR_SRV_UAV))) - { - return S_OK; - } - - hoisted_parameter_index = desc->NumParameters; + unsigned int i; - for (i = 0, j = 0; i < desc->NumParameters; ++i) + for (i = 0; i < desc->NumParameters; ++i) { const D3D12_ROOT_PARAMETER1 *p = &desc->pParameters[i]; - bool raw_va; - - if (!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE) && - p->ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) - { - unsigned int range_descriptor_offset = 0; - for (k = 0; k < p->DescriptorTable.NumDescriptorRanges && info->hoist_descriptor_count; k++) - { - range = &p->DescriptorTable.pDescriptorRanges[k]; - if (range->OffsetInDescriptorsFromTableStart != D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) - range_descriptor_offset = range->OffsetInDescriptorsFromTableStart; - - if (d3d12_descriptor_range_can_hoist_cbv_descriptor(root_signature->device, range)) - { - vk_binding = &vk_binding_info[j++]; - vk_binding->binding = context->vk_binding; - - vk_binding->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - vk_binding->descriptorCount = 1; - vk_binding->stageFlags = vkd3d_vk_stage_flags_from_visibility(p->ShaderVisibility); - vk_binding->pImmutableSamplers = NULL; - - root_signature->root_descriptor_push_mask |= 1ull << hoisted_parameter_index; - hoist_desc = &root_signature->hoist_info.desc[root_signature->hoist_info.num_desc]; - hoist_desc->table_index = i; - hoist_desc->parameter_index = hoisted_parameter_index; - hoist_desc->table_offset = range_descriptor_offset; - root_signature->hoist_info.num_desc++; - - binding = &root_signature->bindings[context->binding_index]; - binding->type = vkd3d_descriptor_type_from_d3d12_range_type(range->RangeType); - binding->register_space = range->RegisterSpace; - binding->register_index = range->BaseShaderRegister; - binding->register_count = 1; - binding->descriptor_table = 0; /* ignored */ - binding->descriptor_offset = 0; /* ignored */ - binding->shader_visibility = vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility); - binding->flags = VKD3D_SHADER_BINDING_FLAG_BUFFER; - binding->binding.binding = context->vk_binding; - binding->binding.set = context->vk_set; - - param = &root_signature->parameters[hoisted_parameter_index]; - param->parameter_type = D3D12_ROOT_PARAMETER_TYPE_CBV; - param->descriptor.binding = binding; - - context->binding_index += 1; - context->vk_binding += 1; - hoisted_parameter_index += 1; - info->hoist_descriptor_count -= 1; - } - - range_descriptor_offset += range->NumDescriptors; - } - } + VkDescriptorSetAndBindingMappingEXT *mapping; if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_CBV && p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_SRV && p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_UAV) - continue; - - raw_va = d3d12_root_signature_parameter_is_raw_va(root_signature, p->ParameterType); - - if (!raw_va) { - vk_binding = &vk_binding_info[j++]; - vk_binding->binding = context->vk_binding; - vk_binding->descriptorType = vk_descriptor_type_from_d3d12_root_parameter(root_signature->device, p->ParameterType); - vk_binding->descriptorCount = 1; - vk_binding->stageFlags = vkd3d_vk_stage_flags_from_visibility(p->ShaderVisibility); - vk_binding->pImmutableSamplers = NULL; - root_signature->root_descriptor_push_mask |= 1ull << i; - - d3d12_root_signature_add_root_descriptor_mapping(root_signature, i, context->vk_set, context->vk_binding); + if (p->ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) + local_root_size = align(local_root_size + sizeof(VkDeviceAddress), sizeof(VkDeviceAddress)); + else + local_root_size += p->Constants.Num32BitValues * sizeof(uint32_t); + continue; } - else - root_signature->root_descriptor_raw_va_mask |= 1ull << i; + + local_root_size = align(local_root_size, sizeof(VkDeviceAddress)); + root_signature->root_descriptor_raw_va_mask |= 1ull << i; + root_signature->root_parameters_raw_va_count += 1; binding = &root_signature->bindings[context->binding_index]; binding->type = vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType); binding->register_space = p->Descriptor.RegisterSpace; binding->register_index = p->Descriptor.ShaderRegister; binding->register_count = 1; - binding->descriptor_table = 0; /* ignored */ - binding->descriptor_offset = 0; /* ignored */ binding->shader_visibility = vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility); - binding->flags = VKD3D_SHADER_BINDING_FLAG_BUFFER; - binding->binding.binding = context->vk_binding; - binding->binding.set = context->vk_set; - - if (raw_va) - binding->flags |= VKD3D_SHADER_BINDING_FLAG_RAW_VA; - else if (vk_binding->descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) - binding->flags |= VKD3D_SHADER_BINDING_FLAG_RAW_SSBO; + binding->binding.binding = raw_va_root_descriptor_count; param = &root_signature->parameters[i]; param->parameter_type = p->ParameterType; param->descriptor.binding = binding; param->descriptor.raw_va_root_descriptor_index = raw_va_root_descriptor_count; - context->binding_index += 1; + if (!vkd3d_array_reserve((void**)&root_signature->mappings, &root_signature->mappings_size, + root_signature->mappings_count + 1, sizeof(*root_signature->mappings))) + return E_OUTOFMEMORY; - if (raw_va) - raw_va_root_descriptor_count += 1; + mapping = &root_signature->mappings[root_signature->mappings_count++]; + memset(mapping, 0, sizeof(*mapping)); + mapping->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + mapping->resourceMask = VK_SPIRV_RESOURCE_TYPE_ALL_EXT; + if (local_root_signature) + { + binding->binding.set = VKD3D_SHADER_LOCAL_ROOT_DESCRIPTORS_VIRTUAL_DESCRIPTOR_SET; + mapping->descriptorSet = VKD3D_SHADER_LOCAL_ROOT_DESCRIPTORS_VIRTUAL_DESCRIPTOR_SET; + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_SHADER_RECORD_ADDRESS_EXT; + mapping->sourceData.shaderRecordAddressOffset = local_root_size; + } else - context->vk_binding += 1; - } - - if (or_flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK) - { - vk_binding = &vk_binding_info[j++]; - vk_binding->binding = context->vk_binding; - vk_binding->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - vk_binding->descriptorCount = 1; - vk_binding->stageFlags = VK_SHADER_STAGE_ALL; - vk_binding->pImmutableSamplers = NULL; - - root_signature->push_constant_ubo_binding.set = context->vk_set; - root_signature->push_constant_ubo_binding.binding = context->vk_binding; + { + binding->binding.set = VKD3D_SHADER_ROOT_DESCRIPTORS_VIRTUAL_DESCRIPTOR_SET; + mapping->descriptorSet = VKD3D_SHADER_ROOT_DESCRIPTORS_VIRTUAL_DESCRIPTOR_SET; + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_ADDRESS_EXT; + mapping->sourceData.pushAddressOffset = raw_va_root_descriptor_count * sizeof(VkDeviceAddress); + } + mapping->firstBinding = raw_va_root_descriptor_count; + mapping->bindingCount = 1; - context->vk_binding += 1; + context->binding_index += 1; + raw_va_root_descriptor_count += 1; + local_root_size += sizeof(VkDeviceAddress); } -#ifdef VKD3D_ENABLE_DESCRIPTOR_QA +#if defined(VKD3D_ENABLE_DESCRIPTOR_QA) && 0 if (vkd3d_descriptor_debug_active_instruction_qa_checks()) { vk_binding = &vk_binding_info[j++]; @@ -1208,229 +897,104 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign } #endif - /* This should never happen. Min requirement for push descriptors is 32 and we can always fit into that limit. */ - if (j > root_signature->device->device_info.push_descriptor_properties.maxPushDescriptors) - { - ERR("Number of descriptors %u exceeds push descriptor limit of %u.\n", - j, root_signature->device->device_info.push_descriptor_properties.maxPushDescriptors); - vkd3d_free(vk_binding_info); - return E_OUTOFMEMORY; - } - - if (j) - { - hr = vkd3d_create_descriptor_set_layout(root_signature->device, - VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - j, vk_binding_info, - VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, - vk_set_layout); - } - - vkd3d_free(vk_binding_info); return hr; } -static HRESULT d3d12_root_signature_init_local_static_samplers(struct d3d12_root_signature *root_signature, - const D3D12_ROOT_SIGNATURE_DESC2 *desc) -{ - unsigned int i; - HRESULT hr; - - if (!desc->NumStaticSamplers) - return S_OK; - - for (i = 0; i < desc->NumStaticSamplers; i++) - { - const D3D12_STATIC_SAMPLER_DESC1 *s = &desc->pStaticSamplers[i]; - if (FAILED(hr = vkd3d_sampler_state_create_static_sampler(&root_signature->device->sampler_state, - root_signature->device, s, &root_signature->static_samplers[i]))) - return hr; - } - - /* Cannot assign bindings until we've seen all local root signatures which go into an RTPSO. - * For now, just copy the static samplers. RTPSO creation will build appropriate bindings. */ - memcpy(root_signature->static_samplers_desc, desc->pStaticSamplers, - sizeof(*root_signature->static_samplers_desc) * desc->NumStaticSamplers); - - return S_OK; -} - static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signature *root_signature, - const D3D12_ROOT_SIGNATURE_DESC2 *desc, struct vkd3d_descriptor_set_context *context, - VkDescriptorSetLayout *vk_set_layout) + const D3D12_ROOT_SIGNATURE_DESC2 *desc, struct vkd3d_descriptor_set_context *context) { - VkDescriptorSetLayoutBinding *vk_binding_info, *vk_binding; struct vkd3d_shader_resource_binding *binding; + bool local_root_signature; unsigned int i; - HRESULT hr; if (!desc->NumStaticSamplers) return S_OK; - if (!(vk_binding_info = vkd3d_malloc(desc->NumStaticSamplers * sizeof(*vk_binding_info)))) + local_root_signature = !!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE); + + if (!vkd3d_array_reserve((void**)&root_signature->mappings, &root_signature->mappings_size, + root_signature->mappings_count + desc->NumStaticSamplers, + sizeof(*root_signature->mappings))) + return E_OUTOFMEMORY; + + root_signature->vk_static_samplers_desc = vkd3d_calloc( + desc->NumStaticSamplers, sizeof(*root_signature->vk_static_samplers_desc)); + if (!root_signature->vk_static_samplers_desc) return E_OUTOFMEMORY; for (i = 0; i < desc->NumStaticSamplers; ++i) { + VkSamplerReductionModeCreateInfoEXT *vk_reduction_info = &root_signature->vk_static_samplers_desc[i].reduction; + VkSamplerCreateInfo *vk_info = &root_signature->vk_static_samplers_desc[i].desc; const D3D12_STATIC_SAMPLER_DESC1 *s = &desc->pStaticSamplers[i]; + VkDescriptorSetAndBindingMappingEXT *mapping; - if (FAILED(hr = vkd3d_sampler_state_create_static_sampler(&root_signature->device->sampler_state, - root_signature->device, s, &root_signature->static_samplers[i]))) - goto cleanup; + vkd3d_sampler_state_init_static_sampler(&root_signature->device->sampler_state, + root_signature->device, s, vk_info, vk_reduction_info); - vk_binding = &vk_binding_info[i]; - vk_binding->binding = context->vk_binding; - vk_binding->descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - vk_binding->descriptorCount = 1; - vk_binding->stageFlags = vkd3d_vk_stage_flags_from_visibility(s->ShaderVisibility); - vk_binding->pImmutableSamplers = &root_signature->static_samplers[i]; + mapping = &root_signature->mappings[root_signature->mappings_count++]; + memset(mapping, 0, sizeof(*mapping)); + mapping->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_AND_BINDING_MAPPING_EXT; + if (local_root_signature) + mapping->descriptorSet = VKD3D_SHADER_STATIC_LOCAL_SAMPLERS_VIRTUAL_DESCRIPTOR_SET; + else + mapping->descriptorSet = VKD3D_SHADER_STATIC_SAMPLERS_VIRTUAL_DESCRIPTOR_SET; + mapping->bindingCount = 1; + mapping->firstBinding = i; + mapping->resourceMask = VK_SPIRV_RESOURCE_TYPE_SAMPLER_BIT_EXT; + mapping->source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_CONSTANT_OFFSET_EXT; + mapping->sourceData.constantOffset.pEmbeddedSampler = vk_info; binding = &root_signature->bindings[context->binding_index]; binding->type = VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER; binding->register_space = s->RegisterSpace; binding->register_index = s->ShaderRegister; binding->register_count = 1; - binding->descriptor_table = 0; /* ignored */ - binding->descriptor_offset = 0; /* ignored */ + //binding->descriptor_table = 0; /* ignored */ + //binding->descriptor_offset = 0; /* ignored */ binding->shader_visibility = vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility); - binding->flags = VKD3D_SHADER_BINDING_FLAG_IMAGE; - binding->binding.binding = context->vk_binding; - binding->binding.set = context->vk_set; + binding->binding.binding = i; + if (local_root_signature) + binding->binding.set = VKD3D_SHADER_STATIC_LOCAL_SAMPLERS_VIRTUAL_DESCRIPTOR_SET; + else + binding->binding.set = VKD3D_SHADER_STATIC_SAMPLERS_VIRTUAL_DESCRIPTOR_SET; context->binding_index += 1; - context->vk_binding += 1; - } - - if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, 0, - desc->NumStaticSamplers, vk_binding_info, - VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT | - VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT, - &root_signature->vk_sampler_descriptor_layout))) - goto cleanup; - - /* With descriptor buffers we can implicitly bind immutable samplers, and no descriptors are necessary. */ - if (!d3d12_device_uses_descriptor_buffers(root_signature->device)) - { - hr = vkd3d_sampler_state_allocate_descriptor_set(&root_signature->device->sampler_state, - root_signature->device, root_signature->vk_sampler_descriptor_layout, - &root_signature->vk_sampler_set, &root_signature->vk_sampler_pool); } - else - hr = S_OK; -cleanup: - vkd3d_free(vk_binding_info); - return hr; + return S_OK; } -static HRESULT d3d12_root_signature_init_local(struct d3d12_root_signature *root_signature, +static HRESULT d3d12_root_signature_init_mappings(struct d3d12_root_signature *root_signature, struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC2 *desc) { - /* Local root signatures map to the ShaderRecordBufferKHR. */ struct vkd3d_descriptor_set_context context; struct d3d12_root_signature_info info; + bool local_root_signature; HRESULT hr; memset(&context, 0, sizeof(context)); + if (desc->Flags & ~(D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT + | D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT + | D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS + | D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS + | D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS + | D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS + | D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS + | D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS + | D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS + | D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED + | D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED + | D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE)) + FIXME("Ignoring root signature flags %#x.\n", desc->Flags); + if (FAILED(hr = d3d12_root_signature_info_from_desc(&info, device, desc))) return hr; -#define D3D12_MAX_SHADER_RECORD_SIZE 4096 - if (info.cost * 4 + D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES > D3D12_MAX_SHADER_RECORD_SIZE) - { - ERR("Local root signature is too large.\n"); - hr = E_INVALIDARG; - goto fail; - } - - root_signature->binding_count = info.binding_count; - root_signature->parameter_count = info.parameter_count; - root_signature->static_sampler_count = desc->NumStaticSamplers; - - hr = E_OUTOFMEMORY; - if (!(root_signature->parameters = vkd3d_calloc(root_signature->parameter_count, - sizeof(*root_signature->parameters)))) - return hr; - if (!(root_signature->bindings = vkd3d_calloc(root_signature->binding_count, - sizeof(*root_signature->bindings)))) - return hr; - root_signature->root_constant_count = info.root_constant_count; - if (!(root_signature->root_constants = vkd3d_calloc(root_signature->root_constant_count, - sizeof(*root_signature->root_constants)))) - return hr; - if (!(root_signature->static_samplers = vkd3d_calloc(root_signature->static_sampler_count, - sizeof(*root_signature->static_samplers)))) - return hr; - if (!(root_signature->static_samplers_desc = vkd3d_calloc(root_signature->static_sampler_count, - sizeof(*root_signature->static_samplers_desc)))) - return hr; - - if (FAILED(hr = d3d12_root_signature_init_local_static_samplers(root_signature, desc))) - return hr; - - d3d12_root_signature_init_extra_bindings(root_signature, &info); - - if (FAILED(hr = d3d12_root_signature_init_shader_record_constants(root_signature, desc, &info))) - return hr; - if (FAILED(hr = d3d12_root_signature_init_shader_record_descriptors(root_signature, desc, &info, &context))) - return hr; - if (FAILED(hr = d3d12_root_signature_init_root_descriptor_tables(root_signature, desc, &info, &context))) - return hr; - - if (FAILED(hr = vkd3d_private_store_init(&root_signature->private_store))) - goto fail; - - return S_OK; - -fail: - return hr; -} - -static void d3d12_root_signature_update_bind_point_layout(struct d3d12_bind_point_layout *layout, - const VkPushConstantRange *push_range, const struct vkd3d_descriptor_set_context *context, - const struct d3d12_root_signature_info *info) -{ - /* Select push UBO style or push constants on a per-pipeline type basis. */ - if ((layout->flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK) || info->push_descriptor_count) - layout->num_set_layouts = context->vk_set; - - if (!(layout->flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK)) - layout->push_constant_range = *push_range; -} - -static HRESULT d3d12_root_signature_init_global(struct d3d12_root_signature *root_signature, - struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC2 *desc) -{ - const VkPhysicalDeviceProperties *vk_device_properties = &device->device_info.properties2.properties; - const struct vkd3d_bindless_state *bindless_state = &device->bindless_state; - struct vkd3d_descriptor_set_context context; - VkShaderStageFlagBits mesh_shader_stages; - VkPushConstantRange push_constant_range; - struct d3d12_root_signature_info info; - unsigned int i; - HRESULT hr; - - memset(&context, 0, sizeof(context)); - - if (desc->Flags & ~(D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT - | D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT - | D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS - | D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS - | D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS - | D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS - | D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS - | D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS - | D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS - | D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED - | D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED - | D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE)) - FIXME("Ignoring root signature flags %#x.\n", desc->Flags); - - if (FAILED(hr = d3d12_root_signature_info_from_desc(&info, device, desc))) - return hr; + local_root_signature = !!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE); - if (info.cost > D3D12_MAX_ROOT_COST) + if (!local_root_signature && info.cost > D3D12_MAX_ROOT_COST) { WARN("Root signature cost %u exceeds maximum allowed cost.\n", info.cost); return E_INVALIDARG; @@ -1444,231 +1008,31 @@ static HRESULT d3d12_root_signature_init_global(struct d3d12_root_signature *roo if (!(root_signature->parameters = vkd3d_calloc(root_signature->parameter_count, sizeof(*root_signature->parameters)))) return hr; + if (!(root_signature->bindings = vkd3d_calloc(root_signature->binding_count, sizeof(*root_signature->bindings)))) return hr; + root_signature->root_constant_count = info.root_constant_count; if (!(root_signature->root_constants = vkd3d_calloc(root_signature->root_constant_count, sizeof(*root_signature->root_constants)))) return hr; - if (!(root_signature->static_samplers = vkd3d_calloc(root_signature->static_sampler_count, - sizeof(*root_signature->static_samplers)))) - return hr; - - if (!(desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE) && - (vkd3d_config_flags & VKD3D_CONFIG_FLAG_EXTENDED_DEBUG_UTILS)) - { - if (!(root_signature->root_parameter_mappings = vkd3d_calloc(root_signature->parameter_count, - sizeof(*root_signature->root_parameter_mappings)))) - return hr; - } - for (i = 0; i < bindless_state->set_count; i++) - root_signature->set_layouts[context.vk_set++] = bindless_state->set_info[i].vk_set_layout; - - if (FAILED(hr = d3d12_root_signature_init_static_samplers(root_signature, desc, - &context, &root_signature->vk_sampler_descriptor_layout))) + if (FAILED(hr = d3d12_root_signature_init_static_samplers(root_signature, desc, &context))) return hr; - - if (root_signature->vk_sampler_descriptor_layout) - { - assert(context.vk_set < VKD3D_MAX_DESCRIPTOR_SETS); - root_signature->set_layouts[context.vk_set] = root_signature->vk_sampler_descriptor_layout; - root_signature->sampler_descriptor_set = context.vk_set; - - context.vk_binding = 0; - context.vk_set += 1; - } - - if (FAILED(hr = d3d12_root_signature_init_push_constants(root_signature, desc, &info, - &push_constant_range))) + if (FAILED(hr = d3d12_root_signature_init_push_constants(root_signature, desc, &info, &context))) return hr; - - /* If we cannot contain the push constants, fall back to push UBO everywhere. */ - if (push_constant_range.size > vk_device_properties->limits.maxPushConstantsSize) - d3d12_root_signature_add_common_flags(root_signature, VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK); - else if (push_constant_range.size && (device->bindless_state.flags & VKD3D_FORCE_COMPUTE_ROOT_PARAMETERS_PUSH_UBO)) - root_signature->compute.flags |= VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK; - - d3d12_root_signature_init_extra_bindings(root_signature, &info); - - /* Individual pipeline types may opt-in or out-of using the push UBO descriptor set. */ - root_signature->graphics.num_set_layouts = context.vk_set; - root_signature->mesh.num_set_layouts = context.vk_set; - root_signature->compute.num_set_layouts = context.vk_set; - root_signature->raygen.num_set_layouts = context.vk_set; - - if (FAILED(hr = d3d12_root_signature_init_root_descriptors(root_signature, desc, - &info, &push_constant_range, &context, - &root_signature->vk_root_descriptor_layout))) + if (FAILED(hr = d3d12_root_signature_init_root_descriptors(root_signature, desc, &info, &context))) return hr; - - if (root_signature->vk_root_descriptor_layout) - { - assert(context.vk_set < VKD3D_MAX_DESCRIPTOR_SETS); - root_signature->set_layouts[context.vk_set] = root_signature->vk_root_descriptor_layout; - root_signature->root_descriptor_set = context.vk_set; - - context.vk_binding = 0; - context.vk_set += 1; - } - if (FAILED(hr = d3d12_root_signature_init_root_descriptor_tables(root_signature, desc, &info, &context))) return hr; - /* Select push UBO style or push constants on a per-pipeline type basis. */ - d3d12_root_signature_update_bind_point_layout(&root_signature->graphics, - &push_constant_range, &context, &info); - d3d12_root_signature_update_bind_point_layout(&root_signature->mesh, - &push_constant_range, &context, &info); - d3d12_root_signature_update_bind_point_layout(&root_signature->compute, - &push_constant_range, &context, &info); - d3d12_root_signature_update_bind_point_layout(&root_signature->raygen, - &push_constant_range, &context, &info); - - /* If we need to use restricted entry_points in vkCmdPushConstants, - * we are unfortunately required to do it like this - * since stageFlags in vkCmdPushConstants must cover at least all entry_points in the layout. - * - * We can pick the appropriate layout to use in PSO creation. - * In set_root_signature we can bind the appropriate layout as well. - * - * For graphics we can generally rely on visibility mask, but not so for compute and raygen, - * since they use ALL visibility. */ - - if (FAILED(hr = vkd3d_create_pipeline_layout_for_stage_mask( - device, root_signature->graphics.num_set_layouts, root_signature->set_layouts, - &root_signature->graphics.push_constant_range, - VK_SHADER_STAGE_ALL_GRAPHICS, &root_signature->graphics))) - return hr; - - if (device->device_info.mesh_shader_features.meshShader && device->device_info.mesh_shader_features.taskShader) - { - mesh_shader_stages = VK_SHADER_STAGE_MESH_BIT_EXT | - VK_SHADER_STAGE_TASK_BIT_EXT | - VK_SHADER_STAGE_FRAGMENT_BIT; - - if (FAILED(hr = vkd3d_create_pipeline_layout_for_stage_mask( - device, root_signature->mesh.num_set_layouts, root_signature->set_layouts, - &root_signature->mesh.push_constant_range, - mesh_shader_stages, &root_signature->mesh))) - return hr; - } - - if (FAILED(hr = vkd3d_create_pipeline_layout_for_stage_mask( - device, root_signature->compute.num_set_layouts, root_signature->set_layouts, - &root_signature->compute.push_constant_range, - VK_SHADER_STAGE_COMPUTE_BIT, &root_signature->compute))) - return hr; - - if (d3d12_device_supports_ray_tracing_tier_1_0(device)) - { - if (FAILED(hr = vkd3d_create_pipeline_layout_for_stage_mask( - device, root_signature->raygen.num_set_layouts, root_signature->set_layouts, - &root_signature->raygen.push_constant_range, - VK_SHADER_STAGE_RAYGEN_BIT_KHR | - VK_SHADER_STAGE_MISS_BIT_KHR | - VK_SHADER_STAGE_INTERSECTION_BIT_KHR | - VK_SHADER_STAGE_CALLABLE_BIT_KHR | - VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | - VK_SHADER_STAGE_ANY_HIT_BIT_KHR, &root_signature->raygen))) - return hr; - } - - return S_OK; -} - -HRESULT d3d12_root_signature_create_local_static_samplers_layout(struct d3d12_root_signature *root_signature, - VkDescriptorSetLayout vk_set_layout, VkPipelineLayout *vk_pipeline_layout) -{ - /* For RTPSOs we might have to bind a secondary static sampler set. To stay compatible with the base global RS, - * just add the descriptor set layout after the other ones. - * With this scheme, it's valid to bind resources with global RS layout, - * and then add a final vkCmdBindDescriptorSets with vk_pipeline_layout which is tied to the RTPSO. */ - VkDescriptorSetLayout set_layouts[VKD3D_MAX_DESCRIPTOR_SETS]; - struct d3d12_bind_point_layout bind_point_layout; - HRESULT hr; - - if (!d3d12_device_supports_ray_tracing_tier_1_0(root_signature->device)) - return E_INVALIDARG; - - memcpy(set_layouts, root_signature->set_layouts, root_signature->raygen.num_set_layouts * sizeof(VkDescriptorSetLayout)); - set_layouts[root_signature->raygen.num_set_layouts] = vk_set_layout; - - if (FAILED(hr = vkd3d_create_pipeline_layout_for_stage_mask( - root_signature->device, root_signature->raygen.num_set_layouts + 1, set_layouts, - &root_signature->raygen.push_constant_range, - VK_SHADER_STAGE_RAYGEN_BIT_KHR | - VK_SHADER_STAGE_MISS_BIT_KHR | - VK_SHADER_STAGE_INTERSECTION_BIT_KHR | - VK_SHADER_STAGE_CALLABLE_BIT_KHR | - VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | - VK_SHADER_STAGE_ANY_HIT_BIT_KHR, &bind_point_layout))) + if (!local_root_signature && FAILED(hr = d3d12_root_signature_init_global_heaps(root_signature, &info))) return hr; - *vk_pipeline_layout = bind_point_layout.vk_pipeline_layout; - return S_OK; -} - -HRESULT d3d12_root_signature_create_work_graph_layout(struct d3d12_root_signature *root_signature, - VkDescriptorSetLayout *vk_push_set_layout, VkPipelineLayout *vk_pipeline_layout) -{ - VkDescriptorSetLayout set_layouts[VKD3D_MAX_DESCRIPTOR_SETS]; - struct d3d12_bind_point_layout bind_point_layout; - VkDescriptorSetLayoutBinding binding; - VkPushConstantRange range; - bool uses_push_ubo; - HRESULT hr; - - /* If we're already using push UBO block, we just need to modify the push range. */ - /* TODO: Local sampler set. */ - uses_push_ubo = !!(root_signature->compute.flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK); - range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - range.offset = 0; - range.size = sizeof(struct vkd3d_shader_node_input_push_signature); - - if (root_signature->root_descriptor_push_mask) - { - FIXME("The root signature is already using push descriptors, cannot add another push descriptor set. Make sure to use VKD3D_CONFIG=force_raw_va_cbv on NVIDIA.\n"); - return E_INVALIDARG; - } - - if (uses_push_ubo || root_signature->compute.push_constant_range.size == 0) - { - if (FAILED(hr = vkd3d_create_pipeline_layout_for_stage_mask( - root_signature->device, root_signature->compute.num_set_layouts, root_signature->set_layouts, - &range, VK_SHADER_STAGE_COMPUTE_BIT, &bind_point_layout))) - return hr; - - *vk_push_set_layout = VK_NULL_HANDLE; - *vk_pipeline_layout = bind_point_layout.vk_pipeline_layout; - } - else - { - binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - binding.descriptorCount = 1; - binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - binding.binding = 0; - binding.pImmutableSamplers = NULL; - - memcpy(set_layouts, root_signature->set_layouts, - root_signature->compute.num_set_layouts * sizeof(VkDescriptorSetLayout)); - - if (FAILED(hr = vkd3d_create_descriptor_set_layout( - root_signature->device, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - 1, &binding, - VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, vk_push_set_layout))) - return hr; - - set_layouts[root_signature->compute.num_set_layouts] = *vk_push_set_layout; - - if (FAILED(hr = vkd3d_create_pipeline_layout_for_stage_mask( - root_signature->device, root_signature->compute.num_set_layouts + 1, set_layouts, - &range, VK_SHADER_STAGE_COMPUTE_BIT, &bind_point_layout))) - return hr; - - *vk_pipeline_layout = bind_point_layout.vk_pipeline_layout; - } + root_signature->mapping_info.sType = VK_STRUCTURE_TYPE_SHADER_DESCRIPTOR_SET_AND_BINDING_MAPPING_INFO_EXT; + root_signature->mapping_info.mappingCount = root_signature->mappings_count; + root_signature->mapping_info.pMappings = root_signature->mappings; return S_OK; } @@ -1687,10 +1051,7 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa /* needed by some methods, increment ref count later */ root_signature->device = device; - if (desc->Flags & D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE) - hr = d3d12_root_signature_init_local(root_signature, device, desc); - else - hr = d3d12_root_signature_init_global(root_signature, device, desc); + hr = d3d12_root_signature_init_mappings(root_signature, device, desc); if (FAILED(hr)) goto fail; @@ -1823,26 +1184,18 @@ HRESULT d3d12_root_signature_create_raw(struct d3d12_device *device, return d3d12_root_signature_create_from_blob(device, payload, payload_length, true, root_signature); } -unsigned int d3d12_root_signature_get_shader_interface_flags(const struct d3d12_root_signature *root_signature, - enum vkd3d_pipeline_type pipeline_type) +unsigned int d3d12_root_signature_get_shader_interface_flags(const struct d3d12_root_signature *root_signature) { - const struct d3d12_bind_point_layout *layout; unsigned int flags = 0; - - layout = d3d12_root_signature_get_layout(root_signature, pipeline_type); - - if (layout->flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK) - flags |= VKD3D_SHADER_INTERFACE_PUSH_CONSTANTS_AS_UNIFORM_BUFFER; - - if (layout->flags & VKD3D_ROOT_SIGNATURE_USE_SSBO_OFFSET_BUFFER) - flags |= VKD3D_SHADER_INTERFACE_SSBO_OFFSET_BUFFER; - if (layout->flags & VKD3D_ROOT_SIGNATURE_USE_TYPED_OFFSET_BUFFER) - flags |= VKD3D_SHADER_INTERFACE_TYPED_OFFSET_BUFFER; - - if (root_signature->device->bindless_state.flags & VKD3D_BINDLESS_CBV_AS_SSBO) - flags |= VKD3D_SHADER_INTERFACE_BINDLESS_CBV_AS_STORAGE_BUFFER; - if (d3d12_device_use_embedded_mutable_descriptors(root_signature->device)) + if (VKD3D_FORCE_RAW_UAV_COUNTER || + root_signature->device->bindless_state.descriptor_heap_packed_raw_buffer_offset < + root_signature->device->device_info.descriptor_heap_properties.bufferDescriptorSize) + { flags |= VKD3D_SHADER_INTERFACE_RAW_VA_ALIAS_DESCRIPTOR_BUFFER; + } + + if (root_signature->redzone_style == VKD3D_ROOT_SIGNATURE_HEAP_REDZONE_STYLE_INLINE) + flags |= VKD3D_SHADER_INTERFACE_INLINE_REDZONE_CBV; return flags; } @@ -2627,26 +1980,27 @@ static void d3d12_pipeline_state_init_shader_interface(struct d3d12_pipeline_sta { const struct d3d12_root_signature *root_signature = state->root_signature; memset(shader_interface, 0, sizeof(*shader_interface)); - shader_interface->flags = d3d12_root_signature_get_shader_interface_flags(root_signature, state->pipeline_type); - shader_interface->min_ssbo_alignment = d3d12_device_get_ssbo_alignment(device); - shader_interface->descriptor_tables.offset = root_signature->descriptor_table_offset; - shader_interface->descriptor_tables.count = root_signature->descriptor_table_count; + shader_interface->flags = d3d12_root_signature_get_shader_interface_flags(root_signature); + shader_interface->min_ssbo_alignment = device->bindless_state.min_ssbo_alignment; shader_interface->bindings = root_signature->bindings; shader_interface->binding_count = root_signature->binding_count; shader_interface->push_constant_buffers = root_signature->root_constants; shader_interface->push_constant_buffer_count = root_signature->root_constant_count; + shader_interface->num_root_descriptors = root_signature->root_parameters_raw_va_count; + shader_interface->num_root_constants = root_signature->root_parameters_constant_dwords; shader_interface->root_parameter_mappings = root_signature->root_parameter_mappings; shader_interface->root_parameter_mapping_count = root_signature->root_parameter_mappings_count; shader_interface->root_signature_blob = root_signature->root_signature_blob; shader_interface->root_signature_blob_size = root_signature->root_signature_blob_size; - shader_interface->push_constant_ubo_binding = &root_signature->push_constant_ubo_binding; shader_interface->offset_buffer_binding = &root_signature->offset_buffer_binding; + shader_interface->descriptor_table_offset_words = root_signature->descriptor_table_offset / sizeof(uint32_t); shader_interface->stage = stage; shader_interface->xfb_info = state->pipeline_type == VKD3D_PIPELINE_TYPE_GRAPHICS && stage == state->graphics.cached_desc.xfb_stage ? state->graphics.cached_desc.xfb_info : NULL; shader_interface->descriptor_size_cbv_srv_uav = d3d12_device_get_descriptor_handle_increment_size( device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + shader_interface->descriptor_raw_va_offset = root_signature->device->bindless_state.uav_counter_embedded_offset; shader_interface->descriptor_size_sampler = d3d12_device_get_descriptor_handle_increment_size( device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); @@ -2717,13 +2071,13 @@ static void d3d12_pipeline_state_init_compile_arguments(struct d3d12_pipeline_st static HRESULT vkd3d_setup_shader_stage(struct d3d12_pipeline_state *state, struct d3d12_device *device, VkPipelineShaderStageCreateInfo *stage_desc, VkShaderStageFlagBits stage, VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *required_subgroup_size_info, - const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *identifier_create_info, + VkPipelineShaderStageModuleIdentifierCreateInfoEXT *identifier_create_info, const struct vkd3d_shader_code *spirv_code) { bool override_subgroup_size = false; stage_desc->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_desc->pNext = NULL; + stage_desc->pNext = &state->root_signature->mapping_info; stage_desc->flags = 0; stage_desc->stage = stage; stage_desc->pName = "main"; @@ -2734,7 +2088,10 @@ static HRESULT vkd3d_setup_shader_stage(struct d3d12_pipeline_state *state, stru return E_INVALIDARG; if (!spirv_code->size && identifier_create_info && identifier_create_info->identifierSize) + { + identifier_create_info->pNext = stage_desc->pNext; stage_desc->pNext = identifier_create_info; + } if ((spirv_code->meta.flags & VKD3D_SHADER_META_FLAG_USES_SUBGROUP_OPERATIONS) || spirv_code->meta.cs_wave_size_min) @@ -2808,6 +2165,8 @@ static HRESULT vkd3d_setup_shader_stage(struct d3d12_pipeline_state *state, stru } } + assert(state->root_signature->mapping_info.pNext == NULL); + if (spirv_code->size) return d3d12_pipeline_state_create_shader_module(device, &stage_desc->module, spirv_code); else @@ -3040,6 +2399,7 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, struct vkd3d_queue_timeline_trace_cookie cookie; VkPipelineCreationFeedbackEXT feedbacks[1]; VkComputePipelineCreateInfo pipeline_info; + VkPipelineCreateFlags2CreateInfo flags2; struct vkd3d_shader_spec_info spec_info; VkPipelineCreationFeedbackEXT feedback; struct vkd3d_shader_code *spirv_code; @@ -3050,14 +2410,18 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, vk_cache = state->vk_pso_cache; spirv_code = &state->compute.code; + memset(&flags2, 0, sizeof(flags2)); + flags2.sType = VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO; + flags2.flags |= VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT; + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_DEBUG_UTILS) spirv_code_debug = &state->compute.code_debug; else spirv_code_debug = NULL; + memset(&pipeline_info, 0, sizeof(pipeline_info)); pipeline_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - pipeline_info.pNext = NULL; - pipeline_info.flags = 0; + pipeline_info.pNext = &flags2; if (state->compute.identifier_create_info.identifierSize == 0) { @@ -3082,7 +2446,6 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, &state->compute.identifier)); } - pipeline_info.layout = state->root_signature->compute.vk_pipeline_layout; pipeline_info.basePipelineHandle = VK_NULL_HANDLE; pipeline_info.basePipelineIndex = -1; @@ -3107,13 +2470,13 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, feedback_info.pipelineStageCreationFeedbackCount = 0; if (pipeline_info.stage.module == VK_NULL_HANDLE) - pipeline_info.flags |= VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT; + flags2.flags |= VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT; - if (d3d12_device_uses_descriptor_buffers(device)) - pipeline_info.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + //if (d3d12_device_uses_descriptor_buffers(device)) + // pipeline_info.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; if (state->compute.code.meta.flags & VKD3D_SHADER_META_FLAG_DISABLE_OPTIMIZATIONS) - pipeline_info.flags |= VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT; + flags2.flags |= VK_PIPELINE_CREATE_2_DISABLE_OPTIMIZATION_BIT; cookie = vkd3d_queue_timeline_trace_register_pso_compile(&device->queue_timeline_trace); @@ -3156,7 +2519,7 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, /* Fallback. */ if (vr == VK_PIPELINE_COMPILE_REQUIRED) { - pipeline_info.flags &= ~VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT; + flags2.flags &= ~VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT; if (FAILED(hr = vkd3d_compile_shader_stage(state, device, VK_SHADER_STAGE_COMPUTE_BIT, code, spirv_code, spirv_code_debug))) @@ -4042,6 +3405,7 @@ VkPipeline vkd3d_vertex_input_pipeline_create(struct d3d12_device *device, const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkGraphicsPipelineLibraryCreateInfoEXT library_create_info; struct vkd3d_vertex_input_pipeline_desc desc_copy = *desc; + VkPipelineCreateFlags2CreateInfo flags2_info; VkGraphicsPipelineCreateInfo create_info; VkPipeline vk_pipeline; VkResult vr; @@ -4061,8 +3425,14 @@ VkPipeline vkd3d_vertex_input_pipeline_create(struct d3d12_device *device, create_info.pDynamicState = &desc_copy.dy_info; create_info.basePipelineIndex = -1; - if (d3d12_device_uses_descriptor_buffers(device)) - create_info.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + //if (d3d12_device_uses_descriptor_buffers(device)) + // create_info.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + memset(&flags2_info, 0, sizeof(flags2_info)); + flags2_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO; + flags2_info.flags = VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT; + flags2_info.flags |= create_info.flags; + create_info.flags = 0; + vk_prepend_struct(&create_info, &flags2_info); if ((vr = VK_CALL(vkCreateGraphicsPipelines(device->vk_device, VK_NULL_HANDLE, 1, &create_info, NULL, &vk_pipeline)))) @@ -4192,6 +3562,7 @@ VkPipeline vkd3d_fragment_output_pipeline_create(struct d3d12_device *device, const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct vkd3d_fragment_output_pipeline_desc desc_copy = *desc; VkGraphicsPipelineLibraryCreateInfoEXT library_create_info; + VkPipelineCreateFlags2CreateInfo flags2_info; VkGraphicsPipelineCreateInfo create_info; VkPipeline vk_pipeline; VkResult vr; @@ -4212,8 +3583,14 @@ VkPipeline vkd3d_fragment_output_pipeline_create(struct d3d12_device *device, create_info.pDynamicState = &desc_copy.dy_info; create_info.basePipelineIndex = -1; - if (d3d12_device_uses_descriptor_buffers(device)) - create_info.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + //if (d3d12_device_uses_descriptor_buffers(device)) + // create_info.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + memset(&flags2_info, 0, sizeof(flags2_info)); + flags2_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO; + flags2_info.flags = VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT; + flags2_info.flags |= create_info.flags; + create_info.flags = 0; + vk_prepend_struct(&create_info, &flags2_info); if ((vr = VK_CALL(vkCreateGraphicsPipelines(device->vk_device, VK_NULL_HANDLE, 1, &create_info, NULL, &vk_pipeline)))) @@ -5616,7 +4993,6 @@ static HRESULT d3d12_pipeline_state_init_static_pipeline(struct d3d12_pipeline_s can_compile_pipeline_early = true; library_flags &= ~VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT; - graphics->pipeline_layout = state->root_signature->mesh.vk_pipeline_layout; } else { @@ -5639,8 +5015,6 @@ static HRESULT d3d12_pipeline_state_init_static_pipeline(struct d3d12_pipeline_s * create a pipeline library if dynamic patch control points are unsupported. */ if (has_tess && !state->device->device_info.extended_dynamic_state2_features.extendedDynamicState2PatchControlPoints) create_library = false; - - graphics->pipeline_layout = state->root_signature->graphics.vk_pipeline_layout; } graphics->pipeline = VK_NULL_HANDLE; @@ -6193,6 +5567,7 @@ static VkResult d3d12_pipeline_state_link_pipeline_variant(struct d3d12_pipeline struct vkd3d_fragment_output_pipeline_desc fragment_output_desc; struct vkd3d_vertex_input_pipeline_desc vertex_input_desc; struct vkd3d_queue_timeline_trace_cookie cookie; + VkPipelineCreateFlags2CreateInfo flags2_info; VkPipelineLibraryCreateInfoKHR library_info; VkGraphicsPipelineCreateInfo create_info; VkPipeline vk_libraries[3]; @@ -6224,11 +5599,10 @@ static VkResult d3d12_pipeline_state_link_pipeline_variant(struct d3d12_pipeline create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; create_info.pNext = &library_info; create_info.flags = graphics->library_create_flags; - create_info.layout = graphics->pipeline_layout; create_info.basePipelineIndex = -1; - if (d3d12_device_uses_descriptor_buffers(state->device)) - create_info.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + //if (d3d12_device_uses_descriptor_buffers(state->device)) + // create_info.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; if (graphics->disable_optimization) create_info.flags |= VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT; @@ -6241,6 +5615,13 @@ static VkResult d3d12_pipeline_state_link_pipeline_variant(struct d3d12_pipeline cookie = vkd3d_queue_timeline_trace_register_pso_compile(&state->device->queue_timeline_trace); + memset(&flags2_info, 0, sizeof(flags2_info)); + flags2_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO; + flags2_info.flags = VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT; + flags2_info.flags |= create_info.flags; + create_info.flags = 0; + vk_prepend_struct(&create_info, &flags2_info); + vr = VK_CALL(vkCreateGraphicsPipelines(state->device->vk_device, vk_cache, 1, &create_info, NULL, vk_pipeline)); @@ -6278,6 +5659,7 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st VkPipelineMultisampleStateCreateInfo multisample_info; VkPipelineDynamicStateCreateInfo dynamic_create_info; struct vkd3d_queue_timeline_trace_cookie cookie; + VkPipelineCreateFlags2CreateInfo flags2_info; struct d3d12_device *device = state->device; VkGraphicsPipelineCreateInfo pipeline_desc; VkPipelineViewportStateCreateInfo vp_desc; @@ -6337,7 +5719,6 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st pipeline_desc.pRasterizationState = &graphics->rs_desc; pipeline_desc.pDepthStencilState = &graphics->ds_desc; pipeline_desc.pDynamicState = &dynamic_create_info; - pipeline_desc.layout = graphics->pipeline_layout; pipeline_desc.basePipelineIndex = -1; if (d3d12_device_supports_variable_shading_rate_tier_2(device)) @@ -6369,16 +5750,15 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st { TRACE("Compiling pipeline library for %p with flags %#x.\n", state, library_flags); + memset(&library_create_info, 0, sizeof(library_create_info)); library_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT; - /* Explicit cast to silence a constness warning, this seems to be a Vulkan header bug */ - library_create_info.pNext = (void*)pipeline_desc.pNext; library_create_info.flags = library_flags; - pipeline_desc.pNext = &library_create_info; pipeline_desc.flags |= VK_PIPELINE_CREATE_LIBRARY_BIT_KHR | VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT; graphics->library_flags = library_flags; + vk_prepend_struct(&pipeline_desc, &library_create_info); } /* A workaround for SottR, which creates pipelines with DSV_UNKNOWN, but still insists on using a depth buffer. @@ -6425,22 +5805,27 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st if (pipeline_desc.pStages[i].module == VK_NULL_HANDLE) pipeline_desc.flags |= VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT; - if (d3d12_device_uses_descriptor_buffers(device)) - pipeline_desc.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; - + //if (d3d12_device_uses_descriptor_buffers(device)) + // pipeline_desc.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; if (graphics->disable_optimization) pipeline_desc.flags |= VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT; + memset(&flags2_info, 0, sizeof(flags2_info)); + flags2_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO; + flags2_info.flags = VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT; + flags2_info.flags |= pipeline_desc.flags; + pipeline_desc.flags = 0; + vk_prepend_struct(&pipeline_desc, &flags2_info); + TRACE("Calling vkCreateGraphicsPipelines.\n"); if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) { feedback_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT; - feedback_info.pNext = pipeline_desc.pNext; feedback_info.pPipelineStageCreationFeedbacks = feedbacks; feedback_info.pipelineStageCreationFeedbackCount = pipeline_desc.stageCount; feedback_info.pPipelineCreationFeedback = &feedback; - pipeline_desc.pNext = &feedback_info; + vk_prepend_struct(&pipeline_desc, &feedback_info); } else feedback_info.pipelineStageCreationFeedbackCount = 0; @@ -6455,7 +5840,7 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st if (vr == VK_SUCCESS) { - if (pipeline_desc.flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) + if (flags2_info.flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) kind = "GFX IDENT OK"; else kind = "GFX OK"; @@ -6471,7 +5856,7 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG) { - if (pipeline_desc.flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) + if (flags2_info.flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) { if (vr == VK_SUCCESS) INFO("[IDENTIFIER] Successfully created graphics pipeline from identifier.\n"); @@ -6491,7 +5876,7 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st goto err; } - pipeline_desc.flags &= ~VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT; + flags2_info.flags &= ~VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT; /* Internal modules are known to be non-null now. */ pipeline_desc.pStages = state->graphics.stages; @@ -6521,7 +5906,7 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st vkd3d_report_pipeline_creation_feedback_results(&feedback_info); if (library_flags) - graphics->library_create_flags = pipeline_desc.flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT; + graphics->library_create_flags = flags2_info.flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT; err: /* Clean up any temporary SPIR-V modules we created. */ @@ -6658,67 +6043,6 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta return vk_pipeline; } -static uint32_t d3d12_max_descriptor_count_from_heap_type(struct d3d12_device *device, D3D12_DESCRIPTOR_HEAP_TYPE heap_type) -{ - uint32_t count = d3d12_device_get_max_descriptor_heap_size(device, heap_type); - - if (heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && - vkd3d_descriptor_debug_active_descriptor_qa_checks()) - count += VKD3D_DESCRIPTOR_DEBUG_NUM_PAD_DESCRIPTORS; - - return count; -} - -static uint32_t d3d12_max_host_descriptor_count_from_heap_type(struct d3d12_device *device, D3D12_DESCRIPTOR_HEAP_TYPE heap_type) -{ - const VkPhysicalDeviceVulkan12Properties *limits = &device->device_info.vulkan_1_2_properties; - - switch (heap_type) - { - case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV: - { - uint32_t cbv_count = device->bindless_state.flags & VKD3D_BINDLESS_CBV_AS_SSBO - ? limits->maxDescriptorSetUpdateAfterBindStorageBuffers - : limits->maxDescriptorSetUpdateAfterBindUniformBuffers; - uint32_t srv_count = limits->maxDescriptorSetUpdateAfterBindSampledImages; - uint32_t uav_count = min(limits->maxDescriptorSetUpdateAfterBindStorageBuffers, - limits->maxDescriptorSetUpdateAfterBindStorageImages); - return min(cbv_count, min(srv_count, uav_count)); - } - - case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER: - return limits->maxDescriptorSetUpdateAfterBindSamplers; - - default: - ERR("Invalid descriptor heap type %d.\n", heap_type); - return 0; - } -} - -static uint32_t vkd3d_bindless_build_mutable_type_list(VkDescriptorType *list, uint32_t bindless_flags, uint32_t set_flags) -{ - uint32_t count = 0; - - if (set_flags & VKD3D_BINDLESS_SET_MUTABLE_RAW) - { - list[count++] = (bindless_flags & VKD3D_BINDLESS_CBV_AS_SSBO) ? - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - - if ((bindless_flags & VKD3D_BINDLESS_RAW_SSBO) && !(bindless_flags & VKD3D_BINDLESS_CBV_AS_SSBO)) - list[count++] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - } - - if (set_flags & VKD3D_BINDLESS_SET_MUTABLE_TYPED) - { - list[count++] = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - list[count++] = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - list[count++] = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - list[count++] = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - } - - return count; -} - /* Make sure copy sizes are deducible to constants by compiler, especially the single descriptor case. * We can get a linear stream of SIMD copies this way. * Potentially we can also use alignment hints to get aligned moves here, @@ -6744,857 +6068,132 @@ VKD3D_DECL_DESCRIPTOR_COPY_SIZE(48) VKD3D_DECL_DESCRIPTOR_COPY_SIZE(64) VKD3D_DECL_DESCRIPTOR_COPY_SIZE(128) -static pfn_vkd3d_host_mapping_copy_template vkd3d_bindless_find_copy_template(uint32_t descriptor_size) -{ - switch (descriptor_size) - { - case 4: - return vkd3d_descriptor_copy_desc_4; - case 8: - return vkd3d_descriptor_copy_desc_8; - case 16: - return vkd3d_descriptor_copy_desc_16; - case 32: - return vkd3d_descriptor_copy_desc_32; - case 48: - return vkd3d_descriptor_copy_desc_48; - case 64: - return vkd3d_descriptor_copy_desc_64; - case 128: - return vkd3d_descriptor_copy_desc_128; - default: - break; - } - - return NULL; -} - -static pfn_vkd3d_host_mapping_copy_template_single vkd3d_bindless_find_copy_template_single(uint32_t descriptor_size) -{ - switch (descriptor_size) - { - case 4: - return vkd3d_descriptor_copy_desc_4_single; - case 8: - return vkd3d_descriptor_copy_desc_8_single; - case 16: - return vkd3d_descriptor_copy_desc_16_single; - case 32: - return vkd3d_descriptor_copy_desc_32_single; - case 48: - return vkd3d_descriptor_copy_desc_48_single; - case 64: - return vkd3d_descriptor_copy_desc_64_single; - case 128: - return vkd3d_descriptor_copy_desc_128_single; - default: - break; - } - - return NULL; -} - -static uint32_t vkd3d_get_descriptor_size_for_type(struct d3d12_device *device, VkDescriptorType vk_descriptor_type) -{ - const VkPhysicalDeviceDescriptorBufferPropertiesEXT *props = &device->device_info.descriptor_buffer_properties; - switch (vk_descriptor_type) - { - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - return props->sampledImageDescriptorSize; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - return props->storageImageDescriptorSize; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - return props->robustUniformBufferDescriptorSize; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - return props->robustStorageBufferDescriptorSize; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - return props->robustUniformTexelBufferDescriptorSize; - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - return props->robustStorageTexelBufferDescriptorSize; - case VK_DESCRIPTOR_TYPE_SAMPLER: - return props->samplerDescriptorSize; - default: - assert(0 && "Invalid descriptor type."); - return 0; - } -} - -static uint32_t vkd3d_get_descriptor_size_for_binding(struct d3d12_device *device, - const VkDescriptorSetLayoutCreateInfo *set_layout_info, uint32_t binding_index) -{ - const VkDescriptorSetLayoutBinding *vk_binding = &set_layout_info->pBindings[binding_index]; - const VkMutableDescriptorTypeCreateInfoEXT *mutable; - const VkMutableDescriptorTypeListEXT *type_list; - uint32_t type_size; - uint32_t max_size; - uint32_t i; - - if (vk_binding->descriptorType != VK_DESCRIPTOR_TYPE_MUTABLE_EXT) - max_size = vkd3d_get_descriptor_size_for_type(device, vk_binding->descriptorType); - else - { - mutable = vk_find_pnext(set_layout_info->pNext, VK_STRUCTURE_TYPE_MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT); - type_list = &mutable->pMutableDescriptorTypeLists[binding_index]; - - max_size = 0; - for (i = 0; i < type_list->descriptorTypeCount; i++) - { - type_size = vkd3d_get_descriptor_size_for_type(device, type_list->pDescriptorTypes[i]); - max_size = max(max_size, type_size); - } - } - - return max_size; -} - -static HRESULT vkd3d_bindless_state_add_binding(struct vkd3d_bindless_state *bindless_state, - struct d3d12_device *device, uint32_t flags, - VkDescriptorType vk_descriptor_type, VkDescriptorType vk_init_null_descriptor_type) -{ - VkMutableDescriptorTypeListEXT mutable_descriptor_list[VKD3D_BINDLESS_SET_MAX_EXTRA_BINDINGS + 1]; - struct vkd3d_bindless_set_info *set_info = &bindless_state->set_info[bindless_state->set_count]; - VkDescriptorSetLayoutBinding vk_binding_info[VKD3D_BINDLESS_SET_MAX_EXTRA_BINDINGS + 1]; - VkDescriptorBindingFlags vk_binding_flags[VKD3D_BINDLESS_SET_MAX_EXTRA_BINDINGS + 1]; - VkDescriptorType mutable_descriptor_types[VKD3D_MAX_MUTABLE_DESCRIPTOR_TYPES]; - VkDescriptorSetLayoutBindingFlagsCreateInfo vk_binding_flags_info; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VkDescriptorSetLayoutCreateInfo vk_set_layout_info; - VkMutableDescriptorTypeCreateInfoEXT mutable_info; - VkDescriptorSetLayoutBinding *vk_binding; - VkDeviceSize desc_offset; - unsigned int i; - VkResult vr; - - set_info->vk_descriptor_type = vk_descriptor_type; - set_info->vk_init_null_descriptor_type = vk_init_null_descriptor_type; - set_info->heap_type = flags & VKD3D_BINDLESS_SET_SAMPLER - ? D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER - : D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - set_info->flags = flags; - set_info->binding_index = vkd3d_popcount(flags & VKD3D_BINDLESS_SET_EXTRA_MASK); - - bindless_state->vk_descriptor_buffer_indices[bindless_state->set_count] = - set_info->heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ? 0 : 1; - - if (set_info->heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) - set_info->set_index = bindless_state->cbv_srv_uav_count++; - else - set_info->set_index = 0; - - for (i = 0; i < set_info->binding_index; i++) - { - /* all extra bindings are storage buffers right now */ - vk_binding_info[i].binding = i; - vk_binding_info[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - - /* Coerce drivers into doing what we want w.r.t. alignment. - * When we map a host pointer (page aligned) and offset it, - * we need the offset to be aligned to at least 32. - * That way we can use lower bits to encode other things. - * We cannot control exactly how drivers allocate this. - * Even if we only access a descriptor as non-arrayed descriptor, - * it is allowed to use descriptorCount > 1, - * see https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#interfaces-resources-setandbinding. - * To improve potential false sharing if different threads poke at adjacent descriptors, - * align to 64 byte. Should also improve write-combined performance. */ - if (d3d12_device_use_embedded_mutable_descriptors(device) && - set_info->binding_index == 1 && - device->device_info.descriptor_buffer_properties.robustStorageBufferDescriptorSize < 64) - { - vk_binding_info[i].descriptorCount = - 64u / device->device_info.descriptor_buffer_properties.robustStorageBufferDescriptorSize; - } - else - vk_binding_info[i].descriptorCount = 1; - - vk_binding_info[i].stageFlags = VK_SHADER_STAGE_ALL; - vk_binding_info[i].pImmutableSamplers = NULL; - - vk_binding_flags[i] = 0; - } - - vk_binding = &vk_binding_info[set_info->binding_index]; - vk_binding->binding = set_info->binding_index; - vk_binding->descriptorType = set_info->vk_descriptor_type; - vk_binding->descriptorCount = d3d12_max_descriptor_count_from_heap_type(device, set_info->heap_type); - vk_binding->stageFlags = VK_SHADER_STAGE_ALL; - vk_binding->pImmutableSamplers = NULL; - - if (d3d12_device_uses_descriptor_buffers(device)) - { - /* All update-after-bind features are implied when using descriptor buffers. */ - vk_binding_flags[set_info->binding_index] = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT; - vk_set_layout_info.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; - } - else - { - vk_binding_flags[set_info->binding_index] = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | - VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT | - VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT | - VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT; - vk_set_layout_info.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT; - } - - vk_binding_flags_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO; - vk_binding_flags_info.pNext = NULL; - vk_binding_flags_info.bindingCount = set_info->binding_index + 1; - vk_binding_flags_info.pBindingFlags = vk_binding_flags; - - vk_set_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - vk_set_layout_info.pNext = &vk_binding_flags_info; - vk_set_layout_info.bindingCount = set_info->binding_index + 1; - vk_set_layout_info.pBindings = vk_binding_info; - - if (vk_descriptor_type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) - { - vk_binding_flags_info.pNext = &mutable_info; - - mutable_info.sType = VK_STRUCTURE_TYPE_MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT; - mutable_info.pNext = NULL; - mutable_info.pMutableDescriptorTypeLists = mutable_descriptor_list; - mutable_info.mutableDescriptorTypeListCount = set_info->binding_index + 1; - - memset(mutable_descriptor_list, 0, sizeof(mutable_descriptor_list)); - mutable_descriptor_list[set_info->binding_index].descriptorTypeCount = - vkd3d_bindless_build_mutable_type_list(mutable_descriptor_types, device->bindless_state.flags, flags); - mutable_descriptor_list[set_info->binding_index].pDescriptorTypes = mutable_descriptor_types; - } - - if ((vr = VK_CALL(vkCreateDescriptorSetLayout(device->vk_device, - &vk_set_layout_info, NULL, &set_info->vk_set_layout))) < 0) - ERR("Failed to create descriptor set layout, vr %d.\n", vr); - - /* If we're able, we should implement descriptor copies with functions we roll ourselves. */ - if (d3d12_device_uses_descriptor_buffers(device)) - { - INFO("Device supports VK_EXT_descriptor_buffer!\n"); - VK_CALL(vkGetDescriptorSetLayoutBindingOffsetEXT(device->vk_device, set_info->vk_set_layout, - set_info->binding_index, &desc_offset)); - set_info->host_mapping_offset = desc_offset; - set_info->host_mapping_descriptor_size = vkd3d_get_descriptor_size_for_binding(device, - &vk_set_layout_info, set_info->binding_index); - - set_info->host_copy_template = - vkd3d_bindless_find_copy_template(set_info->host_mapping_descriptor_size); - set_info->host_copy_template_single = - vkd3d_bindless_find_copy_template_single(set_info->host_mapping_descriptor_size); - - if (!set_info->host_copy_template || !set_info->host_copy_template_single) - { - FIXME("Couldn't find suitable host copy template.\n"); - set_info->host_copy_template = NULL; - set_info->host_copy_template_single = NULL; - } - } - else - { - set_info->host_mapping_offset = 0; - set_info->host_mapping_descriptor_size = 0; - set_info->host_copy_template = NULL; - set_info->host_copy_template_single = NULL; - } - - /* If we have descriptor buffers, we don't need host descriptor set layouts at all. We'll just malloc manually. */ - if (!d3d12_device_uses_descriptor_buffers(device)) - { - vk_binding->descriptorCount = d3d12_max_host_descriptor_count_from_heap_type(device, set_info->heap_type); - - if (device->bindless_state.flags & VKD3D_BINDLESS_MUTABLE_TYPE) - { - /* If we have mutable descriptor extension, we will allocate these descriptors with - * HOST_BIT and not UPDATE_AFTER_BIND, since that is enough to get threading guarantees. */ - vk_binding_flags[set_info->binding_index] = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT; - vk_set_layout_info.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_HOST_ONLY_POOL_BIT_EXT; - } - - if ((vr = VK_CALL(vkCreateDescriptorSetLayout(device->vk_device, - &vk_set_layout_info, NULL, &set_info->vk_host_set_layout))) < 0) - ERR("Failed to create descriptor set layout, vr %d.\n", vr); - } - - bindless_state->set_count++; - - return hresult_from_vk_result(vr); -} - -uint32_t vkd3d_bindless_get_mutable_descriptor_type_size(struct d3d12_device *device) -{ - VkDescriptorType descriptor_types[VKD3D_MAX_MUTABLE_DESCRIPTOR_TYPES]; - uint32_t descriptor_type_count, i; - uint32_t max_size, type_size; - - descriptor_type_count = vkd3d_bindless_build_mutable_type_list(descriptor_types, - VKD3D_BINDLESS_RAW_SSBO, - VKD3D_BINDLESS_SET_MUTABLE_RAW | - VKD3D_BINDLESS_SET_MUTABLE_TYPED); - - max_size = 0; - for (i = 0; i < descriptor_type_count; i++) - { - type_size = vkd3d_get_descriptor_size_for_type(device, descriptor_types[i]); - max_size = max(max_size, type_size); - } - - return max_size; -} - -static uint32_t vkd3d_bindless_embedded_mutable_packed_metadata_offset(struct d3d12_device *device) -{ - const VkPhysicalDeviceDescriptorBufferPropertiesEXT *props = &device->device_info.descriptor_buffer_properties; - uint32_t metadata_offset; - - /* Metadata is required for UAVs to implement ClearUAV. */ - metadata_offset = vkd3d_bindless_embedded_mutable_raw_buffer_offset(device); - metadata_offset += max(props->robustStorageBufferDescriptorSize, props->robustUniformBufferDescriptorSize); - metadata_offset = max(metadata_offset, props->storageImageDescriptorSize); - metadata_offset = align(metadata_offset, 16); - return metadata_offset; -} - -static bool vkd3d_bindless_supports_embedded_packed_metadata(struct d3d12_device *device) -{ - return vkd3d_bindless_embedded_mutable_packed_metadata_offset(device) + - sizeof(struct vkd3d_descriptor_metadata_view) <= - vkd3d_bindless_get_mutable_descriptor_type_size(device); -} - -bool vkd3d_bindless_supports_embedded_mutable_type(struct d3d12_device *device, uint32_t flags) -{ - const VkPhysicalDeviceDescriptorBufferPropertiesEXT *props = &device->device_info.descriptor_buffer_properties; - uint32_t max_size; - -#ifdef VKD3D_ENABLE_PROFILING - /* For now, we don't do vtable variant shenanigans for profiled devices. - * This can be fixed, but it's not that important at this time. */ - if (vkd3d_uses_profiling()) - return false; -#endif - - /* If we're using descriptor QA, we need more complex CPU VA decode to decode heap, offsets, types, etc, - * so the fast path is not feasible. */ - if (vkd3d_descriptor_debug_active_descriptor_qa_checks()) - return false; - - /* We don't want to keep metadata around for shader visible heap. - * If this can be supported on NV later, we can remove static table hoisting. */ - if (flags & VKD3D_HOIST_STATIC_TABLE_CBV) - return false; - - /* For now, assume we're not using mutable_single_set. Fewer code paths to test. - * That workaround is not needed for this style anyway. */ - if (flags & VKD3D_BINDLESS_MUTABLE_TYPE_RAW_SSBO) - return false; - - /* Assume we're actually using SSBOs and not typed buffer for everything. */ - if (!(flags & VKD3D_BINDLESS_RAW_SSBO)) - return false; - - /* It is unsure at this time if DLSS requires us to be able to create shader image view handles - * from shader visible heap. (See d3d12_device_vkd3d_ext_GetCudaSurfaceObject.) - * That would require metadata to stick around, which we do not want. - * If this can be figured out, we can ignore this check on NV. */ - if (device->vk_info.NVX_image_view_handle) - return false; - - /* Checks if we can do some interesting shenanigans. */ - max_size = vkd3d_bindless_get_mutable_descriptor_type_size(device); - - /* The mutable size has to align to POT. */ - if (max_size & (max_size - 1)) - return false; - - /* Increment size must be large enough that we don't end up mis-decoding. - * The minimum is 32, which should match any driver that exposes the true heap. - * Image descriptors in 16 bytes is more or less impossible ... */ - if (max_size < VKD3D_RESOURCE_EMBEDDED_METADATA_OFFSET_LOG2_MASK) - return false; - - /* Sampler descriptor size has to align. */ - if (device->device_info.descriptor_buffer_properties.samplerDescriptorSize & - (device->device_info.descriptor_buffer_properties.samplerDescriptorSize - 1)) - return false; - - /* Sampler descriptor has to be at least 16 byte, so we can use fast path for copies. */ - if (device->device_info.descriptor_buffer_properties.samplerDescriptorSize < 16) - return false; - - /* If descriptor buffers must be bound at large alignment, we cannot do magic packing tricks. */ - if (device->device_info.descriptor_buffer_properties.descriptorBufferOffsetAlignment > - device->device_info.descriptor_buffer_properties.robustStorageBufferDescriptorSize) - return false; - - /* The goal here is to embed all descriptor information into a single mutable element. - * We can make use of the fact that sampled images take up far more space than buffer descriptors. - * This should work if implementations expose descriptor heaps directly instead of going through - * indirections. - * We can bind the same descriptor buffer, at an offset with same stride to support multiple descriptor types. - * - set = 1, binding = 0: Bind descriptor buffer as SSBO. - * Pass down an extra stride parameter to shader compiler. - * VAs should be loaded with stride = max_size. - * - set = 1, binding = 1: Bind descriptor buffer with all mutable types. Place typed descriptors here. - * - set = 2, binding = 0: Bind descriptor buffer with all mutable types, but use descriptor offset equal to - * align(max(props->robustStorageTexelBufferSize, props->robustUniformTexelBufferDescriptorSize), - * props->descriptorBufferOffsetAlignment). Place untyped descriptors here. - * - Proposed layout that can fit in 32 bytes on e.g. AMD: - * - Images take up their full mutable size. - * - CBV: { NULL texel buffer, CBV (fixed offset), padding } - * - SRV buffer: SSBO / texel buffers: { Texel buffer, SSBO (fixed offset), padding } - * - UAV buffer w/o counter: SSBO / texel buffers: { Texel buffer, SSBO (fixed offset), padding } - * - UAV buffer w/ counter: { Texel buffer pointing to counter, SSBO (fixed offset), padding } - * - SRV RTAS: { RTAS ptr, padding } - */ - - /* If UAV counter is used, we pilfer the texel buffer instead of using raw VAs. - * This aids robustness in case where mismatched descriptor types or non-null resource, but null counter - * is used. This scenario is UB, and it behaves oddly on native drivers, so this is fine. */ - - /* We need the descriptor size to be at least 32, otherwise we cannot implement CPU VA encoding scheme. - * To deal with metadata on CPU-side descriptors, we will pilfer the lower 5 bits to encode an offset - * to metadata structure. - * This should be the case on all implementations that actually expose descriptors directly. */ - if (max_size < 32) - return false; - - if (max_size < sizeof(struct vkd3d_descriptor_metadata_view)) - return false; - - /* Make sure we can implement SRV buffer with side by side texel buffer and SSBO/UBO. */ - if (vkd3d_bindless_embedded_mutable_raw_buffer_offset(device) + - max(props->robustStorageBufferDescriptorSize, props->robustUniformBufferDescriptorSize) > max_size) - return false; - - return true; -} - -static bool vkd3d_bindless_supports_mutable_type(struct d3d12_device *device, uint32_t bindless_flags) -{ - VkDescriptorType descriptor_types[VKD3D_MAX_MUTABLE_DESCRIPTOR_TYPES]; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VkDescriptorSetLayoutBindingFlagsCreateInfo binding_flags; - VkMutableDescriptorTypeCreateInfoEXT mutable_info; - VkDescriptorSetLayoutCreateInfo set_layout_info; - VkMutableDescriptorTypeListVALVE mutable_list; - VkDescriptorSetLayoutSupport supported; - VkDescriptorBindingFlags binding_flag; - VkDescriptorSetLayoutBinding binding; - - binding_flag = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT; - if (!d3d12_device_uses_descriptor_buffers(device)) - { - binding_flag |= VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | - VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT | - VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT; - } - - if (!device->device_info.mutable_descriptor_features.mutableDescriptorType) - return false; - - mutable_info.sType = VK_STRUCTURE_TYPE_MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT; - mutable_info.pNext = NULL; - mutable_info.pMutableDescriptorTypeLists = &mutable_list; - mutable_info.mutableDescriptorTypeListCount = 1; - - mutable_list.descriptorTypeCount = vkd3d_bindless_build_mutable_type_list(descriptor_types, bindless_flags, - VKD3D_BINDLESS_SET_MUTABLE_RAW | VKD3D_BINDLESS_SET_MUTABLE_TYPED); - mutable_list.pDescriptorTypes = descriptor_types; - - binding.binding = 0; - binding.descriptorType = VK_DESCRIPTOR_TYPE_MUTABLE_EXT; - binding.descriptorCount = d3d12_max_descriptor_count_from_heap_type(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - binding.pImmutableSamplers = NULL; - binding.stageFlags = VK_SHADER_STAGE_ALL; - - binding_flags.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO; - binding_flags.pNext = &mutable_info; - binding_flags.bindingCount = 1; - binding_flags.pBindingFlags = &binding_flag; - - set_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - set_layout_info.pNext = &binding_flags; - - if (d3d12_device_uses_descriptor_buffers(device)) - set_layout_info.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; - else - set_layout_info.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT; - - set_layout_info.bindingCount = 1; - set_layout_info.pBindings = &binding; - - supported.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_SUPPORT; - supported.pNext = NULL; - VK_CALL(vkGetDescriptorSetLayoutSupport(device->vk_device, &set_layout_info, &supported)); - if (!supported.supported) - return false; - - if (!d3d12_device_uses_descriptor_buffers(device)) - { - set_layout_info.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_HOST_ONLY_POOL_BIT_EXT; - binding_flag = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT; - VK_CALL(vkGetDescriptorSetLayoutSupport(device->vk_device, &set_layout_info, &supported)); - } - - return supported.supported == VK_TRUE; -} - -static uint32_t vkd3d_bindless_state_get_bindless_flags(struct d3d12_device *device) -{ - const struct vkd3d_physical_device_info *device_info = &device->device_info; - uint32_t flags = 0; - - if (!d3d12_device_uses_descriptor_buffers(device)) - { - if (device_info->vulkan_1_2_properties.maxPerStageDescriptorUpdateAfterBindUniformBuffers < VKD3D_MIN_VIEW_DESCRIPTOR_COUNT || - !device_info->vulkan_1_2_features.descriptorBindingUniformBufferUpdateAfterBind || - !device_info->vulkan_1_2_features.shaderUniformBufferArrayNonUniformIndexing) - flags |= VKD3D_BINDLESS_CBV_AS_SSBO; - } - - /* 16 is the cutoff due to requirements on ByteAddressBuffer. - * We need tight 16 byte robustness on those and trying to emulate that with offset buffers - * is too much of an ordeal. */ - if (device_info->properties2.properties.limits.minStorageBufferOffsetAlignment <= 16) - { - flags |= VKD3D_BINDLESS_RAW_SSBO; - - /* Descriptor buffers do not support SINGLE_SET layout. - * We only enable descriptor buffers if we have verified that MUTABLE_SINGLE_SET hack is not required. */ - if (!d3d12_device_uses_descriptor_buffers(device)) - { - /* Intel GPUs have smol descriptor heaps and only way we can fit a D3D12 heap is with - * single set mutable. */ - if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_MUTABLE_SINGLE_SET) || - device_info->properties2.properties.vendorID == VKD3D_VENDOR_ID_INTEL) - { - INFO("Enabling single descriptor set path for MUTABLE.\n"); - flags |= VKD3D_BINDLESS_MUTABLE_TYPE_RAW_SSBO; - } - } - - if (device_info->properties2.properties.limits.minStorageBufferOffsetAlignment > 4) - flags |= VKD3D_SSBO_OFFSET_BUFFER; - } - - /* Always use a typed offset buffer. Otherwise, we risk ending up with unbounded size on view maps. - * Fortunately, we can place descriptors directly if we have descriptor buffers, so this is not required. */ - if (!d3d12_device_uses_descriptor_buffers(device)) - flags |= VKD3D_TYPED_OFFSET_BUFFER; - - /* We must use root SRV and UAV due to alignment requirements for 16-bit storage, - * but root CBV is more lax. */ - flags |= VKD3D_RAW_VA_ROOT_DESCRIPTOR_SRV_UAV; - /* CBV's really require push descriptors on NVIDIA and Qualcomm to get maximum performance. - * The difference in performance is profound (~15% in some cases). - * On ACO, BDA with NonWritable can be promoted directly to scalar loads, - * which is great. */ - if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV) || - (device_info->properties2.properties.vendorID != VKD3D_VENDOR_ID_NVIDIA && - device_info->properties2.properties.vendorID != VKD3D_VENDOR_ID_QUALCOMM)) - flags |= VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV; - - if (device_info->properties2.properties.vendorID == VKD3D_VENDOR_ID_NVIDIA && - !(flags & VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV)) - { - /* On NVIDIA, it's preferable to hoist CBVs to push descriptors if we can. - * Hoisting is only safe with push descriptors since we need to consider - * robustness as well for STATIC_KEEPING_BUFFER_BOUNDS_CHECKS. */ - flags |= VKD3D_HOIST_STATIC_TABLE_CBV; - } - - if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_REQUIRES_COMPUTE_INDIRECT_TEMPLATES) && - !device->device_info.device_generated_commands_compute_features_nv.deviceGeneratedCompute && - !device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands) - { - INFO("Forcing push UBO path for compute root parameters.\n"); - flags |= VKD3D_FORCE_COMPUTE_ROOT_PARAMETERS_PUSH_UBO; - } - - if (device->device_info.device_generated_commands_features_ext.deviceGeneratedCommands) - { - INFO("Enabling fast paths for advanced ExecuteIndirect() graphics and compute (EXT_dgc).\n"); - } - else - { - if (device->device_info.device_generated_commands_compute_features_nv.deviceGeneratedCompute) - INFO("Enabling fast paths for advanced ExecuteIndirect() compute (NV_dgc).\n"); - if (device->device_info.device_generated_commands_features_nv.deviceGeneratedCommands) - INFO("Enabling fast paths for advanced ExecuteIndirect() graphics (NV_dgc).\n"); - } - - if (vkd3d_bindless_supports_mutable_type(device, flags)) - { - INFO("Device supports VK_%s_mutable_descriptor_type.\n", - device->vk_info.EXT_mutable_descriptor_type ? "EXT" : "VALVE"); - flags |= VKD3D_BINDLESS_MUTABLE_TYPE; - - /* If we can, opt in to extreme speed mode. */ - if (d3d12_device_uses_descriptor_buffers(device) && - vkd3d_bindless_supports_embedded_mutable_type(device, flags)) - { - flags |= VKD3D_BINDLESS_MUTABLE_EMBEDDED; - INFO("Device supports ultra-fast path for descriptor copies.\n"); - - if (vkd3d_bindless_supports_embedded_packed_metadata(device)) - { - flags |= VKD3D_BINDLESS_MUTABLE_EMBEDDED_PACKED_METADATA; - INFO("Device supports packed metadata path for descriptor copies.\n"); - } - } - } - else - { - INFO("Device does not support VK_EXT_mutable_descriptor_type (or VALVE).\n"); - flags &= ~VKD3D_BINDLESS_MUTABLE_TYPE_RAW_SSBO; - } - - /* Shorthand formulation to make future checks nicer. */ - if ((flags & VKD3D_BINDLESS_MUTABLE_TYPE) && - (flags & VKD3D_BINDLESS_RAW_SSBO) && - !(flags & VKD3D_BINDLESS_MUTABLE_TYPE_RAW_SSBO)) - { - flags |= VKD3D_BINDLESS_MUTABLE_TYPE_SPLIT_RAW_TYPED; - } - - return flags; -} - -static void vkd3d_bindless_state_init_null_descriptor_payloads(struct vkd3d_bindless_state *bindless_state, - struct d3d12_device *device) -{ - const VkPhysicalDeviceDescriptorBufferPropertiesEXT *props = &device->device_info.descriptor_buffer_properties; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - - const struct - { - VkDescriptorType vk_descriptor_type; - uint32_t size; - } types[] = { - { VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, props->sampledImageDescriptorSize }, - { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, props->storageImageDescriptorSize }, - { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, props->robustUniformBufferDescriptorSize }, - { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, props->robustStorageBufferDescriptorSize }, - { VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, props->robustUniformTexelBufferDescriptorSize }, - { VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, props->robustStorageTexelBufferDescriptorSize }, - }; - VkDescriptorGetInfoEXT get_info; - uint8_t *payload; - uint32_t i; - - bindless_state->descriptor_buffer_cbv_srv_uav_size = 0; - - get_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT; - get_info.pNext = NULL; - memset(&get_info.data, 0, sizeof(get_info.data)); - - for (i = 0; i < ARRAY_SIZE(types); i++) - { - payload = vkd3d_bindless_state_get_null_descriptor_payload(bindless_state, types[i].vk_descriptor_type); - - /* When we write a NULL descriptor for a given type, we actually need to embed multiple NULL descriptors - * of different types if we're using embedded mutable. - * On many GPUs, a NULL descriptor is just zero memory, but not necessarily the case. - * Write UBO -> also write a NULL texel buffer in the first bytes. - * Write SRV/UAV buffer -> Write a NULL texel buffer template. - * That template conveniently has both texel buffer + SSBO NULL descriptors. - * Note that there is no SSBO template since it's ambiguous whether to use SAMPLED or STORAGE_TEXEL_BUFFER. - * Write storage image -> potentially place a NULL SSBO in the upper half. */ - - if ((bindless_state->flags & VKD3D_BINDLESS_MUTABLE_EMBEDDED) && - types[i].vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) - { - get_info.type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustUniformTexelBufferDescriptorSize, - payload)); - payload += bindless_state->descriptor_buffer_packed_raw_buffer_offset; - } - - get_info.type = types[i].vk_descriptor_type; - - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, types[i].size, payload)); - - if (bindless_state->flags & VKD3D_BINDLESS_MUTABLE_EMBEDDED) - { - bool write_null_ssbo = types[i].vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER || - types[i].vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - - /* If we pack SSBOs + metadata above the storage image (embedded packed metadata), - * add NULL SSBO descriptor as well. */ - if (types[i].vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && - bindless_state->descriptor_buffer_packed_raw_buffer_offset >= - device->device_info.descriptor_buffer_properties.storageImageDescriptorSize) - { - write_null_ssbo = true; - } - - if (write_null_ssbo) - { - /* Buffer types are always emitted side by side. - * Emit NULL typed buffer in first half, and NULL SSBO after. - * When creating a NULL buffer descriptor we'll always use the typed template, - * since SSBO is ambiguous (we don't know UAV vs SRV necessarily). */ - payload += bindless_state->descriptor_buffer_packed_raw_buffer_offset; - get_info.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - VK_CALL(vkGetDescriptorEXT(device->vk_device, &get_info, - device->device_info.descriptor_buffer_properties.robustStorageBufferDescriptorSize, - payload)); - } - } - - bindless_state->descriptor_buffer_cbv_srv_uav_size = - max(bindless_state->descriptor_buffer_cbv_srv_uav_size, types[i].size); - } -} - HRESULT vkd3d_bindless_state_init(struct vkd3d_bindless_state *bindless_state, struct d3d12_device *device) { const struct vkd3d_physical_device_info *device_info = &device->device_info; - uint32_t extra_bindings = 0; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + uint32_t minimum_buffer_offset, minimum_metadata_offset; HRESULT hr = E_FAIL; memset(bindless_state, 0, sizeof(*bindless_state)); - bindless_state->flags = vkd3d_bindless_state_get_bindless_flags(device); - if (!device_info->vulkan_1_2_features.descriptorIndexing || - /* Some extra features not covered by descriptorIndexing meta-feature. */ - !device_info->vulkan_1_2_features.shaderStorageTexelBufferArrayNonUniformIndexing || - !device_info->vulkan_1_2_features.shaderStorageImageArrayNonUniformIndexing || - !device_info->vulkan_1_2_features.descriptorBindingVariableDescriptorCount) + if (!device_info->descriptor_heap_features.descriptorHeap) { ERR("Insufficient descriptor indexing support.\n"); goto fail; } - if (!d3d12_device_uses_descriptor_buffers(device)) - { - /* UBO is optional. We can fall back to SSBO if required. */ - if (device_info->vulkan_1_2_properties.maxPerStageDescriptorUpdateAfterBindSampledImages < VKD3D_MIN_VIEW_DESCRIPTOR_COUNT || - device_info->vulkan_1_2_properties.maxPerStageDescriptorUpdateAfterBindStorageImages < VKD3D_MIN_VIEW_DESCRIPTOR_COUNT || - device_info->vulkan_1_2_properties.maxPerStageDescriptorUpdateAfterBindStorageBuffers < VKD3D_MIN_VIEW_DESCRIPTOR_COUNT) - { - ERR("Insufficient descriptor indexing support.\n"); - goto fail; - } - } + /* cbv_srv_uav_size is computed while setting up null payload. */ + bindless_state->descriptor_heap_cbv_srv_uav_size = max( + device->device_info.descriptor_heap_properties.imageDescriptorSize, + device->device_info.descriptor_heap_properties.bufferDescriptorSize); + bindless_state->descriptor_heap_cbv_srv_uav_size = align(bindless_state->descriptor_heap_cbv_srv_uav_size, + max(device->device_info.descriptor_heap_properties.imageDescriptorAlignment, + device->device_info.descriptor_heap_properties.bufferDescriptorAlignment)); - extra_bindings |= VKD3D_BINDLESS_SET_EXTRA_RAW_VA_AUX_BUFFER; - if (bindless_state->flags & (VKD3D_SSBO_OFFSET_BUFFER | VKD3D_TYPED_OFFSET_BUFFER)) - extra_bindings |= VKD3D_BINDLESS_SET_EXTRA_OFFSET_BUFFER; - - if (vkd3d_descriptor_debug_active_descriptor_qa_checks()) +#if 0 + if (device->device_info.vulkan_1_2_properties.driverID == VK_DRIVER_ID_MESA_RADV) { - extra_bindings |= VKD3D_BINDLESS_SET_EXTRA_FEEDBACK_PAYLOAD_INFO_BUFFER | - VKD3D_BINDLESS_SET_EXTRA_FEEDBACK_CONTROL_INFO_BUFFER; + /* Temporary hack to test a few things ... */ + bindless_state->descriptor_heap_cbv_srv_uav_size = 64; } +#endif - if (FAILED(hr = vkd3d_bindless_state_add_binding(bindless_state, device, - VKD3D_BINDLESS_SET_SAMPLER, VK_DESCRIPTOR_TYPE_SAMPLER, VK_DESCRIPTOR_TYPE_SAMPLER))) - goto fail; - - if (bindless_state->flags & VKD3D_BINDLESS_MUTABLE_TYPE) - { - bool uses_raw_typed_split = !!(bindless_state->flags & VKD3D_BINDLESS_MUTABLE_TYPE_SPLIT_RAW_TYPED); - uint32_t flags; - - flags = VKD3D_BINDLESS_SET_UAV | VKD3D_BINDLESS_SET_SRV | - VKD3D_BINDLESS_SET_BUFFER | VKD3D_BINDLESS_SET_IMAGE | - VKD3D_BINDLESS_SET_MUTABLE_TYPED | VKD3D_BINDLESS_SET_MUTABLE | - extra_bindings; - - if (!uses_raw_typed_split) - { - flags |= VKD3D_BINDLESS_SET_CBV | VKD3D_BINDLESS_SET_MUTABLE_RAW; - if (bindless_state->flags & VKD3D_BINDLESS_RAW_SSBO) - flags |= VKD3D_BINDLESS_SET_RAW_SSBO; - } - - /* Ensure that the descriptor size matches the other set, since we'll be overlaying them - * on the same memory. */ - if (bindless_state->flags & VKD3D_BINDLESS_MUTABLE_EMBEDDED) - flags |= VKD3D_BINDLESS_SET_MUTABLE_RAW; - - /* If we can, prefer to use one universal descriptor type which works for any descriptor. - * The exception is SSBOs since we need to workaround buggy applications which create typed buffers, - * but assume they can be read as untyped buffers. Move CBVs to the SSBO set as well if we go that route, - * since it works around similar app bugs. - * If we opt-in to it, we can move everything into the mutable set. */ - if (FAILED(hr = vkd3d_bindless_state_add_binding(bindless_state, device, flags, - VK_DESCRIPTOR_TYPE_MUTABLE_EXT, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE))) - goto fail; - - /* We never use CBV in second set unless SSBO does as well. */ - if (uses_raw_typed_split) - { - bool use_mutable = (bindless_state->flags & VKD3D_BINDLESS_MUTABLE_EMBEDDED) || - !(bindless_state->flags & VKD3D_BINDLESS_CBV_AS_SSBO); - - flags = VKD3D_BINDLESS_SET_UAV | - VKD3D_BINDLESS_SET_SRV | - VKD3D_BINDLESS_SET_RAW_SSBO | - VKD3D_BINDLESS_SET_CBV; + bindless_state->descriptor_heap_sampler_size = + device->device_info.descriptor_heap_properties.samplerDescriptorSize; + + bindless_state->descriptor_heap_sampler_size = align(bindless_state->descriptor_heap_sampler_size, + device->device_info.descriptor_heap_properties.samplerDescriptorAlignment); + + bindless_state->descriptor_heap_cbv_srv_uav_size_log2 = + vkd3d_log2i_ceil(bindless_state->descriptor_heap_cbv_srv_uav_size); + bindless_state->descriptor_heap_sampler_size_log2 = + vkd3d_log2i_ceil(bindless_state->descriptor_heap_sampler_size); + + /* If we cannot place two buffers size by side, we may need to pad the descriptor. + * Not all implementations support this. More recent Intel GPUs will be able to support this. */ + if (device->device_info.descriptor_heap_properties.bufferDescriptorSize * 2 > + device->device_info.descriptor_heap_properties.imageDescriptorSize && + (2u << bindless_state->descriptor_heap_cbv_srv_uav_size_log2) * ((1 << 20) - (1 << 15)) <= + device->device_info.descriptor_heap_properties.maxResourceHeapSize) + { + bindless_state->descriptor_heap_cbv_srv_uav_size_log2 += 1; + } + + /* Just in case we get an implementation that wants non-POT descriptor sizes for whatever reason. */ + bindless_state->descriptor_heap_cbv_srv_uav_size = + 1u << bindless_state->descriptor_heap_cbv_srv_uav_size_log2; + bindless_state->descriptor_heap_sampler_size = + 1u << bindless_state->descriptor_heap_sampler_size_log2; + + minimum_buffer_offset = max( + device->device_info.descriptor_heap_properties.bufferDescriptorSize, + device->device_info.descriptor_heap_properties.bufferDescriptorAlignment); + + bindless_state->storage_image_size = + VK_CALL(vkGetPhysicalDeviceDescriptorSizeEXT(device->vk_physical_device, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)); + bindless_state->sampled_image_size = + VK_CALL(vkGetPhysicalDeviceDescriptorSizeEXT(device->vk_physical_device, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE)); + bindless_state->storage_texel_buffer_size = + VK_CALL(vkGetPhysicalDeviceDescriptorSizeEXT(device->vk_physical_device, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER)); + bindless_state->uniform_texel_buffer_size = + VK_CALL(vkGetPhysicalDeviceDescriptorSizeEXT(device->vk_physical_device, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)); + bindless_state->ubo_size = + VK_CALL(vkGetPhysicalDeviceDescriptorSizeEXT(device->vk_physical_device, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)); + bindless_state->ssbo_size = + VK_CALL(vkGetPhysicalDeviceDescriptorSizeEXT(device->vk_physical_device, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)); + + bindless_state->uav_buffer_size = max(bindless_state->storage_texel_buffer_size, bindless_state->ssbo_size); + + /* TODO: Only do this if we need it for misc reasons. + * These reasons include: + * - May have to do descriptor QA + * - Descriptor heap robustness + * - Need raw VA UAV counters. */ +#if 1 + /* Support up to 4 SSBO meta buffers. */ + bindless_state->heap_redzone_size = device->device_info.descriptor_heap_properties.bufferDescriptorSize * 4; + bindless_state->heap_redzone_size = align(bindless_state->heap_redzone_size, + max(device->device_info.descriptor_heap_properties.imageDescriptorAlignment, + device->device_info.descriptor_heap_properties.bufferDescriptorAlignment)); +#endif - if (use_mutable) - flags |= VKD3D_BINDLESS_SET_MUTABLE | VKD3D_BINDLESS_SET_MUTABLE_RAW; + bindless_state->supports_universal_byte_address_ssbo = + device->device_info.properties2.properties.limits.minStorageBufferOffsetAlignment <= 16; + bindless_state->supports_universal_structured_ssbo = + device->device_info.properties2.properties.limits.minStorageBufferOffsetAlignment <= 4; - /* Ensure that the descriptor size matches the other set, since we'll be overlaying them - * on the same memory. */ - if (bindless_state->flags & VKD3D_BINDLESS_MUTABLE_EMBEDDED) - flags |= VKD3D_BINDLESS_SET_MUTABLE_TYPED; + bindless_state->min_ssbo_alignment = + device->device_info.properties2.properties.limits.minStorageBufferOffsetAlignment; + if (bindless_state->min_ssbo_alignment <= 4) + bindless_state->min_ssbo_alignment = 1; - if (FAILED(hr = vkd3d_bindless_state_add_binding(bindless_state, device, - flags, use_mutable ? VK_DESCRIPTOR_TYPE_MUTABLE_EXT : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER))) - goto fail; - } - } - else + if (bindless_state->supports_universal_byte_address_ssbo && + bindless_state->supports_universal_structured_ssbo && + minimum_buffer_offset * 2 <= bindless_state->descriptor_heap_cbv_srv_uav_size) { - if (FAILED(hr = vkd3d_bindless_state_add_binding(bindless_state, device, - VKD3D_BINDLESS_SET_CBV | extra_bindings, - vkd3d_bindless_state_get_cbv_descriptor_type(bindless_state), - vkd3d_bindless_state_get_cbv_descriptor_type(bindless_state)))) - goto fail; + uint32_t required_packed_metadata_size; + bindless_state->descriptor_heap_packed_raw_buffer_offset = bindless_state->descriptor_heap_cbv_srv_uav_size / 2; - if (FAILED(hr = vkd3d_bindless_state_add_binding(bindless_state, device, - VKD3D_BINDLESS_SET_SRV | VKD3D_BINDLESS_SET_BUFFER, - VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)) || - FAILED(hr = vkd3d_bindless_state_add_binding(bindless_state, device, - VKD3D_BINDLESS_SET_SRV | VKD3D_BINDLESS_SET_IMAGE, - VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE))) - goto fail; + minimum_metadata_offset = max(bindless_state->storage_image_size, bindless_state->storage_texel_buffer_size); + minimum_metadata_offset = max(minimum_metadata_offset, bindless_state->descriptor_heap_packed_raw_buffer_offset + + device->device_info.descriptor_heap_properties.bufferDescriptorSize); - if (FAILED(hr = vkd3d_bindless_state_add_binding(bindless_state, device, - VKD3D_BINDLESS_SET_UAV | VKD3D_BINDLESS_SET_BUFFER, - VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER)) || - FAILED(hr = vkd3d_bindless_state_add_binding(bindless_state, device, - VKD3D_BINDLESS_SET_UAV | VKD3D_BINDLESS_SET_IMAGE, - VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE))) - goto fail; + required_packed_metadata_size = minimum_metadata_offset + sizeof(struct vkd3d_descriptor_metadata_view); - if (bindless_state->flags & VKD3D_BINDLESS_RAW_SSBO) + if (required_packed_metadata_size <= bindless_state->descriptor_heap_cbv_srv_uav_size) { - if (FAILED(hr = vkd3d_bindless_state_add_binding(bindless_state, device, - VKD3D_BINDLESS_SET_UAV | VKD3D_BINDLESS_SET_SRV | VKD3D_BINDLESS_SET_RAW_SSBO, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER))) - goto fail; + /* If we have space to sneak the metadata into upper half of the descriptor, do that. + * This is mostly relevant for RADV with 64 byte descriptors. */ + bindless_state->descriptor_heap_packed_metadata_offset = + bindless_state->descriptor_heap_cbv_srv_uav_size - sizeof(struct vkd3d_descriptor_metadata_view); } } - if (d3d12_device_uses_descriptor_buffers(device)) - { - vkd3d_bindless_state_init_null_descriptor_payloads(bindless_state, device); - /* cbv_srv_uav_size is computed while setting up null payload. */ - bindless_state->descriptor_buffer_sampler_size = - device->device_info.descriptor_buffer_properties.samplerDescriptorSize; - bindless_state->descriptor_buffer_cbv_srv_uav_size_log2 = - vkd3d_log2i(bindless_state->descriptor_buffer_cbv_srv_uav_size); - bindless_state->descriptor_buffer_sampler_size_log2 = - vkd3d_log2i(bindless_state->descriptor_buffer_sampler_size); - bindless_state->descriptor_buffer_packed_raw_buffer_offset = - vkd3d_bindless_embedded_mutable_raw_buffer_offset(device); - bindless_state->descriptor_buffer_packed_metadata_offset = - vkd3d_bindless_embedded_mutable_packed_metadata_offset(device); - } + /* Temporary hack for testing. */ + bindless_state->uav_counter_embedded_offset = + bindless_state->descriptor_heap_cbv_srv_uav_size - sizeof(VkDeviceAddress); return S_OK; @@ -7606,96 +6205,9 @@ HRESULT vkd3d_bindless_state_init(struct vkd3d_bindless_state *bindless_state, void vkd3d_bindless_state_cleanup(struct vkd3d_bindless_state *bindless_state, struct d3d12_device *device) { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - unsigned int i; - - for (i = 0; i < bindless_state->set_count; i++) - { - VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, bindless_state->set_info[i].vk_set_layout, NULL)); - VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, bindless_state->set_info[i].vk_host_set_layout, NULL)); - } } static inline uint32_t vkd3d_bindless_state_get_extra_binding_index(uint32_t extra_flag, uint32_t set_flags) { return vkd3d_popcount(set_flags & VKD3D_BINDLESS_SET_EXTRA_MASK & (extra_flag - 1)); } - -bool vkd3d_bindless_state_find_binding(const struct vkd3d_bindless_state *bindless_state, - uint32_t flags, struct vkd3d_shader_descriptor_binding *binding) -{ - unsigned int i; - - for (i = 0; i < bindless_state->set_count; i++) - { - const struct vkd3d_bindless_set_info *set_info = &bindless_state->set_info[i]; - - if ((set_info->flags & flags) == flags) - { - binding->set = i; - binding->binding = set_info->binding_index; - - if (flags & VKD3D_BINDLESS_SET_EXTRA_MASK) - binding->binding = vkd3d_bindless_state_get_extra_binding_index(flags, set_info->flags); - return true; - } - } - - return false; -} - -struct vkd3d_descriptor_binding vkd3d_bindless_state_find_set(const struct vkd3d_bindless_state *bindless_state, uint32_t flags) -{ - struct vkd3d_descriptor_binding binding; - D3D12_DESCRIPTOR_HEAP_TYPE heap_type; - unsigned int i, set_index = 0; - - heap_type = flags & VKD3D_BINDLESS_SET_SAMPLER - ? D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER - : D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - - for (i = 0; i < bindless_state->set_count; i++) - { - const struct vkd3d_bindless_set_info *set_info = &bindless_state->set_info[i]; - - if (set_info->heap_type == heap_type) - { - if ((set_info->flags & flags) == flags) - { - binding.set = set_index; - binding.binding = set_info->binding_index; - - if (flags & VKD3D_BINDLESS_SET_EXTRA_MASK) - binding.binding = vkd3d_bindless_state_get_extra_binding_index(flags, set_info->flags); - return binding; - } - - set_index++; - } - } - - ERR("No set found for flags %#x.\n", flags); - binding.set = 0; - binding.binding = 0; - return binding; -} - -uint32_t vkd3d_bindless_state_find_set_info_index(const struct vkd3d_bindless_state *bindless_state, uint32_t flags) -{ - D3D12_DESCRIPTOR_HEAP_TYPE heap_type; - unsigned int i; - - heap_type = flags & VKD3D_BINDLESS_SET_SAMPLER - ? D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER - : D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - - for (i = 0; i < bindless_state->set_count; i++) - { - const struct vkd3d_bindless_set_info *set_info = &bindless_state->set_info[i]; - if (set_info->heap_type == heap_type && (set_info->flags & flags) == flags) - return i; - } - - ERR("No set found for flags %#x.\n", flags); - return 0; -} diff --git a/libs/vkd3d/state_object_common.c b/libs/vkd3d/state_object_common.c index 468ce93da9..0e1a4f1263 100644 --- a/libs/vkd3d/state_object_common.c +++ b/libs/vkd3d/state_object_common.c @@ -217,4 +217,121 @@ const struct d3d12_state_object_association *d3d12_state_object_find_association return association; } +struct vkd3d_fused_root_signature_mappings *d3d12_state_object_fuse_root_signature_mappings( + struct d3d12_root_signature *global, struct d3d12_root_signature *local) +{ + /* Need a fused mapping table. */ + uint32_t num_mappings = global->mapping_info.mappingCount + local->mapping_info.mappingCount; + struct vkd3d_fused_root_signature_mappings *fused; + + fused = vkd3d_calloc(1, offsetof(struct vkd3d_fused_root_signature_mappings, mappings) + + num_mappings * sizeof(VkDescriptorSetAndBindingMappingEXT)); + fused->mapping_info.sType = VK_STRUCTURE_TYPE_SHADER_DESCRIPTOR_SET_AND_BINDING_MAPPING_INFO_EXT; + fused->mapping_info.mappingCount = num_mappings; + fused->mapping_info.pMappings = fused->mappings; + + memcpy(fused->mappings, + global->mapping_info.pMappings, + global->mapping_info.mappingCount * sizeof(*fused->mappings)); + + memcpy(fused->mappings + global->mapping_info.mappingCount, + local->mapping_info.pMappings, + local->mapping_info.mappingCount * sizeof(*fused->mappings)); + + return fused; +} + +struct vkd3d_fused_root_signature_mappings *d3d12_state_object_build_workgraph_root_signature_mappings( + struct d3d12_root_signature *global, struct d3d12_root_signature *local) +{ + struct vkd3d_fused_root_signature_mappings *fused; + uint32_t num_mappings = 0; + uint32_t i; + + if (global) + num_mappings += global->mapping_info.mappingCount; + if (local) + num_mappings += local->mapping_info.mappingCount; + + fused = vkd3d_calloc(1, offsetof(struct vkd3d_fused_root_signature_mappings, mappings) + + num_mappings * sizeof(VkDescriptorSetAndBindingMappingEXT)); + fused->mapping_info.sType = VK_STRUCTURE_TYPE_SHADER_DESCRIPTOR_SET_AND_BINDING_MAPPING_INFO_EXT; + fused->mapping_info.mappingCount = num_mappings; + fused->mapping_info.pMappings = fused->mappings; + + if (global) + { + memcpy(fused->mappings, + global->mapping_info.pMappings, + global->mapping_info.mappingCount * sizeof(*fused->mappings)); + } + + if (local) + { + memcpy(fused->mappings + (global ? global->mapping_info.mappingCount : 0), + local->mapping_info.pMappings, + local->mapping_info.mappingCount * sizeof(*fused->mappings)); + } + + /* Reroute to INDIRECT tokens instead. Gotta love how flexible this is :3 */ + for (i = 0; i < num_mappings; i++) + { + VkDescriptorSetAndBindingMappingEXT tmp = fused->mappings[i]; + switch (tmp.source) + { + case VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_DATA_EXT: + fused->mappings[i].source = VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_ADDRESS_EXT; + fused->mappings[i].sourceData.pushAddressOffset = + offsetof(struct vkd3d_shader_node_input_push_signature, root_parameter_bda); + break; + + case VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_ADDRESS_EXT: + fused->mappings[i].source = VK_DESCRIPTOR_MAPPING_SOURCE_INDIRECT_ADDRESS_EXT; + fused->mappings[i].sourceData.indirectAddress.pushOffset = + offsetof(struct vkd3d_shader_node_input_push_signature, root_parameter_bda); + fused->mappings[i].sourceData.indirectAddress.addressOffset = tmp.sourceData.pushAddressOffset; + break; + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_PUSH_INDEX_EXT: + fused->mappings[i].source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_INDIRECT_INDEX_EXT; + memset(&fused->mappings[i].sourceData.indirectIndex, 0, sizeof(fused->mappings[i].sourceData.indirectIndex)); + fused->mappings[i].sourceData.indirectIndex.pushOffset = + offsetof(struct vkd3d_shader_node_input_push_signature, root_parameter_bda); + fused->mappings[i].sourceData.indirectIndex.heapOffset = tmp.sourceData.pushIndex.heapOffset; + fused->mappings[i].sourceData.indirectIndex.heapIndexStride = tmp.sourceData.pushIndex.heapIndexStride; + fused->mappings[i].sourceData.indirectIndex.heapArrayStride = tmp.sourceData.pushIndex.heapArrayStride; + fused->mappings[i].sourceData.indirectIndex.addressOffset = tmp.sourceData.pushIndex.pushOffset; + break; + case VK_DESCRIPTOR_MAPPING_SOURCE_SHADER_RECORD_DATA_EXT: + fused->mappings[i].source = VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_ADDRESS_EXT; + fused->mappings[i].sourceData.pushAddressOffset = + offsetof(struct vkd3d_shader_node_input_push_signature, local_root_signature_bda); + if (tmp.sourceData.shaderRecordDataOffset != 0) + FIXME("WorkGraph with SHADER_RECORD_DATA_EXT needs explicit local root signature lowering.\n"); + break; + + case VK_DESCRIPTOR_MAPPING_SOURCE_SHADER_RECORD_ADDRESS_EXT: + fused->mappings[i].source = VK_DESCRIPTOR_MAPPING_SOURCE_INDIRECT_ADDRESS_EXT; + fused->mappings[i].sourceData.indirectAddress.pushOffset = + offsetof(struct vkd3d_shader_node_input_push_signature, local_root_signature_bda); + fused->mappings[i].sourceData.indirectAddress.addressOffset = tmp.sourceData.pushAddressOffset; + break; + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_SHADER_RECORD_INDEX_EXT: + fused->mappings[i].source = VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_INDIRECT_INDEX_EXT; + fused->mappings[i].sourceData.indirectIndex.pushOffset = + offsetof(struct vkd3d_shader_node_input_push_signature, local_root_signature_bda); + fused->mappings[i].sourceData.indirectIndex.heapOffset = tmp.sourceData.shaderRecordIndex.heapOffset; + fused->mappings[i].sourceData.indirectIndex.heapIndexStride = tmp.sourceData.shaderRecordIndex.heapIndexStride; + fused->mappings[i].sourceData.indirectIndex.heapArrayStride = tmp.sourceData.shaderRecordIndex.heapArrayStride; + fused->mappings[i].sourceData.indirectIndex.addressOffset = tmp.sourceData.shaderRecordIndex.shaderRecordOffset; + break; + + default: + break; + } + } + + return fused; +} diff --git a/libs/vkd3d/swapchain.c b/libs/vkd3d/swapchain.c index 39e289f824..fcb4be6b47 100644 --- a/libs/vkd3d/swapchain.c +++ b/libs/vkd3d/swapchain.c @@ -319,7 +319,7 @@ static void dxgi_vk_swap_chain_wait_acquire_semaphore(struct dxgi_vk_swap_chain wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO; wait_info.semaphore = vk_semaphore; - wait_info.stageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + wait_info.stageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_2_BLIT_BIT; submit_info.pSignalSemaphoreInfos = &signal_info; submit_info.signalSemaphoreInfoCount = 1; @@ -1965,7 +1965,7 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk swapchain_create_info.imageColorSpace = surface_format.colorSpace; swapchain_create_info.imageFormat = surface_format.format; swapchain_create_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; - swapchain_create_info.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + swapchain_create_info.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; swapchain_create_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; swapchain_create_info.preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; swapchain_create_info.presentMode = chain->present.selected_present_mode; @@ -2139,7 +2139,7 @@ static void dxgi_vk_swap_chain_record_render_pass(struct dxgi_vk_swap_chain *cha { const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; VkRenderingAttachmentInfo attachment_info; - VkImageMemoryBarrier2 image_barrier; + VkImageMemoryBarrier2 image_barrier[2]; VkDescriptorImageInfo image_info; VkWriteDescriptorSet write_info; struct d3d12_resource *resource; @@ -2147,6 +2147,7 @@ static void dxgi_vk_swap_chain_record_render_pass(struct dxgi_vk_swap_chain *cha VkDependencyInfo dep_info; VkViewport viewport; bool blank_present; + bool blit_command; /* If application intends to present before we have rendered to it, * it is valid, but we need to ignore the blit, just clear backbuffer. */ @@ -2190,24 +2191,54 @@ static void dxgi_vk_swap_chain_record_render_pass(struct dxgi_vk_swap_chain *cha viewport.height = chain->present.backbuffer_height; } + /* Avoids having to go back to legacy heap. */ + blit_command = !blank_present && + viewport.width == (float)chain->present.backbuffer_width && + viewport.height == (float)chain->present.backbuffer_height; + memset(&dep_info, 0, sizeof(dep_info)); dep_info.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO; - dep_info.imageMemoryBarrierCount = 1; - dep_info.pImageMemoryBarriers = &image_barrier; + dep_info.imageMemoryBarrierCount = blit_command ? 2 : 1; + dep_info.pImageMemoryBarriers = image_barrier; /* srcStage = COLOR_ATTACHMENT to link up to acquire semaphore. */ - memset(&image_barrier, 0, sizeof(image_barrier)); - image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2; - image_barrier.srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; - image_barrier.dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; - image_barrier.dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT; - image_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.image = chain->present.vk_backbuffer_images[swapchain_index]; - image_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - image_barrier.subresourceRange.levelCount = 1; - image_barrier.subresourceRange.layerCount = 1; + memset(image_barrier, 0, sizeof(image_barrier)); + image_barrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2; + image_barrier[1].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2; + + if (blit_command) + { + image_barrier[0].srcStageMask = VK_PIPELINE_STAGE_2_BLIT_BIT; + image_barrier[0].dstStageMask = VK_PIPELINE_STAGE_2_BLIT_BIT; + image_barrier[0].dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT; + image_barrier[0].newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + image_barrier[1].srcStageMask = VK_PIPELINE_STAGE_2_BLIT_BIT; + image_barrier[1].dstStageMask = VK_PIPELINE_STAGE_2_BLIT_BIT; + image_barrier[1].dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT; + image_barrier[1].oldLayout = chain->user.backbuffers[chain->request.user_index]->common_layout; + image_barrier[1].newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + image_barrier[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier[1].image = chain->user.backbuffers[chain->request.user_index]->res.vk_image; + image_barrier[1].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + image_barrier[1].subresourceRange.levelCount = 1; + image_barrier[1].subresourceRange.layerCount = 1; + } + else + { + image_barrier[0].srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + image_barrier[0].dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + image_barrier[0].dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT; + image_barrier[0].newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + } + + image_barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier[0].image = chain->present.vk_backbuffer_images[swapchain_index]; + image_barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + image_barrier[0].subresourceRange.levelCount = 1; + image_barrier[0].subresourceRange.layerCount = 1; if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_DEBUG_UTILS) && chain->queue->device->vk_info.EXT_debug_utils) @@ -2224,43 +2255,86 @@ static void dxgi_vk_swap_chain_record_render_pass(struct dxgi_vk_swap_chain *cha } VK_CALL(vkCmdPipelineBarrier2(vk_cmd, &dep_info)); - VK_CALL(vkCmdBeginRendering(vk_cmd, &rendering_info)); - if (!blank_present) + if (blit_command) { - VK_CALL(vkCmdSetViewport(vk_cmd, 0, 1, &viewport)); - VK_CALL(vkCmdSetScissor(vk_cmd, 0, 1, &rendering_info.renderArea)); - VK_CALL(vkCmdBindPipeline(vk_cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + VkImageBlit blit; + memset(&blit, 0, sizeof(blit)); + blit.dstSubresource.layerCount = 1; + blit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + blit.srcSubresource = blit.dstSubresource; + + blit.srcOffsets[1].x = (int)chain->desc.Width; + blit.srcOffsets[1].y = (int)chain->desc.Height; + blit.srcOffsets[1].z = 1; + + blit.dstOffsets[1].x = (int)chain->present.backbuffer_width; + blit.dstOffsets[1].y = (int)chain->present.backbuffer_height; + blit.dstOffsets[1].z = 1; + + VK_CALL(vkCmdBlitImage(vk_cmd, chain->user.backbuffers[chain->request.user_index]->res.vk_image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + chain->present.vk_backbuffer_images[swapchain_index], + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, &blit, VK_FILTER_LINEAR)); + } + else + { + VK_CALL(vkCmdBeginRendering(vk_cmd, &rendering_info)); + + if (!blank_present) + { + VK_CALL(vkCmdSetViewport(vk_cmd, 0, 1, &viewport)); + VK_CALL(vkCmdSetScissor(vk_cmd, 0, 1, &rendering_info.renderArea)); + VK_CALL(vkCmdBindPipeline(vk_cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, chain->present.pipeline.vk_pipeline)); - write_info.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - write_info.pNext = NULL; - write_info.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - write_info.pBufferInfo = NULL; - write_info.dstSet = VK_NULL_HANDLE; - write_info.pTexelBufferView = NULL; - write_info.pImageInfo = &image_info; - write_info.dstBinding = 0; - write_info.dstArrayElement = 0; - write_info.descriptorCount = 1; - image_info.imageView = chain->user.vk_image_views[chain->request.user_index]; - image_info.imageLayout = d3d12_resource_pick_layout(chain->user.backbuffers[chain->request.user_index], VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - image_info.sampler = VK_NULL_HANDLE; - - VK_CALL(vkCmdPushDescriptorSetKHR(vk_cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + write_info.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_info.pNext = NULL; + write_info.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + write_info.pBufferInfo = NULL; + write_info.dstSet = VK_NULL_HANDLE; + write_info.pTexelBufferView = NULL; + write_info.pImageInfo = &image_info; + write_info.dstBinding = 0; + write_info.dstArrayElement = 0; + write_info.descriptorCount = 1; + image_info.imageView = chain->user.vk_image_views[chain->request.user_index]; + image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + image_info.imageLayout = d3d12_resource_pick_layout(chain->user.backbuffers[chain->request.user_index], VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + image_info.sampler = VK_NULL_HANDLE; + + /* We will never have a heap here, so it's meaningless to try to use heap. + * It's expected that driver will inherit the heap if it cares. + * We could try using vkCmdBlitImage instead when possible. */ + VK_CALL(vkCmdPushDescriptorSetKHR(vk_cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, chain->present.pipeline.vk_pipeline_layout, 0, 1, &write_info)); - VK_CALL(vkCmdDraw(vk_cmd, 3, 1, 0, 0)); + VK_CALL(vkCmdDraw(vk_cmd, 3, 1, 0, 0)); + } + + VK_CALL(vkCmdEndRendering(vk_cmd)); } - VK_CALL(vkCmdEndRendering(vk_cmd)); + if (blit_command) + { + image_barrier[1].srcStageMask = VK_PIPELINE_STAGE_2_BLIT_BIT; + image_barrier[1].srcAccessMask = VK_ACCESS_2_NONE; + image_barrier[1].dstStageMask = VK_PIPELINE_STAGE_2_NONE; + image_barrier[1].dstAccessMask = VK_ACCESS_2_NONE; + image_barrier[1].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + image_barrier[1].newLayout = chain->user.backbuffers[chain->request.user_index]->common_layout; + } + else + { + image_barrier[0].srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + image_barrier[0].srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT; + } - image_barrier.srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; - image_barrier.srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT; - image_barrier.dstStageMask = VK_PIPELINE_STAGE_2_NONE; - image_barrier.dstAccessMask = VK_ACCESS_2_NONE; - image_barrier.oldLayout = image_barrier.newLayout; - image_barrier.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + image_barrier[0].dstStageMask = VK_PIPELINE_STAGE_2_NONE; + image_barrier[0].dstAccessMask = VK_ACCESS_2_NONE; + image_barrier[0].oldLayout = image_barrier[0].newLayout; + image_barrier[0].newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; VK_CALL(vkCmdPipelineBarrier2(vk_cmd, &dep_info)); @@ -2336,7 +2410,7 @@ static bool dxgi_vk_swap_chain_submit_blit(struct dxgi_vk_swap_chain *chain, uin memset(&wait_semaphore_info, 0, sizeof(wait_semaphore_info)); wait_semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO; wait_semaphore_info.semaphore = chain->present.vk_acquire_semaphore[chain->present.acquire_semaphore_index]; - wait_semaphore_info.stageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + wait_semaphore_info.stageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_2_BLIT_BIT; memset(signal_semaphore_info, 0, sizeof(signal_semaphore_info)); signal_semaphore_info[0].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO; diff --git a/libs/vkd3d/va_map.c b/libs/vkd3d/va_map.c index d471977d74..d213a9450f 100644 --- a/libs/vkd3d/va_map.c +++ b/libs/vkd3d/va_map.c @@ -386,5 +386,89 @@ void vkd3d_va_map_cleanup(struct vkd3d_va_map *va_map) vkd3d_va_map_cleanup_tree(&va_map->va_tree); pthread_mutex_destroy(&va_map->mutex); vkd3d_free(va_map->small_entries); + vkd3d_free(va_map->resource_mappings); + vkd3d_free(va_map->sampler_mappings); } +void vkd3d_va_map_insert_descriptor_heap(struct vkd3d_va_map *va_map, + uintptr_t va, size_t range, D3D12_DESCRIPTOR_HEAP_TYPE type) +{ + struct vkd3d_descriptor_heap_mapping *mapping; + pthread_mutex_lock(&va_map->mutex); + + if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + { + vkd3d_array_reserve((void **)&va_map->resource_mappings, &va_map->resource_mappings_size, + va_map->resource_mappings_count + 1, sizeof(*va_map->resource_mappings)); + mapping = &va_map->resource_mappings[va_map->resource_mappings_count++]; + } + else + { + vkd3d_array_reserve((void **)&va_map->sampler_mappings, &va_map->sampler_mappings_size, + va_map->sampler_mappings_count + 1, sizeof(*va_map->sampler_mappings)); + mapping = &va_map->sampler_mappings[va_map->sampler_mappings_count++]; + } + + mapping->va = va; + mapping->range = range; + + pthread_mutex_unlock(&va_map->mutex); +} + +void vkd3d_va_map_remove_descriptor_heap(struct vkd3d_va_map *va_map, + uintptr_t va, D3D12_DESCRIPTOR_HEAP_TYPE type) +{ + size_t i; + + pthread_mutex_lock(&va_map->mutex); + + if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + { + for (i = 0; i < va_map->resource_mappings_count; i++) + { + if (va_map->resource_mappings[i].va == va) + { + va_map->resource_mappings[i] = va_map->resource_mappings[--va_map->resource_mappings_count]; + break; + } + } + } + else + { + for (i = 0; i < va_map->sampler_mappings_count; i++) + { + if (va_map->sampler_mappings[i].va == va) + { + va_map->sampler_mappings[i] = va_map->sampler_mappings[--va_map->sampler_mappings_count]; + break; + } + } + } + + pthread_mutex_unlock(&va_map->mutex); +} + +size_t vkd3d_va_map_query_descriptor_heap_offset(struct vkd3d_va_map *va_map, + uintptr_t va, D3D12_DESCRIPTOR_HEAP_TYPE type) +{ + const struct vkd3d_descriptor_heap_mapping *mappings = type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV + ? va_map->resource_mappings : va_map->sampler_mappings; + size_t count = type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV + ? va_map->resource_mappings_count : va_map->sampler_mappings_count; + size_t ret = SIZE_MAX; + size_t i; + + pthread_mutex_lock(&va_map->mutex); + + for (i = 0; i < count; i++) + { + if (va >= mappings[i].va && va < mappings[i].va + mappings[i].range) + { + ret = va - mappings[i].va; + break; + } + } + + pthread_mutex_unlock(&va_map->mutex); + return ret; +} diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 8044ebe2aa..fd8c885cfe 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -85,7 +85,6 @@ struct d3d12_command_allocator; struct d3d12_device; struct d3d12_resource; -struct vkd3d_bindless_set_info; struct vkd3d_dynamic_state; struct vkd3d_vk_global_procs @@ -149,6 +148,8 @@ struct vkd3d_vulkan_info bool KHR_compute_shader_derivatives; bool KHR_calibrated_timestamps; bool KHR_cooperative_matrix; + bool KHR_shader_untyped_pointers; + bool EXT_descriptor_heap; bool KHR_unified_image_layouts; bool KHR_present_mode_fifo_latest_ready; /* EXT device extensions */ @@ -167,10 +168,8 @@ struct vkd3d_vulkan_info bool EXT_external_memory_host; bool EXT_shader_image_atomic_int64; bool EXT_mesh_shader; - bool EXT_mutable_descriptor_type; /* EXT promotion of VALVE one. */ bool EXT_hdr_metadata; bool EXT_shader_module_identifier; - bool EXT_descriptor_buffer; bool EXT_pipeline_library_group_handles; bool EXT_image_sliced_view_of_3d; bool EXT_graphics_pipeline_library; @@ -201,10 +200,8 @@ struct vkd3d_vulkan_info bool NV_fragment_shader_barycentric; bool NV_compute_shader_derivatives; bool NV_device_diagnostic_checkpoints; - bool NV_device_generated_commands; bool NV_shader_subgroup_partitioned; bool NV_memory_decompression; - bool NV_device_generated_commands_compute; bool NV_low_latency2; bool NV_raw_access_chains; bool NV_cooperative_matrix2; @@ -355,6 +352,12 @@ struct vkd3d_va_range VkDeviceSize size; }; +struct vkd3d_descriptor_heap_mapping +{ + uintptr_t va; + size_t range; +}; + struct vkd3d_va_map { struct vkd3d_va_tree va_tree; @@ -364,6 +367,14 @@ struct vkd3d_va_map struct vkd3d_unique_resource **small_entries; size_t small_entries_size; size_t small_entries_count; + + struct vkd3d_descriptor_heap_mapping *resource_mappings; + size_t resource_mappings_count; + size_t resource_mappings_size; + + struct vkd3d_descriptor_heap_mapping *sampler_mappings; + size_t sampler_mappings_count; + size_t sampler_mappings_size; }; void vkd3d_va_map_insert(struct vkd3d_va_map *va_map, struct vkd3d_unique_resource *resource); @@ -382,6 +393,13 @@ VkMicromapEXT vkd3d_va_map_place_opacity_micromap(struct vkd3d_va_map *va_map, void vkd3d_va_map_init(struct vkd3d_va_map *va_map); void vkd3d_va_map_cleanup(struct vkd3d_va_map *va_map); +void vkd3d_va_map_insert_descriptor_heap(struct vkd3d_va_map *va_map, + uintptr_t va, size_t range, D3D12_DESCRIPTOR_HEAP_TYPE type); +void vkd3d_va_map_remove_descriptor_heap(struct vkd3d_va_map *va_map, + uintptr_t va, D3D12_DESCRIPTOR_HEAP_TYPE type); +size_t vkd3d_va_map_query_descriptor_heap_offset(struct vkd3d_va_map *va_map, + uintptr_t va, D3D12_DESCRIPTOR_HEAP_TYPE type); + struct vkd3d_private_store { pthread_mutex_t mutex; @@ -1356,11 +1374,7 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, enum vkd3d_descriptor_flag { VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW = (1 << 0), - VKD3D_DESCRIPTOR_FLAG_RAW_VA_AUX_BUFFER = (1 << 1), - VKD3D_DESCRIPTOR_FLAG_BUFFER_OFFSET = (1 << 2), - VKD3D_DESCRIPTOR_FLAG_BUFFER_VA_RANGE = (1 << 3), - VKD3D_DESCRIPTOR_FLAG_NON_NULL = (1 << 4), - VKD3D_DESCRIPTOR_FLAG_SINGLE_DESCRIPTOR = (1 << 5), + VKD3D_DESCRIPTOR_FLAG_BUFFER_VA_RANGE = (1 << 1), }; struct vkd3d_descriptor_binding @@ -1406,7 +1420,12 @@ struct vkd3d_descriptor_metadata_buffer_view struct vkd3d_descriptor_metadata_image_view { uint8_t flags; - struct vkd3d_view *view; + uint8_t vk_dimension; + uint8_t dxgi_format; + uint8_t mip_slice; + uint16_t first_array_slice; + uint16_t array_size; + uint8_t plane_slice; }; struct vkd3d_descriptor_metadata_view @@ -1512,6 +1531,7 @@ struct vkd3d_host_visible_buffer_range void *host_ptr; }; +#if 0 union vkd3d_descriptor_info { VkBufferView buffer_view; @@ -1519,6 +1539,7 @@ union vkd3d_descriptor_info VkDescriptorImageInfo image; VkDeviceAddress va; }; +#endif /* ID3D12DescriptorHeap */ struct d3d12_null_descriptor_template @@ -1564,12 +1585,9 @@ struct d3d12_descriptor_heap_set struct d3d12_descriptor_heap { - /* Used by special optimizations where we can take advantage of knowledge of the binding model - * without awkward lookups. Optimized vtable overrides define what these pointers mean. */ - void *fast_pointer_bank[3]; - ID3D12DescriptorHeap ID3D12DescriptorHeap_iface; LONG refcount; + LONG internal_refcount; uint64_t gpu_va; D3D12_DESCRIPTOR_HEAP_DESC desc; @@ -1579,20 +1597,19 @@ struct d3d12_descriptor_heap { VkBuffer vk_buffer; VkDeviceAddress va; + VkDeviceSize size; + VkDeviceSize reserved_offset; struct vkd3d_device_memory_allocation device_allocation; uint8_t *host_allocation; - VkDeviceSize offsets[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; } descriptor_buffer; VkDescriptorPool vk_descriptor_pool; - struct d3d12_descriptor_heap_set sets[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; struct vkd3d_device_memory_allocation device_allocation; VkBuffer vk_buffer; void *host_memory; struct vkd3d_host_visible_buffer_range raw_va_aux_buffer; - struct vkd3d_host_visible_buffer_range buffer_ranges; #ifdef VKD3D_ENABLE_DESCRIPTOR_QA struct vkd3d_host_visible_buffer_range descriptor_heap_info; struct vkd3d_cookie cookie; @@ -1605,6 +1622,11 @@ struct d3d12_descriptor_heap struct vkd3d_private_store private_store; struct d3d_destruction_notifier destruction_notifier; +#define VKD3D_DESCRIPTOR_HEAP_META_DESCRIPTOR_COUNT 4096 + pthread_mutex_t meta_descriptor_lock; + uint32_t *meta_descriptor_indices; + size_t meta_descriptor_index_count; + /* Here we pack metadata data structures for CBV_SRV_UAV and SAMPLER. * For RTV/DSV heaps, we just encode rtv_desc structs inline. */ DECLSPEC_ALIGN(D3D12_DESC_ALIGNMENT) BYTE descriptors[]; @@ -1615,6 +1637,11 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, void d3d12_descriptor_heap_cleanup(struct d3d12_descriptor_heap *descriptor_heap); bool d3d12_descriptor_heap_require_padding_descriptors(struct d3d12_device *device); +uint32_t d3d12_descriptor_heap_allocate_meta_index(struct d3d12_descriptor_heap *heap); +void d3d12_descriptor_heap_free_meta_index(struct d3d12_descriptor_heap *heap, uint32_t index); +uint32_t d3d12_device_find_shader_visible_descriptor_heap_offset( + struct d3d12_device *device, vkd3d_cpu_descriptor_va_t va, D3D12_DESCRIPTOR_HEAP_TYPE type); + static inline struct d3d12_descriptor_heap *impl_from_ID3D12DescriptorHeap(ID3D12DescriptorHeap *iface) { extern CONST_VTBL struct ID3D12DescriptorHeapVtbl d3d12_descriptor_heap_vtbl; @@ -1642,7 +1669,8 @@ struct d3d12_desc_split_embedded struct vkd3d_descriptor_metadata_view *metadata; }; -static inline struct d3d12_desc_split_embedded d3d12_desc_decode_embedded_resource_va(vkd3d_cpu_descriptor_va_t va) +static inline struct d3d12_desc_split_embedded d3d12_desc_decode_embedded_resource_va( + vkd3d_cpu_descriptor_va_t va, uint32_t packed_metadata_offset) { struct d3d12_desc_split_embedded split; @@ -1650,6 +1678,7 @@ static inline struct d3d12_desc_split_embedded d3d12_desc_decode_embedded_resour if (log2_offset > VKD3D_RESOURCE_EMBEDDED_CACHED_MASK) { + /* Planar metadata. We implicitly know it's host visible. */ va -= log2_offset; split.payload = (uint8_t *)va; va += 1u << log2_offset; @@ -1657,10 +1686,15 @@ static inline struct d3d12_desc_split_embedded d3d12_desc_decode_embedded_resour } else { + bool is_host = (va & VKD3D_RESOURCE_EMBEDDED_CACHED_MASK) != 0; va &= ~VKD3D_RESOURCE_EMBEDDED_CACHED_MASK; - /* Shader visible VA. We don't care about metadata at this point. */ - split.metadata = NULL; split.payload = (uint8_t *)va; + + /* Metadata is only used by host heaps. */ + if (is_host) + split.metadata = (struct vkd3d_descriptor_metadata_view *)(split.payload + packed_metadata_offset); + else + split.metadata = NULL; } return split; @@ -1671,8 +1705,8 @@ static inline void d3d12_desc_copy_embedded_resource(vkd3d_cpu_descriptor_va_t d { struct d3d12_desc_split_embedded dst, src; - dst = d3d12_desc_decode_embedded_resource_va(dst_va); - src = d3d12_desc_decode_embedded_resource_va(src_va); + dst = d3d12_desc_decode_embedded_resource_va(dst_va, 0); + src = d3d12_desc_decode_embedded_resource_va(src_va, 0); /* Copy metadata if we're doing CPU -> CPU descriptor copy. * Copying from GPU descriptor heap is not allowed. */ @@ -1694,8 +1728,8 @@ static inline void d3d12_desc_copy_embedded_resource_single_32(vkd3d_cpu_descrip { struct d3d12_desc_split_embedded dst, src; - dst = d3d12_desc_decode_embedded_resource_va(dst_va); - src = d3d12_desc_decode_embedded_resource_va(src_va); + dst = d3d12_desc_decode_embedded_resource_va(dst_va, 0); + src = d3d12_desc_decode_embedded_resource_va(src_va, 0); /* Copy metadata if we're doing CPU -> CPU descriptor copy. * Copying from GPU descriptor heap is not allowed. */ @@ -1755,14 +1789,6 @@ static inline uint32_t d3d12_desc_heap_offset_from_gpu_handle(D3D12_GPU_DESCRIPT return (uint32_t)handle.ptr / VKD3D_RESOURCE_DESC_INCREMENT; } -static inline void *d3d12_descriptor_heap_get_mapped_payload(struct d3d12_descriptor_heap *heap, - unsigned int set_index, unsigned int desc_index) -{ - uint8_t *payload = heap->sets[set_index].mapped_set; - payload += desc_index * heap->sets[set_index].stride; - return payload; -} - /* ID3D12QueryHeap */ struct d3d12_query_heap { @@ -1819,13 +1845,6 @@ static inline bool d3d12_query_heap_type_is_inline(D3D12_QUERY_HEAP_TYPE heap_ty heap_type == D3D12_QUERY_HEAP_TYPE_SO_STATISTICS; } -enum vkd3d_root_signature_flag -{ - VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK = 0x00000001u, - VKD3D_ROOT_SIGNATURE_USE_SSBO_OFFSET_BUFFER = 0x00000002u, - VKD3D_ROOT_SIGNATURE_USE_TYPED_OFFSET_BUFFER = 0x00000004u, -}; - enum vkd3d_pipeline_type { VKD3D_PIPELINE_TYPE_NONE, @@ -1853,28 +1872,11 @@ static inline VkPipelineBindPoint vk_bind_point_from_pipeline_type(enum vkd3d_pi return VK_PIPELINE_BIND_POINT_MAX_ENUM; } -/* ID3D12RootSignature */ -struct d3d12_bind_point_layout +enum vkd3d_root_signature_heap_redzone_style { - VkPipelineLayout vk_pipeline_layout; - VkShaderStageFlags vk_push_stages; - unsigned int flags; /* vkd3d_root_signature_flag */ - uint32_t num_set_layouts; - VkPushConstantRange push_constant_range; -}; - -#define VKD3D_MAX_HOISTED_DESCRIPTORS 16 -struct vkd3d_descriptor_hoist_desc -{ - uint32_t table_index; - uint32_t table_offset; - uint32_t parameter_index; -}; - -struct vkd3d_descriptor_hoist_info -{ - struct vkd3d_descriptor_hoist_desc desc[VKD3D_MAX_HOISTED_DESCRIPTORS]; - unsigned int num_desc; + VKD3D_ROOT_SIGNATURE_HEAP_REDZONE_STYLE_NONE = 0, + VKD3D_ROOT_SIGNATURE_HEAP_REDZONE_STYLE_INLINE, + VKD3D_ROOT_SIGNATURE_HEAP_REDZONE_STYLE_DESCRIPTOR }; struct d3d12_root_signature @@ -1888,23 +1890,15 @@ struct d3d12_root_signature /* Compatiblity for ABI in RTPSOs. Match if the VkPipelineLayouts are equivalent. */ vkd3d_shader_hash_t layout_compatibility_hash; - struct d3d12_bind_point_layout graphics, mesh, compute, raygen; - VkDescriptorSetLayout vk_sampler_descriptor_layout; - VkDescriptorSetLayout vk_root_descriptor_layout; - - VkDescriptorPool vk_sampler_pool; - VkDescriptorSet vk_sampler_set; - struct vkd3d_shader_root_parameter *parameters; unsigned int parameter_count; - uint32_t sampler_descriptor_set; - uint32_t root_descriptor_set; - uint64_t descriptor_table_mask; uint64_t root_constant_mask; uint64_t root_descriptor_raw_va_mask; - uint64_t root_descriptor_push_mask; + + uint32_t root_parameters_raw_va_count; + uint32_t root_parameters_constant_dwords; D3D12_ROOT_SIGNATURE_FLAGS d3d12_flags; @@ -1920,7 +1914,6 @@ struct d3d12_root_signature void *root_signature_blob; size_t root_signature_blob_size; - struct vkd3d_shader_descriptor_binding push_constant_ubo_binding; struct vkd3d_shader_descriptor_binding raw_va_aux_buffer_binding; struct vkd3d_shader_descriptor_binding offset_buffer_binding; #ifdef VKD3D_ENABLE_DESCRIPTOR_QA @@ -1928,16 +1921,27 @@ struct d3d12_root_signature struct vkd3d_shader_descriptor_binding descriptor_qa_control_binding; #endif - VkDescriptorSetLayout set_layouts[VKD3D_MAX_DESCRIPTOR_SETS]; + enum vkd3d_root_signature_heap_redzone_style redzone_style; + /* Ideal: Push descriptor heap size + descriptor heap VA straight into PushData. */ + uint32_t heap_redzone_inline_heap_count_offset; + uint32_t heap_redzone_inline_heap_va_offset; uint32_t descriptor_table_offset; uint32_t descriptor_table_count; unsigned int static_sampler_count; D3D12_STATIC_SAMPLER_DESC1 *static_samplers_desc; - VkSampler *static_samplers; - struct vkd3d_descriptor_hoist_info hoist_info; + VkShaderDescriptorSetAndBindingMappingInfoEXT mapping_info; + VkDescriptorSetAndBindingMappingEXT *mappings; + size_t mappings_size; + size_t mappings_count; + + struct + { + VkSamplerCreateInfo desc; + VkSamplerReductionModeCreateInfoEXT reduction; + } *vk_static_samplers_desc; struct d3d12_device *device; @@ -1964,48 +1968,10 @@ static inline struct d3d12_root_signature *impl_from_ID3D12RootSignature(ID3D12R return CONTAINING_RECORD(iface, struct d3d12_root_signature, ID3D12RootSignature_iface); } -unsigned int d3d12_root_signature_get_shader_interface_flags(const struct d3d12_root_signature *root_signature, - enum vkd3d_pipeline_type pipeline_type); -HRESULT d3d12_root_signature_create_local_static_samplers_layout(struct d3d12_root_signature *root_signature, - VkDescriptorSetLayout vk_set_layout, VkPipelineLayout *vk_pipeline_layout); -HRESULT d3d12_root_signature_create_work_graph_layout(struct d3d12_root_signature *root_signature, - VkDescriptorSetLayout *vk_push_set_layout, VkPipelineLayout *vk_pipeline_layout); -HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, - unsigned int set_layout_count, const VkDescriptorSetLayout *set_layouts, - unsigned int push_constant_count, const VkPushConstantRange *push_constants, - VkPipelineLayout *pipeline_layout); +unsigned int d3d12_root_signature_get_shader_interface_flags(const struct d3d12_root_signature *root_signature); VkShaderStageFlags vkd3d_vk_stage_flags_from_visibility(D3D12_SHADER_VISIBILITY visibility); enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHADER_VISIBILITY visibility); -HRESULT vkd3d_create_descriptor_set_layout(struct d3d12_device *device, - VkDescriptorSetLayoutCreateFlags flags, unsigned int binding_count, - const VkDescriptorSetLayoutBinding *bindings, - VkDescriptorSetLayoutCreateFlags descriptor_buffer_flags, - VkDescriptorSetLayout *set_layout); - -static inline const struct d3d12_bind_point_layout *d3d12_root_signature_get_layout( - const struct d3d12_root_signature *root_signature, enum vkd3d_pipeline_type pipeline_type) -{ - switch (pipeline_type) - { - case VKD3D_PIPELINE_TYPE_NONE: - return NULL; - - case VKD3D_PIPELINE_TYPE_GRAPHICS: - return &root_signature->graphics; - - case VKD3D_PIPELINE_TYPE_MESH_GRAPHICS: - return &root_signature->mesh; - - case VKD3D_PIPELINE_TYPE_COMPUTE: - return &root_signature->compute; - - case VKD3D_PIPELINE_TYPE_RAY_TRACING: - return &root_signature->raygen; - } - - return NULL; -} static inline bool d3d12_root_signature_is_pipeline_compatible( const struct d3d12_root_signature *a, const struct d3d12_root_signature *b) @@ -2026,6 +1992,12 @@ static inline bool d3d12_root_signature_is_pipeline_compatible( static inline bool d3d12_root_signature_is_layout_compatible( const struct d3d12_root_signature *a, const struct d3d12_root_signature *b) { + /* We don't have this restriction anymore. */ + (void)a; + (void)b; + return true; + +#if 0 if (a && a->layout_compatibility_hash == 0) a = NULL; if (b && b->layout_compatibility_hash == 0) @@ -2037,6 +2009,7 @@ static inline bool d3d12_root_signature_is_layout_compatible( return false; else return a->layout_compatibility_hash == b->layout_compatibility_hash; +#endif } enum vkd3d_dynamic_state_flag @@ -2216,7 +2189,6 @@ struct d3d12_graphics_pipeline_state uint32_t explicit_dynamic_states; uint32_t pipeline_dynamic_states; - VkPipelineLayout pipeline_layout; VkPipeline pipeline; VkPipeline library; VkGraphicsPipelineLibraryFlagsEXT library_flags; @@ -2602,6 +2574,12 @@ enum vkd3d_scratch_pool_kind VKD3D_SCRATCH_POOL_KIND_COUNT }; +struct vkd3d_descriptor_heap_meta_allocation +{ + struct d3d12_descriptor_heap *heap; + uint32_t index; +}; + struct d3d12_command_allocator_command_pool_list { VkCommandBuffer *command_buffers; @@ -2648,6 +2626,10 @@ struct d3d12_command_allocator size_t query_pools_size; size_t query_pool_count; + struct vkd3d_descriptor_heap_meta_allocation *meta_allocs; + size_t meta_allocs_size; + size_t meta_allocs_count; + struct vkd3d_query_pool active_query_pools[VKD3D_VIRTUAL_QUERY_TYPE_COUNT]; struct d3d12_command_list *current_command_list; @@ -2671,6 +2653,9 @@ bool d3d12_command_allocator_allocate_query_from_type_index( struct d3d12_command_allocator *allocator, uint32_t type_index, VkQueryPool *query_pool, uint32_t *query_index); +uint32_t d3d12_command_allocator_allocate_meta_index( + struct d3d12_command_allocator *allocator, struct d3d12_descriptor_heap *heap); + struct d3d12_command_list *d3d12_command_list_from_iface(ID3D12CommandList *iface); void d3d12_command_list_decay_tracked_state(struct d3d12_command_list *list); @@ -2687,31 +2672,25 @@ bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command_alloca VkDeviceSize size, VkDeviceSize alignment, uint32_t memory_types, struct vkd3d_scratch_allocation *allocation); -enum vkd3d_pipeline_dirty_flag -{ - VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET = 0x00000001u, - VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS = 0x00000002u, - VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS = 0x00000004u, -}; - +#if 0 struct vkd3d_root_descriptor_info { VkDescriptorType vk_descriptor_type; union vkd3d_descriptor_info info; }; +#endif struct vkd3d_pipeline_bindings { const struct d3d12_root_signature *root_signature; - VkDescriptorSet static_sampler_set; - uint32_t dirty_flags; /* vkd3d_pipeline_dirty_flags */ - uint32_t descriptor_tables[D3D12_MAX_ROOT_COST]; - uint64_t descriptor_heap_dirty_mask; + + bool dirty_table_offsets; + bool dirty_inline_redzone; /* Needed when VK_KHR_push_descriptor is not available. */ - struct vkd3d_root_descriptor_info root_descriptors[D3D12_MAX_ROOT_COST]; + VkDeviceAddress root_descriptors_va[D3D12_MAX_ROOT_COST]; uint64_t root_descriptor_dirty_mask; uint64_t root_descriptor_active_mask; @@ -3011,22 +2990,17 @@ struct d3d12_rtas_batch_state size_t omm_usage_info_size; }; -union vkd3d_descriptor_heap_state +struct vkd3d_descriptor_heap_state { struct { - VkDeviceAddress heap_va_resource; - VkDeviceAddress heap_va_sampler; - VkBuffer vk_buffer_resource; - bool heap_dirty; - - VkDeviceSize vk_offsets[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; - } buffers; + VkDeviceAddress va; + VkDeviceSize size; + VkDeviceSize reserved_offset; + struct d3d12_descriptor_heap *heap; + } resource, sampler; - struct - { - VkDescriptorSet vk_sets[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; - } sets; + bool heap_dirty; }; struct d3d12_rtv_resolve @@ -3234,7 +3208,7 @@ struct d3d12_command_list struct vkd3d_pipeline_bindings compute_bindings; enum vkd3d_pipeline_type active_pipeline_type; - union vkd3d_descriptor_heap_state descriptor_heap; + struct vkd3d_descriptor_heap_state descriptor_heap; struct d3d12_pipeline_state *state; struct d3d12_rt_state_object *rt_state; @@ -3266,8 +3240,6 @@ struct d3d12_command_list size_t pending_queries_size; size_t pending_queries_count; - const struct vkd3d_descriptor_metadata_view *cbv_srv_uav_descriptors_view; - struct d3d12_resource *vrs_image; struct d3d12_resource_tracking *dsv_resource_tracking; @@ -3334,10 +3306,17 @@ void d3d12_command_list_debug_mark_begin_region( void d3d12_command_list_debug_mark_end_region(struct d3d12_command_list *list); void d3d12_command_list_invalidate_current_pipeline(struct d3d12_command_list *list, bool meta_shader); -void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_list *list, - struct vkd3d_pipeline_bindings *bindings, bool invalidate_descriptor_heaps, - struct vkd3d_pipeline_bindings *sibling_push_domain); -void d3d12_command_list_update_descriptor_buffers(struct d3d12_command_list *list); +void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_list *list); +void d3d12_command_list_invalidate_descriptor_heap(struct d3d12_command_list *list); +void d3d12_command_list_update_descriptor_heaps(struct d3d12_command_list *list); + +void d3d12_command_list_meta_push_data(struct d3d12_command_list *list, + VkCommandBuffer vk_command_buffer, + VkPipelineLayout vk_pipeline_layout, VkShaderStageFlags stages, + uint32_t size, const void *data); + +void d3d12_command_list_meta_push_descriptor_index(struct d3d12_command_list *list, + VkCommandBuffer vk_command_buffer, uint32_t binding, uint32_t heap_index); union vkd3d_root_parameter_data { @@ -3732,8 +3711,6 @@ struct d3d12_command_signature VkBuffer buffer; VkDeviceAddress buffer_va; struct vkd3d_device_memory_allocation memory; - VkIndirectCommandsLayoutNV layout_implicit_nv; - VkIndirectCommandsLayoutNV layout_preprocess_nv; VkIndirectCommandsLayoutEXT layout_implicit_ext; VkIndirectCommandsLayoutEXT layout_preprocess_ext; uint32_t stride; @@ -3768,17 +3745,29 @@ static inline struct d3d12_command_signature *impl_from_ID3D12CommandSignature(I return CONTAINING_RECORD(iface, struct d3d12_command_signature, ID3D12CommandSignature_iface); } +struct vkd3d_sampler_custom_border_color +{ + VkBorderColor border_color; + VkClearColorValue color; + uint32_t index; +}; + /* Static samplers */ struct vkd3d_sampler_state { pthread_mutex_t mutex; - struct hash_map map; - - VkDescriptorPool *vk_descriptor_pools; - size_t vk_descriptor_pools_size; - size_t vk_descriptor_pool_count; + struct vkd3d_sampler_custom_border_color *border_colors; + size_t border_color_bank_size; + size_t border_color_count; + bool noop_registration; + uint32_t noop_registration_index; }; +uint32_t vkd3d_sampler_state_register_custom_border_color( + struct d3d12_device *device, + struct vkd3d_sampler_state *state, VkBorderColor border_color, + const VkSamplerCustomBorderColorCreateInfoEXT *info); + struct vkd3d_shader_debug_ring { VkBuffer host_buffer; @@ -3805,30 +3794,16 @@ HRESULT vkd3d_sampler_state_init(struct vkd3d_sampler_state *state, struct d3d12_device *device); void vkd3d_sampler_state_cleanup(struct vkd3d_sampler_state *state, struct d3d12_device *device); -HRESULT vkd3d_sampler_state_create_static_sampler(struct vkd3d_sampler_state *state, - struct d3d12_device *device, const D3D12_STATIC_SAMPLER_DESC1 *desc, VkSampler *vk_sampler); +void vkd3d_sampler_state_init_static_sampler(struct vkd3d_sampler_state *state, + struct d3d12_device *device, const D3D12_STATIC_SAMPLER_DESC1 *desc, + VkSamplerCreateInfo *vk_sampler_info, + VkSamplerReductionModeCreateInfoEXT *vk_reduction_mode); HRESULT vkd3d_sampler_state_allocate_descriptor_set(struct vkd3d_sampler_state *state, struct d3d12_device *device, VkDescriptorSetLayout vk_layout, VkDescriptorSet *vk_set, VkDescriptorPool *vk_pool); void vkd3d_sampler_state_free_descriptor_set(struct vkd3d_sampler_state *state, struct d3d12_device *device, VkDescriptorSet vk_set, VkDescriptorPool vk_pool); -struct vkd3d_global_descriptor_buffer -{ - struct - { - VkBuffer vk_buffer; - VkDeviceAddress va; - struct vkd3d_device_memory_allocation device_allocation; - VkBufferUsageFlags2KHR usage; - } resource, sampler; -}; - -HRESULT vkd3d_global_descriptor_buffer_init(struct vkd3d_global_descriptor_buffer *global_descriptor_buffer, - struct d3d12_device *device); -void vkd3d_global_descriptor_buffer_cleanup(struct vkd3d_global_descriptor_buffer *global_descriptor_buffer, - struct d3d12_device *device); - HRESULT vkd3d_shader_debug_ring_init(struct vkd3d_shader_debug_ring *state, struct d3d12_device *device); void vkd3d_shader_debug_ring_cleanup(struct vkd3d_shader_debug_ring *state, @@ -4195,22 +4170,15 @@ static inline void vkd3d_breadcrumb_buffer_copy( #endif /* VKD3D_ENABLE_BREADCRUMBS */ /* Bindless */ +#if 0 enum vkd3d_bindless_flags { - VKD3D_BINDLESS_CBV_AS_SSBO = (1u << 0), - VKD3D_BINDLESS_RAW_SSBO = (1u << 1), - VKD3D_SSBO_OFFSET_BUFFER = (1u << 2), - VKD3D_TYPED_OFFSET_BUFFER = (1u << 3), - VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV = (1u << 4), - VKD3D_RAW_VA_ROOT_DESCRIPTOR_SRV_UAV = (1u << 5), VKD3D_BINDLESS_MUTABLE_TYPE = (1u << 6), - VKD3D_HOIST_STATIC_TABLE_CBV = (1u << 7), - VKD3D_BINDLESS_MUTABLE_TYPE_RAW_SSBO = (1u << 8), VKD3D_BINDLESS_MUTABLE_EMBEDDED = (1u << 9), VKD3D_BINDLESS_MUTABLE_EMBEDDED_PACKED_METADATA = (1u << 10), - VKD3D_FORCE_COMPUTE_ROOT_PARAMETERS_PUSH_UBO = (1u << 11), VKD3D_BINDLESS_MUTABLE_TYPE_SPLIT_RAW_TYPED = (1u << 12), }; +#endif #define VKD3D_BINDLESS_SET_MAX_EXTRA_BINDINGS 8 @@ -4228,7 +4196,6 @@ enum vkd3d_bindless_set_flag VKD3D_BINDLESS_SET_MUTABLE_TYPED = (1u << 9), VKD3D_BINDLESS_SET_EXTRA_RAW_VA_AUX_BUFFER = (1u << 24), - VKD3D_BINDLESS_SET_EXTRA_OFFSET_BUFFER = (1u << 25), VKD3D_BINDLESS_SET_EXTRA_FEEDBACK_PAYLOAD_INFO_BUFFER = (1u << 26), VKD3D_BINDLESS_SET_EXTRA_FEEDBACK_CONTROL_INFO_BUFFER = (1u << 27), VKD3D_BINDLESS_SET_EXTRA_MASK = 0xff000000u @@ -4243,45 +4210,42 @@ enum vkd3d_bindless_state_info_indices VKD3D_BINDLESS_STATE_INFO_INDEX_MUTABLE_SINGLE = 1, }; -struct vkd3d_bindless_set_info -{ - VkDescriptorType vk_descriptor_type; - VkDescriptorType vk_init_null_descriptor_type; - D3D12_DESCRIPTOR_HEAP_TYPE heap_type; - uint32_t flags; /* vkd3d_bindless_set_flag */ - uint32_t set_index; - uint32_t binding_index; - - /* For VK_EXT_descriptor_buffer (or VK_VALVE_descriptor_set_host_mapping). */ - size_t host_mapping_offset; - size_t host_mapping_descriptor_size; - pfn_vkd3d_host_mapping_copy_template host_copy_template; - pfn_vkd3d_host_mapping_copy_template_single host_copy_template_single; - - VkDescriptorSetLayout vk_set_layout; - /* Unused for descriptor buffers. */ - VkDescriptorSetLayout vk_host_set_layout; -}; - struct vkd3d_bindless_state { - uint32_t flags; /* vkd3d_bindless_flags */ + //uint32_t flags; /* vkd3d_bindless_flags */ /* For descriptor buffers, pre-baked array passed directly to vkCmdBindDescriptorBuffersEXT. */ - uint32_t vk_descriptor_buffer_indices[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; - struct vkd3d_bindless_set_info set_info[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; - unsigned int set_count; - unsigned int cbv_srv_uav_count; + //uint32_t vk_descriptor_buffer_indices[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; + //struct vkd3d_bindless_set_info set_info[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; + //unsigned int set_count; + //unsigned int cbv_srv_uav_count; /* NULL descriptor payloads are not necessarily all zero. * Access the array with vkd3d_bindless_state_get_null_descriptor_payload(). */ - DECLSPEC_ALIGN(16) uint8_t null_descriptor_payloads[6][VKD3D_MAX_DESCRIPTOR_SIZE]; - size_t descriptor_buffer_cbv_srv_uav_size; - size_t descriptor_buffer_sampler_size; - unsigned int descriptor_buffer_cbv_srv_uav_size_log2; - unsigned int descriptor_buffer_sampler_size_log2; - unsigned int descriptor_buffer_packed_raw_buffer_offset; - unsigned int descriptor_buffer_packed_metadata_offset; + //DECLSPEC_ALIGN(16) uint8_t null_descriptor_payloads[6][VKD3D_MAX_DESCRIPTOR_SIZE]; + + size_t descriptor_heap_cbv_srv_uav_size; + size_t descriptor_heap_sampler_size; + unsigned int descriptor_heap_cbv_srv_uav_size_log2; + unsigned int descriptor_heap_sampler_size_log2; + unsigned int descriptor_heap_packed_raw_buffer_offset; + unsigned int descriptor_heap_packed_metadata_offset; + + unsigned int storage_image_size; + unsigned int sampled_image_size; + unsigned int storage_texel_buffer_size; + unsigned int uniform_texel_buffer_size; + unsigned int ubo_size; + unsigned int ssbo_size; + unsigned int uav_buffer_size; + + /* Here we place internal descriptors and other special per-heap data. */ + unsigned int heap_redzone_size; + unsigned int uav_counter_embedded_offset; + + bool supports_universal_structured_ssbo; + bool supports_universal_byte_address_ssbo; + unsigned int min_ssbo_alignment; }; HRESULT vkd3d_bindless_state_init(struct vkd3d_bindless_state *bindless_state, @@ -4294,29 +4258,9 @@ struct vkd3d_descriptor_binding vkd3d_bindless_state_find_set(const struct vkd3d uint32_t vkd3d_bindless_state_find_set_info_index(const struct vkd3d_bindless_state *bindless_state, uint32_t flags); -static inline struct vkd3d_descriptor_binding vkd3d_bindless_state_binding_from_info_index( - const struct vkd3d_bindless_state *bindless_state, uint32_t index) -{ - struct vkd3d_descriptor_binding binding; - binding.binding = bindless_state->set_info[index].binding_index; - binding.set = bindless_state->set_info[index].set_index; - return binding; -} - static inline VkDescriptorType vkd3d_bindless_state_get_cbv_descriptor_type(const struct vkd3d_bindless_state *bindless_state) { - return bindless_state->flags & VKD3D_BINDLESS_CBV_AS_SSBO - ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER - : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; -} - -static inline uint8_t *vkd3d_bindless_state_get_null_descriptor_payload(struct vkd3d_bindless_state *bindless_state, - VkDescriptorType type) -{ - /* The descriptor types we care about are laid out nicely in enum-space. */ - int index = type; - assert(index >= 2 && index < 8); - return bindless_state->null_descriptor_payloads[index - 2]; + return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; } enum vkd3d_format_type @@ -4866,16 +4810,20 @@ struct vkd3d_meta_ops { struct d3d12_device *device; struct vkd3d_meta_ops_common common; - struct vkd3d_clear_uav_ops clear_uav; - struct vkd3d_copy_image_ops copy_image; - struct vkd3d_resolve_image_ops resolve_image; + struct vkd3d_clear_uav_ops clear_uav_heap; + struct vkd3d_clear_uav_ops clear_uav_legacy; + struct vkd3d_copy_image_ops copy_image_heap; + struct vkd3d_copy_image_ops copy_image_legacy; + struct vkd3d_resolve_image_ops resolve_image_heap; + struct vkd3d_resolve_image_ops resolve_image_legacy; struct vkd3d_swapchain_ops swapchain; struct vkd3d_query_ops query; struct vkd3d_predicate_ops predicate; struct vkd3d_execute_indirect_ops execute_indirect; struct vkd3d_multi_dispatch_indirect_ops multi_dispatch_indirect; struct vkd3d_dstorage_ops dstorage; - struct vkd3d_sampler_feedback_resolve_ops sampler_feedback; + struct vkd3d_sampler_feedback_resolve_ops sampler_feedback_heap; + struct vkd3d_sampler_feedback_resolve_ops sampler_feedback_legacy; struct vkd3d_workgraph_indirect_ops workgraph; }; @@ -4883,9 +4831,9 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device); struct vkd3d_clear_uav_pipeline vkd3d_meta_get_clear_buffer_uav_pipeline(struct vkd3d_meta_ops *meta_ops, - bool as_uint, bool raw); + bool as_uint, bool raw, bool heap); struct vkd3d_clear_uav_pipeline vkd3d_meta_get_clear_image_uav_pipeline(struct vkd3d_meta_ops *meta_ops, - VkImageViewType image_view_type, bool as_uint); + VkImageViewType image_view_type, bool as_uint, bool heap); VkExtent3D vkd3d_meta_get_clear_image_uav_workgroup_size(VkImageViewType view_type); static inline VkExtent3D vkd3d_meta_get_clear_buffer_uav_workgroup_size() @@ -4895,13 +4843,14 @@ static inline VkExtent3D vkd3d_meta_get_clear_buffer_uav_workgroup_size() } HRESULT vkd3d_meta_get_copy_image_pipeline(struct vkd3d_meta_ops *meta_ops, - const struct vkd3d_copy_image_pipeline_key *key, struct vkd3d_copy_image_info *info); + const struct vkd3d_copy_image_pipeline_key *key, struct vkd3d_copy_image_info *info, bool use_heap); VkImageViewType vkd3d_meta_get_copy_image_view_type(D3D12_RESOURCE_DIMENSION dim); const struct vkd3d_format *vkd3d_meta_get_copy_image_attachment_format(struct vkd3d_meta_ops *meta_ops, const struct vkd3d_format *dst_format, const struct vkd3d_format *src_format, VkImageAspectFlags dst_aspect, VkImageAspectFlags src_aspect); HRESULT vkd3d_meta_get_resolve_image_pipeline(struct vkd3d_meta_ops *meta_ops, - const struct vkd3d_resolve_image_pipeline_key *key, struct vkd3d_resolve_image_info *info); + const struct vkd3d_resolve_image_pipeline_key *key, struct vkd3d_resolve_image_info *info, + bool use_heap); HRESULT vkd3d_meta_get_swapchain_pipeline(struct vkd3d_meta_ops *meta_ops, const struct vkd3d_swapchain_pipeline_key *key, struct vkd3d_swapchain_info *info); @@ -4925,7 +4874,8 @@ HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info); void vkd3d_meta_get_sampler_feedback_resolve_pipeline(struct vkd3d_meta_ops *meta_ops, - enum vkd3d_sampler_feedback_resolve_type type, struct vkd3d_sampler_feedback_resolve_info *info); + enum vkd3d_sampler_feedback_resolve_type type, struct vkd3d_sampler_feedback_resolve_info *info, + bool use_heap); static inline VkExtent3D vkd3d_meta_get_sampler_feedback_workgroup_size(void) { @@ -4983,11 +4933,9 @@ struct vkd3d_physical_device_info VkPhysicalDeviceAccelerationStructurePropertiesKHR acceleration_structure_properties; VkPhysicalDeviceFragmentShadingRatePropertiesKHR fragment_shading_rate_properties; VkPhysicalDeviceConservativeRasterizationPropertiesEXT conservative_rasterization_properties; - VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV device_generated_commands_properties_nv; VkPhysicalDeviceDeviceGeneratedCommandsPropertiesEXT device_generated_commands_properties_ext; VkPhysicalDeviceMeshShaderPropertiesEXT mesh_shader_properties; VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT shader_module_identifier_properties; - VkPhysicalDeviceDescriptorBufferPropertiesEXT descriptor_buffer_properties; VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT graphics_pipeline_library_properties; VkPhysicalDeviceMemoryDecompressionPropertiesNV memory_decompression_properties; VkPhysicalDeviceMaintenance5PropertiesKHR maintenance_5_properties; @@ -4998,6 +4946,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceLineRasterizationPropertiesEXT line_rasterization_properties; VkPhysicalDeviceComputeShaderDerivativesPropertiesKHR compute_shader_derivatives_properties_khr; VkPhysicalDeviceCooperativeMatrixPropertiesKHR cooperative_matrix_properties; + VkPhysicalDeviceDescriptorHeapPropertiesEXT descriptor_heap_properties; VkPhysicalDeviceProperties2KHR properties2; @@ -5025,13 +4974,11 @@ struct vkd3d_physical_device_info VkPhysicalDeviceImageViewMinLodFeaturesEXT image_view_min_lod_features; VkPhysicalDeviceCoherentMemoryFeaturesAMD device_coherent_memory_features_amd; VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR ray_tracing_maintenance1_features; - VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV device_generated_commands_features_nv; VkPhysicalDeviceDeviceGeneratedCommandsFeaturesEXT device_generated_commands_features_ext; VkPhysicalDeviceMeshShaderFeaturesEXT mesh_shader_features; VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT shader_module_identifier_features; VkPhysicalDevicePresentIdFeaturesKHR present_id_features; VkPhysicalDevicePresentWaitFeaturesKHR present_wait_features; - VkPhysicalDeviceDescriptorBufferFeaturesEXT descriptor_buffer_features; VkPhysicalDevicePipelineLibraryGroupHandlesFeaturesEXT pipeline_library_group_handles_features; VkPhysicalDeviceImageSlicedViewOf3DFeaturesEXT image_sliced_view_of_3d_features; VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT graphics_pipeline_library_features; @@ -5040,7 +4987,6 @@ struct vkd3d_physical_device_info VkPhysicalDevicePageableDeviceLocalMemoryFeaturesEXT pageable_device_memory_features; VkPhysicalDeviceDynamicRenderingUnusedAttachmentsFeaturesEXT dynamic_rendering_unused_attachments_features; VkPhysicalDeviceMemoryDecompressionFeaturesNV memory_decompression_features; - VkPhysicalDeviceDeviceGeneratedCommandsComputeFeaturesNV device_generated_commands_compute_features_nv; VkPhysicalDeviceMaintenance5FeaturesKHR maintenance_5_features; VkPhysicalDeviceMaintenance6FeaturesKHR maintenance_6_features; VkPhysicalDeviceMaintenance7FeaturesKHR maintenance_7_features; @@ -5066,6 +5012,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceShaderFloat8FeaturesEXT shader_float8_features; VkPhysicalDeviceCooperativeMatrix2FeaturesNV cooperative_matrix2_features_nv; VkPhysicalDeviceAntiLagFeaturesAMD anti_lag_amd; + VkPhysicalDeviceDescriptorHeapFeaturesEXT descriptor_heap_features; VkPhysicalDeviceUnifiedImageLayoutsFeaturesKHR unified_image_layouts_features; VkPhysicalDeviceShaderMixedFloatDotProductFeaturesVALVE shader_mixed_float_dot_product_features; VkPhysicalDevicePresentModeFifoLatestReadyFeaturesKHR present_mode_fifo_latest_ready_features; @@ -5513,7 +5460,6 @@ struct d3d12_device struct vkd3d_sampler_state sampler_state; struct vkd3d_shader_debug_ring debug_ring; struct vkd3d_pipeline_library_disk_cache disk_cache; - struct vkd3d_global_descriptor_buffer global_descriptor_buffer; struct vkd3d_address_binding_tracker address_binding_tracker; rwlock_t vertex_input_lock; struct hash_map vertex_input_pipelines; @@ -5605,11 +5551,6 @@ void d3d12_device_return_query_pool(struct d3d12_device *device, const struct vk uint64_t d3d12_device_get_descriptor_heap_gpu_va(struct d3d12_device *device, D3D12_DESCRIPTOR_HEAP_TYPE type); void d3d12_device_return_descriptor_heap_gpu_va(struct d3d12_device *device, uint64_t va); -static inline bool d3d12_device_uses_descriptor_buffers(const struct d3d12_device *device) -{ - return device->global_descriptor_buffer.resource.va != 0; -} - static inline bool is_cpu_accessible_heap(const D3D12_HEAP_PROPERTIES *properties) { if (properties->Type == D3D12_HEAP_TYPE_DEFAULT) @@ -5637,6 +5578,7 @@ static inline bool is_cpu_accessible_system_memory_heap(const D3D12_HEAP_PROPERT return true; } +#if 0 static inline uint32_t vkd3d_bindless_state_find_set_info_index_fast(struct d3d12_device *device, enum vkd3d_bindless_state_info_indices split_type, uint32_t fallback_lookup_types) { @@ -5647,6 +5589,7 @@ static inline uint32_t vkd3d_bindless_state_find_set_info_index_fast(struct d3d1 else return vkd3d_bindless_state_find_set_info_index(&device->bindless_state, fallback_lookup_types); } +#endif static inline const struct vkd3d_memory_info_domain *d3d12_device_get_memory_info_domain( struct d3d12_device *device, @@ -5682,59 +5625,6 @@ static inline ULONG d3d12_device_release(struct d3d12_device *device) return refcount; } -static inline bool d3d12_device_use_embedded_mutable_descriptors(struct d3d12_device *device) -{ - return (device->bindless_state.flags & VKD3D_BINDLESS_MUTABLE_EMBEDDED) != 0; -} - -struct d3d12_desc_split_metadata -{ - struct vkd3d_descriptor_metadata_view *view; - struct vkd3d_descriptor_metadata_types *types; -}; - -static inline struct d3d12_desc_split_metadata d3d12_desc_decode_metadata( - struct d3d12_device *device, vkd3d_cpu_descriptor_va_t va) -{ - struct d3d12_desc_split_metadata meta; - - if (d3d12_device_use_embedded_mutable_descriptors(device)) - { - /* If the descriptor is large enough we can just inline the metadata side by side with the actual descriptor. - * If the descriptor is smaller, we can use the planar method where we encode log2 offset. */ - if (device->bindless_state.flags & VKD3D_BINDLESS_MUTABLE_EMBEDDED_PACKED_METADATA) - { - struct vkd3d_descriptor_metadata_view *m; - va &= ~VKD3D_RESOURCE_EMBEDDED_CACHED_MASK; - m = (void *)(uintptr_t)(va + device->bindless_state.descriptor_buffer_packed_metadata_offset); - meta.view = m; - meta.types = NULL; - } - else - { - struct d3d12_desc_split_embedded d = d3d12_desc_decode_embedded_resource_va(va); - if (d.metadata) - { - meta.view = d.metadata; - meta.types = NULL; - } - else - { - meta.view = NULL; - meta.types = NULL; - } - } - } - else - { - struct d3d12_desc_split d = d3d12_desc_decode_va(va); - meta.view = d.view; - meta.types = d.types; - } - - return meta; -} - static inline unsigned int d3d12_device_get_descriptor_handle_increment_size( struct d3d12_device *device, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) @@ -5742,11 +5632,9 @@ static inline unsigned int d3d12_device_get_descriptor_handle_increment_size( switch (descriptor_heap_type) { case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV: - return d3d12_device_use_embedded_mutable_descriptors(device) ? - device->bindless_state.descriptor_buffer_cbv_srv_uav_size : VKD3D_RESOURCE_DESC_INCREMENT; + return device->bindless_state.descriptor_heap_cbv_srv_uav_size; case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER: - return d3d12_device_use_embedded_mutable_descriptors(device) ? - device->bindless_state.descriptor_buffer_sampler_size : VKD3D_RESOURCE_DESC_INCREMENT; + return device->bindless_state.descriptor_heap_sampler_size; case D3D12_DESCRIPTOR_HEAP_TYPE_RTV: case D3D12_DESCRIPTOR_HEAP_TYPE_DSV: @@ -5761,6 +5649,7 @@ static inline unsigned int d3d12_device_get_descriptor_handle_increment_size( uint32_t vkd3d_bindless_get_mutable_descriptor_type_size(struct d3d12_device *device); bool vkd3d_bindless_supports_embedded_mutable_type(struct d3d12_device *device, uint32_t flags); +#if 0 static inline uint32_t vkd3d_bindless_embedded_mutable_raw_buffer_offset(struct d3d12_device *device) { const VkPhysicalDeviceDescriptorBufferPropertiesEXT *props = &device->device_info.descriptor_buffer_properties; @@ -5776,24 +5665,7 @@ static inline uint32_t vkd3d_bindless_embedded_mutable_raw_buffer_offset(struct raw_buffer_descriptor_offset = align(texel_buffer_size, props->descriptorBufferOffsetAlignment); return raw_buffer_descriptor_offset; } - -static inline bool d3d12_device_use_ssbo_raw_buffer(struct d3d12_device *device) -{ - return (device->bindless_state.flags & VKD3D_BINDLESS_RAW_SSBO) != 0; -} - -static inline VkDeviceSize d3d12_device_get_ssbo_alignment(struct d3d12_device *device) -{ - return device->device_info.properties2.properties.limits.minStorageBufferOffsetAlignment; -} - -static inline bool d3d12_device_use_ssbo_root_descriptors(struct d3d12_device *device) -{ - /* We only know the VA of root SRV/UAVs, so we cannot - * make any better assumptions about the alignment */ - return d3d12_device_use_ssbo_raw_buffer(device) && - d3d12_device_get_ssbo_alignment(device) <= 4; -} +#endif bool d3d12_device_supports_variable_shading_rate_tier_1(struct d3d12_device *device); bool d3d12_device_supports_variable_shading_rate_tier_2(struct d3d12_device *device); @@ -5932,16 +5804,16 @@ struct d3d12_rt_state_object_variant uint32_t stages_count; uint32_t groups_count; +#if 0 struct { VkDescriptorSetLayout set_layout; VkPipelineLayout pipeline_layout; - VkDescriptorSet desc_set; - VkDescriptorPool desc_pool; uint32_t set_index; uint64_t compatibility_hash; bool owned_handles; } local_static_sampler; +#endif }; struct d3d12_rt_state_object_pipeline_data; @@ -6035,6 +5907,17 @@ HRESULT d3d12_rt_state_object_add(struct d3d12_device *device, const D3D12_STATE struct d3d12_rt_state_object *parent, struct d3d12_rt_state_object **object); +struct vkd3d_fused_root_signature_mappings +{ + VkShaderDescriptorSetAndBindingMappingInfoEXT mapping_info; + VkDescriptorSetAndBindingMappingEXT mappings[]; +}; +struct vkd3d_fused_root_signature_mappings *d3d12_state_object_fuse_root_signature_mappings( + struct d3d12_root_signature *global, struct d3d12_root_signature *local); + +struct vkd3d_fused_root_signature_mappings *d3d12_state_object_build_workgraph_root_signature_mappings( + struct d3d12_root_signature *global, struct d3d12_root_signature *local); + static inline struct d3d12_rt_state_object *rt_impl_from_ID3D12StateObject(ID3D12StateObject *iface) { return CONTAINING_RECORD(iface, struct d3d12_rt_state_object, ID3D12StateObject_iface); @@ -6443,6 +6326,18 @@ static inline unsigned int d3d12_resource_get_sub_resource_count(const struct d3 (resource->format ? vkd3d_popcount(resource->format->vk_aspect_mask) : 1); } +struct vkd3d_texture_view_create_info +{ + VkImageViewUsageCreateInfo image_usage_create_info; + VkImageViewMinLodCreateInfoEXT min_lod_desc; + VkImageViewSlicedCreateInfoEXT sliced_desc; + VkImageViewCreateInfo view_desc; +}; + +bool vkd3d_setup_texture_view(struct d3d12_device *device, + const struct vkd3d_texture_view_desc *desc, + struct vkd3d_texture_view_create_info *info); + static inline uint32_t d3d12_resource_desc_default_alignment(const D3D12_RESOURCE_DESC1 *desc) { return desc->SampleDesc.Count > 1 ? diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h index db47bad1c8..3d54da0314 100644 --- a/libs/vkd3d/vulkan_procs.h +++ b/libs/vkd3d/vulkan_procs.h @@ -200,6 +200,7 @@ VK_DEVICE_PFN(vkWaitForFences) VK_DEVICE_PFN(vkWaitSemaphores) VK_DEVICE_PFN(vkCmdSetDepthWriteEnable) VK_DEVICE_PFN(vkResetQueryPool) +VK_DEVICE_PFN(vkCmdBlitImage) /* VK_KHR_push_descriptor */ VK_DEVICE_EXT_PFN(vkCmdPushDescriptorSetKHR) @@ -330,6 +331,7 @@ VK_DEVICE_EXT_PFN(vkCmdCuLaunchKernelNVX) VK_DEVICE_EXT_PFN(vkGetImageViewHandle64NVX) VK_DEVICE_EXT_PFN(vkGetImageViewHandleNVX) VK_DEVICE_EXT_PFN(vkGetImageViewAddressNVX) +VK_DEVICE_EXT_PFN(vkGetDeviceCombinedImageSamplerIndexNVX) /* VK_VALVE_descriptor_set_host_mapping */ VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutHostMappingInfoVALVE) @@ -355,14 +357,6 @@ VK_DEVICE_EXT_PFN(vkGetShaderModuleIdentifierEXT) /* VK_KHR_present_wait */ VK_DEVICE_EXT_PFN(vkWaitForPresentKHR) -/* VK_EXT_descriptor_buffer */ -VK_DEVICE_EXT_PFN(vkGetDescriptorEXT) -VK_DEVICE_EXT_PFN(vkCmdBindDescriptorBuffersEXT) -VK_DEVICE_EXT_PFN(vkCmdBindDescriptorBufferEmbeddedSamplersEXT) -VK_DEVICE_EXT_PFN(vkCmdSetDescriptorBufferOffsetsEXT) -VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutSizeEXT) -VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutBindingOffsetEXT) - /* VK_EXT_pageable_device_local_memory */ VK_DEVICE_EXT_PFN(vkSetDeviceMemoryPriorityEXT) @@ -394,6 +388,17 @@ VK_DEVICE_EXT_PFN(vkCmdCopyMicromapEXT) /* VK_AMD_anti_lag */ VK_DEVICE_EXT_PFN(vkAntiLagUpdateAMD) +/* VK_KHR_descriptor_heap */ +VK_INSTANCE_EXT_PFN(vkGetPhysicalDeviceDescriptorSizeEXT) +VK_DEVICE_EXT_PFN(vkWriteSamplerDescriptorsEXT) +VK_DEVICE_EXT_PFN(vkWriteResourceDescriptorsEXT) +VK_DEVICE_EXT_PFN(vkCmdBindSamplerHeapEXT) +VK_DEVICE_EXT_PFN(vkCmdBindResourceHeapEXT) +VK_DEVICE_EXT_PFN(vkCmdPushDataEXT) +VK_DEVICE_EXT_PFN(vkGetImageOpaqueCaptureDataEXT) +VK_DEVICE_EXT_PFN(vkRegisterCustomBorderColorEXT) +VK_DEVICE_EXT_PFN(vkUnregisterCustomBorderColorEXT) + #undef VK_INSTANCE_PFN #undef VK_INSTANCE_EXT_PFN #undef VK_DEVICE_PFN diff --git a/libs/vkd3d/workgraphs.c b/libs/vkd3d/workgraphs.c index 5e4db7f6a1..d61876e8b2 100644 --- a/libs/vkd3d/workgraphs.c +++ b/libs/vkd3d/workgraphs.c @@ -144,10 +144,8 @@ struct d3d12_wg_state_object_program struct d3d12_wg_state_object_module { VkShaderModule vk_module; - VkDescriptorSetLayout vk_set_layout; - VkPipelineLayout vk_pipeline_layout; struct d3d12_root_signature *root_signature; - uint32_t push_set_index; + struct vkd3d_fused_root_signature_mappings *mappings; }; struct d3d12_wg_state_object_data @@ -1213,8 +1211,7 @@ static void d3d12_wg_state_object_cleanup(struct d3d12_wg_state_object *state_ob for (i = 0; i < state_object->modules_count; i++) { VK_CALL(vkDestroyShaderModule(state_object->device->vk_device, state_object->modules[i].vk_module, NULL)); - VK_CALL(vkDestroyDescriptorSetLayout(state_object->device->vk_device, state_object->modules[i].vk_set_layout, NULL)); - VK_CALL(vkDestroyPipelineLayout(state_object->device->vk_device, state_object->modules[i].vk_pipeline_layout, NULL)); + vkd3d_free(state_object->modules[i].mappings); if (state_object->modules[i].root_signature) d3d12_root_signature_dec_ref(state_object->modules[i].root_signature); } @@ -1771,6 +1768,7 @@ static HRESULT d3d12_wg_state_object_compile_pipeline( { const struct vkd3d_vk_device_procs *vk_procs = &object->device->vk_procs; const struct vkd3d_shader_library_entry_point *entry; + VkPipelineCreateFlags2CreateInfo flags2_info; VkComputePipelineCreateInfo pipeline_info; VkSpecializationInfo spec_info; uint32_t spec_constant_index; @@ -1820,9 +1818,9 @@ static HRESULT d3d12_wg_state_object_compile_pipeline( vkd3d_array_reserve((void **)&tmp->map_entries, &tmp->map_entries_size, tmp->spec_data_count, sizeof(*tmp->map_entries)); pipeline_info.stage.module = object->modules[entry_point_index].vk_module; - pipeline_info.layout = object->modules[entry_point_index].vk_pipeline_layout; - if (d3d12_device_uses_descriptor_buffers(object->device)) - pipeline_info.flags |= VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT; + if (object->modules[entry_point_index].mappings) + pipeline_info.stage.pNext = &object->modules[entry_point_index].mappings->mapping_info; + spec_info.pData = tmp->spec_data; spec_info.dataSize = tmp->spec_data_count * sizeof(uint32_t); spec_info.pMapEntries = tmp->map_entries; @@ -1897,6 +1895,11 @@ static HRESULT d3d12_wg_state_object_compile_pipeline( } } + memset(&flags2_info, 0, sizeof(flags2_info)); + flags2_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO; + flags2_info.flags = VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT | pipeline_info.flags; + vk_prepend_struct(&pipeline_info, &flags2_info); + /* CPU node entry, static payload. */ if (is_broadcast) { @@ -2167,7 +2170,6 @@ static HRESULT d3d12_wg_state_object_convert_entry_point( const struct d3d12_state_object_association *global_rs_assoc; const struct d3d12_state_object_association *local_rs_assoc; struct vkd3d_shader_interface_info shader_interface_info; - struct vkd3d_shader_descriptor_binding push_ubo_binding; struct vkd3d_shader_compile_arguments compile_args; struct vkd3d_shader_code dxil, spirv; @@ -2191,8 +2193,6 @@ static HRESULT d3d12_wg_state_object_convert_entry_point( memset(&shader_interface_info, 0, sizeof(shader_interface_info)); memset(&shader_interface_local_info, 0, sizeof(shader_interface_local_info)); - shader_interface_info.min_ssbo_alignment = d3d12_device_get_ssbo_alignment(object->device); - shader_interface_info.stage = VK_SHADER_STAGE_COMPUTE_BIT; shader_interface_info.xfb_info = NULL; @@ -2226,70 +2226,57 @@ static HRESULT d3d12_wg_state_object_convert_entry_point( d3d12_root_signature_inc_ref(module->root_signature = global_rs_assoc->root_signature); } - /* Create a modified pipeline layout which uses the work graph layout. - * It uses push constants for various metadata, and moves root parameters to push UBO. */ - if (FAILED(hr = d3d12_root_signature_create_work_graph_layout( - module->root_signature, &module->vk_set_layout, &module->vk_pipeline_layout))) - return hr; - if (module->root_signature) { struct d3d12_root_signature *rs = module->root_signature; /* We might have different bindings per PSO, even if they are considered pipeline layout compatible. * Register/space declaration could differ, but those don't change the Vulkan pipeline layout. */ - shader_interface_info.flags = d3d12_root_signature_get_shader_interface_flags(rs, VKD3D_PIPELINE_TYPE_COMPUTE); - shader_interface_info.descriptor_tables.offset = rs->descriptor_table_offset; - shader_interface_info.descriptor_tables.count = rs->descriptor_table_count; + shader_interface_info.flags = d3d12_root_signature_get_shader_interface_flags(rs); + /* Cannot lower these in any useful way. */ + shader_interface_info.flags &= ~VKD3D_SHADER_INTERFACE_INLINE_REDZONE_CBV; shader_interface_info.bindings = rs->bindings; shader_interface_info.binding_count = rs->binding_count; - shader_interface_info.push_constant_buffers = rs->root_constants; - shader_interface_info.push_constant_buffer_count = rs->root_constant_count; + shader_interface_info.push_constant_buffers = module->root_signature->root_constants; + shader_interface_info.push_constant_buffer_count = module->root_signature->root_constant_count; + shader_interface_info.num_root_descriptors = module->root_signature->root_parameters_raw_va_count; + shader_interface_info.num_root_constants = module->root_signature->root_parameters_constant_dwords; + shader_interface_info.push_constant_ubo_binding.set = VKD3D_SHADER_ROOT_CONSTANTS_VIRTUAL_DESCRIPTOR_SET; + shader_interface_info.push_constant_ubo_binding.binding = 0; + shader_interface_info.descriptor_table_offset_words = + module->root_signature->descriptor_table_offset / sizeof(uint32_t); + shader_interface_info.descriptor_raw_va_offset = object->device->bindless_state.uav_counter_embedded_offset; + shader_interface_info.min_ssbo_alignment = object->device->bindless_state.min_ssbo_alignment; + shader_interface_info.flags |= VKD3D_SHADER_INTERFACE_PUSH_CONSTANTS_AS_UNIFORM_BUFFER; /* TODO: EXTENDED_DEBUG_UTILS mapping. */ - shader_interface_info.push_constant_ubo_binding = &rs->push_constant_ubo_binding; - shader_interface_info.offset_buffer_binding = &rs->offset_buffer_binding; #ifdef VKD3D_ENABLE_DESCRIPTOR_QA shader_interface_info.descriptor_qa_payload_binding = &rs->descriptor_qa_payload_binding; shader_interface_info.descriptor_qa_control_binding = &rs->descriptor_qa_control_binding; #endif - - if (!(shader_interface_info.flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK)) - { - push_ubo_binding.binding = 0; - push_ubo_binding.set = rs->compute.num_set_layouts; - shader_interface_info.push_constant_ubo_binding = &push_ubo_binding; - shader_interface_info.flags |= VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK; - } - - module->push_set_index = rs->compute.push_constant_range.size ? push_ubo_binding.set : UINT32_MAX; - } - else - { - module->push_set_index = UINT32_MAX; } if (local_rs_assoc) { struct d3d12_root_signature *rs = local_rs_assoc->root_signature; + + shader_interface_local_info.bindings = rs->bindings; + shader_interface_local_info.binding_count = rs->binding_count; shader_interface_local_info.local_root_parameters = rs->parameters; shader_interface_local_info.local_root_parameter_count = rs->parameter_count; shader_interface_local_info.shader_record_constant_buffers = rs->root_constants; shader_interface_local_info.shader_record_buffer_count = rs->root_constant_count; - - if (rs->static_sampler_count) - { - FIXME("Static samplers not implemented yet.\n"); - return E_NOTIMPL; - } - - shader_interface_local_info.bindings = rs->bindings; - shader_interface_local_info.binding_count = rs->binding_count; - /* Promote state which might only be active in local root signature. */ - shader_interface_info.flags |= d3d12_root_signature_get_shader_interface_flags(rs, VKD3D_PIPELINE_TYPE_COMPUTE); - if (rs->compute.flags & (VKD3D_ROOT_SIGNATURE_USE_SSBO_OFFSET_BUFFER | VKD3D_ROOT_SIGNATURE_USE_TYPED_OFFSET_BUFFER)) - shader_interface_info.offset_buffer_binding = &rs->offset_buffer_binding; + shader_interface_info.flags |= d3d12_root_signature_get_shader_interface_flags(rs); } + if (module->root_signature && local_rs_assoc) + module->mappings = d3d12_state_object_build_workgraph_root_signature_mappings(module->root_signature, local_rs_assoc->root_signature); + else if (module->root_signature) + module->mappings = d3d12_state_object_build_workgraph_root_signature_mappings(module->root_signature, NULL); + else if (local_rs_assoc) + module->mappings = d3d12_state_object_build_workgraph_root_signature_mappings(NULL, local_rs_assoc->root_signature); + else + module->mappings = NULL; + memset(&dxil, 0, sizeof(dxil)); memset(&spirv, 0, sizeof(spirv)); @@ -2569,73 +2556,6 @@ void d3d12_command_list_workgraph_initialize_scratch(struct d3d12_command_list * d3d12_command_list_debug_mark_end_region(list); } -static void d3d12_command_list_workgraph_bind_resources(struct d3d12_command_list *list, - const struct d3d12_wg_state_object *state, - const struct d3d12_wg_state_object_program *program, - const struct d3d12_wg_state_object_module *module, - VkBuffer vk_root_parameter_buffer, VkDeviceSize vk_root_parameter_buffer_offset) -{ - const struct vkd3d_bindless_state *bindless_state = &list->device->bindless_state; - const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - VkDescriptorBufferInfo buffer_info; - VkWriteDescriptorSet write; - unsigned int i; - - if (d3d12_device_uses_descriptor_buffers(list->device)) - { - VK_CALL(vkCmdSetDescriptorBufferOffsetsEXT(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - module->vk_pipeline_layout, 0, bindless_state->set_count, - bindless_state->vk_descriptor_buffer_indices, - list->descriptor_heap.buffers.vk_offsets)); - } - else - { - for (i = 0; i < bindless_state->set_count; i++) - { - if (list->descriptor_heap.sets.vk_sets[i]) - { - VK_CALL(vkCmdBindDescriptorSets(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - module->vk_pipeline_layout, i, 1, - &list->descriptor_heap.sets.vk_sets[i], 0, NULL)); - } - } - } - - if (module->push_set_index != UINT32_MAX) - { - memset(&write, 0, sizeof(write)); - write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - write.descriptorCount = 1; - write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - write.pBufferInfo = &buffer_info; - - buffer_info.offset = vk_root_parameter_buffer_offset; - buffer_info.buffer = vk_root_parameter_buffer; - buffer_info.range = sizeof(union vkd3d_root_parameter_data); - - VK_CALL(vkCmdPushDescriptorSetKHR(list->cmd.vk_command_buffer, - VK_PIPELINE_BIND_POINT_COMPUTE, module->vk_pipeline_layout, module->push_set_index, 1, &write)); - } - - if (module->root_signature) - { - if (module->root_signature->vk_sampler_set) - { - VK_CALL(vkCmdBindDescriptorSets(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - module->vk_pipeline_layout, - module->root_signature->sampler_descriptor_set, - 1, &module->root_signature->vk_sampler_set, 0, NULL)); - } - else if (module->root_signature->vk_sampler_descriptor_layout) - { - VK_CALL(vkCmdBindDescriptorBufferEmbeddedSamplersEXT(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - module->vk_pipeline_layout, module->root_signature->sampler_descriptor_set)); - } - } - - /* TODO: Bind local sampler set. */ -} - static unsigned int d3d12_command_list_workgraph_remaining_levels( const struct d3d12_wg_state_object_program *program, unsigned int level, @@ -2679,17 +2599,14 @@ static void d3d12_command_list_workgraph_execute_node_cpu_entry(struct d3d12_com const struct vkd3d_shader_node_input_data *node_input; struct vkd3d_shader_node_input_push_signature push; struct vkd3d_scratch_allocation offset_scratch; - VkPipelineLayout vk_layout; + VkPushDataInfoEXT push_info; uint32_t table_index; memset(&push, 0, sizeof(push)); - vk_layout = state->modules[node_index].vk_pipeline_layout; + d3d12_command_list_update_descriptor_heaps(list); - /* Just rebind resources every time. We have to execute intermediate shaders anyway, - * which clobbers all descriptor state. */ - d3d12_command_list_workgraph_bind_resources(list, state, program, - &state->modules[node_index], - vk_root_parameter_buffer, vk_root_parameter_buffer_offset); + push.root_parameter_bda = vkd3d_get_buffer_device_address(list->device, vk_root_parameter_buffer) + + vk_root_parameter_buffer_offset; node_input = state->entry_points[node_index].node_input; @@ -2726,9 +2643,12 @@ static void d3d12_command_list_workgraph_execute_node_cpu_entry(struct d3d12_com program->pipelines[node_index].vk_static_cpu_node_entry_pipeline : program->pipelines[node_index].vk_cpu_node_entry_pipeline)); - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, - vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(push), &push)); + memset(&push_info, 0, sizeof(push_info)); + push_info.sType = VK_STRUCTURE_TYPE_PUSH_DATA_INFO_EXT; + push_info.data.address = &push; + push_info.data.size = sizeof(push); + push_info.offset = 0; + VK_CALL(vkCmdPushDataEXT(list->cmd.vk_command_buffer, &push_info)); if (desc->NumRecords == 1 && node_input->launch_type == VKD3D_SHADER_NODE_LAUNCH_TYPE_BROADCASTING) { @@ -2840,10 +2760,7 @@ static void d3d12_command_list_workgraph_execute_node_cpu_entry(struct d3d12_com VK_CALL(vkCmdDispatch(list->cmd.vk_command_buffer, WG_DIVIDER, num_wgs_x / WG_DIVIDER, amplification)); push.node_linear_offset_bda += sizeof(uint32_t); - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, - vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, - offsetof(struct vkd3d_shader_node_input_push_signature, node_linear_offset_bda), - sizeof(uint32_t), &push.node_linear_offset_bda)); + VK_CALL(vkCmdPushDataEXT(list->cmd.vk_command_buffer, &push_info)); /* Secondary offset */ if (num_wgs_x % WG_DIVIDER) @@ -2870,10 +2787,7 @@ static void d3d12_command_list_workgraph_execute_node_cpu_entry(struct d3d12_com VK_CALL(vkCmdDispatch(list->cmd.vk_command_buffer, WG_DIVIDER, num_wgs_x / WG_DIVIDER, 1)); push.node_linear_offset_bda += sizeof(uint32_t); - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, - vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, - offsetof(struct vkd3d_shader_node_input_push_signature, node_linear_offset_bda), - sizeof(uint32_t), &push.node_linear_offset_bda)); + VK_CALL(vkCmdPushDataEXT(list->cmd.vk_command_buffer, &push_info)); /* Secondary offset */ if (num_wgs_x % WG_DIVIDER) @@ -2928,9 +2842,9 @@ static void d3d12_command_list_emit_distribute_workgroups(struct d3d12_command_l args.node_share_mapping_va = program->share_mapping_scratch_offset + list->wg_state.BackingMemory.StartAddress; args.num_nodes = state->entry_points_count; - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, - program->workgroup_distributor.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(args), &args)); + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + program->workgroup_distributor.vk_pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, sizeof(args), &args); if (list->device->vk_info.EXT_debug_utils) d3d12_command_list_debug_mark_label(list, "Distribute Work Groups", 1.0f, 0.8f, 0.8f, 1.0f); @@ -2969,10 +2883,9 @@ static void d3d12_command_list_emit_distribute_payload_offsets(struct d3d12_comm args.unrolled_offsets = state->unrolled_offsets.va; args.packed_offset_counts = list->wg_state.BackingMemory.StartAddress + program->required_scratch_size; - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, program->payload_offset_expander.vk_pipeline_layout, - VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(args), &args)); + VK_SHADER_STAGE_COMPUTE_BIT, sizeof(args), &args); if (list->device->vk_info.EXT_debug_utils) { @@ -3003,10 +2916,9 @@ static void d3d12_command_list_emit_distribute_payload_offsets(struct d3d12_comm VK_CALL(vkCmdBindPipeline(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, program->complete_compaction.vk_pipeline)); - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, program->complete_compaction.vk_pipeline_layout, - VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(complete_args), &complete_args)); + VK_SHADER_STAGE_COMPUTE_BIT, sizeof(complete_args), &complete_args); wgx = complete_args.node_count; wgx = align(complete_args.node_count, vkd3d_meta_get_workgraph_complete_compaction_workgroup_size()) / @@ -3047,19 +2959,16 @@ static void d3d12_command_list_workgraph_execute_node_gpu( VkDeviceAddress primary_linear_offset_bda; VkDeviceSize vk_secondary_indirect_offset; VkDeviceSize vk_primary_indirect_offset; + VkPushDataInfoEXT push_info; VkBuffer vk_indirect_buffer; - VkPipelineLayout vk_layout; unsigned int table_index; memset(&push, 0, sizeof(push)); - vk_layout = state->modules[node_index].vk_pipeline_layout; node_input = state->entry_points[node_index].node_input; - /* Just rebind resources every time. We have to execute intermediate shaders anyway, - * which clobbers all descriptor state. */ - d3d12_command_list_workgraph_bind_resources(list, state, program, - &state->modules[node_index], - vk_root_parameter_buffer, vk_root_parameter_buffer_offset); + d3d12_command_list_update_descriptor_heaps(list); + push.root_parameter_bda = vkd3d_get_buffer_device_address(list->device, vk_root_parameter_buffer) + + vk_root_parameter_buffer_offset; push.node_payload_output_bda = output_va; push.node_remaining_recursion_levels = d3d12_command_list_workgraph_remaining_levels(program, level, node_index); @@ -3134,9 +3043,13 @@ static void d3d12_command_list_workgraph_execute_node_gpu( } push.node_linear_offset_bda = primary_linear_offset_bda; - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, - vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(push), &push)); + + memset(&push_info, 0, sizeof(push_info)); + push_info.sType = VK_STRUCTURE_TYPE_PUSH_DATA_INFO_EXT; + push_info.data.address = &push; + push_info.data.size = sizeof(push); + push_info.offset = 0; + VK_CALL(vkCmdPushDataEXT(list->cmd.vk_command_buffer, &push_info)); if (list->device->vk_info.EXT_debug_utils) { @@ -3146,21 +3059,10 @@ static void d3d12_command_list_workgraph_execute_node_gpu( } if (d3d12_wg_requires_primary_execution(state)) - { - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, - vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, - offsetof(struct vkd3d_shader_node_input_push_signature, node_linear_offset_bda), - sizeof(VkDeviceAddress), &push.node_linear_offset_bda)); - VK_CALL(vkCmdDispatchIndirect(list->cmd.vk_command_buffer, vk_indirect_buffer, vk_primary_indirect_offset)); - } push.node_linear_offset_bda = secondary_linear_offset_bda; - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, - vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, - offsetof(struct vkd3d_shader_node_input_push_signature, node_linear_offset_bda), - sizeof(VkDeviceAddress), &push.node_linear_offset_bda)); - + VK_CALL(vkCmdPushDataEXT(list->cmd.vk_command_buffer, &push_info)); VK_CALL(vkCmdDispatchIndirect(list->cmd.vk_command_buffer, vk_indirect_buffer, vk_secondary_indirect_offset)); VKD3D_BREADCRUMB_AUX32(level); @@ -3278,8 +3180,10 @@ static bool d3d12_command_list_workgraph_setup_indirect( args.entry_point_mapping_va = entry_scratch.va; args.num_entry_points = program->num_pipelines; - VK_CALL(vkCmdPushConstants(list->cmd.vk_command_buffer, program->gpu_input_setup.vk_pipeline_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args)); + d3d12_command_list_meta_push_data(list, list->cmd.vk_command_buffer, + program->gpu_input_setup.vk_pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, sizeof(args), &args); + VK_CALL(vkCmdBindPipeline(list->cmd.vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, program->gpu_input_setup.vk_pipeline)); num_wgs = align(args.num_entry_points, vkd3d_meta_get_workgraph_setup_gpu_input_workgroup_size()) / @@ -3447,8 +3351,7 @@ void d3d12_command_list_workgraph_dispatch(struct d3d12_command_list *list, cons d3d12_command_list_debug_mark_begin_region(list, "WGDispatch"); d3d12_command_list_invalidate_current_pipeline(list, true); - d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, &list->graphics_bindings); - d3d12_command_list_update_descriptor_buffers(list); + d3d12_command_list_invalidate_root_parameters(list); d3d12_command_list_workgraph_barrier(list, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, diff --git a/tests/d3d12_clear.c b/tests/d3d12_clear.c index 7aff628efd..91bb8d089e 100644 --- a/tests/d3d12_clear.c +++ b/tests/d3d12_clear.c @@ -1083,11 +1083,16 @@ void test_clear_unordered_access_view_image(void) void test_uav_clear_exhaustive_descriptors(void) { + /* Engineer the range so that we get to test clear_uav_with_copy. */ + const uint32_t SLOW_INDEX_LO = 32 * 1024 + 4000; + const uint32_t SLOW_INDEX_HI = 32 * 1024 + 4200; + struct resource_readback rb_rt; struct test_context context; struct resource_readback rb; ID3D12DescriptorHeap *gpu; ID3D12DescriptorHeap *cpu; ID3D12Resource *buffer; + ID3D12Resource *tex_rt; ID3D12Resource *tex; unsigned int i, j; @@ -1102,6 +1107,10 @@ void test_uav_clear_exhaustive_descriptors(void) /* Pick a format that is deliberately hard to deal with w.r.t. NaN clears. */ tex = create_default_texture2d(context.device, 64, 64, 1024, 1, DXGI_FORMAT_R16G16B16A16_FLOAT, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + /* We don't end up with full uint MUTABLE on these resources. Need fallback clears most likely. */ + tex_rt = create_default_texture2d(context.device, 64, 64, 1024, 1, DXGI_FORMAT_R16G16B16A16_FLOAT, + D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS | + D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); /* 64k descriptors. */ for (j = 0; j < 64; j++) @@ -1109,6 +1118,8 @@ void test_uav_clear_exhaustive_descriptors(void) for (i = 0; i < 1024; i++) { D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc; + unsigned int heap_index; + bool use_rtv_tex; memset(&uav_desc, 0, sizeof(uav_desc)); uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; @@ -1116,10 +1127,13 @@ void test_uav_clear_exhaustive_descriptors(void) uav_desc.Texture2DArray.FirstArraySlice = i; uav_desc.Texture2DArray.ArraySize = 1; - ID3D12Device_CreateUnorderedAccessView(context.device, tex, NULL, &uav_desc, - get_cpu_descriptor_handle(&context, cpu, i + 1024 * j)); - ID3D12Device_CreateUnorderedAccessView(context.device, tex, NULL, &uav_desc, - get_cpu_descriptor_handle(&context, gpu, i + 1024 * j)); + heap_index = i + 1024 * j; + use_rtv_tex = heap_index >= SLOW_INDEX_LO && heap_index <= SLOW_INDEX_HI; + + ID3D12Device_CreateUnorderedAccessView(context.device, use_rtv_tex ? tex_rt : tex, NULL, &uav_desc, + get_cpu_descriptor_handle(&context, cpu, heap_index)); + ID3D12Device_CreateUnorderedAccessView(context.device, use_rtv_tex ? tex_rt : tex, NULL, &uav_desc, + get_cpu_descriptor_handle(&context, gpu, heap_index)); } } @@ -1162,6 +1176,7 @@ void test_uav_clear_exhaustive_descriptors(void) for (i = 32 * 1024; i < 64 * 1024; i++) { uint32_t values[4] = { (i >> 0) & 0xff, (i >> 8) & 0xff, 0, 0 }; + bool use_rtv_tex; D3D12_RECT rect; rect.left = 0; @@ -1177,9 +1192,11 @@ void test_uav_clear_exhaustive_descriptors(void) values[3] = 0xffff; } + use_rtv_tex = i >= SLOW_INDEX_LO && i <= SLOW_INDEX_HI; + ID3D12GraphicsCommandList_ClearUnorderedAccessViewUint(context.list, get_gpu_descriptor_handle(&context, gpu, i), - get_cpu_descriptor_handle(&context, cpu, i), tex, + get_cpu_descriptor_handle(&context, cpu, i), use_rtv_tex ? tex_rt : tex, values, 1, &rect); } @@ -1229,11 +1246,16 @@ void test_uav_clear_exhaustive_descriptors(void) D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); transition_resource_state(context.list, tex, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + transition_resource_state(context.list, tex_rt, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); for (i = 0; i < 1024; i++) { + bool use_rtv_tex; uint32_t y; get_texture_readback_with_command_list(tex, i, &rb, context.queue, context.list); + reset_command_list(context.list, context.allocator); + get_texture_readback_with_command_list(tex_rt, i, &rb_rt, context.queue, context.list); for (y = 0; y < 64; y++) { @@ -1242,6 +1264,7 @@ void test_uav_clear_exhaustive_descriptors(void) uint64_t value; descriptor_index = i + y * 1024; + use_rtv_tex = descriptor_index >= SLOW_INDEX_LO && descriptor_index <= SLOW_INDEX_HI; if (descriptor_index < 32 * 1024) { @@ -1258,12 +1281,13 @@ void test_uav_clear_exhaustive_descriptors(void) expected = 0xfffffffefffdfffc; } - value = get_readback_uint64(&rb, 0, y); + value = get_readback_uint64(use_rtv_tex ? &rb_rt : &rb, 0, y); ok(expected == value, "desc %u, line %u: Expected %"PRIx64", got %"PRIx64".\n", i, y, expected, value); } release_resource_readback(&rb); + release_resource_readback(&rb_rt); reset_command_list(context.list, context.allocator); } @@ -1319,6 +1343,7 @@ void test_uav_clear_exhaustive_descriptors(void) ID3D12DescriptorHeap_Release(cpu); ID3D12Resource_Release(buffer); ID3D12Resource_Release(tex); + ID3D12Resource_Release(tex_rt); destroy_test_context(&context); } diff --git a/tests/d3d12_copy.c b/tests/d3d12_copy.c index efda23ddca..f5d2e4e396 100644 --- a/tests/d3d12_copy.c +++ b/tests/d3d12_copy.c @@ -329,6 +329,7 @@ void test_copy_texture(void) void test_copy_texture_ds_edge_cases(void) { ID3D12GraphicsCommandList *command_list; + ID3D12DescriptorHeap *resource_heap; struct depth_stencil_resource ds; struct test_context_desc desc; struct test_context context; @@ -380,6 +381,10 @@ void test_copy_texture_ds_edge_cases(void) queue = context.queue; heap = create_cpu_descriptor_heap(device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1); + resource_heap = create_gpu_descriptor_heap(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1); + + /* Ensure that we have some test coverage for fallback copy + heap. */ + ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &resource_heap); for (i = 0; i < ARRAY_SIZE(tests); i++) { @@ -460,6 +465,7 @@ void test_copy_texture_ds_edge_cases(void) } vkd3d_test_set_context(NULL); + ID3D12DescriptorHeap_Release(resource_heap); ID3D12DescriptorHeap_Release(heap); destroy_test_context(&context); } @@ -1706,6 +1712,7 @@ void test_multisample_resolve_formats(void) D3D12_HEAP_PROPERTIES heap_properties; D3D12_ROOT_SIGNATURE_DESC rs_desc; D3D12_RESOURCE_DESC resource_desc; + ID3D12DescriptorHeap *desc_heap; struct test_context_desc desc; D3D12_ROOT_PARAMETER rs_param; D3D12_RECT scissor, src_rect; @@ -1961,6 +1968,8 @@ void test_multisample_resolve_formats(void) transition_resource_state(context.list, rt_s32_ms, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); transition_resource_state(context.list, ds_ms, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); + desc_heap = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1); + for (i = 0; i < ARRAY_SIZE(resolve_modes); i++) { D3D12_RESOLVE_MODE mode = resolve_modes[i]; @@ -2036,6 +2045,10 @@ void test_multisample_resolve_formats(void) ID3D12GraphicsCommandList1_ResolveSubresourceRegion(command_list1, image_u32, 0, dst_x, dst_y, rt_u32_ms, 0, j ? &src_rect : NULL, DXGI_FORMAT_R32_UINT, mode); + + /* Bind this late. Ensure we get some coverage for both heap based compute resolve and + * legacy path. */ + ID3D12GraphicsCommandList1_SetDescriptorHeaps(command_list1, 1, &desc_heap); ID3D12GraphicsCommandList1_ResolveSubresourceRegion(command_list1, image_s32, 0, dst_x, dst_y, rt_s32_ms, 0, j ? &src_rect : NULL, DXGI_FORMAT_R32_SINT, mode); @@ -2196,6 +2209,7 @@ void test_multisample_resolve_formats(void) ID3D12RootSignature_Release(rs_setup_stencil); ID3D12GraphicsCommandList1_Release(command_list1); + ID3D12DescriptorHeap_Release(desc_heap); destroy_test_context(&context); } diff --git a/tests/d3d12_descriptors.c b/tests/d3d12_descriptors.c index 4cc3909858..e10046a8a5 100644 --- a/tests/d3d12_descriptors.c +++ b/tests/d3d12_descriptors.c @@ -510,7 +510,7 @@ void test_update_root_descriptors(void) hr = create_root_signature(device, &root_signature_desc, &root_signature); ok(SUCCEEDED(hr), "Failed to create root signature, hr %#x.\n", hr); - pipeline_state = create_compute_pipeline_state(device, root_signature, update_root_descriptors_dxbc); + pipeline_state = create_compute_pipeline_state(device, root_signature, update_root_descriptors_dxil); ID3D12GraphicsCommandList_SetPipelineState(command_list, pipeline_state); ID3D12GraphicsCommandList_SetComputeRootSignature(command_list, root_signature); @@ -4896,7 +4896,8 @@ void test_undefined_descriptor_heap_mismatch_types(void) reset_command_list(context.list, context.allocator); radv_32b_layout = is_radv_device(context.device) && - is_vk_device_extension_supported(context.device, "VK_EXT_descriptor_buffer") && + (is_vk_device_extension_supported(context.device, "VK_EXT_descriptor_buffer") || + is_vk_device_extension_supported(context.device, "VK_EXT_descriptor_heap")) && ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) == 32; for (j = 0; j < TYPE_COUNT; j++) @@ -6148,6 +6149,13 @@ void test_custom_border_color_limits(void) ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 2, ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(sampler_heaps[i])); ID3D12GraphicsCommandList_Dispatch(context.list, 2048 / 64, 1, 1); + +#if 0 + /* Temporary hack to make the test pass on first beta driver. */ + ID3D12GraphicsCommandList_Close(context.list); + exec_command_list(context.queue, context.list); + ID3D12GraphicsCommandList_Reset(context.list, context.allocator, NULL); +#endif } transition_resource_state(context.list, output, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); @@ -6169,7 +6177,7 @@ void test_custom_border_color_limits(void) /* NV will fail around 4k unique samplers. */ if (is_nvidia_device(context.device)) - is_todo = flat_index >= 4000; + is_todo = !is_vk_device_extension_supported(context.device, "VK_EXT_descriptor_heap") && flat_index >= 4000; else if (is_amd_vulkan_device(context.device) || is_adreno_device(context.device) || is_mesa_intel_device(context.device)) diff --git a/tests/d3d12_robustness.c b/tests/d3d12_robustness.c index 5ca30649d4..3675d1ddef 100644 --- a/tests/d3d12_robustness.c +++ b/tests/d3d12_robustness.c @@ -88,7 +88,8 @@ void test_buffers_oob_behavior_vectorized_structured_16bit(void) heap = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, ARRAY_SIZE(output_buffers)); - if (is_mesa_intel_device(context.device)) + if (is_mesa_intel_device(context.device) || + (is_nvidia_device(context.device) && is_vk_device_extension_supported(context.device, "VK_EXT_descriptor_heap"))) { /* There appears to be driver issues. * SSBO not aligned to the advertised 4 bytes seems to break down for unknown reasons. @@ -955,6 +956,7 @@ static void test_undefined_structured_raw_read_typed(bool use_dxil) ID3D12Resource *input; unsigned int i, j; bool is_amd_win; + bool is_nv_heap; #include "shaders/robustness/headers/undefined_structured_raw_read_typed.h" @@ -1044,6 +1046,8 @@ static void test_undefined_structured_raw_read_typed(bool use_dxil) /* AMD behavior: RAW emits a descriptor without stride. Typed always reads same value at offset = 0. * Structured: Passed down as stride to typed. */ is_amd_win = is_amd_windows_device(context.device); + is_nv_heap = is_nvidia_device(context.device) && + is_vk_device_extension_supported(context.device, "VK_EXT_descriptor_heap"); /* Validate structured. */ for (i = 0; i < 8; i++) @@ -1079,6 +1083,11 @@ static void test_undefined_structured_raw_read_typed(bool use_dxil) expected.w = expected.x; } } + else if (is_nv_heap) + { + /* Magic driver behavior. SSBO is expressed as a R32_UINT texel buffer when read as one. */ + expected.x = j < in_bounds_dwords ? (j + stride_dwords) : 0; + } else { /* NV native behavior. We try to match this in vkd3d-proton since it's implementable. */ @@ -1232,6 +1241,7 @@ static void test_undefined_typed_read_structured_raw(bool use_dxil) ID3D12Resource *input; unsigned int i, j; bool is_amd_win; + bool is_nv_heap; bool is_nv_win; #include "shaders/robustness/headers/undefined_typed_read_structured_raw.h" @@ -1327,8 +1337,9 @@ static void test_undefined_typed_read_structured_raw(bool use_dxil) /* AMD behavior: Typed buffer has stride + element count. Structured buffer works as expected. * RAW access works as expected, except that robustness is completely disabled. */ is_amd_win = is_amd_windows_device(context.device); - is_nv_win = is_nvidia_windows_device(context.device); + is_nv_heap = is_nvidia_device(context.device) && + is_vk_device_extension_supported(context.device, "VK_EXT_descriptor_heap"); /* Validate the buffer read. */ for (i = 0; i < 16; i++) @@ -1344,8 +1355,43 @@ static void test_undefined_typed_read_structured_raw(bool use_dxil) { struct uvec4 expected = {0}; const struct uvec4 *value; + bool is_bug = false; - if (is_nv_win && i < 8) + if (is_nv_heap && i >= 8) + { + /* Structured buffer path. */ + unsigned int ssbo_size_words = 12; + unsigned int ssbo_offset_words = vecsize * 4; + unsigned int accessed_word = j * vecsize; + + /* SSBO size is treated as num elements * 4, + * so we always get 12 dwords here. */ + if (accessed_word < ssbo_size_words) + { + expected.x = ssbo_offset_words + j * vecsize; + expected.y = expected.x + 1; + expected.z = expected.y + 1; + expected.w = expected.z + 1; + /* Driver does not seem to like RGBA8 aliasing with SSBO, but it works on native, + * so that's odd. */ + is_bug = i == 8; + } + } + else if (is_nv_heap) + { + /* For raw buffer, number of elements is treated as number of dwords. */ + if (j < 12 / vecsize) + { + expected.x = vecsize * (4 + j); + expected.y = expected.x + 1; + expected.z = expected.y + 1; + expected.w = expected.z + 1; + /* Driver does not seem to like RGBA8 aliasing with SSBO, but it works on native, + * so that's odd. */ + is_bug = i == 0; + } + } + else if (is_nv_win && i < 8) { /* For raw buffer, number of elements is treated as number of dwords. */ if (j < 12 / vecsize) @@ -1373,6 +1419,7 @@ static void test_undefined_typed_read_structured_raw(bool use_dxil) expected.w = expected.z; value = get_readback_uvec4(&rb, j, 0); + bug_if(is_bug) ok(compare_uvec4(value, &expected), "output %u, index %u, expected (%u, %u, %u, %u), got (%u, %u, %u, %u)\n", i, j, expected.x, expected.y, expected.z, expected.w, diff --git a/tests/d3d12_sampler_feedback.c b/tests/d3d12_sampler_feedback.c index 8e8d2128db..a41c3ad562 100644 --- a/tests/d3d12_sampler_feedback.c +++ b/tests/d3d12_sampler_feedback.c @@ -874,6 +874,7 @@ void test_sampler_feedback_decode_encode_min_mip(void) struct test_context_desc context_desc; ID3D12GraphicsCommandList1 *list1; D3D12_HEAP_PROPERTIES heap_props; + ID3D12DescriptorHeap *desc_heap; struct test_context context; struct resource_readback rb; ID3D12Resource *resolve_tex; @@ -908,6 +909,8 @@ void test_sampler_feedback_decode_encode_min_mip(void) hr = ID3D12GraphicsCommandList_QueryInterface(context.list, &IID_ID3D12GraphicsCommandList1, (void **)&list1); ok(SUCCEEDED(hr), "Failed to query GraphicsCommandList1, hr #%x.\n", hr); + desc_heap = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1); + memset(&desc, 0, sizeof(desc)); memset(&heap_props, 0, sizeof(heap_props)); @@ -948,6 +951,7 @@ void test_sampler_feedback_decode_encode_min_mip(void) /* DstX/Y for buffers are ignored on NV, but not AMD. Inherit NV behavior here, it's the only one that makes some kind of sense ... */ /* SrcRect is ignored on NV (spec says it's not allowed for MIN_MIP), but not AMD. Inherit NV behavior here. */ + ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &desc_heap); ID3D12GraphicsCommandList1_ResolveSubresourceRegion(list1, feedback_min_mip_single, UINT_MAX, 0, 0, upload, 0, NULL, DXGI_FORMAT_R8_UINT, D3D12_RESOLVE_MODE_ENCODE_SAMPLER_FEEDBACK); transition_resource_state(context.list, feedback_min_mip_single, D3D12_RESOURCE_STATE_RESOLVE_DEST, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); ID3D12GraphicsCommandList1_ResolveSubresourceRegion(list1, resolve, 0, 0, 0, feedback_min_mip_single, UINT_MAX, NULL, DXGI_FORMAT_R8_UINT, D3D12_RESOLVE_MODE_DECODE_SAMPLER_FEEDBACK); @@ -1006,6 +1010,7 @@ void test_sampler_feedback_decode_encode_min_mip(void) } transition_resource_state(context.list, upload_tex, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); + ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &desc_heap); /* On ENCODE, dst subresource is always -1, and source subresource index is the slice to resolve. * This implies two rules: We can only resolve layer N to layer N, and layer size of source and dest must be the same. */ @@ -1090,6 +1095,7 @@ void test_sampler_feedback_decode_encode_min_mip(void) } ID3D12GraphicsCommandList1_Release(list1); + ID3D12DescriptorHeap_Release(desc_heap); ID3D12Resource_Release(feedback_min_mip_single); ID3D12Resource_Release(feedback_min_mip_array); ID3D12Resource_Release(upload_tex); @@ -1123,6 +1129,7 @@ void test_sampler_feedback_decode_encode_mip_used(void) struct test_context_desc context_desc; ID3D12GraphicsCommandList1 *list1; D3D12_HEAP_PROPERTIES heap_props; + ID3D12DescriptorHeap *desc_heap; struct test_context context; struct resource_readback rb; ID3D12Resource *resolve_tex; @@ -1150,6 +1157,8 @@ void test_sampler_feedback_decode_encode_mip_used(void) return; } + desc_heap = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1); + hr = ID3D12Device_QueryInterface(context.device, &IID_ID3D12Device8, (void **)&device8); ok(SUCCEEDED(hr), "Failed to query Device8, hr #%x.\n", hr); hr = ID3D12GraphicsCommandList_QueryInterface(context.list, &IID_ID3D12GraphicsCommandList1, (void **)&list1); @@ -1206,6 +1215,7 @@ void test_sampler_feedback_decode_encode_mip_used(void) transition_resource_state(context.list, upload_tex, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); + ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &desc_heap); ID3D12GraphicsCommandList1_ResolveSubresourceRegion(list1, feedback, UINT_MAX, 0, 0, upload_tex, UINT_MAX, NULL, DXGI_FORMAT_R8_UINT, D3D12_RESOLVE_MODE_ENCODE_SAMPLER_FEEDBACK); transition_resource_state(context.list, feedback, D3D12_RESOURCE_STATE_RESOLVE_DEST, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); @@ -1238,6 +1248,7 @@ void test_sampler_feedback_decode_encode_mip_used(void) { /* Target Layer = 1, Level = 1 with Level = 0, Layer = 0 as source. This should not work, but it does. * Now DstX/Y works. */ + ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &desc_heap); ID3D12GraphicsCommandList1_ResolveSubresourceRegion(list1, feedback, LEVELS + 1, 1, 2, upload_tex, 0, NULL, DXGI_FORMAT_R8_UINT, D3D12_RESOLVE_MODE_ENCODE_SAMPLER_FEEDBACK); transition_resource_state(context.list, resolve_tex, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RESOLVE_DEST); transition_resource_state(context.list, feedback, D3D12_RESOURCE_STATE_RESOLVE_DEST, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); @@ -1269,6 +1280,7 @@ void test_sampler_feedback_decode_encode_mip_used(void) { D3D12_RECT decode_rect = { 1, 2, 9, 10 }; D3D12_RECT encode_rect = { 4, 0, 5, 1 }; + ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &desc_heap); ID3D12GraphicsCommandList1_ResolveSubresourceRegion(list1, feedback, 0, decode_rect.left, decode_rect.top, upload_tex, 0, &encode_rect, DXGI_FORMAT_R8_UINT, D3D12_RESOLVE_MODE_ENCODE_SAMPLER_FEEDBACK); transition_resource_state(context.list, resolve_tex, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RESOLVE_DEST); transition_resource_state(context.list, feedback, D3D12_RESOURCE_STATE_RESOLVE_DEST, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); @@ -1304,6 +1316,7 @@ void test_sampler_feedback_decode_encode_mip_used(void) /* With MIP_USED region both dst and src subresource matters. */ ID3D12GraphicsCommandList1_Release(list1); + ID3D12DescriptorHeap_Release(desc_heap); ID3D12Resource_Release(feedback); ID3D12Resource_Release(upload_tex); ID3D12Resource_Release(resolve_tex); diff --git a/tests/d3d12_tessellation.c b/tests/d3d12_tessellation.c index 875229fbd3..4a90caec34 100644 --- a/tests/d3d12_tessellation.c +++ b/tests/d3d12_tessellation.c @@ -381,8 +381,10 @@ static void test_quad_tessellation(bool use_dxil, bool wrong_pso_topology, bool ID3D12PipelineState_Release(context.pipeline_state); pso_desc.HS = use_dxil ? quad_tess_hs_cw_dxil : quad_tess_hs_cw_dxbc; + vkd3d_mute_validation_message("08743", "blah"); hr = ID3D12Device_CreateGraphicsPipelineState(device, &pso_desc, &IID_ID3D12PipelineState, (void **)&context.pipeline_state); + vkd3d_unmute_validation_message("08743"); ok(hr == S_OK, "Failed to create state, hr %#x.\n", hr); ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL); diff --git a/tests/d3d12_tests.h b/tests/d3d12_tests.h index b561ea646b..79ffc9b3aa 100644 --- a/tests/d3d12_tests.h +++ b/tests/d3d12_tests.h @@ -302,6 +302,7 @@ decl_test(test_buffers_oob_behavior_vectorized_structured_16bit); decl_test(test_typed_buffers_many_objects_dxbc); decl_test(test_typed_buffers_many_objects_dxil); decl_test(test_create_pipeline_with_null_root_signature); +decl_test(test_undefined_descriptor_heap_mismatch_types); decl_test(test_undefined_read_typed_buffer_as_untyped_simple_dxbc); decl_test(test_undefined_read_typed_buffer_as_untyped_simple_dxil); decl_test(test_undefined_structured_raw_alias_dxbc); @@ -529,6 +530,7 @@ decl_test(test_uav_clear_exhaustive_descriptors); decl_test(test_resolve_image_exhaustive_descriptors); decl_test(test_root_constant_indexing_dxil); decl_test(test_root_constant_indexing_dxbc); +decl_test(test_clear_uav_mismatch_heap); decl_test(test_nvx_cubin); decl_test(test_use_before_alloc_stress); decl_test(test_clear_uav_mismatch_heap); diff --git a/tests/shaders/descriptors/headers/update_root_descriptors.h b/tests/shaders/descriptors/headers/update_root_descriptors.h index 5ec4d72ba3..841588eacf 100644 --- a/tests/shaders/descriptors/headers/update_root_descriptors.h +++ b/tests/shaders/descriptors/headers/update_root_descriptors.h @@ -15,3 +15,62 @@ static const DWORD update_root_descriptors_code_dxbc[] = #endif UNUSED_ARRAY_ATTR static const D3D12_SHADER_BYTECODE update_root_descriptors_dxbc = { update_root_descriptors_code_dxbc, sizeof(update_root_descriptors_code_dxbc) }; #undef UNUSED_ARRAY_ATTR +static const BYTE update_root_descriptors_code_dxil[] = +{ + 0x44, 0x58, 0x42, 0x43, 0xae, 0x63, 0x0c, 0xbf, 0x79, 0x4d, 0xd0, 0x1a, 0xa3, 0x4d, 0xb7, 0x35, 0x80, 0x2b, 0xd7, 0xa3, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x06, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x0c, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x80, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x4d, 0xb2, 0x0f, + 0xfb, 0x50, 0xbe, 0x43, 0x6e, 0xbf, 0x73, 0xbe, 0x1c, 0x6c, 0xda, 0x12, 0x44, 0x58, 0x49, 0x4c, 0x08, 0x05, 0x00, 0x00, 0x60, 0x00, 0x05, 0x00, 0x42, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, + 0x00, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xf0, 0x04, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x39, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, + 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, + 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, + 0x40, 0x02, 0xaa, 0x0d, 0x84, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x01, 0x00, 0x00, 0x00, 0x49, 0x18, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x00, 0x00, 0x00, + 0x89, 0x20, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, + 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x54, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0xc0, 0xe0, 0xa8, 0xe1, 0xf2, 0x27, 0xec, 0x21, 0x24, 0x9f, 0xdb, 0xa8, 0x62, 0x25, 0x26, 0x1f, 0xb9, 0x6d, 0x44, + 0x8c, 0x31, 0xc6, 0x1c, 0x01, 0x42, 0xe5, 0x9e, 0xe1, 0xf2, 0x27, 0xec, 0x21, 0x24, 0x3f, 0x04, 0x9a, 0x61, 0x21, 0x50, 0x60, 0x0a, 0x41, 0xc6, 0x19, 0x84, 0xe6, 0x08, 0x82, 0x62, 0x9c, 0x51, + 0xc6, 0x50, 0xb4, 0x8a, 0x02, 0xc6, 0x19, 0x63, 0x8c, 0x31, 0x0a, 0xb5, 0x81, 0x80, 0xd3, 0xa4, 0x29, 0xa2, 0x84, 0xc9, 0x5f, 0xe1, 0x0d, 0x9b, 0x08, 0x6d, 0x18, 0x22, 0x42, 0x92, 0x36, 0xaa, + 0x28, 0x88, 0x08, 0x05, 0x83, 0xe0, 0x09, 0x42, 0x80, 0x84, 0x31, 0x48, 0xce, 0x11, 0x80, 0x02, 0x00, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, + 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xaf, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, + 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, + 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, + 0x72, 0xd0, 0x06, 0xe6, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x08, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x16, 0x20, 0x00, 0x02, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x34, 0x40, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x05, 0x02, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x10, + 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x25, 0x30, 0x02, 0x50, 0x10, 0xc5, 0x50, 0x16, 0x85, 0x40, 0x6a, 0x04, 0x80, 0x68, 0x81, 0xd0, 0x9c, 0x01, 0xa0, 0x38, + 0x03, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x8f, 0x0c, 0x6f, 0xec, 0xed, 0x4d, 0x0c, 0x24, 0xc6, 0x05, 0xc7, 0x45, + 0x86, 0x06, 0xa6, 0xc6, 0x25, 0xa6, 0x06, 0x04, 0xc5, 0x8c, 0xec, 0xa6, 0xac, 0x86, 0x46, 0x6c, 0x8c, 0x2c, 0x65, 0x43, 0x10, 0x4c, 0x10, 0x06, 0x62, 0x82, 0x30, 0x14, 0x1b, 0x84, 0x81, 0x98, + 0x20, 0x0c, 0xc6, 0x06, 0x61, 0x30, 0x28, 0x8c, 0xcd, 0x4d, 0x10, 0x86, 0x63, 0xc3, 0x80, 0x24, 0xc4, 0x04, 0x21, 0x6a, 0x08, 0x4c, 0x10, 0x06, 0x64, 0x82, 0xa0, 0x28, 0x1b, 0x16, 0x62, 0x61, + 0x08, 0x62, 0x68, 0x1c, 0xc7, 0x01, 0x36, 0x04, 0xcf, 0x04, 0x61, 0x62, 0x36, 0x20, 0x44, 0xc4, 0x10, 0xc4, 0x60, 0x00, 0x1b, 0x02, 0x69, 0x03, 0x01, 0x40, 0x13, 0x30, 0x41, 0x10, 0x00, 0x12, + 0x6d, 0x61, 0x69, 0x6e, 0x13, 0x04, 0x6a, 0x99, 0x20, 0x0c, 0xc9, 0x86, 0x61, 0x18, 0x86, 0x0d, 0x04, 0x71, 0x61, 0xd9, 0x86, 0xa2, 0xb2, 0x00, 0x4a, 0xab, 0xc2, 0xc6, 0x66, 0xd7, 0xe6, 0x92, + 0x46, 0x56, 0xe6, 0x46, 0x37, 0x25, 0x08, 0xaa, 0x90, 0xe1, 0xb9, 0xd8, 0x95, 0xc9, 0xcd, 0xa5, 0xbd, 0xb9, 0x4d, 0x09, 0x88, 0x26, 0x64, 0x78, 0x2e, 0x76, 0x61, 0x6c, 0x76, 0x65, 0x72, 0x53, + 0x02, 0xa3, 0x0e, 0x19, 0x9e, 0xcb, 0x1c, 0x5a, 0x18, 0x59, 0x99, 0x5c, 0xd3, 0x1b, 0x59, 0x19, 0xdb, 0x94, 0x20, 0x29, 0x43, 0x86, 0xe7, 0x22, 0x57, 0x36, 0xf7, 0x56, 0x27, 0x37, 0x56, 0x36, + 0x37, 0x25, 0x98, 0xea, 0x90, 0xe1, 0xb9, 0x94, 0xb9, 0xd1, 0xc9, 0xe5, 0x41, 0xbd, 0xa5, 0xb9, 0xd1, 0xcd, 0x4d, 0x09, 0x34, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, + 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, + 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, + 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, + 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, + 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, + 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, + 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, + 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, + 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x8c, 0xc8, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x72, 0x10, 0x87, 0x73, 0x70, 0x03, 0x7b, 0x08, 0x07, 0x79, 0x60, 0x87, 0x70, 0xc8, 0x87, 0x77, 0xa8, 0x07, 0x7a, + 0x98, 0x81, 0x3c, 0xe4, 0x80, 0x0f, 0x6e, 0x40, 0x0f, 0xe5, 0xd0, 0x0e, 0xf0, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x36, 0x50, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x40, + 0x15, 0x05, 0x11, 0xb1, 0x93, 0x13, 0x11, 0x3e, 0x72, 0xdb, 0x16, 0xb0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x25, 0x61, 0x00, 0x02, 0xe6, 0x23, 0xb7, + 0x6d, 0x02, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0x18, 0x00, 0xc1, 0x00, 0x48, 0x03, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x34, 0x66, 0x00, 0x4a, 0xae, 0xec, 0x8a, 0xa0, 0x14, 0x03, 0xa8, 0x94, 0x40, 0x11, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, + 0x82, 0x60, 0xb0, 0x4c, 0x86, 0x00, 0x41, 0xcb, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x2c, 0xd4, 0x21, 0x44, 0x11, 0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x88, 0x75, 0x04, 0xd2, 0x68, 0x42, + 0x10, 0x8c, 0x26, 0x08, 0x40, 0x05, 0xc9, 0x8d, 0x18, 0x34, 0x00, 0x08, 0x82, 0x41, 0x83, 0x25, 0x46, 0xd0, 0x0c, 0x4d, 0xd3, 0x20, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, +}; +#ifdef __GNUC__ +#define UNUSED_ARRAY_ATTR __attribute__((unused)) +#else +#define UNUSED_ARRAY_ATTR +#endif +UNUSED_ARRAY_ATTR static const D3D12_SHADER_BYTECODE update_root_descriptors_dxil = { update_root_descriptors_code_dxil, sizeof(update_root_descriptors_code_dxil) }; +#undef UNUSED_ARRAY_ATTR diff --git a/tests/shaders/descriptors/update_root_descriptors.cs_5_0.hlsl b/tests/shaders/descriptors/update_root_descriptors.cs_5_0.cs_6_0.hlsl similarity index 100% rename from tests/shaders/descriptors/update_root_descriptors.cs_5_0.hlsl rename to tests/shaders/descriptors/update_root_descriptors.cs_5_0.cs_6_0.hlsl diff --git a/tests/test-runner.sh b/tests/test-runner.sh index 74efc11a16..662d9fe22d 100755 --- a/tests/test-runner.sh +++ b/tests/test-runner.sh @@ -86,6 +86,7 @@ fi run_tests() { while (($counter < $nr_cpus)) ; do # output to /dev/null by default + echo "Running ${tests[$test_idx]} ..." if [[ -z "$output_dir" ]] ; then VKD3D_TEST_MATCH=${tests[$test_idx]} "$d3d12_bin" &>/dev/null & else