Skip to content

Commit 8e966eb

Browse files
authored
New Fragment Density Map Sample (#1411)
* Sample for fragment density map * Fix rebase conflicts * Add shaders spirv * fix format issues in CLI * PR improvements
1 parent 24b5d90 commit 8e966eb

35 files changed

+2772
-8
lines changed

antora/modules/ROOT/nav.adoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
** xref:samples/extensions/dynamic_rendering/README.adoc[Dynamic rendering]
6464
** xref:samples/extensions/dynamic_rendering_local_read/README.adoc[Dynamic rendering local read]
6565
** xref:samples/extensions/extended_dynamic_state2/README.adoc[Extended dynamic state2]
66+
** xref:samples/extensions/fragment_density_map/README.adoc[Fragment density map]
6667
** xref:samples/extensions/fragment_shader_barycentric/README.adoc[Fragment shader barycentric]
6768
** xref:samples/extensions/fragment_shading_rate/README.adoc[Fragment shading rate]
6869
** xref:samples/extensions/fragment_shading_rate_dynamic/README.adoc[Fragment shading rate dynamic]

framework/api_vulkan_sample.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/* Copyright (c) 2019-2025, Sascha Willems
2+
* Copyright (c) 2025, Arm Limited and Contributors
23
*
34
* SPDX-License-Identifier: Apache-2.0
45
*
@@ -147,7 +148,7 @@ class ApiVulkanSample : public vkb::VulkanSampleC
147148
std::vector<VkCommandBuffer> draw_cmd_buffers;
148149

149150
// Global render pass for frame buffer writes
150-
VkRenderPass render_pass;
151+
VkRenderPass render_pass = VK_NULL_HANDLE;
151152

152153
// List of available frame buffers (same as number of swap chain images)
153154
std::vector<VkFramebuffer> framebuffers;

framework/common/vk_initializers.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
/* Copyright (c) 2019-2024, Sascha Willems
1+
/* Copyright (c) 2019-2025, Sascha Willems
2+
* Copyright (c) 2025, Arm Limited and Contributors
23
*
34
* SPDX-License-Identifier: Apache-2.0
45
*
@@ -72,6 +73,15 @@ inline VkCommandBufferInheritanceInfo command_buffer_inheritance_info()
7273
return command_buffer_inheritance_info;
7374
}
7475

76+
inline VkComponentMapping component_mapping()
77+
{
78+
return {
79+
.r = VK_COMPONENT_SWIZZLE_R,
80+
.g = VK_COMPONENT_SWIZZLE_G,
81+
.b = VK_COMPONENT_SWIZZLE_B,
82+
.a = VK_COMPONENT_SWIZZLE_A};
83+
}
84+
7585
inline VkRenderPassBeginInfo render_pass_begin_info()
7686
{
7787
VkRenderPassBeginInfo render_pass_begin_info{};

framework/gui.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,12 @@ class Gui
253253

254254
bool update_buffers();
255255

256+
/**
257+
* @brief Shows a child with statistics
258+
* @param stats Statistics to show
259+
*/
260+
void show_stats(const StatsType &stats);
261+
256262
private:
257263
static constexpr char const *default_font = "Roboto-Regular"; // The name of the default font file to use
258264
static constexpr char const *default_window_font = "RobotoMono-Regular"; // The name of the default window font file to use
@@ -282,12 +288,6 @@ class Gui
282288
*/
283289
void show_debug_window(DebugInfo &debug_info, const ImVec2 &position);
284290

285-
/**
286-
* @brief Shows a child with statistics
287-
* @param stats Statistics to show
288-
*/
289-
void show_stats(const StatsType &stats);
290-
291291
/**
292292
* @brief Updates Vulkan buffers
293293
* @param command_buffer Command buffer to draw into

framework/vulkan_type_mapping.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,12 @@ struct HPPType<VkPhysicalDeviceFloat16Int8FeaturesKHR>
163163
using Type = vk::PhysicalDeviceFloat16Int8FeaturesKHR;
164164
};
165165

166+
template <>
167+
struct HPPType<VkPhysicalDeviceFragmentDensityMapFeaturesEXT>
168+
{
169+
using Type = vk::PhysicalDeviceFragmentDensityMapFeaturesEXT;
170+
};
171+
166172
template <>
167173
struct HPPType<VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR>
168174
{
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Copyright (c) 2025, Arm Limited and Contributors
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Licensed under the Apache License, Version 2.0 the "License";
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
get_filename_component(FOLDER_NAME ${CMAKE_CURRENT_LIST_DIR} NAME)
19+
get_filename_component(PARENT_DIR ${CMAKE_CURRENT_LIST_DIR} PATH)
20+
get_filename_component(CATEGORY_NAME ${PARENT_DIR} NAME)
21+
22+
add_sample(
23+
ID ${FOLDER_NAME}
24+
CATEGORY ${CATEGORY_NAME}
25+
AUTHOR "Arm"
26+
NAME "Fragment Density Map"
27+
DESCRIPTION "Demonstration of how to use VK_EXT_fragment_density_map"
28+
SHADER_FILES_GLSL
29+
"fragment_density_map/forward.frag"
30+
"fragment_density_map/forward.vert"
31+
"fragment_density_map/forward_debug.frag"
32+
"fragment_density_map/generate_density_map.comp"
33+
"fragment_density_map/generate_density_map.frag"
34+
"fragment_density_map/quad_uv.vert"
35+
"fragment_density_map/quad_uvw.vert"
36+
"fragment_density_map/sky.frag"
37+
"fragment_density_map/sky_debug.frag"
38+
"fragment_density_map/texture.frag")
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
////
2+
- Copyright (c) 2025 Arm Limited and Contributors
3+
-
4+
- SPDX-License-Identifier: Apache-2.0
5+
-
6+
- Licensed under the Apache License, Version 2.0 the "License";
7+
- you may not use this file except in compliance with the License.
8+
- You may obtain a copy of the License at
9+
-
10+
- http://www.apache.org/licenses/LICENSE-2.0
11+
-
12+
- Unless required by applicable law or agreed to in writing, software
13+
- distributed under the License is distributed on an "AS IS" BASIS,
14+
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
- See the License for the specific language governing permissions and
16+
- limitations under the License.
17+
-
18+
////
19+
20+
= Fragment Density Map
21+
22+
ifdef::site-gen-antora[]
23+
TIP: The source for this sample can be found in the https://github.com/KhronosGroup/Vulkan-Samples/tree/main/samples/extensions/fragment_density_map[Khronos Vulkan Samples GitHub repository].
24+
endif::[]
25+
26+
== Overview
27+
28+
This sample demonstrates the `VK_EXT_fragment_density_map` extension. The extension allows different regions of a render target to be shaded at different rates, broadcasting a single fragment invocation to multiple texels and then compositing the results for presentation. Control is provided through a dedicated attachment in the render pass that defines the approximate shading density of regions of the target image.
29+
30+
=== Potential uses of Fragment Density Map
31+
32+
By lowering the shading rate in parts of the image that are less important perceptually, applications can reclaim performance. A common use case is foveated rendering in Virtual Reality (VR): with eye tracking, you render the gaze region at full resolution and peripheral regions at lower resolution. When done correctly (with sufficiently low latency), this can significantly reduce fragment invocations without perceptually sacrificing image quality.
33+
34+
NOTE: VR is not a requirement for fragment density map, but foveated rendering in VR is its main use case.
35+
36+
== Enabling Fragment Density Map
37+
=== Enabling extensions
38+
39+
The key extension used in this sample is `VK_EXT_fragment_density_map`, along with its dependencies `VK_KHR_create_renderpass2` and `VK_KHR_get_physical_device_properties2`. While `VK_EXT_fragment_density_map2` exists and can reduce host-to-device latency, it is not used in this sample.
40+
41+
Developers who want to use `VK_EXT_fragment_density_map` should also consider reviewing xref:samples/extensions/fragment_shading_rate/README.adoc[Fragment Shading Rate] (`VK_KHR_fragment_shading_rate`). The KHR extension offers applications more precise control over the shading rate and provides additional options to developers, allowing them to select a rate per draw call, per primitive, or similar to `VK_EXT_fragment_density_map`—per region of the framebuffer.
42+
43+
=== Feature description
44+
45+
[,C++]
46+
----
47+
VkPhysicalDeviceFragmentDensityMapFeaturesEXT fdm_features{
48+
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_FEATURES_EXT
49+
};
50+
51+
fdm_features.fragmentDensityMap = VK_TRUE;
52+
fdm_features.fragmentDensityMapDynamic = VK_TRUE;
53+
fdm_features.fragmentDensityMapNonSubsampledImages = VK_FALSE;
54+
----
55+
56+
Fragment density map exposes optional features that must be enabled at device creation via `vkGetPhysicalDeviceFeatures2` (or `vkGetPhysicalDeviceFeatures2KHR`).
57+
58+
* `fragmentDensityMap` should always be enabled; without it the extension has no effect.
59+
* `fragmentDensityMapDynamic` reduces latency between command-buffer recording and draw execution by removing the requirement that the density map be ready at record time. This is not supported on all devices. On devices that do not support it, `VK_EXT_fragment_density_map2` can help reduce latency.
60+
* `fragmentDensityMapNonSubsampledImages` (not universally supported) allows render passes to use non-subsampled attachments, potentially enabling direct rendering to the swapchain.
61+
62+
=== Density map image
63+
64+
The density map must:
65+
66+
* Use a format with support for `VK_FORMAT_FEATURE_FRAGMENT_DENSITY_MAP_BIT_EXT` (for example, `VK_FORMAT_R8G8_UNORM`).
67+
* Store values in the range `(0.0, 1.0]`, where `1.0` represents full density on the respective X/Y axis. Specific fragment sizes are implementation-defined.
68+
* Use image layout `VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT` whenever it is ready for use.
69+
* Be created with usage flag `VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT`.
70+
71+
CAUTION: Some devices report fragment density map support only for the trivial density `(1,1)`, which is the default shading rate for conventional rasterization.
72+
73+
The density map image extent must be a factor of the render-target extent. Query `VkPhysicalDeviceFragmentDensityMapPropertiesEXT` via `vkGetPhysicalDeviceProperties2` to determine supported factors. The fields `minFragmentDensityTexelSize` and `maxFragmentDensityTexelSize` define how many render-target texels each fragment density map texel can influence. A simple approach is to divide the render-target extent by `maxFragmentDensityTexelSize` (rounding up) to compute the density-map extent.
74+
75+
If `fragmentDensityMapDynamic` is used, create the image view with the `VK_IMAGE_VIEW_CREATE_FRAGMENT_DENSITY_MAP_DYNAMIC_BIT_EXT` flag.
76+
77+
NOTE: When using fragment density map without `fragmentDensityMapNonSubsampledImages`, all images used within the affected render pass must be created with `VK_IMAGE_CREATE_SUBSAMPLED_BIT_EXT`.
78+
79+
=== Read time of the density map
80+
81+
The density map is read at different times depending on the `fragmentDensityMapDynamic` setting and the `VK_EXT_fragment_density_map2` configuration:
82+
83+
- If `fragmentDensityMapDynamic = false`, the density map must be available at command-buffer record time but the map cannot be modified after calling `vkCmdBeginRenderPass(...)` The map is still used after submision, which can introduce noticeable lag between density-map updates and the displayed frame.
84+
- If `fragmentDensityMapDynamic = true`, the density map is read during `VK_PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT_EXT`, eliminating the need to produce the map before recording the command buffer. This yields the lowest latency but requires driver support.
85+
+- `VK_EXT_fragment_density_map2` improves performance on devices without `fragmentDensityMapDynamic` by allowing modifications to the density map between `vkCmdBeginRenderPass(...)` and `vkEndCommandBuffer(...)`. See https://registry.khronos.org/vulkan/specs/latest/man/html/VK_EXT_fragment_density_map2.html[VK_EXT_fragment_density_map2] for more details.
86+
87+
=== Render pass setup
88+
89+
Fragment density map operates at the render-pass level. Provide a `VkRenderPassFragmentDensityMapCreateInfoEXT` in the `pNext` chain of `VkRenderPassCreateInfo`. This structure specifies the attachment and its layout. Regardless of subpass count, it is recommended to add a subpass dependency with `.srcSubpass = VK_SUBPASS_EXTERNAL` and `.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT_EXT` to ensure the density map is ready before use. You may omit this if readiness is otherwise guaranteed (e.g., a static density map created at startup).
90+
91+
Fragment density map requires a specific attachment configuration:
92+
93+
[,C++]
94+
----
95+
VkAttachmentDescription2KHR fdm_attachment{
96+
.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR,
97+
.format = VK_FORMAT_R8G8_UNORM,
98+
.samples = VK_SAMPLE_COUNT_1_BIT,
99+
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
100+
.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
101+
.initialLayout = VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT,
102+
.finalLayout = VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT,
103+
// ...
104+
};
105+
----
106+
107+
Unless `fragmentDensityMapNonSubsampledImages` is enabled, fragment density map render passes cannot render directly to a swapchain image because the result is subsampled/sparse. Use an intermediate pass to sample the subsampled color and write to the swapchain. This pass requires a specific sampler configuration (see below). It is also recommended to render the UI and perform other composition at full resolution in this pass.
108+
109+
IMPORTANT: If `fragmentDensityMapNonSubsampledImages` is enabled, fragment density map render passes may render directly to the swapchain image. However, if your application does additional composition (e.g., UI), an intermediate pass is still recommended.
110+
111+
TIP: The intermediate pass typically also draws/composites the UI at full surface resolution.
112+
113+
Below is a simple pipeline arrangement used on this sample.
114+
115+
1. The density map is computed once outside the render loop and reused each frame. This is done in a separate command-buffer, all the other commands will go to the same command buffer. The sample also includes a UI option to update the fragment density map attachment every frame using `fragmentDensityMapDynamic`. If this option is selected, an additional pass at the beginning of the main command-buffer produces the fragment density map attachment.
116+
117+
2. The sample renders a simple scene using forward rendering. The main pass uses the fragment density map as an attachment. Typically, this pass is rendered at a lower resolution, but in our sample it is rendered at a higher resolution to make the performance difference more noticeable.
118+
119+
3. A final pass samples the main pass and renders the UI. Note that this present/composition pass is rendered at full resolution.
120+
121+
NOTE: To make the performance benefit of fragment density map more noticeable, we render the main pass at an increased resolution.
122+
123+
To help visualize and better understand the extension, the sample provides the following options:
124+
125+
- *Enable FDM:* Enables or disables the fragment density map. This option can be used to evaluate the performance benefits of the extension.
126+
- *Update FDM each frame:* Uses `fragmentDensityMapDynamic` to update the fragment density map every frame.
127+
- *Generate FDM with compute:* If enabled, the fragment density map is generated using a compute shader; otherwise, it is generated using a fragment shader.
128+
- *Show FDM:* Displays the image used as the fragment density map attachment.
129+
- *Debug FDM:* Uses `gl_FragSizeEXT` to display the fragment size used when rendering the image.
130+
- *Show stats:* Displays a graph with useful statistics such as frame time and GPU activity.
131+
132+
To help visualize the current fragment size, the sample has an option to draw the selected fragment size. This is done using different shaders that employ `gl_FragSizeEXT` and `GL_EXT_fragment_invocation_density`.
133+
134+
These images show the configured density map and the resulting image. Actual results may differ from expectations. Implementations may clamp the requested fragment area to a supported one. The clamped area must be less than or equal in size to the requested area, and the supported set may vary across framebuffer regions.
135+
136+
NOTE: Implementations may choose areas with lower size in one dimension. For example, if the requested fragment area is `(1,4)`, an implementation may clamp to `(2,2)` because it has the same area. The selected density must be higher, but this allows an implementation to render a dimension at a lower resolution.
137+
138+
NOTE: Implementations may fetch additional density map texels around a window, selecting a fragment size for an entire region. This might result in different results than one would expect. For example, link:fdm_comparison.png[this image, window=_blank] shows how the same fragment density map sample is interpreted by two GPUs.
139+
140+
This first image shows the fragment density map attachment specified in the sample:
141+
142+
image::fdm_show.png[Density Map Attachment]
143+
144+
This second image shows the result on a Mali G715 GPU:
145+
146+
image::fdm_g715.png[Density Map Final Result on Mali G715]
147+
148+
As observed, the attachment is composited of a series of inner circles of different values:
149+
150+
* The innermost circle has a density of `(1.0, 1.0)`, corresponding to `1×1`.
151+
* The second circle has densities of `(1.0, 0.5)` and `(0.5, 1.0)`. This corresponds to desired sizes of `1×2` and `2×1`, but the current implementation selects a higher density (smaller fragment size) of `1×1`.
152+
* The third circle has a density of `(0.5, 0.5)`, corresponding to `2×2`.
153+
* The fourth circle has densities of `(0.5, 0.25)` and `(0.25, 0.5)`, corresponding to `2×4` and `4×2`. The implementation is selecting a higher density of `2×2`.
154+
* The periphery has a density of `(0.25, 0.25)`, corresponding to a fragment size of `4×4`.
155+
156+
This attachment emulates foveated rendering: the gaze-centered region (eye center) is in focus and rendered at higher resolution, while the periphery is out of focus and rendered with less detail.
157+
158+
=== Sampler
159+
160+
Images that interact with fragment density map must be sampled with a specially configured sampler:
161+
162+
[,C++]
163+
----
164+
VkSamplerCreateInfo sampler_create_info{
165+
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
166+
.pNext = nullptr,
167+
.flags = VK_SAMPLER_CREATE_SUBSAMPLED_BIT_EXT,
168+
.minFilter = VK_FILTER_NEAREST,
169+
.magFilter = VK_FILTER_NEAREST,
170+
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
171+
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
172+
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
173+
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
174+
.mipLodBias = 0.0f,
175+
.anisotropyEnable = VK_FALSE,
176+
.maxAnisotropy = 0.0f,
177+
.compareEnable = VK_FALSE,
178+
.compareOp = VK_COMPARE_OP_ALWAYS,
179+
.unnormalizedCoordinates = VK_FALSE,
180+
// ...
181+
};
182+
----
183+
184+
The most important fields are `.flags`, `.minFilter`, `.magFilter`, and `.mipmapMode`. The sampler used to read subsampled images must match these settings exactly. Use this sampler for any image influenced by fragment density map.
185+
186+
== Conclusion
187+
188+
////
189+
This is necessary to have the sample show up on the build for the docs site under https://docs.vulkan.org
190+
////
191+
192+
`VK_EXT_fragment_density_map` is particularly effective with VR and `VK_KHR_multiview`, enabling techniques like foveated rendering that reduce peripheral shading work while preserving perceived quality.
193+
194+
However, we suggest that developers also consider xref:samples/extensions/fragment_shading_rate/README.adoc[Fragment Shading Rate] (`VK_KHR_fragment_shading_rate`), which generally offers broader device support, simpler setup, more options, and greater control when specifying where lower shading rates apply.
1.89 MB
Loading
1.14 MB
Loading
985 KB
Loading

0 commit comments

Comments
 (0)