Skip to content

Commit d1a51dd

Browse files
tests: Add test for groupshared overflow behavior.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
1 parent ac10032 commit d1a51dd

File tree

4 files changed

+241
-0
lines changed

4 files changed

+241
-0
lines changed

tests/d3d12_shaders.c

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15148,4 +15148,102 @@ void test_alloca_out_of_bounds_dxbc(void)
1514815148
void test_alloca_out_of_bounds_dxil(void)
1514915149
{
1515015150
test_alloca_out_of_bounds(true);
15151+
}
15152+
15153+
static void test_groupshared_out_of_bounds(bool use_dxil)
15154+
{
15155+
D3D12_ROOT_SIGNATURE_DESC rs_desc;
15156+
D3D12_ROOT_PARAMETER rs_param[2];
15157+
struct test_context context;
15158+
struct resource_readback rb;
15159+
ID3D12Resource *output;
15160+
ID3D12Resource *input;
15161+
unsigned int i, j, k;
15162+
bool known_behavior;
15163+
15164+
#include "shaders/shaders/headers/groupshared_out_of_bounds.h"
15165+
15166+
/* Just arbitrary test stimuli. Make sure to test negative and huge u32 indices. */
15167+
static const int32_t input_data[8][16][2] = {
15168+
{ { 1, 4 }, { 3, 10 }, { 7, -100 }, {8, 400}, {15, 2}, {16, 40}, {-1, 500}, {1000000, 1}, {0x20000000, 50}, {0x40000000, 80} },
15169+
{ { 1, 4 }, { 3, 10 }, { -7, -100 }, { 8, 400 }, { 15, 2 }, { 16, 40 }, { -1, 500 }, { 1000000, 1 }, { 0x20000000, 50 }, { 0x40000000, 80 } },
15170+
{ { 1, 4 }, { 3, 10 }, { 7, -100 }, { 8, 400 }, { 15, 2 }, { -16, 40 }, { -1, 500 }, { 1000000, 1 }, { 0x20000000, 50 }, { 0x40000000, 80 } },
15171+
{ { 1, 4 }, { 3, 10 }, { 7, -100 }, { 8, 400 }, { 15, 2 }, { 16, 40 }, { -1, 500 }, { 1000000, 1 }, { 0x20000000, 50 }, { 0x40000000, 80 } },
15172+
{ { 1, 4 }, { 3, 10 }, { 7, -100 }, { -8, 400 }, { 15, 2 }, { 16, 40 }, { -1, 500 }, { 1000000, 1 }, { 0x20000000, 50 }, { 0x40000000, 80 } },
15173+
{ { 10000, 4 }, { 10000, 10 }, { 7, -100 }, { 8, 400 }, { 15, 2 }, { 16, 40 }, { -1, 500 }, { 1000000, 1 }, { 0x20000000, 50 }, { 0x40000000, 80 } },
15174+
{ { 1, 4 }, { 3, 10 }, { 7, -100 }, { 8, 400 }, { 15, 2 }, { -16, 40 }, { -1, 500 }, { 1000000, 1 }, { 0x20000000, 50 }, { 0x40000000, 80 } },
15175+
{ { 1, 4 }, { 1, 10 }, { 1, -100 }, { 1, 400 }, { 15, 2 }, { 16, 40 }, { -1, 500 }, { 1000000, 1 }, { 0x20000000, 50 }, { 0x40000000, 80 } },
15176+
};
15177+
15178+
if (!init_compute_test_context(&context))
15179+
return;
15180+
15181+
memset(rs_param, 0, sizeof(rs_param));
15182+
memset(&rs_desc, 0, sizeof(rs_desc));
15183+
rs_desc.NumParameters = ARRAY_SIZE(rs_param);
15184+
rs_desc.pParameters = rs_param;
15185+
rs_param[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
15186+
rs_param[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
15187+
rs_param[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
15188+
rs_param[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
15189+
create_root_signature(context.device, &rs_desc, &context.root_signature);
15190+
context.pipeline_state = create_compute_pipeline_state(context.device, context.root_signature,
15191+
use_dxil ? groupshared_out_of_bounds_dxil : groupshared_out_of_bounds_dxbc);
15192+
15193+
output = create_default_buffer(context.device, 16 * sizeof(int32_t), D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COMMON);
15194+
input = create_upload_buffer(context.device, sizeof(input_data), input_data);
15195+
15196+
ID3D12GraphicsCommandList_SetPipelineState(context.list, context.pipeline_state);
15197+
ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, context.root_signature);
15198+
ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 0, ID3D12Resource_GetGPUVirtualAddress(output));
15199+
ID3D12GraphicsCommandList_SetComputeRootShaderResourceView(context.list, 1, ID3D12Resource_GetGPUVirtualAddress(input));
15200+
ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
15201+
transition_resource_state(context.list, output, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
15202+
get_buffer_readback_with_command_list(output, DXGI_FORMAT_UNKNOWN, &rb, context.queue, context.list);
15203+
15204+
known_behavior = is_nvidia_device(context.device) || is_amd_vulkan_device(context.device) || is_amd_windows_device(context.device);
15205+
15206+
/* This modelling matches AMD and NV hardware it seems. */
15207+
for (j = 0; j < 16; j++)
15208+
{
15209+
int32_t expected = 0;
15210+
int32_t value;
15211+
15212+
for (i = 0; i < 8; i++)
15213+
{
15214+
for (k = 0; k < 16; k++)
15215+
{
15216+
uint32_t write_addr = input_data[i][k][0];
15217+
bool overflow_write = write_addr >= 0x40000000;
15218+
/* Internally, we assume a u32 LDS offset is computed. This can overflow. */
15219+
write_addr &= 0x3fffffff;
15220+
if (write_addr == j)
15221+
expected += (overflow_write ? 2 : 1) * input_data[i][k][1];
15222+
if (j >= 8 && write_addr == j)
15223+
expected += input_data[i][k][1];
15224+
}
15225+
}
15226+
15227+
value = get_readback_uint(&rb, j, 0, 0);
15228+
15229+
/* We don't know exact behavior for other GPUs, but we shouldn't hang the GPU at least.
15230+
* Depending on how the driver lays out the groupshared, we can get unexpected result in these indices, but that's okay. */
15231+
todo_if(!known_behavior || j == 0 || j == 8 || j == 9 || j == 15)
15232+
ok(value == expected, "value %u: expected %d, got %d\n", j, expected, value);
15233+
}
15234+
15235+
ID3D12Resource_Release(input);
15236+
ID3D12Resource_Release(output);
15237+
release_resource_readback(&rb);
15238+
destroy_test_context(&context);
15239+
}
15240+
15241+
void test_groupshared_out_of_bounds_dxbc(void)
15242+
{
15243+
test_groupshared_out_of_bounds(false);
15244+
}
15245+
15246+
void test_groupshared_out_of_bounds_dxil(void)
15247+
{
15248+
test_groupshared_out_of_bounds(true);
1515115249
}

tests/d3d12_tests.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,3 +444,5 @@ decl_test(test_constant_lut_out_of_bounds_dxbc);
444444
decl_test(test_constant_lut_out_of_bounds_dxil);
445445
decl_test(test_alloca_out_of_bounds_dxbc);
446446
decl_test(test_alloca_out_of_bounds_dxil);
447+
decl_test(test_groupshared_out_of_bounds_dxbc);
448+
decl_test(test_groupshared_out_of_bounds_dxil);
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
StructuredBuffer<int2> IndexPairs : register(t0);
2+
RWStructuredBuffer<int> Outputs : register(u0);
3+
4+
// Try to expose overflow within a spill region or something.
5+
groupshared int local_array[8];
6+
groupshared int local_array2[8];
7+
8+
[numthreads(8, 1, 1)]
9+
void main(uint gid : SV_DispatchThreadID)
10+
{
11+
local_array[gid] = 0;
12+
local_array2[gid] = 0;
13+
GroupMemoryBarrierWithGroupSync();
14+
int i, o;
15+
16+
[loop]
17+
for (i = 0; i < 16; i++)
18+
{
19+
int2 pair = IndexPairs[16 * gid + i];
20+
InterlockedAdd(local_array[pair.x], pair.y, o);
21+
InterlockedAdd(local_array2[pair.x - 8], pair.x >= 8 ? pair.y : 0, o);
22+
}
23+
24+
GroupMemoryBarrierWithGroupSync();
25+
26+
Outputs[gid + 0] = local_array[gid];
27+
Outputs[gid + 8] = local_array2[gid];
28+
}

0 commit comments

Comments
 (0)