@@ -15148,4 +15148,102 @@ void test_alloca_out_of_bounds_dxbc(void)
1514815148void test_alloca_out_of_bounds_dxil(void)
1514915149{
1515015150 test_alloca_out_of_bounds(true);
15151+ }
15152+
15153+ static void test_groupshared_out_of_bounds(bool use_dxil)
15154+ {
15155+ D3D12_ROOT_SIGNATURE_DESC rs_desc;
15156+ D3D12_ROOT_PARAMETER rs_param[2];
15157+ struct test_context context;
15158+ struct resource_readback rb;
15159+ ID3D12Resource *output;
15160+ ID3D12Resource *input;
15161+ unsigned int i, j, k;
15162+ bool known_behavior;
15163+
15164+ #include "shaders/shaders/headers/groupshared_out_of_bounds.h"
15165+
15166+ /* Just arbitrary test stimuli. Make sure to test negative and huge u32 indices. */
15167+ static const int32_t input_data[8][16][2] = {
15168+ { { 1, 4 }, { 3, 10 }, { 7, -100 }, {8, 400}, {15, 2}, {16, 40}, {-1, 500}, {1000000, 1}, {0x20000000, 50}, {0x40000000, 80} },
15169+ { { 1, 4 }, { 3, 10 }, { -7, -100 }, { 8, 400 }, { 15, 2 }, { 16, 40 }, { -1, 500 }, { 1000000, 1 }, { 0x20000000, 50 }, { 0x40000000, 80 } },
15170+ { { 1, 4 }, { 3, 10 }, { 7, -100 }, { 8, 400 }, { 15, 2 }, { -16, 40 }, { -1, 500 }, { 1000000, 1 }, { 0x20000000, 50 }, { 0x40000000, 80 } },
15171+ { { 1, 4 }, { 3, 10 }, { 7, -100 }, { 8, 400 }, { 15, 2 }, { 16, 40 }, { -1, 500 }, { 1000000, 1 }, { 0x20000000, 50 }, { 0x40000000, 80 } },
15172+ { { 1, 4 }, { 3, 10 }, { 7, -100 }, { -8, 400 }, { 15, 2 }, { 16, 40 }, { -1, 500 }, { 1000000, 1 }, { 0x20000000, 50 }, { 0x40000000, 80 } },
15173+ { { 10000, 4 }, { 10000, 10 }, { 7, -100 }, { 8, 400 }, { 15, 2 }, { 16, 40 }, { -1, 500 }, { 1000000, 1 }, { 0x20000000, 50 }, { 0x40000000, 80 } },
15174+ { { 1, 4 }, { 3, 10 }, { 7, -100 }, { 8, 400 }, { 15, 2 }, { -16, 40 }, { -1, 500 }, { 1000000, 1 }, { 0x20000000, 50 }, { 0x40000000, 80 } },
15175+ { { 1, 4 }, { 1, 10 }, { 1, -100 }, { 1, 400 }, { 15, 2 }, { 16, 40 }, { -1, 500 }, { 1000000, 1 }, { 0x20000000, 50 }, { 0x40000000, 80 } },
15176+ };
15177+
15178+ if (!init_compute_test_context(&context))
15179+ return;
15180+
15181+ memset(rs_param, 0, sizeof(rs_param));
15182+ memset(&rs_desc, 0, sizeof(rs_desc));
15183+ rs_desc.NumParameters = ARRAY_SIZE(rs_param);
15184+ rs_desc.pParameters = rs_param;
15185+ rs_param[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
15186+ rs_param[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
15187+ rs_param[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
15188+ rs_param[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
15189+ create_root_signature(context.device, &rs_desc, &context.root_signature);
15190+ context.pipeline_state = create_compute_pipeline_state(context.device, context.root_signature,
15191+ use_dxil ? groupshared_out_of_bounds_dxil : groupshared_out_of_bounds_dxbc);
15192+
15193+ output = create_default_buffer(context.device, 16 * sizeof(int32_t), D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COMMON);
15194+ input = create_upload_buffer(context.device, sizeof(input_data), input_data);
15195+
15196+ ID3D12GraphicsCommandList_SetPipelineState(context.list, context.pipeline_state);
15197+ ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, context.root_signature);
15198+ ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 0, ID3D12Resource_GetGPUVirtualAddress(output));
15199+ ID3D12GraphicsCommandList_SetComputeRootShaderResourceView(context.list, 1, ID3D12Resource_GetGPUVirtualAddress(input));
15200+ ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
15201+ transition_resource_state(context.list, output, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
15202+ get_buffer_readback_with_command_list(output, DXGI_FORMAT_UNKNOWN, &rb, context.queue, context.list);
15203+
15204+ known_behavior = is_nvidia_device(context.device) || is_amd_vulkan_device(context.device) || is_amd_windows_device(context.device);
15205+
15206+ /* This modelling matches AMD and NV hardware it seems. */
15207+ for (j = 0; j < 16; j++)
15208+ {
15209+ int32_t expected = 0;
15210+ int32_t value;
15211+
15212+ for (i = 0; i < 8; i++)
15213+ {
15214+ for (k = 0; k < 16; k++)
15215+ {
15216+ uint32_t write_addr = input_data[i][k][0];
15217+ bool overflow_write = write_addr >= 0x40000000;
15218+ /* Internally, we assume a u32 LDS offset is computed. This can overflow. */
15219+ write_addr &= 0x3fffffff;
15220+ if (write_addr == j)
15221+ expected += (overflow_write ? 2 : 1) * input_data[i][k][1];
15222+ if (j >= 8 && write_addr == j)
15223+ expected += input_data[i][k][1];
15224+ }
15225+ }
15226+
15227+ value = get_readback_uint(&rb, j, 0, 0);
15228+
15229+ /* We don't know exact behavior for other GPUs, but we shouldn't hang the GPU at least.
15230+ * Depending on how the driver lays out the groupshared, we can get unexpected result in these indices, but that's okay. */
15231+ todo_if(!known_behavior || j == 0 || j == 8 || j == 9 || j == 15)
15232+ ok(value == expected, "value %u: expected %d, got %d\n", j, expected, value);
15233+ }
15234+
15235+ ID3D12Resource_Release(input);
15236+ ID3D12Resource_Release(output);
15237+ release_resource_readback(&rb);
15238+ destroy_test_context(&context);
15239+ }
15240+
15241+ void test_groupshared_out_of_bounds_dxbc(void)
15242+ {
15243+ test_groupshared_out_of_bounds(false);
15244+ }
15245+
15246+ void test_groupshared_out_of_bounds_dxil(void)
15247+ {
15248+ test_groupshared_out_of_bounds(true);
1515115249}
0 commit comments