Skip to content

Commit 4bea2f6

Browse files
authored
[NFC] Manually optimize writes (#346)
This manually applies an optimization that Clang performs to see if I can trip up the NV driver through DXC as well as Clang.
1 parent 8b04ac1 commit 4bea2f6

File tree

2 files changed

+13
-3
lines changed

2 files changed

+13
-3
lines changed

test/Feature/WaveOps/WaveIsFirstLane.test

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,21 @@ RWStructuredBuffer<uint> Out : register(u1);
44

55
[numthreads(4, 1, 1)]
66
void main(uint3 threadID : SV_DispatchThreadID) {
7+
uint tmp = 0xFF;
78
switch (value[threadID.x]) {
89
case 0:
9-
Out[threadID.x] = WaveIsFirstLane(); // threads 0 and 1; 0 is first for both
10+
tmp = WaveIsFirstLane(); // threads 0 and 1; 0 is first for both
1011
break;
1112
case 2:
12-
Out[threadID.x] = WaveIsFirstLane(); // thread 3; 3 is first
13+
tmp = WaveIsFirstLane(); // thread 3; 3 is first
1314
break;
1415
default:
15-
Out[threadID.x] = WaveIsFirstLane(); // thread 2; 2 is first
16+
tmp = WaveIsFirstLane(); // thread 2; 2 is first
1617
break;
1718
}
19+
// Using a temporary value that is wave-divergent here seems to hit a driver
20+
// bug in the NV 50-series GPUs
21+
Out[threadID.x] = tmp;
1822
Out[threadID.x + 4] = WaveIsFirstLane(); // 0 is first for all
1923
}
2024

@@ -64,6 +68,8 @@ DescriptorSets:
6468
# https://github.com/llvm/llvm-project/issues/145513
6569
# XFAIL: Clang-Vulkan
6670

71+
# XFAIL: NV-Reconvergence-Issue-320
72+
6773
# RUN: split-file %s %t
6874
# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl
6975
# RUN: %offloader %t/pipeline.yaml %t.o

test/lit.cfg.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ def setDeviceFeatures(config, device, compiler):
6363
config.available_features.add("Intel-Memory-Coherence-Issue-226")
6464
if "NVIDIA" in device["Description"]:
6565
config.available_features.add("%s-NV" % API)
66+
NV50SeriesRegex = re.compile("NVIDIA GeForce [A-Z]+ 50[0-9]+")
67+
NV50SeriesMatch = NV50SeriesRegex.match(device["Description"])
68+
if NV50SeriesMatch and API == "DirectX":
69+
config.available_features.add("NV-Reconvergence-Issue-320")
6670
if "AMD" in device["Description"]:
6771
config.available_features.add("%s-AMD" % API)
6872

0 commit comments

Comments
 (0)