|
1 | | -; RUN: opt -S -passes='early-cse<memssa>' %s | FileCheck %s |
| 1 | +; RUN: opt -S -passes='early-cse<memssa>' %s -o %t |
| 2 | +; RUN: FileCheck --check-prefixes=CSE,CHECK %s < %t |
| 3 | +; finish compiling to verify that dxil-op-lower removes the globals entirely |
| 4 | +; RUN: llc -mtriple=dxil-pc-shadermodel6.0-compute --filetype=asm -o - %t | FileCheck --check-prefixes=LLC,CHECK %s |
| 5 | +; RUN: llc -mtriple=dxil-pc-shadermodel6.6-compute --filetype=asm -o - %t | FileCheck --check-prefixes=LLC,CHECK %s |
2 | 6 |
|
3 | 7 | ; Ensure that EarlyCSE is able to eliminate unneeded loads of resource globals across typedBufferLoad. |
| 8 | +; Also that DXILOpLowering eliminates the globals entirely. |
4 | 9 |
|
5 | 10 | target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" |
6 | 11 | target triple = "dxilv1.6-unknown-shadermodel6.6-compute" |
7 | 12 |
|
8 | 13 | %"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x float>, 1, 0, 0) } |
9 | 14 |
|
| 15 | +; LLC-NOT: @In = global |
| 16 | +; LLC-NOT: @Out = global |
10 | 17 | @In = global %"class.hlsl::RWBuffer" zeroinitializer, align 4 |
11 | 18 | @Out = global %"class.hlsl::RWBuffer" zeroinitializer, align 4 |
12 | 19 |
|
13 | 20 | ; Function Attrs: convergent noinline norecurse |
14 | 21 | ; CHECK-LABEL define void @main() |
15 | 22 | define void @main() local_unnamed_addr #0 { |
16 | 23 | entry: |
17 | | - %tmp = alloca target("dx.TypedBuffer", <4 x float>, 1, 0, 0), align 4 |
| 24 | + ; LLC: %In_h.i1 = call %dx.types.Handle @dx.op.createHandle |
| 25 | + ; LLC: %Out_h.i2 = call %dx.types.Handle @dx.op.createHandle |
18 | 26 | %In_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false) |
19 | 27 | store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %In_h.i, ptr @In, align 4 |
20 | 28 | %Out_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 4, i32 1, i32 1, i32 0, i1 false) |
21 | 29 | store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr @Out, align 4 |
22 | | - ; CHECK: call i32 @llvm.dx.flattened.thread.id.in.group() |
| 30 | + ; CSE: call i32 @llvm.dx.flattened.thread.id.in.group() |
23 | 31 | %0 = call i32 @llvm.dx.flattened.thread.id.in.group() |
24 | 32 | ; CHECK-NOT: load {{.*}} ptr @In |
25 | 33 | %1 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4 |
26 | | - ; CHECK call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t |
| 34 | + ; CSE: call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t |
27 | 35 | %2 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %1, i32 %0) |
28 | 36 | ; CHECK-NOT: load {{.*}} ptr @In |
29 | 37 | %3 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4 |
30 | 38 | %4 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %3, i32 %0) |
31 | 39 | %add.i = fadd <4 x float> %2, %4 |
32 | | - store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr %tmp, align 4 |
33 | 40 | call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, i32 %0, <4 x float> %add.i) |
| 41 | + ; CHECK: ret void |
34 | 42 | ret void |
35 | 43 | } |
36 | 44 |
|
37 | 45 | ; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) |
38 | 46 | declare i32 @llvm.dx.flattened.thread.id.in.group() #1 |
39 | 47 |
|
40 | 48 | ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn |
41 | | -; CHECK: declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) [[ROAttr:#[0-9]+]] |
| 49 | +; CSE: declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) [[ROAttr:#[0-9]+]] |
42 | 50 | declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) #2 |
43 | 51 |
|
44 | 52 | ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn |
45 | | -; CHECK: declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) [[WOAttr:#[0-9]+]] |
| 53 | +; CSE: declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) [[WOAttr:#[0-9]+]] |
46 | 54 | declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) #2 |
47 | 55 |
|
48 | 56 | ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) |
49 | 57 | declare target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32, i32, i32, i32, i1) #3 |
50 | 58 |
|
51 | | -; CHECK: attributes [[ROAttr]] = { {{.*}} memory(read) } |
52 | | -; CHECK: attributes [[WOAttr]] = { {{.*}} memory(write) } |
| 59 | +; CSE: attributes [[ROAttr]] = { {{.*}} memory(read) } |
| 60 | +; CSE: attributes [[WOAttr]] = { {{.*}} memory(write) } |
53 | 61 |
|
54 | 62 | attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } |
55 | 63 | attributes #1 = { mustprogress nofree nosync nounwind willreturn memory(none) } |
|
0 commit comments