|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
1 | 2 | ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope %s |
2 | 3 |
|
3 | | -declare i32 @llvm.amdgcn.readfirstlane(i32) #0 |
4 | | - |
5 | | -; CHECK-LABEL: {{^}}test_readfirstlane: |
6 | | -; CHECK: v_readfirstlane_b32 s{{[0-9]+}}, v2 |
7 | | -define void @test_readfirstlane(ptr addrspace(1) %out, i32 %src) #1 { |
| 4 | +define void @test_readfirstlane(ptr addrspace(1) %out, i32 %src) { |
| 5 | +; CHECK-LABEL: test_readfirstlane: |
| 6 | +; CHECK: ; %bb.0: |
| 7 | +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 8 | +; CHECK-NEXT: v_readfirstlane_b32 s4, v2 |
| 9 | +; CHECK-NEXT: v_mov_b32_e32 v2, s4 |
| 10 | +; CHECK-NEXT: flat_store_dword v[0:1], v2 |
| 11 | +; CHECK-NEXT: s_waitcnt vmcnt(0) |
| 12 | +; CHECK-NEXT: s_setpc_b64 s[30:31] |
8 | 13 | %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %src) |
9 | 14 | store i32 %readfirstlane, ptr addrspace(1) %out, align 4 |
10 | 15 | ret void |
11 | 16 | } |
12 | 17 |
|
13 | | -; CHECK-LABEL: {{^}}test_readfirstlane_imm: |
14 | | -; CHECK: s_mov_b32 [[SGPR_VAL:s[0-9]]], 32 |
15 | | -; CHECK-NOT: [[SGPR_VAL]] |
16 | | -; CHECK: ; use [[SGPR_VAL]] |
17 | | -define amdgpu_kernel void @test_readfirstlane_imm(ptr addrspace(1) %out) #1 { |
| 18 | +define amdgpu_kernel void @test_readfirstlane_imm(ptr addrspace(1) %out) { |
| 19 | +; CHECK-LABEL: test_readfirstlane_imm: |
| 20 | +; CHECK: ; %bb.0: |
| 21 | +; CHECK-NEXT: s_mov_b32 s0, 32 |
| 22 | +; CHECK-NEXT: ;;#ASMSTART |
| 23 | +; CHECK-NEXT: ; use s0 |
| 24 | +; CHECK-NEXT: ;;#ASMEND |
| 25 | +; CHECK-NEXT: s_endpgm |
18 | 26 | %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 32) |
19 | 27 | call void asm sideeffect "; use $0", "s"(i32 %readfirstlane) |
20 | 28 | ret void |
21 | 29 | } |
22 | 30 |
|
23 | | -; CHECK-LABEL: {{^}}test_readfirstlane_imm_fold: |
24 | | -; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], 32 |
25 | | -; CHECK-NOT: [[VVAL]] |
26 | | -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VVAL]] |
27 | | -define amdgpu_kernel void @test_readfirstlane_imm_fold(ptr addrspace(1) %out) #1 { |
| 31 | +define amdgpu_kernel void @test_readfirstlane_imm_fold(ptr addrspace(1) %out) { |
| 32 | +; CHECK-LABEL: test_readfirstlane_imm_fold: |
| 33 | +; CHECK: ; %bb.0: |
| 34 | +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| 35 | +; CHECK-NEXT: v_mov_b32_e32 v2, 32 |
| 36 | +; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| 37 | +; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| 38 | +; CHECK-NEXT: v_mov_b32_e32 v1, s1 |
| 39 | +; CHECK-NEXT: flat_store_dword v[0:1], v2 |
| 40 | +; CHECK-NEXT: s_endpgm |
28 | 41 | %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 32) |
29 | 42 | store i32 %readfirstlane, ptr addrspace(1) %out, align 4 |
30 | 43 | ret void |
31 | 44 | } |
32 | 45 |
|
33 | | -; CHECK-LABEL: {{^}}test_readfirstlane_m0: |
34 | | -; CHECK: s_mov_b32 m0, -1 |
35 | | -; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], m0 |
36 | | -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VVAL]] |
37 | | -define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) #1 { |
| 46 | +define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) { |
| 47 | +; CHECK-LABEL: test_readfirstlane_m0: |
| 48 | +; CHECK: ; %bb.0: |
| 49 | +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| 50 | +; CHECK-NEXT: ;;#ASMSTART |
| 51 | +; CHECK-NEXT: s_mov_b32 m0, -1 |
| 52 | +; CHECK-NEXT: ;;#ASMEND |
| 53 | +; CHECK-NEXT: v_mov_b32_e32 v2, m0 |
| 54 | +; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| 55 | +; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| 56 | +; CHECK-NEXT: v_mov_b32_e32 v1, s1 |
| 57 | +; CHECK-NEXT: flat_store_dword v[0:1], v2 |
| 58 | +; CHECK-NEXT: s_endpgm |
38 | 59 | %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"() |
39 | 60 | %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %m0) |
40 | 61 | store i32 %readfirstlane, ptr addrspace(1) %out, align 4 |
41 | 62 | ret void |
42 | 63 | } |
43 | 64 |
|
44 | | -; CHECK-LABEL: {{^}}test_readfirstlane_copy_from_sgpr: |
45 | | -; CHECK: ;;#ASMSTART |
46 | | -; CHECK-NEXT: s_mov_b32 [[SGPR:s[0-9]+]] |
47 | | -; CHECK: ;;#ASMEND |
48 | | -; CHECK-NOT: [[SGPR]] |
49 | | -; CHECK-NOT: readfirstlane |
50 | | -; CHECK: v_mov_b32_e32 [[VCOPY:v[0-9]+]], [[SGPR]] |
51 | | -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VCOPY]] |
52 | | -define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr(ptr addrspace(1) %out) #1 { |
| 65 | +define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr(ptr addrspace(1) %out) { |
| 66 | +; CHECK-LABEL: test_readfirstlane_copy_from_sgpr: |
| 67 | +; CHECK: ; %bb.0: |
| 68 | +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| 69 | +; CHECK-NEXT: ;;#ASMSTART |
| 70 | +; CHECK-NEXT: s_mov_b32 s2, 0 |
| 71 | +; CHECK-NEXT: ;;#ASMEND |
| 72 | +; CHECK-NEXT: v_mov_b32_e32 v2, s2 |
| 73 | +; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| 74 | +; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| 75 | +; CHECK-NEXT: v_mov_b32_e32 v1, s1 |
| 76 | +; CHECK-NEXT: flat_store_dword v[0:1], v2 |
| 77 | +; CHECK-NEXT: s_endpgm |
53 | 78 | %sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"() |
54 | 79 | %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %sgpr) |
55 | 80 | store i32 %readfirstlane, ptr addrspace(1) %out, align 4 |
56 | 81 | ret void |
57 | 82 | } |
58 | 83 |
|
59 | | -; Make sure this doesn't crash. |
60 | | -; CHECK-LABEL: {{^}}test_readfirstlane_fi: |
61 | | -; CHECK: s_mov_b32 [[FIVAL:s[0-9]]], 0 |
62 | | -define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) #1 { |
| 84 | +define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) { |
| 85 | +; CHECK-LABEL: test_readfirstlane_fi: |
| 86 | +; CHECK: ; %bb.0: |
| 87 | +; CHECK-NEXT: s_add_u32 s0, s0, s9 |
| 88 | +; CHECK-NEXT: s_addc_u32 s1, s1, 0 |
| 89 | +; CHECK-NEXT: s_mov_b32 s4, 0 |
| 90 | +; CHECK-NEXT: ;;#ASMSTART |
| 91 | +; CHECK-NEXT: ; use s4 |
| 92 | +; CHECK-NEXT: ;;#ASMEND |
| 93 | +; CHECK-NEXT: s_endpgm |
63 | 94 | %alloca = alloca i32, addrspace(5) |
64 | 95 | %int = ptrtoint ptr addrspace(5) %alloca to i32 |
65 | 96 | %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %int) |
66 | 97 | call void asm sideeffect "; use $0", "s"(i32 %readfirstlane) |
67 | 98 | ret void |
68 | 99 | } |
69 | | - |
70 | | -attributes #0 = { nounwind readnone convergent } |
71 | | -attributes #1 = { nounwind } |
|
0 commit comments