|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
1 | 2 | ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
|
2 | 3 |
|
3 |
| -; GCN-LABEL: {{^}}alignbit_shr_pat: |
4 |
| -; GCN-DAG: s_load_dword s[[SHR:[0-9]+]] |
5 |
| -; GCN-DAG: load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]] |
6 |
| -; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], s[[SHR]] |
7 |
| - |
8 | 4 | define amdgpu_kernel void @alignbit_shr_pat(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
|
| 5 | +; GCN-LABEL: alignbit_shr_pat: |
| 6 | +; GCN: ; %bb.0: ; %bb |
| 7 | +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 8 | +; GCN-NEXT: s_load_dword s8, s[4:5], 0xd |
| 9 | +; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| 10 | +; GCN-NEXT: s_mov_b32 s6, -1 |
| 11 | +; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| 12 | +; GCN-NEXT: s_mov_b32 s4, s0 |
| 13 | +; GCN-NEXT: s_mov_b32 s5, s1 |
| 14 | +; GCN-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 |
| 15 | +; GCN-NEXT: s_mov_b32 s4, s2 |
| 16 | +; GCN-NEXT: s_mov_b32 s5, s3 |
| 17 | +; GCN-NEXT: s_and_b32 s0, s8, 31 |
| 18 | +; GCN-NEXT: s_waitcnt vmcnt(0) |
| 19 | +; GCN-NEXT: v_lshr_b64 v[0:1], v[0:1], s0 |
| 20 | +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| 21 | +; GCN-NEXT: s_endpgm |
9 | 22 | bb:
|
10 | 23 | %tmp = load i64, ptr addrspace(1) %arg, align 8
|
11 | 24 | %tmp3 = and i32 %arg2, 31
|
|
16 | 29 | ret void
|
17 | 30 | }
|
18 | 31 |
|
19 |
| -; GCN-LABEL: {{^}}alignbit_shr_pat_v: |
20 |
| -; GCN-DAG: load_dword v[[SHR:[0-9]+]], |
21 |
| -; GCN-DAG: load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]] |
22 |
| -; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], v[[SHR]] |
23 |
| - |
24 | 32 | define amdgpu_kernel void @alignbit_shr_pat_v(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
|
| 33 | +; GCN-LABEL: alignbit_shr_pat_v: |
| 34 | +; GCN: ; %bb.0: ; %bb |
| 35 | +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 36 | +; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| 37 | +; GCN-NEXT: s_mov_b32 s6, 0 |
| 38 | +; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| 39 | +; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| 40 | +; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| 41 | +; GCN-NEXT: s_mov_b64 s[4:5], s[0:1] |
| 42 | +; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| 43 | +; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| 44 | +; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| 45 | +; GCN-NEXT: buffer_load_dword v0, v[1:2], s[4:7], 0 addr64 |
| 46 | +; GCN-NEXT: s_waitcnt vmcnt(0) |
| 47 | +; GCN-NEXT: v_alignbit_b32 v0, v4, v3, v0 |
| 48 | +; GCN-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 |
| 49 | +; GCN-NEXT: s_endpgm |
25 | 50 | bb:
|
26 | 51 | %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
|
27 | 52 | %gep1 = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %tid
|
|
36 | 61 | ret void
|
37 | 62 | }
|
38 | 63 |
|
39 |
| -; GCN-LABEL: {{^}}alignbit_shr_pat_wrong_and30: |
40 |
| -; Negative test, wrong constant |
41 |
| -; GCN: v_lshr_b64 |
42 |
| -; GCN-NOT: v_alignbit_b32 |
43 |
| - |
44 | 64 | define amdgpu_kernel void @alignbit_shr_pat_wrong_and30(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
|
| 65 | +; GCN-LABEL: alignbit_shr_pat_wrong_and30: |
| 66 | +; GCN: ; %bb.0: ; %bb |
| 67 | +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 68 | +; GCN-NEXT: s_load_dword s8, s[4:5], 0xd |
| 69 | +; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| 70 | +; GCN-NEXT: s_mov_b32 s6, -1 |
| 71 | +; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| 72 | +; GCN-NEXT: s_mov_b32 s4, s0 |
| 73 | +; GCN-NEXT: s_mov_b32 s5, s1 |
| 74 | +; GCN-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 |
| 75 | +; GCN-NEXT: s_mov_b32 s4, s2 |
| 76 | +; GCN-NEXT: s_mov_b32 s5, s3 |
| 77 | +; GCN-NEXT: s_and_b32 s0, s8, 30 |
| 78 | +; GCN-NEXT: s_waitcnt vmcnt(0) |
| 79 | +; GCN-NEXT: v_lshr_b64 v[0:1], v[0:1], s0 |
| 80 | +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| 81 | +; GCN-NEXT: s_endpgm |
45 | 82 | bb:
|
46 | 83 | %tmp = load i64, ptr addrspace(1) %arg, align 8
|
47 | 84 | %tmp3 = and i32 %arg2, 30
|
|
52 | 89 | ret void
|
53 | 90 | }
|
54 | 91 |
|
55 |
| -; GCN-LABEL: {{^}}alignbit_shr_pat_wrong_and63: |
56 |
| -; Negative test, wrong constant |
57 |
| -; GCN: v_lshr_b64 |
58 |
| -; GCN-NOT: v_alignbit_b32 |
59 |
| - |
60 | 92 | define amdgpu_kernel void @alignbit_shr_pat_wrong_and63(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
|
| 93 | +; GCN-LABEL: alignbit_shr_pat_wrong_and63: |
| 94 | +; GCN: ; %bb.0: ; %bb |
| 95 | +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 96 | +; GCN-NEXT: s_load_dword s8, s[4:5], 0xd |
| 97 | +; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| 98 | +; GCN-NEXT: s_mov_b32 s6, -1 |
| 99 | +; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| 100 | +; GCN-NEXT: s_mov_b32 s4, s0 |
| 101 | +; GCN-NEXT: s_mov_b32 s5, s1 |
| 102 | +; GCN-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 |
| 103 | +; GCN-NEXT: s_mov_b32 s4, s2 |
| 104 | +; GCN-NEXT: s_mov_b32 s5, s3 |
| 105 | +; GCN-NEXT: s_waitcnt vmcnt(0) |
| 106 | +; GCN-NEXT: v_lshr_b64 v[0:1], v[0:1], s8 |
| 107 | +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| 108 | +; GCN-NEXT: s_endpgm |
61 | 109 | bb:
|
62 | 110 | %tmp = load i64, ptr addrspace(1) %arg, align 8
|
63 | 111 | %tmp3 = and i32 %arg2, 63
|
|
68 | 116 | ret void
|
69 | 117 | }
|
70 | 118 |
|
71 |
| -; GCN-LABEL: {{^}}alignbit_shr_pat_const30: |
72 |
| -; GCN: load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]] |
73 |
| -; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], 30 |
74 |
| - |
75 | 119 | define amdgpu_kernel void @alignbit_shr_pat_const30(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
|
| 120 | +; GCN-LABEL: alignbit_shr_pat_const30: |
| 121 | +; GCN: ; %bb.0: ; %bb |
| 122 | +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 123 | +; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| 124 | +; GCN-NEXT: s_mov_b32 s6, -1 |
| 125 | +; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| 126 | +; GCN-NEXT: s_mov_b32 s4, s0 |
| 127 | +; GCN-NEXT: s_mov_b32 s5, s1 |
| 128 | +; GCN-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 |
| 129 | +; GCN-NEXT: s_mov_b32 s4, s2 |
| 130 | +; GCN-NEXT: s_mov_b32 s5, s3 |
| 131 | +; GCN-NEXT: s_waitcnt vmcnt(0) |
| 132 | +; GCN-NEXT: v_lshr_b64 v[0:1], v[0:1], 30 |
| 133 | +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| 134 | +; GCN-NEXT: s_endpgm |
76 | 135 | bb:
|
77 | 136 | %tmp = load i64, ptr addrspace(1) %arg, align 8
|
78 | 137 | %tmp5 = lshr i64 %tmp, 30
|
|
81 | 140 | ret void
|
82 | 141 | }
|
83 | 142 |
|
84 |
| -; GCN-LABEL: {{^}}alignbit_shr_pat_wrong_const33: |
85 |
| -; Negative test, shift amount more than 31 |
86 |
| -; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} |
87 |
| -; GCN-NOT: v_alignbit_b32 |
88 |
| - |
89 | 143 | define amdgpu_kernel void @alignbit_shr_pat_wrong_const33(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
|
| 144 | +; GCN-LABEL: alignbit_shr_pat_wrong_const33: |
| 145 | +; GCN: ; %bb.0: ; %bb |
| 146 | +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| 147 | +; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| 148 | +; GCN-NEXT: s_mov_b32 s6, -1 |
| 149 | +; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| 150 | +; GCN-NEXT: s_mov_b32 s4, s2 |
| 151 | +; GCN-NEXT: s_mov_b32 s5, s3 |
| 152 | +; GCN-NEXT: s_mov_b32 s2, s6 |
| 153 | +; GCN-NEXT: s_mov_b32 s3, s7 |
| 154 | +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 |
| 155 | +; GCN-NEXT: s_waitcnt vmcnt(0) |
| 156 | +; GCN-NEXT: v_lshrrev_b32_e32 v0, 1, v0 |
| 157 | +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| 158 | +; GCN-NEXT: s_endpgm |
90 | 159 | bb:
|
91 | 160 | %tmp = load i64, ptr addrspace(1) %arg, align 8
|
92 | 161 | %tmp5 = lshr i64 %tmp, 33
|
|
0 commit comments