Skip to content

Commit a0d4230

Browse files
committed
compact vreg numbers, adding new tests
1 parent 4119be5 commit a0d4230

File tree

2 files changed

+59
-24
lines changed

2 files changed

+59
-24
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
3+
4+
; Description: an end-to-end IR test for https://github.com/llvm/llvm-project/issues/139752
5+
; To test combine shuffle_vectors into build_vector
6+
7+
define amdgpu_gs <4 x float> @_amdgpu_gs_main() {
8+
; CHECK-LABEL: _amdgpu_gs_main:
9+
; CHECK: ; %bb.0: ; %bb
10+
; CHECK-NEXT: v_mov_b32_e32 v0, 16
11+
; CHECK-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
12+
; CHECK-NEXT: s_mov_b32 s0, 0
13+
; CHECK-NEXT: s_mov_b32 s1, s0
14+
; CHECK-NEXT: s_mov_b32 s2, s0
15+
; CHECK-NEXT: s_mov_b32 s3, s0
16+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
17+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
18+
; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v2, s[0:3], 0 idxen
19+
; CHECK-NEXT: s_nop 0
20+
; CHECK-NEXT: v_mov_b32_e32 v0, v1
21+
; CHECK-NEXT: s_waitcnt vmcnt(0)
22+
; CHECK-NEXT: ; return to shader part epilog
23+
bb:
24+
%i = load <1 x float>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) null, i32 16), align 4
25+
%i1 = load <1 x float>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) null, i32 20), align 4
26+
%i2 = shufflevector <1 x float> %i, <1 x float> zeroinitializer, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
27+
call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %i2, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0)
28+
%i3 = shufflevector <1 x float> %i1, <1 x float> zeroinitializer, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
29+
ret <4 x float> %i3
30+
}
31+
32+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
33+
declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #0
34+
35+

llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-shuffle.mir

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ body: |
2020
; CHECK-NEXT: SI_RETURN
2121
%0:_(p3) = COPY $vgpr0
2222
%1:_(p3) = COPY $vgpr1
23-
%12:_(<8 x s16>) = G_IMPLICIT_DEF
24-
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
25-
%11:_(<4 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(4, 5, 6, 7)
26-
G_STORE %11(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
23+
%2:_(<8 x s16>) = G_IMPLICIT_DEF
24+
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
25+
%4:_(<4 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(4, 5, 6, 7)
26+
G_STORE %4(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
2727
SI_RETURN
2828
...
2929

@@ -46,10 +46,10 @@ body: |
4646
; CHECK-NEXT: SI_RETURN
4747
%0:_(p3) = COPY $vgpr0
4848
%1:_(p3) = COPY $vgpr1
49-
%12:_(<8 x s16>) = G_IMPLICIT_DEF
50-
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
51-
%11:_(<2 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(3, 4)
52-
G_STORE %11(<2 x s16>), %1(p3) :: (store (<2 x s16>), addrspace 3)
49+
%2:_(<8 x s16>) = G_IMPLICIT_DEF
50+
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
51+
%4:_(<2 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(3, 4)
52+
G_STORE %4(<2 x s16>), %1(p3) :: (store (<2 x s16>), addrspace 3)
5353
SI_RETURN
5454
5555
...
@@ -73,10 +73,10 @@ body: |
7373
; CHECK-NEXT: SI_RETURN
7474
%0:_(p3) = COPY $vgpr0
7575
%1:_(p3) = COPY $vgpr1
76-
%12:_(<8 x s16>) = G_IMPLICIT_DEF
77-
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
78-
%11:_(<3 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(0, 1, 2)
79-
G_STORE %11(<3 x s16>), %1(p3) :: (store (<3 x s16>), addrspace 3)
76+
%2:_(<8 x s16>) = G_IMPLICIT_DEF
77+
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
78+
%4:_(<3 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(0, 1, 2)
79+
G_STORE %4(<3 x s16>), %1(p3) :: (store (<3 x s16>), addrspace 3)
8080
SI_RETURN
8181
...
8282

@@ -101,10 +101,10 @@ body: |
101101
; CHECK-NEXT: SI_RETURN
102102
%0:_(p3) = COPY $vgpr0
103103
%1:_(p3) = COPY $vgpr1
104-
%12:_(<8 x s16>) = G_IMPLICIT_DEF
105-
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
106-
%11:_(<4 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(4, 5, -1, 7)
107-
G_STORE %11(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
104+
%2:_(<8 x s16>) = G_IMPLICIT_DEF
105+
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
106+
%4:_(<4 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(4, 5, -1, 7)
107+
G_STORE %4(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
108108
SI_RETURN
109109
...
110110

@@ -128,10 +128,10 @@ body: |
128128
; CHECK-NEXT: SI_RETURN
129129
%0:_(p3) = COPY $vgpr0
130130
%1:_(p3) = COPY $vgpr1
131-
%12:_(<8 x s16>) = G_IMPLICIT_DEF
132-
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
133-
%11:_(<4 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(6, 7, 8, 9)
134-
G_STORE %11(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
131+
%2:_(<8 x s16>) = G_IMPLICIT_DEF
132+
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
133+
%4:_(<4 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(6, 7, 8, 9)
134+
G_STORE %4(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
135135
SI_RETURN
136136
...
137137

@@ -153,9 +153,9 @@ body: |
153153
; CHECK-NEXT: SI_RETURN
154154
%0:_(p3) = COPY $vgpr0
155155
%1:_(p3) = COPY $vgpr1
156-
%12:_(<8 x s16>) = G_IMPLICIT_DEF
157-
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
158-
%11:_(s16) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(4)
159-
G_STORE %11(s16), %1(p3) :: (store (s16), addrspace 3)
156+
%2:_(<8 x s16>) = G_IMPLICIT_DEF
157+
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
158+
%4:_(s16) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(4)
159+
G_STORE %4(s16), %1(p3) :: (store (s16), addrspace 3)
160160
SI_RETURN
161161
...

0 commit comments

Comments
 (0)