Skip to content

Commit 0b14e3a

Browse files
committed
[SinkGEPConstOffset] Precommit unitest for sinking constant offsets down a GEP chain to tail for reduction of register usage.
1 parent 29db305 commit 0b14e3a

File tree

1 file changed

+105
-0
lines changed

1 file changed

+105
-0
lines changed
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -O3 -S | \
3+
; RUN: FileCheck %s --check-prefix=CHECK-O3
4+
5+
define void @kernel__0(ptr addrspace(5) noalias %pout, ptr addrspace(3) noalias %pin, i32 %num, i32 %ofst0, i32 %ofst1, i32 %ofst2, i32 %ofst3, i32 %ofst4) {
6+
; CHECK-O3-LABEL: @kernel__0(
7+
; CHECK-O3-NEXT: entry:
8+
; CHECK-O3-NEXT: [[SREM:%.*]] = srem i32 [[NUM:%.*]], 1024
9+
; CHECK-O3-NEXT: [[ADD1:%.*]] = add nsw i32 [[SREM]], [[OFST0:%.*]]
10+
; CHECK-O3-NEXT: [[ADD2:%.*]] = add nsw i32 [[OFST1:%.*]], [[SREM]]
11+
; CHECK-O3-NEXT: [[ADD3:%.*]] = add nsw i32 [[OFST2:%.*]], [[SREM]]
12+
; CHECK-O3-NEXT: [[ADD4:%.*]] = add nsw i32 [[OFST3:%.*]], [[SREM]]
13+
; CHECK-O3-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(3) [[PIN:%.*]], i32 222
14+
; CHECK-O3-NEXT: [[GEP1:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP]], i32 [[ADD1]]
15+
; CHECK-O3-NEXT: [[GEP2:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP]], i32 [[ADD2]]
16+
; CHECK-O3-NEXT: [[GEP3:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(3) [[PIN]], i32 444
17+
; CHECK-O3-NEXT: [[GEP4:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP3]], i32 [[ADD1]]
18+
; CHECK-O3-NEXT: [[GEP5:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP3]], i32 [[ADD2]]
19+
; CHECK-O3-NEXT: [[GEP6:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(3) [[GEP4]], i32 666
20+
; CHECK-O3-NEXT: [[GEP7:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[GEP5]], i32 -888
21+
; CHECK-O3-NEXT: [[MUL:%.*]] = mul nsw i32 [[SREM]], [[OFST0]]
22+
; CHECK-O3-NEXT: [[GEP8:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[POUT:%.*]], i32 1110
23+
; CHECK-O3-NEXT: [[GEP9:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP8]], i32 [[MUL]]
24+
; CHECK-O3-NEXT: [[GEP10:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP9]], i32 [[ADD3]]
25+
; CHECK-O3-NEXT: [[GEP11:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP9]], i32 [[ADD4]]
26+
; CHECK-O3-NEXT: [[GEP12:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[POUT]], i32 1332
27+
; CHECK-O3-NEXT: [[GEP13:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP12]], i32 [[MUL]]
28+
; CHECK-O3-NEXT: [[GEP14:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP13]], i32 [[ADD3]]
29+
; CHECK-O3-NEXT: [[GEP15:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP13]], i32 [[ADD4]]
30+
; CHECK-O3-NEXT: [[GEP16:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[GEP14]], i32 1554
31+
; CHECK-O3-NEXT: [[GEP17:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[GEP15]], i32 -1776
32+
; CHECK-O3-NEXT: [[GEP18:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[PIN]], i32 [[OFST4:%.*]]
33+
; CHECK-O3-NEXT: [[GEP20:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[POUT]], i32 [[OFST4]]
34+
; CHECK-O3-NEXT: br label [[LOOP:%.*]]
35+
; CHECK-O3: loop:
36+
; CHECK-O3-NEXT: [[LOAD:%.*]] = load half, ptr addrspace(3) [[GEP1]], align 2
37+
; CHECK-O3-NEXT: [[LOAD1:%.*]] = load half, ptr addrspace(3) [[GEP2]], align 2
38+
; CHECK-O3-NEXT: [[LOAD2:%.*]] = load half, ptr addrspace(3) [[GEP6]], align 2
39+
; CHECK-O3-NEXT: [[LOAD3:%.*]] = load half, ptr addrspace(3) [[GEP7]], align 2
40+
; CHECK-O3-NEXT: store half [[LOAD]], ptr addrspace(5) [[GEP10]], align 2
41+
; CHECK-O3-NEXT: store half [[LOAD1]], ptr addrspace(5) [[GEP11]], align 2
42+
; CHECK-O3-NEXT: store half [[LOAD2]], ptr addrspace(5) [[GEP16]], align 2
43+
; CHECK-O3-NEXT: store half [[LOAD3]], ptr addrspace(5) [[GEP17]], align 2
44+
; CHECK-O3-NEXT: [[GEP19:%.*]] = load i32, ptr addrspace(3) [[GEP18]], align 4
45+
; CHECK-O3-NEXT: [[GEP21:%.*]] = load i32, ptr addrspace(5) [[GEP20]], align 4
46+
; CHECK-O3-NEXT: store i32 [[GEP21]], ptr addrspace(3) [[PIN]], align 4
47+
; CHECK-O3-NEXT: [[ICMP:%.*]] = icmp eq i32 [[GEP19]], [[GEP21]]
48+
; CHECK-O3-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT:%.*]]
49+
; CHECK-O3: exit:
50+
; CHECK-O3-NEXT: ret void
51+
;
52+
entry:
53+
%srem = srem i32 %num, 1024
54+
%add = add nsw i32 %srem, 3
55+
%add1 = add nsw i32 %srem, %ofst0
56+
%add2 = add nsw i32 %srem, %ofst1
57+
%add3 = add nsw i32 %srem, %ofst2
58+
%add4 = add nsw i32 %srem, %ofst3
59+
60+
%gep = getelementptr inbounds half, ptr addrspace(3) %pin, i32 111
61+
%gep1 = getelementptr inbounds half, ptr addrspace(3) %gep, i32 %add1
62+
%gep2 = getelementptr inbounds half, ptr addrspace(3) %gep, i32 %add2
63+
%gep3 = getelementptr inbounds half, ptr addrspace(3) %pin, i32 222
64+
%gep4 = getelementptr inbounds half, ptr addrspace(3) %gep3, i32 %add1
65+
%gep5 = getelementptr inbounds half, ptr addrspace(3) %gep3, i32 %add2
66+
%gep6 = getelementptr inbounds half, ptr addrspace(3) %gep4, i32 333
67+
%gep7 = getelementptr inbounds half, ptr addrspace(3) %gep5, i32 -444
68+
69+
%mul = mul nsw i32 %srem, %ofst0
70+
71+
%gep8 = getelementptr inbounds half, ptr addrspace(5) %pout, i32 555
72+
%gep9 = getelementptr inbounds half, ptr addrspace(5) %gep8, i32 %mul
73+
%gep10 = getelementptr inbounds half, ptr addrspace(5) %gep9, i32 %add3
74+
%gep11 = getelementptr inbounds half, ptr addrspace(5) %gep9, i32 %add4
75+
%gep12 = getelementptr inbounds half, ptr addrspace(5) %pout, i32 666
76+
%gep13 = getelementptr inbounds half, ptr addrspace(5) %gep12, i32 %mul
77+
%gep14 = getelementptr inbounds half, ptr addrspace(5) %gep13, i32 %add3
78+
%gep15 = getelementptr inbounds half, ptr addrspace(5) %gep13, i32 %add4
79+
%gep16 = getelementptr inbounds half, ptr addrspace(5) %gep14, i32 777
80+
%gep17 = getelementptr inbounds half, ptr addrspace(5) %gep15, i32 -888
81+
82+
br label %loop
83+
84+
loop: ; loop
85+
%load = load half, ptr addrspace(3) %gep1, align 2
86+
%load1 = load half, ptr addrspace(3) %gep2, align 2
87+
%load2 = load half, ptr addrspace(3) %gep6, align 2
88+
%load3 = load half, ptr addrspace(3) %gep7, align 2
89+
90+
store half %load, ptr addrspace(5) %gep10, align 2
91+
store half %load1, ptr addrspace(5) %gep11, align 2
92+
store half %load2, ptr addrspace(5) %gep16, align 2
93+
store half %load3, ptr addrspace(5) %gep17, align 2
94+
95+
%gep18 = getelementptr inbounds i32, ptr addrspace(3) %pin, i32 %ofst4
96+
%gep19 = load i32, ptr addrspace(3) %gep18, align 4
97+
%gep20 = getelementptr inbounds i32, ptr addrspace(5) %pout, i32 %ofst4
98+
%gep21 = load i32, ptr addrspace(5) %gep20, align 4
99+
store i32 %gep21, ptr addrspace(3) %pin, align 4
100+
%icmp = icmp eq i32 %gep19, %gep21
101+
br i1 %icmp, label %loop, label %exit
102+
103+
exit:
104+
ret void
105+
}

0 commit comments

Comments
 (0)