Skip to content

Commit fd8d619

Browse files
committed
[AMDGPU] Update a test using tool-generated check according to review
1 parent 79b4a28 commit fd8d619

File tree

1 file changed

+57
-42
lines changed

1 file changed

+57
-42
lines changed
Lines changed: 57 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,79 @@
1-
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN %s
2-
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN %s
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN %s
33

44
; Checks that there is no crash when there are multiple tails
55
; for a the same head starting a chain.
66
@0 = internal addrspace(3) global [16384 x i32] undef
77

8-
; GCN-LABEL: @no_crash(
9-
; GCN: store <2 x i32> zeroinitializer
10-
; GCN: store i32 0
11-
; GCN: store i32 0
12-
138
define amdgpu_kernel void @no_crash(i32 %arg) {
14-
%tmp2 = add i32 %arg, 14
15-
%tmp3 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %tmp2
16-
%tmp4 = add i32 %arg, 15
17-
%tmp5 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %tmp4
9+
; GCN-LABEL: define amdgpu_kernel void @no_crash(
10+
; GCN-SAME: i32 [[ARG:%.*]]) {
11+
; GCN-NEXT: [[TEMP2:%.*]] = add i32 [[ARG]], 14
12+
; GCN-NEXT: [[TEMP3:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0:[0-9]+]], i32 0, i32 [[TEMP2]]
13+
; GCN-NEXT: [[TEMP4:%.*]] = add i32 [[ARG]], 15
14+
; GCN-NEXT: [[TEMP5:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0]], i32 0, i32 [[TEMP4]]
15+
; GCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(3) [[TEMP3]], align 4
16+
; GCN-NEXT: store i32 0, ptr addrspace(3) [[TEMP5]], align 4
17+
; GCN-NEXT: store i32 0, ptr addrspace(3) [[TEMP5]], align 4
18+
; GCN-NEXT: ret void
19+
;
20+
%temp2 = add i32 %arg, 14
21+
%temp3 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %temp2
22+
%temp4 = add i32 %arg, 15
23+
%temp5 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %temp4
1824

19-
store i32 0, ptr addrspace(3) %tmp3, align 4
20-
store i32 0, ptr addrspace(3) %tmp5, align 4
21-
store i32 0, ptr addrspace(3) %tmp5, align 4
22-
store i32 0, ptr addrspace(3) %tmp5, align 4
25+
store i32 0, ptr addrspace(3) %temp3, align 4
26+
store i32 0, ptr addrspace(3) %temp5, align 4
27+
store i32 0, ptr addrspace(3) %temp5, align 4
28+
store i32 0, ptr addrspace(3) %temp5, align 4
2329

2430
ret void
2531
}
2632

2733
; Check adjacent memory locations are properly matched and the
2834
; longest chain vectorized
29-
30-
; GCN-LABEL: @interleave_get_longest
31-
32-
; GCN: load <2 x i32>{{.*}} %tmp1
33-
; GCN: store <2 x i32> zeroinitializer{{.*}} %tmp1
34-
; GCN: load <2 x i32>{{.*}} %tmp2
35-
; GCN: load <2 x i32>{{.*}} %tmp4
36-
; GCN: extractelement <2 x i32>
37-
; GCN: extractelement <2 x i32>
38-
; GCN: extractelement <2 x i32>
39-
; GCN: extractelement <2 x i32>
40-
4135
define amdgpu_kernel void @interleave_get_longest(i32 %arg) {
36+
; GCN-LABEL: define amdgpu_kernel void @interleave_get_longest(
37+
; GCN-SAME: i32 [[ARG:%.*]]) {
38+
; GCN-NEXT: [[A1:%.*]] = add i32 [[ARG]], 1
39+
; GCN-NEXT: [[A3:%.*]] = add i32 [[ARG]], 3
40+
; GCN-NEXT: [[TEMP1:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0]], i32 0, i32 [[ARG]]
41+
; GCN-NEXT: [[TEMP2:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0]], i32 0, i32 [[A1]]
42+
; GCN-NEXT: [[TEMP4:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0]], i32 0, i32 [[A3]]
43+
; GCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(3) [[TEMP1]], align 4
44+
; GCN-NEXT: [[L21:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
45+
; GCN-NEXT: [[L12:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
46+
; GCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(3) [[TEMP1]], align 4
47+
; GCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(3) [[TEMP2]], align 4
48+
; GCN-NEXT: [[L33:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
49+
; GCN-NEXT: [[L44:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
50+
; GCN-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(3) [[TEMP4]], align 4
51+
; GCN-NEXT: [[L55:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
52+
; GCN-NEXT: [[L66:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
53+
; GCN-NEXT: [[L77:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
54+
; GCN-NEXT: [[L88:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
55+
; GCN-NEXT: ret void
56+
;
4257
%a1 = add i32 %arg, 1
4358
%a2 = add i32 %arg, 2
4459
%a3 = add i32 %arg, 3
4560
%a4 = add i32 %arg, 4
46-
%tmp1 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %arg
47-
%tmp2 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a1
48-
%tmp3 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a2
49-
%tmp4 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a3
50-
%tmp5 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a4
61+
%temp1 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %arg
62+
%temp2 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a1
63+
%temp3 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a2
64+
%temp4 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a3
65+
%temp5 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a4
5166

52-
%l1 = load i32, ptr addrspace(3) %tmp2, align 4
53-
%l2 = load i32, ptr addrspace(3) %tmp1, align 4
54-
store i32 0, ptr addrspace(3) %tmp2, align 4
55-
store i32 0, ptr addrspace(3) %tmp1, align 4
56-
%l3 = load i32, ptr addrspace(3) %tmp2, align 4
57-
%l4 = load i32, ptr addrspace(3) %tmp3, align 4
58-
%l5 = load i32, ptr addrspace(3) %tmp4, align 4
59-
%l6 = load i32, ptr addrspace(3) %tmp5, align 4
60-
%l7 = load i32, ptr addrspace(3) %tmp5, align 4
61-
%l8 = load i32, ptr addrspace(3) %tmp5, align 4
67+
%l1 = load i32, ptr addrspace(3) %temp2, align 4
68+
%l2 = load i32, ptr addrspace(3) %temp1, align 4
69+
store i32 0, ptr addrspace(3) %temp2, align 4
70+
store i32 0, ptr addrspace(3) %temp1, align 4
71+
%l3 = load i32, ptr addrspace(3) %temp2, align 4
72+
%l4 = load i32, ptr addrspace(3) %temp3, align 4
73+
%l5 = load i32, ptr addrspace(3) %temp4, align 4
74+
%l6 = load i32, ptr addrspace(3) %temp5, align 4
75+
%l7 = load i32, ptr addrspace(3) %temp5, align 4
76+
%l8 = load i32, ptr addrspace(3) %temp5, align 4
6277

6378
ret void
6479
}

0 commit comments

Comments
 (0)