|
1 | | -; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN %s |
2 | | -; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN %s |
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| 2 | +; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN %s |
3 | 3 |
|
4 | 4 | ; Checks that there is no crash when there are multiple tails |
5 | 5 | ; for a the same head starting a chain. |
6 | 6 | @0 = internal addrspace(3) global [16384 x i32] undef |
7 | 7 |
|
8 | | -; GCN-LABEL: @no_crash( |
9 | | -; GCN: store <2 x i32> zeroinitializer |
10 | | -; GCN: store i32 0 |
11 | | -; GCN: store i32 0 |
12 | | - |
13 | 8 | define amdgpu_kernel void @no_crash(i32 %arg) { |
14 | | - %tmp2 = add i32 %arg, 14 |
15 | | - %tmp3 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %tmp2 |
16 | | - %tmp4 = add i32 %arg, 15 |
17 | | - %tmp5 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %tmp4 |
| 9 | +; GCN-LABEL: define amdgpu_kernel void @no_crash( |
| 10 | +; GCN-SAME: i32 [[ARG:%.*]]) { |
| 11 | +; GCN-NEXT: [[TEMP2:%.*]] = add i32 [[ARG]], 14 |
| 12 | +; GCN-NEXT: [[TEMP3:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0:[0-9]+]], i32 0, i32 [[TEMP2]] |
| 13 | +; GCN-NEXT: [[TEMP4:%.*]] = add i32 [[ARG]], 15 |
| 14 | +; GCN-NEXT: [[TEMP5:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0]], i32 0, i32 [[TEMP4]] |
| 15 | +; GCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(3) [[TEMP3]], align 4 |
| 16 | +; GCN-NEXT: store i32 0, ptr addrspace(3) [[TEMP5]], align 4 |
| 17 | +; GCN-NEXT: store i32 0, ptr addrspace(3) [[TEMP5]], align 4 |
| 18 | +; GCN-NEXT: ret void |
| 19 | +; |
| 20 | + %temp2 = add i32 %arg, 14 |
| 21 | + %temp3 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %temp2 |
| 22 | + %temp4 = add i32 %arg, 15 |
| 23 | + %temp5 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %temp4 |
18 | 24 |
|
19 | | - store i32 0, ptr addrspace(3) %tmp3, align 4 |
20 | | - store i32 0, ptr addrspace(3) %tmp5, align 4 |
21 | | - store i32 0, ptr addrspace(3) %tmp5, align 4 |
22 | | - store i32 0, ptr addrspace(3) %tmp5, align 4 |
| 25 | + store i32 0, ptr addrspace(3) %temp3, align 4 |
| 26 | + store i32 0, ptr addrspace(3) %temp5, align 4 |
| 27 | + store i32 0, ptr addrspace(3) %temp5, align 4 |
| 28 | + store i32 0, ptr addrspace(3) %temp5, align 4 |
23 | 29 |
|
24 | 30 | ret void |
25 | 31 | } |
26 | 32 |
|
27 | 33 | ; Check adjacent memory locations are properly matched and the |
28 | 34 | ; longest chain vectorized |
29 | | - |
30 | | -; GCN-LABEL: @interleave_get_longest |
31 | | - |
32 | | -; GCN: load <2 x i32>{{.*}} %tmp1 |
33 | | -; GCN: store <2 x i32> zeroinitializer{{.*}} %tmp1 |
34 | | -; GCN: load <2 x i32>{{.*}} %tmp2 |
35 | | -; GCN: load <2 x i32>{{.*}} %tmp4 |
36 | | -; GCN: extractelement <2 x i32> |
37 | | -; GCN: extractelement <2 x i32> |
38 | | -; GCN: extractelement <2 x i32> |
39 | | -; GCN: extractelement <2 x i32> |
40 | | - |
41 | 35 | define amdgpu_kernel void @interleave_get_longest(i32 %arg) { |
| 36 | +; GCN-LABEL: define amdgpu_kernel void @interleave_get_longest( |
| 37 | +; GCN-SAME: i32 [[ARG:%.*]]) { |
| 38 | +; GCN-NEXT: [[A1:%.*]] = add i32 [[ARG]], 1 |
| 39 | +; GCN-NEXT: [[A3:%.*]] = add i32 [[ARG]], 3 |
| 40 | +; GCN-NEXT: [[TEMP1:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0]], i32 0, i32 [[ARG]] |
| 41 | +; GCN-NEXT: [[TEMP2:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0]], i32 0, i32 [[A1]] |
| 42 | +; GCN-NEXT: [[TEMP4:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0]], i32 0, i32 [[A3]] |
| 43 | +; GCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(3) [[TEMP1]], align 4 |
| 44 | +; GCN-NEXT: [[L21:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 |
| 45 | +; GCN-NEXT: [[L12:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 |
| 46 | +; GCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(3) [[TEMP1]], align 4 |
| 47 | +; GCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(3) [[TEMP2]], align 4 |
| 48 | +; GCN-NEXT: [[L33:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0 |
| 49 | +; GCN-NEXT: [[L44:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 |
| 50 | +; GCN-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(3) [[TEMP4]], align 4 |
| 51 | +; GCN-NEXT: [[L55:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0 |
| 52 | +; GCN-NEXT: [[L66:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 |
| 53 | +; GCN-NEXT: [[L77:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 |
| 54 | +; GCN-NEXT: [[L88:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 |
| 55 | +; GCN-NEXT: ret void |
| 56 | +; |
42 | 57 | %a1 = add i32 %arg, 1 |
43 | 58 | %a2 = add i32 %arg, 2 |
44 | 59 | %a3 = add i32 %arg, 3 |
45 | 60 | %a4 = add i32 %arg, 4 |
46 | | - %tmp1 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %arg |
47 | | - %tmp2 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a1 |
48 | | - %tmp3 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a2 |
49 | | - %tmp4 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a3 |
50 | | - %tmp5 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a4 |
| 61 | + %temp1 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %arg |
| 62 | + %temp2 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a1 |
| 63 | + %temp3 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a2 |
| 64 | + %temp4 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a3 |
| 65 | + %temp5 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a4 |
51 | 66 |
|
52 | | - %l1 = load i32, ptr addrspace(3) %tmp2, align 4 |
53 | | - %l2 = load i32, ptr addrspace(3) %tmp1, align 4 |
54 | | - store i32 0, ptr addrspace(3) %tmp2, align 4 |
55 | | - store i32 0, ptr addrspace(3) %tmp1, align 4 |
56 | | - %l3 = load i32, ptr addrspace(3) %tmp2, align 4 |
57 | | - %l4 = load i32, ptr addrspace(3) %tmp3, align 4 |
58 | | - %l5 = load i32, ptr addrspace(3) %tmp4, align 4 |
59 | | - %l6 = load i32, ptr addrspace(3) %tmp5, align 4 |
60 | | - %l7 = load i32, ptr addrspace(3) %tmp5, align 4 |
61 | | - %l8 = load i32, ptr addrspace(3) %tmp5, align 4 |
| 67 | + %l1 = load i32, ptr addrspace(3) %temp2, align 4 |
| 68 | + %l2 = load i32, ptr addrspace(3) %temp1, align 4 |
| 69 | + store i32 0, ptr addrspace(3) %temp2, align 4 |
| 70 | + store i32 0, ptr addrspace(3) %temp1, align 4 |
| 71 | + %l3 = load i32, ptr addrspace(3) %temp2, align 4 |
| 72 | + %l4 = load i32, ptr addrspace(3) %temp3, align 4 |
| 73 | + %l5 = load i32, ptr addrspace(3) %temp4, align 4 |
| 74 | + %l6 = load i32, ptr addrspace(3) %temp5, align 4 |
| 75 | + %l7 = load i32, ptr addrspace(3) %temp5, align 4 |
| 76 | + %l8 = load i32, ptr addrspace(3) %temp5, align 4 |
62 | 77 |
|
63 | 78 | ret void |
64 | 79 | } |
0 commit comments