Skip to content

Commit 452e13e

Browse files
committed
[NFC] Precommit autogenerated test
1 parent c617466 commit 452e13e

File tree

1 file changed

+67
-25
lines changed

1 file changed

+67
-25
lines changed

llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll

Lines changed: 67 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
12
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -mattr=+relaxed-buffer-oob-mode -S -o - %s | FileCheck --check-prefixes=CHECK,CHECK-OOB-RELAXED %s
23
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck --check-prefixes=CHECK,CHECK-OOB-STRICT %s
34

4-
; CHECK-LABEL: @merge_v2i32_v2i32(
5-
; CHECK: load <4 x i32>
6-
; CHECK: store <4 x i32> zeroinitializer
75
define amdgpu_kernel void @merge_v2i32_v2i32(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture readonly %b) #0 {
6+
; CHECK-LABEL: define amdgpu_kernel void @merge_v2i32_v2i32(
7+
; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0:[0-9]+]] {
8+
; CHECK-NEXT: [[ENTRY:.*:]]
9+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr addrspace(1) [[B]], align 4
10+
; CHECK-NEXT: [[LD_C1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
11+
; CHECK-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
12+
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr addrspace(1) [[A]], align 4
13+
; CHECK-NEXT: ret void
14+
;
815
entry:
916
%a.1 = getelementptr inbounds <2 x i32>, ptr addrspace(1) %a, i64 1
1017
%b.1 = getelementptr inbounds <2 x i32>, ptr addrspace(1) %b, i64 1
@@ -18,10 +25,16 @@ entry:
1825
ret void
1926
}
2027

21-
; CHECK-LABEL: @merge_v1i32_v1i32(
22-
; CHECK: load <2 x i32>
23-
; CHECK: store <2 x i32> zeroinitializer
2428
define amdgpu_kernel void @merge_v1i32_v1i32(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture readonly %b) #0 {
29+
; CHECK-LABEL: define amdgpu_kernel void @merge_v1i32_v1i32(
30+
; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
31+
; CHECK-NEXT: [[ENTRY:.*:]]
32+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[B]], align 4
33+
; CHECK-NEXT: [[LD_C1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <1 x i32> zeroinitializer
34+
; CHECK-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <1 x i32> <i32 1>
35+
; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(1) [[A]], align 4
36+
; CHECK-NEXT: ret void
37+
;
2538
entry:
2639
%a.1 = getelementptr inbounds <1 x i32>, ptr addrspace(1) %a, i64 1
2740
%b.1 = getelementptr inbounds <1 x i32>, ptr addrspace(1) %b, i64 1
@@ -35,12 +48,18 @@ entry:
3548
ret void
3649
}
3750

38-
; CHECK-LABEL: @no_merge_v3i32_v3i32(
39-
; CHECK: load <3 x i32>
40-
; CHECK: load <3 x i32>
41-
; CHECK: store <3 x i32> zeroinitializer
42-
; CHECK: store <3 x i32> zeroinitializer
4351
define amdgpu_kernel void @no_merge_v3i32_v3i32(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture readonly %b) #0 {
52+
; CHECK-LABEL: define amdgpu_kernel void @no_merge_v3i32_v3i32(
53+
; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
54+
; CHECK-NEXT: [[ENTRY:.*:]]
55+
; CHECK-NEXT: [[A_1:%.*]] = getelementptr inbounds <3 x i32>, ptr addrspace(1) [[A]], i64 1
56+
; CHECK-NEXT: [[B_1:%.*]] = getelementptr inbounds <3 x i32>, ptr addrspace(1) [[B]], i64 1
57+
; CHECK-NEXT: [[LD_C:%.*]] = load <3 x i32>, ptr addrspace(1) [[B]], align 4
58+
; CHECK-NEXT: [[LD_C_IDX_1:%.*]] = load <3 x i32>, ptr addrspace(1) [[B_1]], align 4
59+
; CHECK-NEXT: store <3 x i32> zeroinitializer, ptr addrspace(1) [[A]], align 4
60+
; CHECK-NEXT: store <3 x i32> zeroinitializer, ptr addrspace(1) [[A_1]], align 4
61+
; CHECK-NEXT: ret void
62+
;
4463
entry:
4564
%a.1 = getelementptr inbounds <3 x i32>, ptr addrspace(1) %a, i64 1
4665
%b.1 = getelementptr inbounds <3 x i32>, ptr addrspace(1) %b, i64 1
@@ -54,10 +73,16 @@ entry:
5473
ret void
5574
}
5675

57-
; CHECK-LABEL: @merge_v2i16_v2i16(
58-
; CHECK: load <4 x i16>
59-
; CHECK: store <4 x i16> zeroinitializer
6076
define amdgpu_kernel void @merge_v2i16_v2i16(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture readonly %b) #0 {
77+
; CHECK-LABEL: define amdgpu_kernel void @merge_v2i16_v2i16(
78+
; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
79+
; CHECK-NEXT: [[ENTRY:.*:]]
80+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(1) [[B]], align 4
81+
; CHECK-NEXT: [[LD_C1:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
82+
; CHECK-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
83+
; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr addrspace(1) [[A]], align 4
84+
; CHECK-NEXT: ret void
85+
;
6186
entry:
6287
%a.1 = getelementptr inbounds <2 x i16>, ptr addrspace(1) %a, i64 1
6388
%b.1 = getelementptr inbounds <2 x i16>, ptr addrspace(1) %b, i64 1
@@ -71,15 +96,27 @@ entry:
7196
ret void
7297
}
7398

74-
; CHECK-OOB-RELAXED-LABEL: @merge_fat_ptrs(
75-
; CHECK-OOB-RELAXED: load <4 x i16>
76-
; CHECK-OOB-RELAXED: store <4 x i16> zeroinitializer
77-
; CHECK-OOB-STRICT-LABEL: @merge_fat_ptrs(
78-
; CHECK-OOB-STRICT: load <2 x i16>
79-
; CHECK-OOB-STRICT: load <2 x i16>
80-
; CHECK-OOB-STRICT: store <2 x i16> zeroinitializer
81-
; CHECK-OOB-STRICT: store <2 x i16> zeroinitializer
8299
define amdgpu_kernel void @merge_fat_ptrs(ptr addrspace(7) nocapture %a, ptr addrspace(7) nocapture readonly %b) #0 {
100+
; CHECK-OOB-RELAXED-LABEL: define amdgpu_kernel void @merge_fat_ptrs(
101+
; CHECK-OOB-RELAXED-SAME: ptr addrspace(7) captures(none) [[A:%.*]], ptr addrspace(7) readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
102+
; CHECK-OOB-RELAXED-NEXT: [[ENTRY:.*:]]
103+
; CHECK-OOB-RELAXED-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(7) [[B]], align 4
104+
; CHECK-OOB-RELAXED-NEXT: [[LD_C1:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
105+
; CHECK-OOB-RELAXED-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
106+
; CHECK-OOB-RELAXED-NEXT: store <4 x i16> zeroinitializer, ptr addrspace(7) [[A]], align 4
107+
; CHECK-OOB-RELAXED-NEXT: ret void
108+
;
109+
; CHECK-OOB-STRICT-LABEL: define amdgpu_kernel void @merge_fat_ptrs(
110+
; CHECK-OOB-STRICT-SAME: ptr addrspace(7) captures(none) [[A:%.*]], ptr addrspace(7) readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
111+
; CHECK-OOB-STRICT-NEXT: [[ENTRY:.*:]]
112+
; CHECK-OOB-STRICT-NEXT: [[A_1:%.*]] = getelementptr inbounds <2 x i16>, ptr addrspace(7) [[A]], i32 1
113+
; CHECK-OOB-STRICT-NEXT: [[B_1:%.*]] = getelementptr inbounds <2 x i16>, ptr addrspace(7) [[B]], i32 1
114+
; CHECK-OOB-STRICT-NEXT: [[LD_C:%.*]] = load <2 x i16>, ptr addrspace(7) [[B]], align 4
115+
; CHECK-OOB-STRICT-NEXT: [[LD_C_IDX_1:%.*]] = load <2 x i16>, ptr addrspace(7) [[B_1]], align 4
116+
; CHECK-OOB-STRICT-NEXT: store <2 x i16> zeroinitializer, ptr addrspace(7) [[A]], align 4
117+
; CHECK-OOB-STRICT-NEXT: store <2 x i16> zeroinitializer, ptr addrspace(7) [[A_1]], align 4
118+
; CHECK-OOB-STRICT-NEXT: ret void
119+
;
83120
entry:
84121
%a.1 = getelementptr inbounds <2 x i16>, ptr addrspace(7) %a, i32 1
85122
%b.1 = getelementptr inbounds <2 x i16>, ptr addrspace(7) %b, i32 1
@@ -94,10 +131,15 @@ entry:
94131
}
95132

96133
; Ideally this would be merged
97-
; CHECK-LABEL: @merge_load_i32_v2i16(
98-
; CHECK: load i32,
99-
; CHECK: load <2 x i16>
100134
define amdgpu_kernel void @merge_load_i32_v2i16(ptr addrspace(1) nocapture %a) #0 {
135+
; CHECK-LABEL: define amdgpu_kernel void @merge_load_i32_v2i16(
136+
; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]]) #[[ATTR0]] {
137+
; CHECK-NEXT: [[ENTRY:.*:]]
138+
; CHECK-NEXT: [[A_1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[A]], i32 1
139+
; CHECK-NEXT: [[LD_0:%.*]] = load i32, ptr addrspace(1) [[A]], align 4
140+
; CHECK-NEXT: [[LD_1:%.*]] = load <2 x i16>, ptr addrspace(1) [[A_1]], align 4
141+
; CHECK-NEXT: ret void
142+
;
101143
entry:
102144
%a.1 = getelementptr inbounds i32, ptr addrspace(1) %a, i32 1
103145

0 commit comments

Comments
 (0)