Skip to content

Commit 7a05ee3

Browse files
committed
Update test to account for change in sub-byte element type legalization for NVPTX
1 parent 6dc716d commit 7a05ee3

File tree

1 file changed

+20
-40
lines changed

1 file changed

+20
-40
lines changed

llvm/test/Transforms/LoadStoreVectorizer/NVPTX/gap-fill-vectors.ll

Lines changed: 20 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -5,49 +5,10 @@
55
; currently, we do not gap fill when the loads enclosing the gap are different sizes
66
; Otherwise, vectors are treated the same as any other scalar types
77

8-
define void @i1x8_gap_gap_i1x8(ptr %ptr) {
9-
; CHECK-LABEL: define void @i1x8_gap_gap_i1x8(
10-
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
11-
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i64 0
12-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i1> @llvm.masked.load.v32i1.p0(ptr align 4 [[PTR0]], <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i1> poison)
13-
; CHECK-NEXT: [[L03:%.*]] = shufflevector <32 x i1> [[TMP1]], <32 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
14-
; CHECK-NEXT: [[GAPFILL4:%.*]] = shufflevector <32 x i1> [[TMP1]], <32 x i1> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15-
; CHECK-NEXT: [[GAPFILL25:%.*]] = shufflevector <32 x i1> [[TMP1]], <32 x i1> poison, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
16-
; CHECK-NEXT: [[L36:%.*]] = shufflevector <32 x i1> [[TMP1]], <32 x i1> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17-
; CHECK-NEXT: ret void
18-
;
19-
%ptr0 = getelementptr i8, ptr %ptr, i64 0
20-
%ptr3 = getelementptr i8, ptr %ptr, i64 3
21-
22-
%l0 = load <8 x i1>, ptr %ptr0, align 4
23-
%l3 = load <8 x i1>, ptr %ptr3, align 1
24-
25-
ret void
26-
}
27-
28-
; The chain elements are different sizes, gap filling won't kick in
29-
define void @i1x8_gap_gap_i1x16(ptr %ptr) {
30-
; CHECK-LABEL: define void @i1x8_gap_gap_i1x16(
31-
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] {
32-
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i64 0
33-
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i8, ptr [[PTR]], i64 3
34-
; CHECK-NEXT: [[L0:%.*]] = load <8 x i1>, ptr [[PTR0]], align 4
35-
; CHECK-NEXT: [[L3:%.*]] = load <16 x i1>, ptr [[PTR3]], align 2
36-
; CHECK-NEXT: ret void
37-
;
38-
%ptr0 = getelementptr i8, ptr %ptr, i64 0
39-
%ptr3 = getelementptr i8, ptr %ptr, i64 3
40-
41-
%l0 = load <8 x i1>, ptr %ptr0, align 4
42-
%l3 = load <16 x i1>, ptr %ptr3, align 2
43-
44-
ret void
45-
}
46-
478
; Gap of two load <2 x i8>s gets filled
489
define void @i8x2_gap_gap_i8x2(ptr %ptr) {
4910
; CHECK-LABEL: define void @i8x2_gap_gap_i8x2(
50-
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] {
11+
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
5112
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i64 0
5213
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr align 8 [[PTR0]], <8 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true>, <8 x i8> poison)
5314
; CHECK-NEXT: [[L03:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <2 x i32> <i32 0, i32 1>
@@ -184,3 +145,22 @@ define void @i64x2_i64x2_gap_i64x2(ptr addrspace(1) %in) {
184145
%vec1 = load <2 x i64>, ptr addrspace(1) %getElem1, align 8
185146
ret void
186147
}
148+
149+
; Masked loads are not supported for sub-byte element types.
150+
define void @i1x8_gap_gap_i1x8(ptr %ptr) {
151+
; CHECK-LABEL: define void @i1x8_gap_gap_i1x8(
152+
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] {
153+
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i64 0
154+
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i8, ptr [[PTR]], i64 3
155+
; CHECK-NEXT: [[L0:%.*]] = load <8 x i1>, ptr [[PTR0]], align 4
156+
; CHECK-NEXT: [[L3:%.*]] = load <8 x i1>, ptr [[PTR3]], align 1
157+
; CHECK-NEXT: ret void
158+
;
159+
%ptr0 = getelementptr i8, ptr %ptr, i64 0
160+
%ptr3 = getelementptr i8, ptr %ptr, i64 3
161+
162+
%l0 = load <8 x i1>, ptr %ptr0, align 4
163+
%l3 = load <8 x i1>, ptr %ptr3, align 1
164+
165+
ret void
166+
}

0 commit comments

Comments
 (0)