1- ; REQUIRES: asserts
2- ; RUN: not --crash opt -p loop-vectorize -mtriple=s390x-unknown-linux -mcpu=z16 %s
1+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+ ; RUN: opt -p loop-vectorize -mtriple=s390x-unknown-linux -mcpu=z16 -S %s | FileCheck %s
33
44target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
55
@@ -9,6 +9,94 @@ target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
99; all scalar lanes) and a VPInstruction that only demands the first lane.
1010; Test case for https://github.com/llvm/llvm-project/issues/88849.
1111define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst (ptr noalias %dst , ptr noalias %src.1 ) {
12+ ; CHECK-LABEL: define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(
13+ ; CHECK-SAME: ptr noalias [[DST:%.*]], ptr noalias [[SRC_1:%.*]]) #[[ATTR0:[0-9]+]] {
14+ ; CHECK-NEXT: [[ENTRY:.*]]:
15+ ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
16+ ; CHECK: [[VECTOR_PH]]:
17+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
18+ ; CHECK: [[VECTOR_BODY]]:
19+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
20+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
21+ ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
22+ ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
23+ ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
24+ ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw i64 [[TMP0]], 4
25+ ; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i64 [[TMP1]], 4
26+ ; CHECK-NEXT: [[TMP6:%.*]] = mul nsw i64 [[TMP2]], 4
27+ ; CHECK-NEXT: [[TMP7:%.*]] = mul nsw i64 [[TMP3]], 4
28+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP4]]
29+ ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP5]]
30+ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP6]]
31+ ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP7]]
32+ ; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP8]], align 1
33+ ; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP9]], align 1
34+ ; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP10]], align 1
35+ ; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP11]], align 1
36+ ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12]], i32 0
37+ ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 1
38+ ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 2
39+ ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP18]], i8 [[TMP15]], i32 3
40+ ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
41+ ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP0]], 4
42+ ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [8 x i32], ptr @src, i64 0, i64 [[TMP21]]
43+ ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i32 0
44+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
45+ ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0
46+ ; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
47+ ; CHECK: [[PRED_STORE_IF]]:
48+ ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 0
49+ ; CHECK-NEXT: store i32 [[TMP25]], ptr [[DST]], align 4
50+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
51+ ; CHECK: [[PRED_STORE_CONTINUE]]:
52+ ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP20]], i32 1
53+ ; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
54+ ; CHECK: [[PRED_STORE_IF1]]:
55+ ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 1
56+ ; CHECK-NEXT: store i32 [[TMP27]], ptr [[DST]], align 4
57+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
58+ ; CHECK: [[PRED_STORE_CONTINUE2]]:
59+ ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP20]], i32 2
60+ ; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
61+ ; CHECK: [[PRED_STORE_IF3]]:
62+ ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 2
63+ ; CHECK-NEXT: store i32 [[TMP29]], ptr [[DST]], align 4
64+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
65+ ; CHECK: [[PRED_STORE_CONTINUE4]]:
66+ ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP20]], i32 3
67+ ; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
68+ ; CHECK: [[PRED_STORE_IF5]]:
69+ ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
70+ ; CHECK-NEXT: store i32 [[TMP31]], ptr [[DST]], align 4
71+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
72+ ; CHECK: [[PRED_STORE_CONTINUE6]]:
73+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
74+ ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
75+ ; CHECK: [[MIDDLE_BLOCK]]:
76+ ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
77+ ; CHECK: [[SCALAR_PH]]:
78+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
79+ ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
80+ ; CHECK: [[LOOP_HEADER]]:
81+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
82+ ; CHECK-NEXT: [[MUL_IV:%.*]] = mul nsw i64 [[IV]], 4
83+ ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[MUL_IV]]
84+ ; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_SRC_1]], align 1
85+ ; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L_1]], 0
86+ ; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
87+ ; CHECK: [[THEN]]:
88+ ; CHECK-NEXT: [[IV_OR:%.*]] = or disjoint i64 [[IV]], 4
89+ ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds [8 x i32], ptr @src, i64 0, i64 [[IV_OR]]
90+ ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC]], align 4
91+ ; CHECK-NEXT: store i32 [[L_2]], ptr [[DST]], align 4
92+ ; CHECK-NEXT: br label %[[LOOP_LATCH]]
93+ ; CHECK: [[LOOP_LATCH]]:
94+ ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
95+ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 4
96+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
97+ ; CHECK: [[EXIT]]:
98+ ; CHECK-NEXT: ret void
99+ ;
12100entry:
13101 br label %loop.header
14102
@@ -35,3 +123,9 @@ loop.latch:
35123exit:
36124 ret void
37125}
126+ ;.
127+ ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
128+ ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
129+ ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
130+ ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
131+ ;.
0 commit comments