Skip to content

Commit a15f966

Browse files
committed
[LV] Fix regression
1 parent 9c4a307 commit a15f966

File tree

4 files changed

+64
-139
lines changed

4 files changed

+64
-139
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,9 @@ class LoopVectorizationLegality {
418418
return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits();
419419
}
420420

421+
/// Returns true if MaskedOp contains \p I.
422+
bool isMasked(Instruction *I) const { return MaskedOp.contains(I); }
423+
421424
/// Returns true if vector representation of the instruction \p I
422425
/// requires mask.
423426
bool isMaskRequired(Instruction *I, bool FoldTailByMasking) const;

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6310,7 +6310,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
63106310
continue;
63116311
}
63126312

6313-
bool MaskRequired = Legal->isMaskRequired(CI, foldTailByMasking());
6313+
bool MaskRequired = Legal->isMasked(CI);
63146314
// Compute corresponding vector type for return value and arguments.
63156315
Type *RetTy = toVectorizedTy(ScalarRetTy, VF);
63166316
for (Type *ScalarTy : ScalarTys)

llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,10 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
6464
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
6565
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
6666
; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8
67-
; TFCOMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[LD]], i64 0
68-
; TFCOMMON-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
69-
; TFCOMMON-NEXT: [[TMP8:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
67+
; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]]
68+
; TFCOMMON-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3]]
69+
; TFCOMMON-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
70+
; TFCOMMON-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
7071
; TFCOMMON-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP8]], zeroinitializer
7172
; TFCOMMON-NEXT: [[TMP11:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer
7273
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
@@ -106,10 +107,14 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
106107
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ]
107108
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ]
108109
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8
109-
; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i64 0
110-
; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
111-
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
112-
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
110+
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]]
111+
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
112+
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
113+
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
114+
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
115+
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
116+
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0
117+
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP10]], i32 1
113118
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = fcmp ule <2 x double> [[TMP8]], zeroinitializer
114119
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = fcmp ule <2 x double> [[TMP12]], zeroinitializer
115120
; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll

Lines changed: 48 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -968,147 +968,64 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
968968
; TFNONE: [[END]]:
969969
; TFNONE-NEXT: ret void
970970
;
971-
; TFALWAYS-LABEL: define void @test_widen_exp_v2(
972-
; TFALWAYS-SAME: ptr noalias [[P2:%.*]], ptr noalias [[P:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
973-
; TFALWAYS-NEXT: [[ENTRY:.*]]:
974-
; TFALWAYS-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
975-
; TFALWAYS-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 2
976-
; TFALWAYS-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 2
977-
; TFALWAYS-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0
978-
; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[TMP0]])
979-
; TFALWAYS-NEXT: br label %[[VECTOR_BODY:.*]]
980-
; TFALWAYS: [[VECTOR_BODY]]:
981-
; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
982-
; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
983-
; TFALWAYS-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8
984-
; TFALWAYS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i64 0
985-
; TFALWAYS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
986-
; TFALWAYS-NEXT: [[TMP5:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
987-
; TFALWAYS-NEXT: [[TMP6:%.*]] = fcmp ule <2 x double> [[TMP5]], zeroinitializer
988-
; TFALWAYS-NEXT: [[TMP7:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer
989-
; TFALWAYS-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
990-
; TFALWAYS-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
991-
; TFALWAYS-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
992-
; TFALWAYS: [[PRED_STORE_IF]]:
993-
; TFALWAYS-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0
994-
; TFALWAYS-NEXT: store double [[TMP9]], ptr [[P]], align 8
995-
; TFALWAYS-NEXT: br label %[[PRED_STORE_CONTINUE]]
996-
; TFALWAYS: [[PRED_STORE_CONTINUE]]:
997-
; TFALWAYS-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
998-
; TFALWAYS-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
999-
; TFALWAYS: [[PRED_STORE_IF1]]:
1000-
; TFALWAYS-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1
1001-
; TFALWAYS-NEXT: store double [[TMP11]], ptr [[P]], align 8
1002-
; TFALWAYS-NEXT: br label %[[PRED_STORE_CONTINUE2]]
1003-
; TFALWAYS: [[PRED_STORE_CONTINUE2]]:
1004-
; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
1005-
; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP3]])
1006-
; TFALWAYS-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
1007-
; TFALWAYS-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0
1008-
; TFALWAYS-NEXT: br i1 [[TMP13]], label %[[END:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
1009-
; TFALWAYS: [[END]]:
1010-
; TFALWAYS-NEXT: ret void
1011-
;
1012-
; TFFALLBACK-LABEL: define void @test_widen_exp_v2(
1013-
; TFFALLBACK-SAME: ptr noalias [[P2:%.*]], ptr noalias [[P:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
1014-
; TFFALLBACK-NEXT: [[ENTRY:.*]]:
1015-
; TFFALLBACK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
1016-
; TFFALLBACK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 2
1017-
; TFFALLBACK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 2
1018-
; TFFALLBACK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0
1019-
; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[TMP0]])
1020-
; TFFALLBACK-NEXT: br label %[[VECTOR_BODY:.*]]
1021-
; TFFALLBACK: [[VECTOR_BODY]]:
1022-
; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
1023-
; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
1024-
; TFFALLBACK-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8
1025-
; TFFALLBACK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i64 0
1026-
; TFFALLBACK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
1027-
; TFFALLBACK-NEXT: [[TMP5:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
1028-
; TFFALLBACK-NEXT: [[TMP6:%.*]] = fcmp ule <2 x double> [[TMP5]], zeroinitializer
1029-
; TFFALLBACK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer
1030-
; TFFALLBACK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
1031-
; TFFALLBACK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
1032-
; TFFALLBACK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
1033-
; TFFALLBACK: [[PRED_STORE_IF]]:
1034-
; TFFALLBACK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0
1035-
; TFFALLBACK-NEXT: store double [[TMP9]], ptr [[P]], align 8
1036-
; TFFALLBACK-NEXT: br label %[[PRED_STORE_CONTINUE]]
1037-
; TFFALLBACK: [[PRED_STORE_CONTINUE]]:
1038-
; TFFALLBACK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
1039-
; TFFALLBACK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
1040-
; TFFALLBACK: [[PRED_STORE_IF1]]:
1041-
; TFFALLBACK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1
1042-
; TFFALLBACK-NEXT: store double [[TMP11]], ptr [[P]], align 8
1043-
; TFFALLBACK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
1044-
; TFFALLBACK: [[PRED_STORE_CONTINUE2]]:
1045-
; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
1046-
; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP3]])
1047-
; TFFALLBACK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
1048-
; TFFALLBACK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0
1049-
; TFFALLBACK-NEXT: br i1 [[TMP13]], label %[[END:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
1050-
; TFFALLBACK: [[END]]:
1051-
; TFFALLBACK-NEXT: ret void
971+
; TFCOMMON-LABEL: define void @test_widen_exp_v2(
972+
; TFCOMMON-SAME: ptr noalias [[P2:%.*]], ptr noalias [[P:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
973+
; TFCOMMON-NEXT: [[ENTRY:.*]]:
974+
; TFCOMMON-NEXT: br label %[[LOOP:.*]]
975+
; TFCOMMON: [[LOOP]]:
976+
; TFCOMMON-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
977+
; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2]], align 8
978+
; TFCOMMON-NEXT: [[EXP:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR7:[0-9]+]]
979+
; TFCOMMON-NEXT: [[COND1:%.*]] = fcmp ogt double [[EXP]], 0.000000e+00
980+
; TFCOMMON-NEXT: [[SINK:%.*]] = select i1 [[COND1]], double 0.000000e+00, double 1.000000e+00
981+
; TFCOMMON-NEXT: store double [[SINK]], ptr [[P]], align 8
982+
; TFCOMMON-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
983+
; TFCOMMON-NEXT: [[COND2:%.*]] = icmp eq i64 [[IV]], [[N]]
984+
; TFCOMMON-NEXT: br i1 [[COND2]], label %[[END:.*]], label %[[LOOP]]
985+
; TFCOMMON: [[END]]:
986+
; TFCOMMON-NEXT: ret void
1052987
;
1053988
; TFA_INTERLEAVE-LABEL: define void @test_widen_exp_v2(
1054989
; TFA_INTERLEAVE-SAME: ptr noalias [[P2:%.*]], ptr noalias [[P:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
1055990
; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]:
1056991
; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
1057-
; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 4
1058-
; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 4
992+
; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 2
993+
; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 2
1059994
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0
1060-
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[TMP0]])
1061-
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 2, i64 [[TMP0]])
995+
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = icmp ult i64 0, [[TMP0]]
996+
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ult i64 1, [[TMP0]]
1062997
; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
1063998
; TFA_INTERLEAVE: [[VECTOR_BODY]]:
1064-
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT1:%.*]], %[[PRED_STORE_CONTINUE9:.*]] ]
1065-
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE9]] ]
1066-
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], %[[PRED_STORE_CONTINUE9]] ]
999+
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[TMP19:.*]] ]
1000+
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[TMP19]] ]
1001+
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[TMP19]] ]
10671002
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8
1068-
; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i64 0
1069-
; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
1070-
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
1071-
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
1072-
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = fcmp ule <2 x double> [[TMP5]], zeroinitializer
1073-
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = fcmp ule <2 x double> [[TMP6]], zeroinitializer
1074-
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP7]], <2 x i1> zeroinitializer
1075-
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK2]], <2 x i1> [[TMP8]], <2 x i1> zeroinitializer
1076-
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
1077-
; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP10]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
1078-
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
1079-
; TFA_INTERLEAVE-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
1080-
; TFA_INTERLEAVE: [[PRED_STORE_IF]]:
1081-
; TFA_INTERLEAVE-NEXT: [[SPEC_SELECT:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0
1003+
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]]
1004+
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]]
1005+
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00
1006+
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = fcmp ogt double [[TMP6]], 0.000000e+00
1007+
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = xor i1 [[TMP7]], true
1008+
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = xor i1 [[TMP8]], true
1009+
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = select i1 [[ACTIVE_LANE_MASK]], i1 [[TMP9]], i1 false
1010+
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], i1 [[TMP10]], i1 false
1011+
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP11]], double 1.000000e+00, double 0.000000e+00
1012+
; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP12]], double 1.000000e+00, double 0.000000e+00
1013+
; TFA_INTERLEAVE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], double [[PREDPHI3]], double [[PREDPHI]]
1014+
; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = xor i1 [[ACTIVE_LANE_MASK]], true
1015+
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = xor i1 [[ACTIVE_LANE_MASK2]], true
1016+
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = xor i1 [[TMP13]], true
1017+
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = xor i1 [[TMP14]], true
1018+
; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
1019+
; TFA_INTERLEAVE-NEXT: br i1 [[TMP17]], label %[[BB18:.*]], label %[[TMP19]]
1020+
; TFA_INTERLEAVE: [[BB18]]:
10821021
; TFA_INTERLEAVE-NEXT: store double [[SPEC_SELECT]], ptr [[P]], align 8
1083-
; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE]]
1084-
; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE]]:
1085-
; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
1086-
; TFA_INTERLEAVE-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
1087-
; TFA_INTERLEAVE: [[PRED_STORE_IF4]]:
1088-
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1
1089-
; TFA_INTERLEAVE-NEXT: store double [[TMP14]], ptr [[P]], align 8
1090-
; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE5]]
1091-
; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE5]]:
1092-
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 0
1093-
; TFA_INTERLEAVE-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
1094-
; TFA_INTERLEAVE: [[PRED_STORE_IF6]]:
1095-
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0
1096-
; TFA_INTERLEAVE-NEXT: store double [[TMP16]], ptr [[P]], align 8
1097-
; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE7]]
1098-
; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE7]]:
1099-
; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 1
1100-
; TFA_INTERLEAVE-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9]]
1101-
; TFA_INTERLEAVE: [[PRED_STORE_IF8]]:
1102-
; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1
1103-
; TFA_INTERLEAVE-NEXT: store double [[TMP18]], ptr [[P]], align 8
1104-
; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE9]]
1105-
; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE9]]:
1106-
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT1]] = add i64 [[INDEX]], 4
1107-
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT:%.*]] = add i64 [[INDEX]], 2
1108-
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP3]])
1109-
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT10]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX_NEXT]], i64 [[TMP3]])
1110-
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
1111-
; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP20]], i32 0
1022+
; TFA_INTERLEAVE-NEXT: br label %[[TMP19]]
1023+
; TFA_INTERLEAVE: [[TMP19]]:
1024+
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
1025+
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1
1026+
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = icmp ult i64 [[INDEX]], [[TMP3]]
1027+
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT6]] = icmp ult i64 [[TMP20]], [[TMP3]]
1028+
; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = xor i1 [[ACTIVE_LANE_MASK_NEXT]], true
11121029
; TFA_INTERLEAVE-NEXT: br i1 [[TMP21]], label %[[END:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
11131030
; TFA_INTERLEAVE: [[END]]:
11141031
; TFA_INTERLEAVE-NEXT: ret void

0 commit comments

Comments
 (0)