Skip to content

Commit bafec6a

Browse files
committed
[VPlan] Use getOpcodeOrIntrinsicID to fix miscompile
1 parent 30a2be4 commit bafec6a

File tree

2 files changed

+33
-4
lines changed

2 files changed

+33
-4
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4148,6 +4148,15 @@ static bool interleaveStoredValuesMatch(ArrayRef<VPValue *> StoredValues) {
41484148
if (!IR->getInterleaveGroup()->isFull() ||
41494149
!equal(DefI->definedValues(), Def0->definedValues()))
41504150
return false;
4151+
} else if (Def0 != DefI) {
4152+
auto *SingleDef0 = dyn_cast<VPSingleDefRecipe>(Def0);
4153+
auto *SingleDefI = dyn_cast<VPSingleDefRecipe>(DefI);
4154+
if (!SingleDef0 || !SingleDefI)
4155+
return false;
4156+
auto Opc0 = getOpcodeOrIntrinsicID(SingleDef0);
4157+
auto OpcI = getOpcodeOrIntrinsicID(SingleDefI);
4158+
if (!Opc0 || Opc0 != OpcI)
4159+
return false;
41514160
}
41524161
}
41534162
}
@@ -4305,11 +4314,12 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
43054314
NarrowedOps.insert(RepR);
43064315
return RepR;
43074316
}
4308-
auto *WideLoad = dyn_cast<VPWidenLoadRecipe>(R);
4309-
if (!WideLoad) {
4317+
if (isa<VPSingleDefRecipe>(R)) {
4318+
// Narrow any intervening single-def recipes.
43104319
NarrowedOps.insert(V);
43114320
return V;
43124321
}
4322+
auto *WideLoad = cast<VPWidenLoadRecipe>(R);
43134323

43144324
VPValue *PtrOp = WideLoad->getAddr();
43154325
if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(PtrOp))

llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,27 @@ define void @opcode_mismatch(ptr %dst.start, i8 %a, i16 %b) {
107107
; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i16> [[TMP1]], splat (i16 255)
108108
; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw <4 x i16> [[TMP2]] to <4 x i8>
109109
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP3]]
110-
; CHECK-NEXT: store <4 x i8> [[TMP4]], ptr [[NEXT_GEP]], align 1
111-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
110+
; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i8> [[STRIDED_VEC3]] to <4 x i16>
111+
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw <4 x i16> [[TMP23]], [[BROADCAST_SPLAT]]
112+
; CHECK-NEXT: [[TMP7:%.*]] = udiv <4 x i16> [[TMP6]], splat (i16 255)
113+
; CHECK-NEXT: [[TMP8:%.*]] = trunc nuw <4 x i16> [[TMP7]] to <4 x i8>
114+
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP8]]
115+
; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[STRIDED_VEC4]] to <4 x i16>
116+
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw <4 x i16> [[TMP10]], [[BROADCAST_SPLAT]]
117+
; CHECK-NEXT: [[TMP12:%.*]] = udiv <4 x i16> [[TMP11]], splat (i16 255)
118+
; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw <4 x i16> [[TMP12]] to <4 x i8>
119+
; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP13]]
120+
; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[STRIDED_VEC5]] to <4 x i16>
121+
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <4 x i16> [[TMP15]], [[BROADCAST_SPLAT]]
122+
; CHECK-NEXT: [[TMP17:%.*]] = udiv <4 x i16> [[TMP16]], splat (i16 255)
123+
; CHECK-NEXT: [[TMP18:%.*]] = trunc nuw <4 x i16> [[TMP17]] to <4 x i8>
124+
; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP18]]
125+
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
126+
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
127+
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
128+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
129+
; CHECK-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1
130+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
112131
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
113132
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
114133
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)