Skip to content

Commit ab438a9

Browse files
committed
[VPlan] Use getOpcodeOrIntrinsicID to fix miscompile
1 parent 83a247a commit ab438a9

File tree

2 files changed

+33
-4
lines changed

2 files changed

+33
-4
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4100,6 +4100,15 @@ static bool interleaveStoredValuesMatch(ArrayRef<VPValue *> StoredValues) {
41004100
if (!IR->getInterleaveGroup()->isFull() ||
41014101
!equal(DefI->definedValues(), Def0->definedValues()))
41024102
return false;
4103+
} else if (Def0 != DefI) {
4104+
auto *SingleDef0 = dyn_cast<VPSingleDefRecipe>(Def0);
4105+
auto *SingleDefI = dyn_cast<VPSingleDefRecipe>(DefI);
4106+
if (!SingleDef0 || !SingleDefI)
4107+
return false;
4108+
auto Opc0 = getOpcodeOrIntrinsicID(SingleDef0);
4109+
auto OpcI = getOpcodeOrIntrinsicID(SingleDefI);
4110+
if (!Opc0 || Opc0 != OpcI)
4111+
return false;
41034112
}
41044113
}
41054114
}
@@ -4257,11 +4266,12 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
42574266
NarrowedOps.insert(RepR);
42584267
return RepR;
42594268
}
4260-
auto *WideLoad = dyn_cast<VPWidenLoadRecipe>(R);
4261-
if (!WideLoad) {
4269+
if (isa<VPSingleDefRecipe>(R)) {
4270+
// Narrow any intervening single-def recipes.
42624271
NarrowedOps.insert(V);
42634272
return V;
42644273
}
4274+
auto *WideLoad = cast<VPWidenLoadRecipe>(R);
42654275

42664276
VPValue *PtrOp = WideLoad->getAddr();
42674277
if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(PtrOp))

llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,27 @@ define void @opcode_mismatch(ptr %dst.start, i8 %a, i16 %b) {
107107
; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i16> [[TMP1]], splat (i16 255)
108108
; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw <4 x i16> [[TMP2]] to <4 x i8>
109109
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP3]]
110-
; CHECK-NEXT: store <4 x i8> [[TMP4]], ptr [[NEXT_GEP]], align 1
111-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
110+
; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i8> [[STRIDED_VEC3]] to <4 x i16>
111+
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw <4 x i16> [[TMP23]], [[BROADCAST_SPLAT]]
112+
; CHECK-NEXT: [[TMP7:%.*]] = udiv <4 x i16> [[TMP6]], splat (i16 255)
113+
; CHECK-NEXT: [[TMP8:%.*]] = trunc nuw <4 x i16> [[TMP7]] to <4 x i8>
114+
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP8]]
115+
; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[STRIDED_VEC4]] to <4 x i16>
116+
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw <4 x i16> [[TMP10]], [[BROADCAST_SPLAT]]
117+
; CHECK-NEXT: [[TMP12:%.*]] = udiv <4 x i16> [[TMP11]], splat (i16 255)
118+
; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw <4 x i16> [[TMP12]] to <4 x i8>
119+
; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP13]]
120+
; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[STRIDED_VEC5]] to <4 x i16>
121+
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <4 x i16> [[TMP15]], [[BROADCAST_SPLAT]]
122+
; CHECK-NEXT: [[TMP17:%.*]] = udiv <4 x i16> [[TMP16]], splat (i16 255)
123+
; CHECK-NEXT: [[TMP18:%.*]] = trunc nuw <4 x i16> [[TMP17]] to <4 x i8>
124+
; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP18]]
125+
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
126+
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
127+
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
128+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
129+
; CHECK-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1
130+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
112131
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
113132
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
114133
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)