Skip to content

Commit c359643

Browse files
XChyPriyanshu3820
authored andcommitted
[DAGCombiner] Don't optimize insert_vector_elt into shuffle if implicit truncation exists (llvm#169022)
Fixes llvm#169017
1 parent 054e087 commit c359643

File tree

2 files changed

+30
-0
lines changed

2 files changed

+30
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23469,6 +23469,10 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
2346923469
EVT SubVecVT = SubVec.getValueType();
2347023470
EVT VT = DestVec.getValueType();
2347123471
unsigned NumSrcElts = SubVecVT.getVectorNumElements();
23472+
// Bail out if the inserted value is larger than the vector element, as
23473+
// insert_vector_elt performs an implicit truncation in this case.
23474+
if (InsertVal.getValueType() != VT.getVectorElementType())
23475+
return SDValue();
2347223476
// If the source only has a single vector element, the cost of creating adding
2347323477
// it to a vector is likely to exceed the cost of a insert_vector_elt.
2347423478
if (NumSrcElts == 1)

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,6 +1143,32 @@ define <4 x half> @insertelt_v4f16_idx(<4 x half> %a, half %y, i32 zeroext %idx)
11431143
%b = insertelement <4 x half> %a, half %y, i32 %idx
11441144
ret <4 x half> %b
11451145
}
1146+
1147+
define <2 x i8> @pr169017(<4 x i16> %vecinit, <2 x i8> %dst_vec) {
1148+
; CHECK-LABEL: pr169017:
1149+
; CHECK: # %bb.0: # %entry
1150+
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1151+
; CHECK-NEXT: vmv.x.s a0, v8
1152+
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma
1153+
; CHECK-NEXT: vmv.s.x v9, a0
1154+
; CHECK-NEXT: vmv1r.v v8, v9
1155+
; CHECK-NEXT: ret
1156+
;
1157+
; VISNI-LABEL: pr169017:
1158+
; VISNI: # %bb.0: # %entry
1159+
; VISNI-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1160+
; VISNI-NEXT: vmv.x.s a0, v8
1161+
; VISNI-NEXT: vsetvli zero, zero, e8, mf8, tu, ma
1162+
; VISNI-NEXT: vmv.s.x v9, a0
1163+
; VISNI-NEXT: vmv1r.v v8, v9
1164+
; VISNI-NEXT: ret
1165+
entry:
1166+
%cast = bitcast <4 x i16> %vecinit to i64
1167+
%trunc = trunc i64 %cast to i8
1168+
%2 = insertelement <2 x i8> %dst_vec, i8 %trunc, i64 0
1169+
ret <2 x i8> %2
1170+
}
1171+
11461172
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
11471173
; ZVFHMINRV32: {{.*}}
11481174
; ZVFHMINRV64: {{.*}}

0 commit comments

Comments
 (0)