Skip to content

Commit c1a53c7

Browse files
authored
Merge branch 'users/zhaoqi5/opt-extractelement-idx' into users/zhaoqi5/test-permute-and-shuffle-samelane
2 parents 8873a1b + 6bdb01a commit c1a53c7

File tree

3 files changed

+140
-68
lines changed

3 files changed

+140
-68
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 111 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,11 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
412412
setTargetDAGCombine(ISD::BITCAST);
413413
}
414414

415+
// Set DAG combine for 'LASX' feature.
416+
417+
if (Subtarget.hasExtLASX())
418+
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
419+
415420
// Compute derived properties from the register classes.
416421
computeRegisterProperties(Subtarget.getRegisterInfo());
417422

@@ -2612,26 +2617,87 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
26122617
SDValue Vec = Op->getOperand(0);
26132618
EVT VecTy = Vec->getValueType(0);
26142619
SDValue Idx = Op->getOperand(1);
2615-
unsigned NumElts = VecTy.getVectorNumElements();
26162620
SDLoc DL(Op);
2621+
MVT GRLenVT = Subtarget.getGRLenVT();
26172622

26182623
assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
26192624

2620-
if (isa<ConstantSDNode>(Idx) && Idx->getAsZExtVal() < NumElts)
2625+
if (isa<ConstantSDNode>(Idx))
26212626
return Op;
26222627

2623-
// TODO: Deal with other legal 256-bits vector types?
2624-
if (!isa<ConstantSDNode>(Idx) &&
2625-
(VecTy == MVT::v8i32 || VecTy == MVT::v8f32)) {
2628+
switch (VecTy.getSimpleVT().SimpleTy) {
2629+
default:
2630+
llvm_unreachable("Unexpected type");
2631+
case MVT::v32i8:
2632+
case MVT::v16i16: {
2633+
// Consider the source vector as v8i32 type.
2634+
SDValue NewVec = DAG.getBitcast(MVT::v8i32, Vec);
2635+
2636+
// Compute the adjusted index and use it to broadcast the vector.
2637+
// The original desired i8/i16 element is now replicated in each
2638+
// i32 lane of the splatted vector.
2639+
SDValue NewIdx = DAG.getNode(
2640+
LoongArchISD::BSTRPICK, DL, GRLenVT, Idx,
2641+
DAG.getConstant(31, DL, GRLenVT),
2642+
DAG.getConstant(((VecTy == MVT::v32i8) ? 2 : 1), DL, GRLenVT));
2643+
SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, NewIdx);
2644+
SDValue SplatValue =
2645+
DAG.getNode(LoongArchISD::XVPERM, DL, MVT::v8i32, NewVec, SplatIdx);
2646+
SDValue SplatVec = DAG.getBitcast(VecTy, SplatValue);
2647+
2648+
// Compute the local index of the original i8/i16 element within the
2649+
// i32 element and then use it to broadcast the vector. Each elements
2650+
// of the vector will be the desired element.
2651+
SDValue LocalIdx = DAG.getNode(
2652+
ISD::AND, DL, GRLenVT, Idx,
2653+
DAG.getConstant(((VecTy == MVT::v32i8) ? 3 : 1), DL, GRLenVT));
2654+
SDValue ExtractVec =
2655+
DAG.getNode(LoongArchISD::VREPLVE, DL, VecTy, SplatVec, LocalIdx);
2656+
2657+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ExtractVec,
2658+
DAG.getConstant(0, DL, GRLenVT));
2659+
}
2660+
case MVT::v8i32:
2661+
case MVT::v8f32: {
26262662
SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
26272663
SDValue SplatValue =
26282664
DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
26292665

26302666
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
2631-
DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
2667+
DAG.getConstant(0, DL, GRLenVT));
26322668
}
2669+
case MVT::v4i64:
2670+
case MVT::v4f64: {
2671+
// Consider the source vector as v8i32 type.
2672+
SDValue NewVec = DAG.getBitcast(MVT::v8i32, Vec);
26332673

2634-
return SDValue();
2674+
// Split the original element index into low and high parts:
2675+
// Lo = Idx * 2, Hi = Idx * 2 + 1.
2676+
SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
2677+
SDValue SplatIdxLo = DAG.getNode(LoongArchISD::VSLLI, DL, MVT::v8i32,
2678+
SplatIdx, DAG.getConstant(1, DL, GRLenVT));
2679+
SDValue SplatIdxHi =
2680+
DAG.getNode(ISD::ADD, DL, MVT::v8i32, SplatIdxLo,
2681+
DAG.getSplatBuildVector(MVT::v8i32, DL,
2682+
DAG.getConstant(1, DL, GRLenVT)));
2683+
2684+
// Use the broadcasted index to broadcast the low and high parts of the
2685+
// vector separately.
2686+
SDValue SplatVecLo =
2687+
DAG.getNode(LoongArchISD::XVPERM, DL, MVT::v8i32, NewVec, SplatIdxLo);
2688+
SDValue SplatVecHi =
2689+
DAG.getNode(LoongArchISD::XVPERM, DL, MVT::v8i32, NewVec, SplatIdxHi);
2690+
2691+
// Combine the low and high i32 parts to reconstruct the original i64/f64
2692+
// element.
2693+
SDValue SplatValue = DAG.getNode(LoongArchISD::VILVL, DL, MVT::v8i32,
2694+
SplatVecHi, SplatVecLo);
2695+
SDValue ExtractVec = DAG.getBitcast(VecTy, SplatValue);
2696+
2697+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ExtractVec,
2698+
DAG.getConstant(0, DL, GRLenVT));
2699+
}
2700+
}
26352701
}
26362702

26372703
SDValue
@@ -5846,6 +5912,42 @@ performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG,
58465912
return SDValue();
58475913
}
58485914

5915+
static SDValue
5916+
performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
5917+
TargetLowering::DAGCombinerInfo &DCI,
5918+
const LoongArchSubtarget &Subtarget) {
5919+
if (!DCI.isBeforeLegalize())
5920+
return SDValue();
5921+
5922+
MVT EltVT = N->getSimpleValueType(0);
5923+
SDValue Vec = N->getOperand(0);
5924+
EVT VecTy = Vec->getValueType(0);
5925+
SDValue Idx = N->getOperand(1);
5926+
unsigned IdxOp = Idx.getOpcode();
5927+
SDLoc DL(N);
5928+
5929+
if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
5930+
return SDValue();
5931+
5932+
// Combine:
5933+
// t2 = truncate t1
5934+
// t3 = {zero/sign/any}_extend t2
5935+
// t4 = extract_vector_elt t0, t3
5936+
// to:
5937+
// t4 = extract_vector_elt t0, t1
5938+
if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
5939+
IdxOp == ISD::ANY_EXTEND) {
5940+
SDValue IdxOrig = Idx.getOperand(0);
5941+
if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
5942+
return SDValue();
5943+
5944+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
5945+
IdxOrig.getOperand(0));
5946+
}
5947+
5948+
return SDValue();
5949+
}
5950+
58495951
SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
58505952
DAGCombinerInfo &DCI) const {
58515953
SelectionDAG &DAG = DCI.DAG;
@@ -5875,6 +5977,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
58755977
return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
58765978
case LoongArchISD::SPLIT_PAIR_F64:
58775979
return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
5980+
case ISD::EXTRACT_VECTOR_ELT:
5981+
return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
58785982
}
58795983
return SDValue();
58805984
}

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
def SDT_LoongArchXVPERM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
14-
SDTCisVec<2>, SDTCisInt<2>]>;
14+
SDTCisVec<2>, SDTCisInt<2>]>;
1515

1616
// Target nodes.
1717
def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>;

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll

Lines changed: 28 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -76,21 +76,13 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind {
7676
define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
7777
; CHECK-LABEL: extract_32xi8_idx:
7878
; CHECK: # %bb.0:
79-
; CHECK-NEXT: addi.d $sp, $sp, -96
80-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
81-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
82-
; CHECK-NEXT: addi.d $fp, $sp, 96
83-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
8479
; CHECK-NEXT: xvld $xr0, $a0, 0
85-
; CHECK-NEXT: xvst $xr0, $sp, 32
86-
; CHECK-NEXT: addi.d $a0, $sp, 32
87-
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 0
88-
; CHECK-NEXT: ld.b $a0, $a0, 0
89-
; CHECK-NEXT: st.b $a0, $a1, 0
90-
; CHECK-NEXT: addi.d $sp, $fp, -96
91-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
92-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
93-
; CHECK-NEXT: addi.d $sp, $sp, 96
80+
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 2
81+
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a0
82+
; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
83+
; CHECK-NEXT: andi $a0, $a2, 3
84+
; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a0
85+
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
9486
; CHECK-NEXT: ret
9587
%v = load volatile <32 x i8>, ptr %src
9688
%e = extractelement <32 x i8> %v, i32 %idx
@@ -101,21 +93,13 @@ define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
10193
define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
10294
; CHECK-LABEL: extract_16xi16_idx:
10395
; CHECK: # %bb.0:
104-
; CHECK-NEXT: addi.d $sp, $sp, -96
105-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
106-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
107-
; CHECK-NEXT: addi.d $fp, $sp, 96
108-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
10996
; CHECK-NEXT: xvld $xr0, $a0, 0
110-
; CHECK-NEXT: xvst $xr0, $sp, 32
111-
; CHECK-NEXT: addi.d $a0, $sp, 32
112-
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 1
113-
; CHECK-NEXT: ld.h $a0, $a0, 0
114-
; CHECK-NEXT: st.h $a0, $a1, 0
115-
; CHECK-NEXT: addi.d $sp, $fp, -96
116-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
117-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
118-
; CHECK-NEXT: addi.d $sp, $sp, 96
97+
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 1
98+
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a0
99+
; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
100+
; CHECK-NEXT: andi $a0, $a2, 1
101+
; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a0
102+
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
119103
; CHECK-NEXT: ret
120104
%v = load volatile <16 x i16>, ptr %src
121105
%e = extractelement <16 x i16> %v, i32 %idx
@@ -127,8 +111,7 @@ define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
127111
; CHECK-LABEL: extract_8xi32_idx:
128112
; CHECK: # %bb.0:
129113
; CHECK-NEXT: xvld $xr0, $a0, 0
130-
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
131-
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a0
114+
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
132115
; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
133116
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
134117
; CHECK-NEXT: ret
@@ -141,21 +124,14 @@ define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
141124
define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
142125
; CHECK-LABEL: extract_4xi64_idx:
143126
; CHECK: # %bb.0:
144-
; CHECK-NEXT: addi.d $sp, $sp, -96
145-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
146-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
147-
; CHECK-NEXT: addi.d $fp, $sp, 96
148-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
149127
; CHECK-NEXT: xvld $xr0, $a0, 0
150-
; CHECK-NEXT: xvst $xr0, $sp, 32
151-
; CHECK-NEXT: addi.d $a0, $sp, 32
152-
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3
153-
; CHECK-NEXT: ld.d $a0, $a0, 0
154-
; CHECK-NEXT: st.d $a0, $a1, 0
155-
; CHECK-NEXT: addi.d $sp, $fp, -96
156-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
157-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
158-
; CHECK-NEXT: addi.d $sp, $sp, 96
128+
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
129+
; CHECK-NEXT: xvslli.w $xr1, $xr1, 1
130+
; CHECK-NEXT: xvperm.w $xr2, $xr0, $xr1
131+
; CHECK-NEXT: xvaddi.wu $xr1, $xr1, 1
132+
; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
133+
; CHECK-NEXT: xvilvl.w $xr0, $xr0, $xr2
134+
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
159135
; CHECK-NEXT: ret
160136
%v = load volatile <4 x i64>, ptr %src
161137
%e = extractelement <4 x i64> %v, i32 %idx
@@ -167,8 +143,7 @@ define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
167143
; CHECK-LABEL: extract_8xfloat_idx:
168144
; CHECK: # %bb.0:
169145
; CHECK-NEXT: xvld $xr0, $a0, 0
170-
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
171-
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a0
146+
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
172147
; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
173148
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
174149
; CHECK-NEXT: ret
@@ -181,21 +156,14 @@ define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
181156
define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
182157
; CHECK-LABEL: extract_4xdouble_idx:
183158
; CHECK: # %bb.0:
184-
; CHECK-NEXT: addi.d $sp, $sp, -96
185-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
186-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
187-
; CHECK-NEXT: addi.d $fp, $sp, 96
188-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
189159
; CHECK-NEXT: xvld $xr0, $a0, 0
190-
; CHECK-NEXT: xvst $xr0, $sp, 32
191-
; CHECK-NEXT: addi.d $a0, $sp, 32
192-
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3
193-
; CHECK-NEXT: fld.d $fa0, $a0, 0
194-
; CHECK-NEXT: fst.d $fa0, $a1, 0
195-
; CHECK-NEXT: addi.d $sp, $fp, -96
196-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
197-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
198-
; CHECK-NEXT: addi.d $sp, $sp, 96
160+
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
161+
; CHECK-NEXT: xvslli.w $xr1, $xr1, 1
162+
; CHECK-NEXT: xvperm.w $xr2, $xr0, $xr1
163+
; CHECK-NEXT: xvaddi.wu $xr1, $xr1, 1
164+
; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
165+
; CHECK-NEXT: xvilvl.w $xr0, $xr0, $xr2
166+
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
199167
; CHECK-NEXT: ret
200168
%v = load volatile <4 x double>, ptr %src
201169
%e = extractelement <4 x double> %v, i32 %idx

0 commit comments

Comments
 (0)