Skip to content

Commit e0b1b66

Browse files
committed
better choice
1 parent 94af475 commit e0b1b66

File tree

2 files changed

+35
-76
lines changed

2 files changed

+35
-76
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 20 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -2799,28 +2799,26 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
27992799
default:
28002800
llvm_unreachable("Unexpected type");
28012801
case MVT::v32i8:
2802-
case MVT::v16i16: {
2803-
// Consider the source vector as v8i32 type.
2804-
SDValue NewVec = DAG.getBitcast(MVT::v8i32, Vec);
2805-
2806-
// Compute the adjusted index and use it to broadcast the vector.
2807-
// The original desired i8/i16 element is now replicated in each
2808-
// i32 lane of the splatted vector.
2809-
SDValue NewIdx = DAG.getNode(
2810-
ISD::SRA, DL, GRLenVT, Idx,
2811-
DAG.getConstant(((VecTy == MVT::v32i8) ? 2 : 1), DL, GRLenVT));
2812-
SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, NewIdx);
2813-
SDValue SplatValue =
2814-
DAG.getNode(LoongArchISD::XVPERM, DL, MVT::v8i32, NewVec, SplatIdx);
2815-
SDValue SplatVec = DAG.getBitcast(VecTy, SplatValue);
2816-
2817-
// The original i8/i16 elements in each i32 lane all share the same i32
2818-
// intra-element offset, use the original Idx to broadcast the vector.
2819-
// Each elements of the vector will be the desired element.
2820-
SDValue ExtractVec =
2821-
DAG.getNode(LoongArchISD::VREPLVE, DL, VecTy, SplatVec, Idx);
2822-
2823-
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ExtractVec,
2802+
case MVT::v16i16:
2803+
case MVT::v4i64:
2804+
case MVT::v4f64: {
2805+
// Extract the high half subvector and place it to the low half of a new
2806+
// vector. It doesn't matter what the high half of the new vector is.
2807+
EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
2808+
SDValue VecHi =
2809+
DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
2810+
SDValue TmpVec =
2811+
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
2812+
VecHi, DAG.getConstant(0, DL, GRLenVT));
2813+
2814+
// Shuffle the origin Vec and the TmpVec. Each element of the low half of
2815+
// the ResVec will be the desired element.
2816+
SDValue SplatIdx = DAG.getSplatBuildVector(
2817+
(VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, DL, Idx);
2818+
SDValue ResVec =
2819+
DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, SplatIdx, TmpVec, Vec);
2820+
2821+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
28242822
DAG.getConstant(0, DL, GRLenVT));
28252823
}
28262824
case MVT::v8i32:
@@ -2832,37 +2830,6 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
28322830
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
28332831
DAG.getConstant(0, DL, GRLenVT));
28342832
}
2835-
case MVT::v4i64:
2836-
case MVT::v4f64: {
2837-
// Consider the source vector as v8i32 type.
2838-
SDValue NewVec = DAG.getBitcast(MVT::v8i32, Vec);
2839-
2840-
// Split the original element index into low and high parts:
2841-
// Lo = Idx * 2, Hi = Idx * 2 + 1.
2842-
SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
2843-
SDValue SplatIdxLo = DAG.getNode(LoongArchISD::VSLLI, DL, MVT::v8i32,
2844-
SplatIdx, DAG.getConstant(1, DL, GRLenVT));
2845-
SDValue SplatIdxHi =
2846-
DAG.getNode(ISD::ADD, DL, MVT::v8i32, SplatIdxLo,
2847-
DAG.getSplatBuildVector(MVT::v8i32, DL,
2848-
DAG.getConstant(1, DL, GRLenVT)));
2849-
2850-
// Use the broadcasted index to broadcast the low and high parts of the
2851-
// vector separately.
2852-
SDValue SplatVecLo =
2853-
DAG.getNode(LoongArchISD::XVPERM, DL, MVT::v8i32, NewVec, SplatIdxLo);
2854-
SDValue SplatVecHi =
2855-
DAG.getNode(LoongArchISD::XVPERM, DL, MVT::v8i32, NewVec, SplatIdxHi);
2856-
2857-
// Combine the low and high i32 parts to reconstruct the original i64/f64
2858-
// element.
2859-
SDValue SplatValue = DAG.getNode(LoongArchISD::VILVL, DL, MVT::v8i32,
2860-
SplatVecHi, SplatVecLo);
2861-
SDValue ExtractVec = DAG.getBitcast(VecTy, SplatValue);
2862-
2863-
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ExtractVec,
2864-
DAG.getConstant(0, DL, GRLenVT));
2865-
}
28662833
}
28672834
}
28682835

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,9 @@ define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
7777
; CHECK-LABEL: extract_32xi8_idx:
7878
; CHECK: # %bb.0:
7979
; CHECK-NEXT: xvld $xr0, $a0, 0
80-
; CHECK-NEXT: srai.d $a0, $a2, 2
81-
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a0
82-
; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
83-
; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a2
80+
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
81+
; CHECK-NEXT: xvreplgr2vr.b $xr2, $a2
82+
; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
8483
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
8584
; CHECK-NEXT: ret
8685
%v = load volatile <32 x i8>, ptr %src
@@ -93,11 +92,10 @@ define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
9392
; CHECK-LABEL: extract_16xi16_idx:
9493
; CHECK: # %bb.0:
9594
; CHECK-NEXT: xvld $xr0, $a0, 0
96-
; CHECK-NEXT: srai.d $a0, $a2, 1
97-
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a0
98-
; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
99-
; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a2
100-
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
95+
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
96+
; CHECK-NEXT: xvreplgr2vr.h $xr2, $a2
97+
; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0
98+
; CHECK-NEXT: xvstelm.h $xr2, $a1, 0, 0
10199
; CHECK-NEXT: ret
102100
%v = load volatile <16 x i16>, ptr %src
103101
%e = extractelement <16 x i16> %v, i32 %idx
@@ -123,13 +121,10 @@ define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
123121
; CHECK-LABEL: extract_4xi64_idx:
124122
; CHECK: # %bb.0:
125123
; CHECK-NEXT: xvld $xr0, $a0, 0
126-
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
127-
; CHECK-NEXT: xvslli.w $xr1, $xr1, 1
128-
; CHECK-NEXT: xvperm.w $xr2, $xr0, $xr1
129-
; CHECK-NEXT: xvaddi.wu $xr1, $xr1, 1
130-
; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
131-
; CHECK-NEXT: xvilvl.w $xr0, $xr0, $xr2
132-
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
124+
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
125+
; CHECK-NEXT: xvreplgr2vr.d $xr2, $a2
126+
; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
127+
; CHECK-NEXT: xvstelm.d $xr2, $a1, 0, 0
133128
; CHECK-NEXT: ret
134129
%v = load volatile <4 x i64>, ptr %src
135130
%e = extractelement <4 x i64> %v, i32 %idx
@@ -155,13 +150,10 @@ define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
155150
; CHECK-LABEL: extract_4xdouble_idx:
156151
; CHECK: # %bb.0:
157152
; CHECK-NEXT: xvld $xr0, $a0, 0
158-
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
159-
; CHECK-NEXT: xvslli.w $xr1, $xr1, 1
160-
; CHECK-NEXT: xvperm.w $xr2, $xr0, $xr1
161-
; CHECK-NEXT: xvaddi.wu $xr1, $xr1, 1
162-
; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
163-
; CHECK-NEXT: xvilvl.w $xr0, $xr0, $xr2
164-
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
153+
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
154+
; CHECK-NEXT: xvreplgr2vr.d $xr2, $a2
155+
; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
156+
; CHECK-NEXT: xvstelm.d $xr2, $a1, 0, 0
165157
; CHECK-NEXT: ret
166158
%v = load volatile <4 x double>, ptr %src
167159
%e = extractelement <4 x double> %v, i32 %idx

0 commit comments

Comments
 (0)