Skip to content

Commit e1a23dd

Browse files
committed
[LoongArch] Generate [x]vldi instructions with special constant splats
1 parent 368110e commit e1a23dd

File tree

15 files changed

+289
-369
lines changed

15 files changed

+289
-369
lines changed

llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -113,44 +113,58 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
113113
APInt SplatValue, SplatUndef;
114114
unsigned SplatBitSize;
115115
bool HasAnyUndefs;
116-
unsigned Op;
116+
unsigned Op = 0;
117117
EVT ResTy = BVN->getValueType(0);
118118
bool Is128Vec = BVN->getValueType(0).is128BitVector();
119119
bool Is256Vec = BVN->getValueType(0).is256BitVector();
120+
SDNode *Res;
120121

121122
if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec))
122123
break;
123124
if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
124125
HasAnyUndefs, 8))
125126
break;
126127

127-
switch (SplatBitSize) {
128-
default:
129-
break;
130-
case 8:
131-
Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
132-
break;
133-
case 16:
134-
Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
135-
break;
136-
case 32:
137-
Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
138-
break;
139-
case 64:
140-
Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
141-
break;
142-
}
143-
144-
SDNode *Res;
145128
// If we have a signed 10 bit integer, we can splat it directly.
146129
if (SplatValue.isSignedIntN(10)) {
130+
switch (SplatBitSize) {
131+
default:
132+
break;
133+
case 8:
134+
Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
135+
break;
136+
case 16:
137+
Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
138+
break;
139+
case 32:
140+
Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
141+
break;
142+
case 64:
143+
Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
144+
break;
145+
}
146+
147147
EVT EleType = ResTy.getVectorElementType();
148148
APInt Val = SplatValue.sextOrTrunc(EleType.getSizeInBits());
149149
SDValue Imm = CurDAG->getTargetConstant(Val, DL, EleType);
150150
Res = CurDAG->getMachineNode(Op, DL, ResTy, Imm);
151151
ReplaceNode(Node, Res);
152152
return;
153153
}
154+
155+
// Select appropriate [x]vldi instructions for some special constant splats,
156+
// where the immediate value `imm[12] == 1` for used [x]vldi instructions.
157+
std::pair<bool, uint64_t> ConvertVLDI =
158+
LoongArchTargetLowering::isImmVLDILegalForMode1(SplatValue,
159+
SplatBitSize);
160+
if (ConvertVLDI.first) {
161+
Op = Is256Vec ? LoongArch::XVLDI : LoongArch::VLDI;
162+
SDValue Imm = CurDAG->getSignedTargetConstant(
163+
SignExtend32<13>(ConvertVLDI.second), DL, MVT::i32);
164+
Res = CurDAG->getMachineNode(Op, DL, ResTy, Imm);
165+
ReplaceNode(Node, Res);
166+
return;
167+
}
154168
break;
155169
}
156170
}

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 85 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2679,9 +2679,10 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
26792679

26802680
if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
26812681
// We can only handle 64-bit elements that are within
2682-
// the signed 10-bit range on 32-bit targets.
2682+
// the signed 10-bit range or match vldi patterns on 32-bit targets.
26832683
// See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
2684-
if (!SplatValue.isSignedIntN(10))
2684+
if (!SplatValue.isSignedIntN(10) &&
2685+
!isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
26852686
return SDValue();
26862687
if ((Is128Vec && ResTy == MVT::v4i32) ||
26872688
(Is256Vec && ResTy == MVT::v8i32))
@@ -8194,6 +8195,88 @@ SDValue LoongArchTargetLowering::LowerReturn(
81948195
return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
81958196
}
81968197

8198+
// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
8199+
// Note: The following prefixes are excluded:
8200+
// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
8201+
// as thy can be represented using [x]vrepli.[whb]
8202+
std::pair<bool, uint64_t>
8203+
LoongArchTargetLowering::isImmVLDILegalForMode1(const APInt &SplatValue,
8204+
const unsigned SplatBitSize) {
8205+
uint64_t RequiredImm = 0;
8206+
uint64_t V = SplatValue.getZExtValue();
8207+
if (SplatBitSize == 16 && !(V & 0x00FF)) {
8208+
// 4'b0101
8209+
RequiredImm = (0b10101 << 8) | (V >> 8);
8210+
return {true, RequiredImm};
8211+
} else if (SplatBitSize == 32) {
8212+
// 4'b0001
8213+
if (!(V & 0xFFFF00FF)) {
8214+
RequiredImm = (0b10001 << 8) | (V >> 8);
8215+
return {true, RequiredImm};
8216+
}
8217+
// 4'b0010
8218+
if (!(V & 0xFF00FFFF)) {
8219+
RequiredImm = (0b10010 << 8) | (V >> 16);
8220+
return {true, RequiredImm};
8221+
}
8222+
// 4'b0011
8223+
if (!(V & 0x00FFFFFF)) {
8224+
RequiredImm = (0b10011 << 8) | (V >> 24);
8225+
return {true, RequiredImm};
8226+
}
8227+
// 4'b0110
8228+
if ((V & 0xFFFF00FF) == 0xFF) {
8229+
RequiredImm = (0b10110 << 8) | (V >> 8);
8230+
return {true, RequiredImm};
8231+
}
8232+
// 4'b0111
8233+
if ((V & 0xFF00FFFF) == 0xFFFF) {
8234+
RequiredImm = (0b10111 << 8) | (V >> 16);
8235+
return {true, RequiredImm};
8236+
}
8237+
// 4'b1010
8238+
if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
8239+
RequiredImm =
8240+
(0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8241+
return {true, RequiredImm};
8242+
}
8243+
} else if (SplatBitSize == 64) {
8244+
// 4'b1011
8245+
if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
8246+
(V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
8247+
RequiredImm =
8248+
(0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8249+
return {true, RequiredImm};
8250+
}
8251+
// 4'b1100
8252+
if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
8253+
(V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
8254+
RequiredImm =
8255+
(0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
8256+
return {true, RequiredImm};
8257+
}
8258+
// 4'b1001
8259+
auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
8260+
uint8_t res = 0;
8261+
for (int i = 0; i < 8; ++i) {
8262+
uint8_t byte = x & 0xFF;
8263+
if (byte == 0 || byte == 0xFF)
8264+
res |= ((byte & 1) << i);
8265+
else
8266+
return {false, 0};
8267+
x >>= 8;
8268+
}
8269+
return {true, res};
8270+
};
8271+
auto [IsSame, Suffix] = sameBitsPreByte(V);
8272+
if (IsSame) {
8273+
RequiredImm = (0b11001 << 8) | Suffix;
8274+
return {true, RequiredImm};
8275+
}
8276+
}
8277+
return {false, RequiredImm};
8278+
}
8279+
81978280
bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm,
81988281
EVT VT) const {
81998282
if (!Subtarget.hasExtLSX())

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,11 @@ class LoongArchTargetLowering : public TargetLowering {
332332
TargetLoweringOpt &TLO,
333333
unsigned Depth) const override;
334334

335+
/// Check if a constant splat can be generated using [x]vldi, where imm[12]
336+
/// is 1.
337+
static std::pair<bool, uint64_t>
338+
isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize);
339+
335340
private:
336341
/// Target-specific function used to lower LoongArch calling conventions.
337342
typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,

llvm/test/CodeGen/LoongArch/lasx/build-vector.ll

Lines changed: 23 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -201,8 +201,7 @@ entry:
201201
define void @buildvector_v8f32_const_splat(ptr %dst) nounwind {
202202
; CHECK-LABEL: buildvector_v8f32_const_splat:
203203
; CHECK: # %bb.0: # %entry
204-
; CHECK-NEXT: lu12i.w $a1, 260096
205-
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
204+
; CHECK-NEXT: xvldi $xr0, -1424
206205
; CHECK-NEXT: xvst $xr0, $a0, 0
207206
; CHECK-NEXT: ret
208207
entry:
@@ -212,19 +211,11 @@ entry:
212211

213212
;; Also check buildvector_const_splat_xvldi_1100.
214213
define void @buildvector_v4f64_const_splat(ptr %dst) nounwind {
215-
; LA32-LABEL: buildvector_v4f64_const_splat:
216-
; LA32: # %bb.0: # %entry
217-
; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0)
218-
; LA32-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI14_0)
219-
; LA32-NEXT: xvst $xr0, $a0, 0
220-
; LA32-NEXT: ret
221-
;
222-
; LA64-LABEL: buildvector_v4f64_const_splat:
223-
; LA64: # %bb.0: # %entry
224-
; LA64-NEXT: lu52i.d $a1, $zero, 1023
225-
; LA64-NEXT: xvreplgr2vr.d $xr0, $a1
226-
; LA64-NEXT: xvst $xr0, $a0, 0
227-
; LA64-NEXT: ret
214+
; CHECK-LABEL: buildvector_v4f64_const_splat:
215+
; CHECK: # %bb.0: # %entry
216+
; CHECK-NEXT: xvldi $xr0, -912
217+
; CHECK-NEXT: xvst $xr0, $a0, 0
218+
; CHECK-NEXT: ret
228219
entry:
229220
store <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, ptr %dst
230221
ret void
@@ -234,8 +225,7 @@ entry:
234225
define void @buildvector_const_splat_xvldi_0001(ptr %dst) nounwind {
235226
; CHECK-LABEL: buildvector_const_splat_xvldi_0001:
236227
; CHECK: # %bb.0: # %entry
237-
; CHECK-NEXT: ori $a1, $zero, 768
238-
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
228+
; CHECK-NEXT: xvldi $xr0, -3837
239229
; CHECK-NEXT: xvst $xr0, $a0, 0
240230
; CHECK-NEXT: ret
241231
entry:
@@ -246,8 +236,7 @@ entry:
246236
define void @buildvector_const_splat_xvldi_0010(ptr %dst) nounwind {
247237
; CHECK-LABEL: buildvector_const_splat_xvldi_0010:
248238
; CHECK: # %bb.0: # %entry
249-
; CHECK-NEXT: lu12i.w $a1, 16
250-
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
239+
; CHECK-NEXT: xvldi $xr0, -3583
251240
; CHECK-NEXT: xvst $xr0, $a0, 0
252241
; CHECK-NEXT: ret
253242
entry:
@@ -258,8 +247,7 @@ entry:
258247
define void @buildvector_const_splat_xvldi_0011(ptr %dst) nounwind {
259248
; CHECK-LABEL: buildvector_const_splat_xvldi_0011:
260249
; CHECK: # %bb.0: # %entry
261-
; CHECK-NEXT: lu12i.w $a1, 4096
262-
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
250+
; CHECK-NEXT: xvldi $xr0, -3327
263251
; CHECK-NEXT: xvst $xr0, $a0, 0
264252
; CHECK-NEXT: ret
265253
entry:
@@ -270,8 +258,7 @@ entry:
270258
define void @buildvector_const_splat_xvldi_0101(ptr %dst) {
271259
; CHECK-LABEL: buildvector_const_splat_xvldi_0101:
272260
; CHECK: # %bb.0: # %entry
273-
; CHECK-NEXT: ori $a1, $zero, 768
274-
; CHECK-NEXT: xvreplgr2vr.h $xr0, $a1
261+
; CHECK-NEXT: xvldi $xr0, -2813
275262
; CHECK-NEXT: xvst $xr0, $a0, 0
276263
; CHECK-NEXT: ret
277264
entry:
@@ -282,8 +269,7 @@ entry:
282269
define void @buildvector_const_splat_xvldi_0110(ptr %dst) nounwind {
283270
; CHECK-LABEL: buildvector_const_splat_xvldi_0110:
284271
; CHECK: # %bb.0: # %entry
285-
; CHECK-NEXT: ori $a1, $zero, 1023
286-
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
272+
; CHECK-NEXT: xvldi $xr0, -2557
287273
; CHECK-NEXT: xvst $xr0, $a0, 0
288274
; CHECK-NEXT: ret
289275
entry:
@@ -294,9 +280,7 @@ entry:
294280
define void @buildvector_const_splat_xvldi_0111(ptr %dst) nounwind {
295281
; CHECK-LABEL: buildvector_const_splat_xvldi_0111:
296282
; CHECK: # %bb.0: # %entry
297-
; CHECK-NEXT: lu12i.w $a1, 15
298-
; CHECK-NEXT: ori $a1, $a1, 4095
299-
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
283+
; CHECK-NEXT: xvldi $xr0, -2305
300284
; CHECK-NEXT: xvst $xr0, $a0, 0
301285
; CHECK-NEXT: ret
302286
entry:
@@ -305,39 +289,22 @@ entry:
305289
}
306290

307291
define void @buildvector_const_splat_xvldi_1001(ptr %dst) nounwind {
308-
; LA32-LABEL: buildvector_const_splat_xvldi_1001:
309-
; LA32: # %bb.0: # %entry
310-
; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI21_0)
311-
; LA32-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI21_0)
312-
; LA32-NEXT: xvst $xr0, $a0, 0
313-
; LA32-NEXT: ret
314-
;
315-
; LA64-LABEL: buildvector_const_splat_xvldi_1001:
316-
; LA64: # %bb.0: # %entry
317-
; LA64-NEXT: lu12i.w $a1, 15
318-
; LA64-NEXT: ori $a1, $a1, 4095
319-
; LA64-NEXT: xvreplgr2vr.d $xr0, $a1
320-
; LA64-NEXT: xvst $xr0, $a0, 0
321-
; LA64-NEXT: ret
292+
; CHECK-LABEL: buildvector_const_splat_xvldi_1001:
293+
; CHECK: # %bb.0: # %entry
294+
; CHECK-NEXT: xvldi $xr0, -1789
295+
; CHECK-NEXT: xvst $xr0, $a0, 0
296+
; CHECK-NEXT: ret
322297
entry:
323298
store <8 x i32> <i32 65535, i32 0, i32 65535, i32 0, i32 65535, i32 0, i32 65535, i32 0>, ptr %dst
324299
ret void
325300
}
326301

327302
define void @buildvector_const_splat_xvldi_1011(ptr %dst) nounwind {
328-
; LA32-LABEL: buildvector_const_splat_xvldi_1011:
329-
; LA32: # %bb.0: # %entry
330-
; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI22_0)
331-
; LA32-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI22_0)
332-
; LA32-NEXT: xvst $xr0, $a0, 0
333-
; LA32-NEXT: ret
334-
;
335-
; LA64-LABEL: buildvector_const_splat_xvldi_1011:
336-
; LA64: # %bb.0: # %entry
337-
; LA64-NEXT: lu12i.w $a1, 262144
338-
; LA64-NEXT: xvreplgr2vr.d $xr0, $a1
339-
; LA64-NEXT: xvst $xr0, $a0, 0
340-
; LA64-NEXT: ret
303+
; CHECK-LABEL: buildvector_const_splat_xvldi_1011:
304+
; CHECK: # %bb.0: # %entry
305+
; CHECK-NEXT: xvldi $xr0, -1280
306+
; CHECK-NEXT: xvst $xr0, $a0, 0
307+
; CHECK-NEXT: ret
341308
entry:
342309
store <8 x float> <float 2.0, float 0.0, float 2.0, float 0.0, float 2.0, float 0.0, float 2.0, float 0.0>, ptr %dst
343310
ret void
@@ -2458,8 +2425,7 @@ define void @buildvector_v8f32_with_constant(ptr %dst, float %a1, float %a2, flo
24582425
; CHECK-NEXT: # kill: def $f2 killed $f2 def $xr2
24592426
; CHECK-NEXT: # kill: def $f1 killed $f1 def $xr1
24602427
; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
2461-
; CHECK-NEXT: lu12i.w $a1, 262144
2462-
; CHECK-NEXT: xvreplgr2vr.w $xr4, $a1
2428+
; CHECK-NEXT: xvldi $xr4, -3264
24632429
; CHECK-NEXT: xvinsve0.w $xr4, $xr0, 1
24642430
; CHECK-NEXT: xvinsve0.w $xr4, $xr1, 2
24652431
; CHECK-NEXT: xvinsve0.w $xr4, $xr2, 5

0 commit comments

Comments
 (0)