Skip to content

Commit bc44ff2

Browse files
authored
[LoongArch] Make {sadd,ssub,uadd,usub}.sat legal for lsx/lasx (#158209)
1 parent c8b5b6e commit bc44ff2

File tree

11 files changed

+72
-240
lines changed

11 files changed

+72
-240
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
311311
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
312312
setOperationAction(ISD::ABDS, VT, Legal);
313313
setOperationAction(ISD::ABDU, VT, Legal);
314+
setOperationAction(ISD::SADDSAT, VT, Legal);
315+
setOperationAction(ISD::SSUBSAT, VT, Legal);
316+
setOperationAction(ISD::UADDSAT, VT, Legal);
317+
setOperationAction(ISD::USUBSAT, VT, Legal);
314318
}
315319
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
316320
setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -386,6 +390,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
386390
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
387391
setOperationAction(ISD::ABDS, VT, Legal);
388392
setOperationAction(ISD::ABDU, VT, Legal);
393+
setOperationAction(ISD::SADDSAT, VT, Legal);
394+
setOperationAction(ISD::SSUBSAT, VT, Legal);
395+
setOperationAction(ISD::UADDSAT, VT, Legal);
396+
setOperationAction(ISD::USUBSAT, VT, Legal);
389397
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
390398
}
391399
for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1998,6 +1998,12 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)),
19981998
defm : PatXrXr<abds, "XVABSD">;
19991999
defm : PatXrXrU<abdu, "XVABSD">;
20002000

2001+
// XVSADD_{B/H/W/D}[U], XVSSUB_{B/H/W/D}[U]
2002+
defm : PatXrXr<saddsat, "XVSADD">;
2003+
defm : PatXrXr<ssubsat, "XVSSUB">;
2004+
defm : PatXrXrU<uaddsat, "XVSADD">;
2005+
defm : PatXrXrU<usubsat, "XVSSUB">;
2006+
20012007
// Vector mask set by condition
20022008
def : Pat<(loongarch_xvmskltz (v32i8 LASX256:$vj)), (PseudoXVMSKLTZ_B LASX256:$vj)>;
20032009
def : Pat<(loongarch_xvmskltz (v16i16 LASX256:$vj)), (PseudoXVMSKLTZ_H LASX256:$vj)>;

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2155,6 +2155,12 @@ def : Pat<(f64 f64imm_vldi:$in),
21552155
defm : PatVrVr<abds, "VABSD">;
21562156
defm : PatVrVrU<abdu, "VABSD">;
21572157

2158+
// VSADD_{B/H/W/D}[U], VSSUB_{B/H/W/D}[U]
2159+
defm : PatVrVr<saddsat, "VSADD">;
2160+
defm : PatVrVr<ssubsat, "VSSUB">;
2161+
defm : PatVrVrU<uaddsat, "VSADD">;
2162+
defm : PatVrVrU<usubsat, "VSSUB">;
2163+
21582164
// Vector mask set by condition
21592165
def : Pat<(loongarch_vmskltz (v16i8 LSX128:$vj)), (PseudoVMSKLTZ_B LSX128:$vj)>;
21602166
def : Pat<(loongarch_vmskltz (v8i16 LSX128:$vj)), (PseudoVMSKLTZ_H LSX128:$vj)>;

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sadd-sat.ll

Lines changed: 9 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
3-
; RUN: llc -mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
2+
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
3+
; RUN: llc -mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s
44

55
define <32 x i8> @xvsadd_b(<32 x i8> %a, <32 x i8> %b) {
66
; CHECK-LABEL: xvsadd_b:
77
; CHECK: # %bb.0:
8-
; CHECK-NEXT: xvadd.b $xr2, $xr0, $xr1
9-
; CHECK-NEXT: xvslt.b $xr0, $xr2, $xr0
10-
; CHECK-NEXT: xvslti.b $xr1, $xr1, 0
11-
; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0
12-
; CHECK-NEXT: xvsrai.b $xr1, $xr2, 7
13-
; CHECK-NEXT: xvbitrevi.b $xr1, $xr1, 7
14-
; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0
8+
; CHECK-NEXT: xvsadd.b $xr0, $xr0, $xr1
159
; CHECK-NEXT: ret
1610
%ret = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1711
ret <32 x i8> %ret
@@ -20,13 +14,7 @@ define <32 x i8> @xvsadd_b(<32 x i8> %a, <32 x i8> %b) {
2014
define <16 x i16> @xvsadd_h(<16 x i16> %a, <16 x i16> %b) {
2115
; CHECK-LABEL: xvsadd_h:
2216
; CHECK: # %bb.0:
23-
; CHECK-NEXT: xvadd.h $xr2, $xr0, $xr1
24-
; CHECK-NEXT: xvslt.h $xr0, $xr2, $xr0
25-
; CHECK-NEXT: xvslti.h $xr1, $xr1, 0
26-
; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0
27-
; CHECK-NEXT: xvsrai.h $xr1, $xr2, 15
28-
; CHECK-NEXT: xvbitrevi.h $xr1, $xr1, 15
29-
; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0
17+
; CHECK-NEXT: xvsadd.h $xr0, $xr0, $xr1
3018
; CHECK-NEXT: ret
3119
%ret = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
3220
ret <16 x i16> %ret
@@ -35,42 +23,17 @@ define <16 x i16> @xvsadd_h(<16 x i16> %a, <16 x i16> %b) {
3523
define <8 x i32> @xvsadd_w(<8 x i32> %a, <8 x i32> %b) {
3624
; CHECK-LABEL: xvsadd_w:
3725
; CHECK: # %bb.0:
38-
; CHECK-NEXT: xvadd.w $xr2, $xr0, $xr1
39-
; CHECK-NEXT: xvslt.w $xr0, $xr2, $xr0
40-
; CHECK-NEXT: xvslti.w $xr1, $xr1, 0
41-
; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0
42-
; CHECK-NEXT: xvsrai.w $xr1, $xr2, 31
43-
; CHECK-NEXT: xvbitrevi.w $xr1, $xr1, 31
44-
; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0
26+
; CHECK-NEXT: xvsadd.w $xr0, $xr0, $xr1
4527
; CHECK-NEXT: ret
4628
%ret = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %a, <8 x i32> %b)
4729
ret <8 x i32> %ret
4830
}
4931

5032
define <4 x i64> @xvsadd_d(<4 x i64> %a, <4 x i64> %b) {
51-
; LA32-LABEL: xvsadd_d:
52-
; LA32: # %bb.0:
53-
; LA32-NEXT: xvadd.d $xr2, $xr0, $xr1
54-
; LA32-NEXT: xvslt.d $xr0, $xr2, $xr0
55-
; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
56-
; LA32-NEXT: xvld $xr3, $a0, %pc_lo12(.LCPI3_0)
57-
; LA32-NEXT: xvslti.d $xr1, $xr1, 0
58-
; LA32-NEXT: xvxor.v $xr0, $xr1, $xr0
59-
; LA32-NEXT: xvsrai.d $xr1, $xr2, 63
60-
; LA32-NEXT: xvxor.v $xr1, $xr1, $xr3
61-
; LA32-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0
62-
; LA32-NEXT: ret
63-
;
64-
; LA64-LABEL: xvsadd_d:
65-
; LA64: # %bb.0:
66-
; LA64-NEXT: xvadd.d $xr2, $xr0, $xr1
67-
; LA64-NEXT: xvslt.d $xr0, $xr2, $xr0
68-
; LA64-NEXT: xvslti.d $xr1, $xr1, 0
69-
; LA64-NEXT: xvxor.v $xr0, $xr1, $xr0
70-
; LA64-NEXT: xvsrai.d $xr1, $xr2, 63
71-
; LA64-NEXT: xvbitrevi.d $xr1, $xr1, 63
72-
; LA64-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0
73-
; LA64-NEXT: ret
33+
; CHECK-LABEL: xvsadd_d:
34+
; CHECK: # %bb.0:
35+
; CHECK-NEXT: xvsadd.d $xr0, $xr0, $xr1
36+
; CHECK-NEXT: ret
7437
%ret = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %a, <4 x i64> %b)
7538
ret <4 x i64> %ret
7639
}

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ssub-sat.ll

Lines changed: 9 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
3-
; RUN: llc -mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
2+
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
3+
; RUN: llc -mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s
44

55
define <32 x i8> @xvssub_b(<32 x i8> %a, <32 x i8> %b) {
66
; CHECK-LABEL: xvssub_b:
77
; CHECK: # %bb.0:
8-
; CHECK-NEXT: xvrepli.b $xr2, 0
9-
; CHECK-NEXT: xvslt.b $xr2, $xr2, $xr1
10-
; CHECK-NEXT: xvsub.b $xr1, $xr0, $xr1
11-
; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0
12-
; CHECK-NEXT: xvxor.v $xr0, $xr2, $xr0
13-
; CHECK-NEXT: xvsrai.b $xr2, $xr1, 7
14-
; CHECK-NEXT: xvbitrevi.b $xr2, $xr2, 7
15-
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
8+
; CHECK-NEXT: xvssub.b $xr0, $xr0, $xr1
169
; CHECK-NEXT: ret
1710
%ret = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1811
ret <32 x i8> %ret
@@ -21,14 +14,7 @@ define <32 x i8> @xvssub_b(<32 x i8> %a, <32 x i8> %b) {
2114
define <16 x i16> @xvssub_h(<16 x i16> %a, <16 x i16> %b) {
2215
; CHECK-LABEL: xvssub_h:
2316
; CHECK: # %bb.0:
24-
; CHECK-NEXT: xvrepli.b $xr2, 0
25-
; CHECK-NEXT: xvslt.h $xr2, $xr2, $xr1
26-
; CHECK-NEXT: xvsub.h $xr1, $xr0, $xr1
27-
; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0
28-
; CHECK-NEXT: xvxor.v $xr0, $xr2, $xr0
29-
; CHECK-NEXT: xvsrai.h $xr2, $xr1, 15
30-
; CHECK-NEXT: xvbitrevi.h $xr2, $xr2, 15
31-
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
17+
; CHECK-NEXT: xvssub.h $xr0, $xr0, $xr1
3218
; CHECK-NEXT: ret
3319
%ret = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
3420
ret <16 x i16> %ret
@@ -37,45 +23,17 @@ define <16 x i16> @xvssub_h(<16 x i16> %a, <16 x i16> %b) {
3723
define <8 x i32> @xvssub_w(<8 x i32> %a, <8 x i32> %b) {
3824
; CHECK-LABEL: xvssub_w:
3925
; CHECK: # %bb.0:
40-
; CHECK-NEXT: xvrepli.b $xr2, 0
41-
; CHECK-NEXT: xvslt.w $xr2, $xr2, $xr1
42-
; CHECK-NEXT: xvsub.w $xr1, $xr0, $xr1
43-
; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0
44-
; CHECK-NEXT: xvxor.v $xr0, $xr2, $xr0
45-
; CHECK-NEXT: xvsrai.w $xr2, $xr1, 31
46-
; CHECK-NEXT: xvbitrevi.w $xr2, $xr2, 31
47-
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
26+
; CHECK-NEXT: xvssub.w $xr0, $xr0, $xr1
4827
; CHECK-NEXT: ret
4928
%ret = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %a, <8 x i32> %b)
5029
ret <8 x i32> %ret
5130
}
5231

5332
define <4 x i64> @xvssub_d(<4 x i64> %a, <4 x i64> %b) {
54-
; LA32-LABEL: xvssub_d:
55-
; LA32: # %bb.0:
56-
; LA32-NEXT: xvrepli.b $xr2, 0
57-
; LA32-NEXT: xvslt.d $xr2, $xr2, $xr1
58-
; LA32-NEXT: xvsub.d $xr1, $xr0, $xr1
59-
; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
60-
; LA32-NEXT: xvld $xr3, $a0, %pc_lo12(.LCPI3_0)
61-
; LA32-NEXT: xvslt.d $xr0, $xr1, $xr0
62-
; LA32-NEXT: xvxor.v $xr0, $xr2, $xr0
63-
; LA32-NEXT: xvsrai.d $xr2, $xr1, 63
64-
; LA32-NEXT: xvxor.v $xr2, $xr2, $xr3
65-
; LA32-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
66-
; LA32-NEXT: ret
67-
;
68-
; LA64-LABEL: xvssub_d:
69-
; LA64: # %bb.0:
70-
; LA64-NEXT: xvrepli.b $xr2, 0
71-
; LA64-NEXT: xvslt.d $xr2, $xr2, $xr1
72-
; LA64-NEXT: xvsub.d $xr1, $xr0, $xr1
73-
; LA64-NEXT: xvslt.d $xr0, $xr1, $xr0
74-
; LA64-NEXT: xvxor.v $xr0, $xr2, $xr0
75-
; LA64-NEXT: xvsrai.d $xr2, $xr1, 63
76-
; LA64-NEXT: xvbitrevi.d $xr2, $xr2, 63
77-
; LA64-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
78-
; LA64-NEXT: ret
33+
; CHECK-LABEL: xvssub_d:
34+
; CHECK: # %bb.0:
35+
; CHECK-NEXT: xvssub.d $xr0, $xr0, $xr1
36+
; CHECK-NEXT: ret
7937
%ret = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %a, <4 x i64> %b)
8038
ret <4 x i64> %ret
8139
}

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uadd-sat.ll

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
define <32 x i8> @xvuadd_b(<32 x i8> %a, <32 x i8> %b) {
66
; CHECK-LABEL: xvuadd_b:
77
; CHECK: # %bb.0:
8-
; CHECK-NEXT: xvxori.b $xr2, $xr1, 255
9-
; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr2
10-
; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
8+
; CHECK-NEXT: xvsadd.bu $xr0, $xr0, $xr1
119
; CHECK-NEXT: ret
1210
%ret = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1311
ret <32 x i8> %ret
@@ -16,10 +14,7 @@ define <32 x i8> @xvuadd_b(<32 x i8> %a, <32 x i8> %b) {
1614
define <16 x i16> @xvuadd_h(<16 x i16> %a, <16 x i16> %b) {
1715
; CHECK-LABEL: xvuadd_h:
1816
; CHECK: # %bb.0:
19-
; CHECK-NEXT: xvrepli.b $xr2, -1
20-
; CHECK-NEXT: xvxor.v $xr2, $xr1, $xr2
21-
; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr2
22-
; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
17+
; CHECK-NEXT: xvsadd.hu $xr0, $xr0, $xr1
2318
; CHECK-NEXT: ret
2419
%ret = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
2520
ret <16 x i16> %ret
@@ -28,10 +23,7 @@ define <16 x i16> @xvuadd_h(<16 x i16> %a, <16 x i16> %b) {
2823
define <8 x i32> @xvuadd_w(<8 x i32> %a, <8 x i32> %b) {
2924
; CHECK-LABEL: xvuadd_w:
3025
; CHECK: # %bb.0:
31-
; CHECK-NEXT: xvrepli.b $xr2, -1
32-
; CHECK-NEXT: xvxor.v $xr2, $xr1, $xr2
33-
; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr2
34-
; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
26+
; CHECK-NEXT: xvsadd.wu $xr0, $xr0, $xr1
3527
; CHECK-NEXT: ret
3628
%ret = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %a, <8 x i32> %b)
3729
ret <8 x i32> %ret
@@ -40,10 +32,7 @@ define <8 x i32> @xvuadd_w(<8 x i32> %a, <8 x i32> %b) {
4032
define <4 x i64> @xvuadd_d(<4 x i64> %a, <4 x i64> %b) {
4133
; CHECK-LABEL: xvuadd_d:
4234
; CHECK: # %bb.0:
43-
; CHECK-NEXT: xvrepli.b $xr2, -1
44-
; CHECK-NEXT: xvxor.v $xr2, $xr1, $xr2
45-
; CHECK-NEXT: xvmin.du $xr0, $xr0, $xr2
46-
; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
35+
; CHECK-NEXT: xvsadd.du $xr0, $xr0, $xr1
4736
; CHECK-NEXT: ret
4837
%ret = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %a, <4 x i64> %b)
4938
ret <4 x i64> %ret

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/usub-sat.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
define <32 x i8> @xvusub_b(<32 x i8> %a, <32 x i8> %b) {
66
; CHECK-LABEL: xvusub_b:
77
; CHECK: # %bb.0:
8-
; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1
9-
; CHECK-NEXT: xvsub.b $xr0, $xr0, $xr1
8+
; CHECK-NEXT: xvssub.bu $xr0, $xr0, $xr1
109
; CHECK-NEXT: ret
1110
%ret = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1211
ret <32 x i8> %ret
@@ -15,8 +14,7 @@ define <32 x i8> @xvusub_b(<32 x i8> %a, <32 x i8> %b) {
1514
define <16 x i16> @xvusub_h(<16 x i16> %a, <16 x i16> %b) {
1615
; CHECK-LABEL: xvusub_h:
1716
; CHECK: # %bb.0:
18-
; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1
19-
; CHECK-NEXT: xvsub.h $xr0, $xr0, $xr1
17+
; CHECK-NEXT: xvssub.hu $xr0, $xr0, $xr1
2018
; CHECK-NEXT: ret
2119
%ret = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
2220
ret <16 x i16> %ret
@@ -25,8 +23,7 @@ define <16 x i16> @xvusub_h(<16 x i16> %a, <16 x i16> %b) {
2523
define <8 x i32> @xvusub_w(<8 x i32> %a, <8 x i32> %b) {
2624
; CHECK-LABEL: xvusub_w:
2725
; CHECK: # %bb.0:
28-
; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1
29-
; CHECK-NEXT: xvsub.w $xr0, $xr0, $xr1
26+
; CHECK-NEXT: xvssub.wu $xr0, $xr0, $xr1
3027
; CHECK-NEXT: ret
3128
%ret = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %a, <8 x i32> %b)
3229
ret <8 x i32> %ret
@@ -35,8 +32,7 @@ define <8 x i32> @xvusub_w(<8 x i32> %a, <8 x i32> %b) {
3532
define <4 x i64> @xvusub_d(<4 x i64> %a, <4 x i64> %b) {
3633
; CHECK-LABEL: xvusub_d:
3734
; CHECK: # %bb.0:
38-
; CHECK-NEXT: xvmax.du $xr0, $xr0, $xr1
39-
; CHECK-NEXT: xvsub.d $xr0, $xr0, $xr1
35+
; CHECK-NEXT: xvssub.du $xr0, $xr0, $xr1
4036
; CHECK-NEXT: ret
4137
%ret = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %a, <4 x i64> %b)
4238
ret <4 x i64> %ret

llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sadd-sat.ll

Lines changed: 9 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
3-
; RUN: llc -mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
2+
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
3+
; RUN: llc -mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s
44

55
define <16 x i8> @vsadd_b(<16 x i8> %a, <16 x i8> %b) {
66
; CHECK-LABEL: vsadd_b:
77
; CHECK: # %bb.0:
8-
; CHECK-NEXT: vadd.b $vr2, $vr0, $vr1
9-
; CHECK-NEXT: vslt.b $vr0, $vr2, $vr0
10-
; CHECK-NEXT: vslti.b $vr1, $vr1, 0
11-
; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
12-
; CHECK-NEXT: vsrai.b $vr1, $vr2, 7
13-
; CHECK-NEXT: vbitrevi.b $vr1, $vr1, 7
14-
; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0
8+
; CHECK-NEXT: vsadd.b $vr0, $vr0, $vr1
159
; CHECK-NEXT: ret
1610
%ret = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
1711
ret <16 x i8> %ret
@@ -20,13 +14,7 @@ define <16 x i8> @vsadd_b(<16 x i8> %a, <16 x i8> %b) {
2014
define <8 x i16> @vsadd_h(<8 x i16> %a, <8 x i16> %b) {
2115
; CHECK-LABEL: vsadd_h:
2216
; CHECK: # %bb.0:
23-
; CHECK-NEXT: vadd.h $vr2, $vr0, $vr1
24-
; CHECK-NEXT: vslt.h $vr0, $vr2, $vr0
25-
; CHECK-NEXT: vslti.h $vr1, $vr1, 0
26-
; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
27-
; CHECK-NEXT: vsrai.h $vr1, $vr2, 15
28-
; CHECK-NEXT: vbitrevi.h $vr1, $vr1, 15
29-
; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0
17+
; CHECK-NEXT: vsadd.h $vr0, $vr0, $vr1
3018
; CHECK-NEXT: ret
3119
%ret = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
3220
ret <8 x i16> %ret
@@ -35,42 +23,17 @@ define <8 x i16> @vsadd_h(<8 x i16> %a, <8 x i16> %b) {
3523
define <4 x i32> @vsadd_w(<4 x i32> %a, <4 x i32> %b) {
3624
; CHECK-LABEL: vsadd_w:
3725
; CHECK: # %bb.0:
38-
; CHECK-NEXT: vadd.w $vr2, $vr0, $vr1
39-
; CHECK-NEXT: vslt.w $vr0, $vr2, $vr0
40-
; CHECK-NEXT: vslti.w $vr1, $vr1, 0
41-
; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
42-
; CHECK-NEXT: vsrai.w $vr1, $vr2, 31
43-
; CHECK-NEXT: vbitrevi.w $vr1, $vr1, 31
44-
; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0
26+
; CHECK-NEXT: vsadd.w $vr0, $vr0, $vr1
4527
; CHECK-NEXT: ret
4628
%ret = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
4729
ret <4 x i32> %ret
4830
}
4931

5032
define <2 x i64> @vsadd_d(<2 x i64> %a, <2 x i64> %b) {
51-
; LA32-LABEL: vsadd_d:
52-
; LA32: # %bb.0:
53-
; LA32-NEXT: vadd.d $vr2, $vr0, $vr1
54-
; LA32-NEXT: vslt.d $vr0, $vr2, $vr0
55-
; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
56-
; LA32-NEXT: vld $vr3, $a0, %pc_lo12(.LCPI3_0)
57-
; LA32-NEXT: vslti.d $vr1, $vr1, 0
58-
; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
59-
; LA32-NEXT: vsrai.d $vr1, $vr2, 63
60-
; LA32-NEXT: vxor.v $vr1, $vr1, $vr3
61-
; LA32-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0
62-
; LA32-NEXT: ret
63-
;
64-
; LA64-LABEL: vsadd_d:
65-
; LA64: # %bb.0:
66-
; LA64-NEXT: vadd.d $vr2, $vr0, $vr1
67-
; LA64-NEXT: vslt.d $vr0, $vr2, $vr0
68-
; LA64-NEXT: vslti.d $vr1, $vr1, 0
69-
; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
70-
; LA64-NEXT: vsrai.d $vr1, $vr2, 63
71-
; LA64-NEXT: vbitrevi.d $vr1, $vr1, 63
72-
; LA64-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0
73-
; LA64-NEXT: ret
33+
; CHECK-LABEL: vsadd_d:
34+
; CHECK: # %bb.0:
35+
; CHECK-NEXT: vsadd.d $vr0, $vr0, $vr1
36+
; CHECK-NEXT: ret
7437
%ret = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
7538
ret <2 x i64> %ret
7639
}

0 commit comments

Comments
 (0)