Skip to content

Commit e9d71ef

Browse files
authored
[AArch64] Mark [usp]mull, [us]addl, [us]abdl as commutative (#152158)
Fixes #61461.
1 parent 6ebb890 commit e9d71ef

File tree

7 files changed

+49
-66
lines changed

7 files changed

+49
-66
lines changed

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -296,22 +296,22 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
296296
def int_aarch64_neon_sqrdmlah : AdvSIMD_3IntArg_Intrinsic;
297297
def int_aarch64_neon_sqrdmlsh : AdvSIMD_3IntArg_Intrinsic;
298298

299-
// Vector Polynominal Multiply
300-
def int_aarch64_neon_pmul : AdvSIMD_2VectorArg_Intrinsic;
301-
302-
// Vector Long Multiply
303-
def int_aarch64_neon_smull : AdvSIMD_2VectorArg_Long_Intrinsic;
304-
def int_aarch64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic;
305-
def int_aarch64_neon_pmull : AdvSIMD_2VectorArg_Long_Intrinsic;
306-
307-
// 64-bit polynomial multiply really returns an i128, which is not legal. Fake
308-
// it with a v16i8.
309-
def int_aarch64_neon_pmull64 :
310-
DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
311-
312-
// Vector Extending Multiply
313-
def int_aarch64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic {
314-
let IntrProperties = [IntrNoMem, Commutative];
299+
let IntrProperties = [IntrNoMem, Commutative] in {
300+
// Vector Polynominal Multiply
301+
def int_aarch64_neon_pmul : AdvSIMD_2VectorArg_Intrinsic;
302+
303+
// Vector Long Multiply
304+
def int_aarch64_neon_smull : AdvSIMD_2VectorArg_Long_Intrinsic;
305+
def int_aarch64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic;
306+
def int_aarch64_neon_pmull : AdvSIMD_2VectorArg_Long_Intrinsic;
307+
308+
// 64-bit polynomial multiply really returns an i128, which is not legal.
309+
// Fake it with a v16i8.
310+
def int_aarch64_neon_pmull64
311+
: DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_i64_ty]>;
312+
313+
// Vector Extending Multiply
314+
def int_aarch64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic;
315315
}
316316

317317
// Vector Saturating Doubling Long Multiply

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7362,7 +7362,9 @@ multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm,
73627362
[(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
73637363
def v16i8 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
73647364
V128, V128, V128,
7365-
asm#"2", ".8h", ".16b", ".16b", []>;
7365+
asm#"2", ".8h", ".16b", ".16b",
7366+
[(set (v8i16 V128:$Rd), (OpNode (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn))),
7367+
(v8i8 (extract_high_v16i8 (v16i8 V128:$Rm)))))]>;
73667368
let Predicates = [HasAES] in {
73677369
def v1i64 : BaseSIMDDifferentThreeVector<U, 0b110, opc,
73687370
V128, V64, V64,
@@ -7374,10 +7376,6 @@ multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm,
73747376
[(set (v16i8 V128:$Rd), (OpNode (extract_high_v2i64 (v2i64 V128:$Rn)),
73757377
(extract_high_v2i64 (v2i64 V128:$Rm))))]>;
73767378
}
7377-
7378-
def : Pat<(v8i16 (OpNode (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn))),
7379-
(v8i8 (extract_high_v16i8 (v16i8 V128:$Rm))))),
7380-
(!cast<Instruction>(NAME#"v16i8") V128:$Rn, V128:$Rm)>;
73817379
}
73827380

73837381
multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm,
@@ -7402,6 +7400,7 @@ multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm,
74027400
(extract_high_v4i32 (v4i32 V128:$Rm))))]>;
74037401
}
74047402

7403+
let isCommutable = 1 in
74057404
multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
74067405
SDPatternOperator OpNode = null_frag> {
74077406
def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
@@ -7483,6 +7482,7 @@ multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
74837482
(extract_high_v4i32 (v4i32 V128:$Rm)))))))]>;
74847483
}
74857484

7485+
let isCommutable = 1 in
74867486
multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
74877487
SDPatternOperator OpNode = null_frag> {
74887488
def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc,

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6055,6 +6055,7 @@ defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>;
60556055
defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
60566056

60576057
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
6058+
let isCommutable = 1 in
60586059
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
60596060
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
60606061
TriOpFrag<(add node:$LHS, (abds node:$MHS, node:$RHS))> >;
@@ -6806,6 +6807,7 @@ defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>
68066807
defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
68076808
defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
68086809
defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
6810+
let isCommutable = 1 in
68096811
defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>;
68106812
defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", abds>;
68116813
defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", abds>;
@@ -6822,6 +6824,7 @@ defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", saddsat>;
68226824
defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", ssubsat>;
68236825
defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
68246826
int_aarch64_neon_sqdmull>;
6827+
let isCommutable = 0 in
68256828
defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
68266829
BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
68276830
defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
@@ -6836,6 +6839,7 @@ defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
68366839
defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
68376840
TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
68386841
defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>;
6842+
let isCommutable = 0 in
68396843
defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
68406844
BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
68416845
defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",

llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -71,18 +71,11 @@ entry:
7171
}
7272

7373
define void @test_commutable_vaddl_s8(<8 x i8> %a, <8 x i8> %b, ptr %c) {
74-
; CHECK-SD-LABEL: test_commutable_vaddl_s8:
75-
; CHECK-SD: // %bb.0: // %entry
76-
; CHECK-SD-NEXT: saddl v0.8h, v0.8b, v1.8b
77-
; CHECK-SD-NEXT: stp q0, q0, [x0]
78-
; CHECK-SD-NEXT: ret
79-
;
80-
; CHECK-GI-LABEL: test_commutable_vaddl_s8:
81-
; CHECK-GI: // %bb.0: // %entry
82-
; CHECK-GI-NEXT: saddl v2.8h, v0.8b, v1.8b
83-
; CHECK-GI-NEXT: saddl v0.8h, v1.8b, v0.8b
84-
; CHECK-GI-NEXT: stp q2, q0, [x0]
85-
; CHECK-GI-NEXT: ret
74+
; CHECK-LABEL: test_commutable_vaddl_s8:
75+
; CHECK: // %bb.0: // %entry
76+
; CHECK-NEXT: saddl v0.8h, v0.8b, v1.8b
77+
; CHECK-NEXT: stp q0, q0, [x0]
78+
; CHECK-NEXT: ret
8679
entry:
8780
%vmovl.i.i = sext <8 x i8> %a to <8 x i16>
8881
%vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
@@ -131,18 +124,11 @@ entry:
131124
}
132125

133126
define void @test_commutable_vaddl_u8(<8 x i8> %a, <8 x i8> %b, ptr %c) {
134-
; CHECK-SD-LABEL: test_commutable_vaddl_u8:
135-
; CHECK-SD: // %bb.0: // %entry
136-
; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b
137-
; CHECK-SD-NEXT: stp q0, q0, [x0]
138-
; CHECK-SD-NEXT: ret
139-
;
140-
; CHECK-GI-LABEL: test_commutable_vaddl_u8:
141-
; CHECK-GI: // %bb.0: // %entry
142-
; CHECK-GI-NEXT: uaddl v2.8h, v0.8b, v1.8b
143-
; CHECK-GI-NEXT: uaddl v0.8h, v1.8b, v0.8b
144-
; CHECK-GI-NEXT: stp q2, q0, [x0]
145-
; CHECK-GI-NEXT: ret
127+
; CHECK-LABEL: test_commutable_vaddl_u8:
128+
; CHECK: // %bb.0: // %entry
129+
; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b
130+
; CHECK-NEXT: stp q0, q0, [x0]
131+
; CHECK-NEXT: ret
146132
entry:
147133
%vmovl.i.i = zext <8 x i8> %a to <8 x i16>
148134
%vmovl.i2.i = zext <8 x i8> %b to <8 x i16>

llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1611,9 +1611,8 @@ define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
16111611
define <16 x i8> @commutable_poly_mul(<16 x i8> %lhs, <16 x i8> %rhs) {
16121612
; CHECK-LABEL: commutable_poly_mul:
16131613
; CHECK: // %bb.0:
1614-
; CHECK-NEXT: pmul v2.16b, v0.16b, v1.16b
1615-
; CHECK-NEXT: pmul v0.16b, v1.16b, v0.16b
1616-
; CHECK-NEXT: add v0.16b, v2.16b, v0.16b
1614+
; CHECK-NEXT: pmul v0.16b, v0.16b, v1.16b
1615+
; CHECK-NEXT: add v0.16b, v0.16b, v0.16b
16171616
; CHECK-NEXT: ret
16181617
%1 = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
16191618
%2 = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %rhs, <16 x i8> %lhs)

llvm/test/CodeGen/AArch64/arm64-vabs.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,8 @@ define void @commutable_sabdl(ptr %A, ptr %B, ptr %C) nounwind {
5757
; CHECK-GI: // %bb.0:
5858
; CHECK-GI-NEXT: ldr d0, [x0]
5959
; CHECK-GI-NEXT: ldr d1, [x1]
60-
; CHECK-GI-NEXT: sabdl.8h v2, v0, v1
61-
; CHECK-GI-NEXT: sabdl.8h v0, v1, v0
62-
; CHECK-GI-NEXT: str q2, [x2]
60+
; CHECK-GI-NEXT: sabdl.8h v0, v0, v1
61+
; CHECK-GI-NEXT: str q0, [x2]
6362
; CHECK-GI-NEXT: str q0, [x2]
6463
; CHECK-GI-NEXT: ret
6564
%tmp1 = load <8 x i8>, ptr %A
@@ -198,9 +197,8 @@ define void @commutable_uabdl(ptr %A, ptr %B, ptr %C) nounwind {
198197
; CHECK-GI: // %bb.0:
199198
; CHECK-GI-NEXT: ldr d0, [x0]
200199
; CHECK-GI-NEXT: ldr d1, [x1]
201-
; CHECK-GI-NEXT: uabdl.8h v2, v0, v1
202-
; CHECK-GI-NEXT: uabdl.8h v0, v1, v0
203-
; CHECK-GI-NEXT: str q2, [x2]
200+
; CHECK-GI-NEXT: uabdl.8h v0, v0, v1
201+
; CHECK-GI-NEXT: str q0, [x2]
204202
; CHECK-GI-NEXT: str q0, [x2]
205203
; CHECK-GI-NEXT: ret
206204
%tmp1 = load <8 x i8>, ptr %A

llvm/test/CodeGen/AArch64/arm64-vmul.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,8 @@ define <2 x i64> @smull2d(ptr %A, ptr %B) nounwind {
8282
define void @commutable_smull(<2 x i32> %A, <2 x i32> %B, ptr %C) {
8383
; CHECK-LABEL: commutable_smull:
8484
; CHECK: // %bb.0:
85-
; CHECK-NEXT: smull v2.2d, v0.2s, v1.2s
86-
; CHECK-NEXT: smull v0.2d, v1.2s, v0.2s
87-
; CHECK-NEXT: stp q2, q0, [x0]
85+
; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
86+
; CHECK-NEXT: stp q0, q0, [x0]
8887
; CHECK-NEXT: ret
8988
%1 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %B)
9089
%2 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %B, <2 x i32> %A)
@@ -140,9 +139,8 @@ define <2 x i64> @umull2d(ptr %A, ptr %B) nounwind {
140139
define void @commutable_umull(<2 x i32> %A, <2 x i32> %B, ptr %C) {
141140
; CHECK-LABEL: commutable_umull:
142141
; CHECK: // %bb.0:
143-
; CHECK-NEXT: umull v2.2d, v0.2s, v1.2s
144-
; CHECK-NEXT: umull v0.2d, v1.2s, v0.2s
145-
; CHECK-NEXT: stp q2, q0, [x0]
142+
; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
143+
; CHECK-NEXT: stp q0, q0, [x0]
146144
; CHECK-NEXT: ret
147145
%1 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %B)
148146
%2 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %B, <2 x i32> %A)
@@ -246,9 +244,8 @@ define <8 x i16> @pmull8h(ptr %A, ptr %B) nounwind {
246244
define void @commutable_pmull8h(<8 x i8> %A, <8 x i8> %B, ptr %C) {
247245
; CHECK-LABEL: commutable_pmull8h:
248246
; CHECK: // %bb.0:
249-
; CHECK-NEXT: pmull v2.8h, v0.8b, v1.8b
250-
; CHECK-NEXT: pmull v0.8h, v1.8b, v0.8b
251-
; CHECK-NEXT: stp q2, q0, [x0]
247+
; CHECK-NEXT: pmull v0.8h, v0.8b, v1.8b
248+
; CHECK-NEXT: stp q0, q0, [x0]
252249
; CHECK-NEXT: ret
253250
%1 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %A, <8 x i8> %B)
254251
%2 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %B, <8 x i8> %A)
@@ -3273,9 +3270,8 @@ define <16 x i8> @test_commutable_pmull_64(i64 %l, i64 %r) nounwind {
32733270
; CHECK: // %bb.0:
32743271
; CHECK-NEXT: fmov d0, x1
32753272
; CHECK-NEXT: fmov d1, x0
3276-
; CHECK-NEXT: pmull v2.1q, v1.1d, v0.1d
3277-
; CHECK-NEXT: pmull v0.1q, v0.1d, v1.1d
3278-
; CHECK-NEXT: add v0.16b, v2.16b, v0.16b
3273+
; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d
3274+
; CHECK-NEXT: add v0.16b, v0.16b, v0.16b
32793275
; CHECK-NEXT: ret
32803276
%1 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r)
32813277
%2 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %r, i64 %l)

0 commit comments

Comments
 (0)