Skip to content

Commit 35db481

Browse files
committed
[AArch64][GlobalISel] Added G_PMULL node
GlobalISel now selects the pmull intrinsic given vector operands, without falling back to SDAG.
1 parent 0b1ef8c commit 35db481

File tree

9 files changed

+139
-96
lines changed

9 files changed

+139
-96
lines changed

llvm/lib/Target/AArch64/AArch64InstrGISel.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,12 @@ def G_SMULL : AArch64GenericInstruction {
197197
let hasSideEffects = 0;
198198
}
199199

200+
def G_PMULL : AArch64GenericInstruction {
201+
let OutOperandList = (outs type0:$dst);
202+
let InOperandList = (ins type1:$src1, type1:$src2);
203+
let hasSideEffects = 0;
204+
}
205+
200206
def G_UADDLP : AArch64GenericInstruction {
201207
let OutOperandList = (outs type0:$dst);
202208
let InOperandList = (ins type0:$src1);
@@ -273,6 +279,7 @@ def : GINodeEquiv<G_FCMGT, AArch64fcmgt>;
273279

274280
def : GINodeEquiv<G_BSP, AArch64bsp>;
275281

282+
def : GINodeEquiv<G_PMULL, AArch64pmull>;
276283
def : GINodeEquiv<G_UMULL, AArch64umull>;
277284
def : GINodeEquiv<G_SMULL, AArch64smull>;
278285

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1809,6 +1809,10 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
18091809
return LowerBinOp(TargetOpcode::G_FMAXNUM);
18101810
case Intrinsic::aarch64_neon_fminnm:
18111811
return LowerBinOp(TargetOpcode::G_FMINNUM);
1812+
case Intrinsic::aarch64_neon_pmull:
1813+
return LowerBinOp(AArch64::G_PMULL); // Will prob need to be 64bit specific, /shrug
1814+
case Intrinsic::aarch64_neon_pmull64:
1815+
return LowerBinOp(AArch64::G_PMULL);
18121816
case Intrinsic::aarch64_neon_smull:
18131817
return LowerBinOp(AArch64::G_SMULL);
18141818
case Intrinsic::aarch64_neon_umull:

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,8 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
482482
case Intrinsic::aarch64_neon_sqrdmulh:
483483
case Intrinsic::aarch64_neon_sqadd:
484484
case Intrinsic::aarch64_neon_sqsub:
485+
case Intrinsic::aarch64_neon_pmull:
486+
case Intrinsic::aarch64_neon_pmull64:
485487
case Intrinsic::aarch64_crypto_sha1h:
486488
case Intrinsic::aarch64_crypto_sha1c:
487489
case Intrinsic::aarch64_crypto_sha1p:

llvm/test/CodeGen/AArch64/128bit_load_store.ll

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,31 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
12
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon | FileCheck %s
23

34
define void @test_store_f128(ptr %ptr, fp128 %val) #0 {
4-
; CHECK-LABEL: test_store_f128
5-
; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}]
5+
; CHECK-LABEL: test_store_f128:
6+
; CHECK: // %bb.0: // %entry
7+
; CHECK-NEXT: str q0, [x0]
8+
; CHECK-NEXT: ret
69
entry:
710
store fp128 %val, ptr %ptr, align 16
811
ret void
912
}
1013

1114
define fp128 @test_load_f128(ptr readonly %ptr) #2 {
12-
; CHECK-LABEL: test_load_f128
13-
; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
15+
; CHECK-LABEL: test_load_f128:
16+
; CHECK: // %bb.0: // %entry
17+
; CHECK-NEXT: ldr q0, [x0]
18+
; CHECK-NEXT: ret
1419
entry:
1520
%0 = load fp128, ptr %ptr, align 16
1621
ret fp128 %0
1722
}
1823

1924
define void @test_vstrq_p128(ptr %ptr, i128 %val) #0 {
20-
; CHECK-LABEL: test_vstrq_p128
21-
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}]
25+
; CHECK-LABEL: test_vstrq_p128:
26+
; CHECK: // %bb.0: // %entry
27+
; CHECK-NEXT: stp x2, x3, [x0]
28+
; CHECK-NEXT: ret
2229

2330
entry:
2431
%0 = bitcast i128 %val to fp128
@@ -27,8 +34,11 @@ entry:
2734
}
2835

2936
define i128 @test_vldrq_p128(ptr readonly %ptr) #2 {
30-
; CHECK-LABEL: test_vldrq_p128
31-
; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}]
37+
; CHECK-LABEL: test_vldrq_p128:
38+
; CHECK: // %bb.0: // %entry
39+
; CHECK-NEXT: ldp x8, x1, [x0]
40+
; CHECK-NEXT: mov x0, x8
41+
; CHECK-NEXT: ret
3242

3343
entry:
3444
%0 = load fp128, ptr %ptr, align 16
@@ -37,9 +47,11 @@ entry:
3747
}
3848

3949
define void @test_ld_st_p128(ptr nocapture %ptr) #0 {
40-
; CHECK-LABEL: test_ld_st_p128
41-
; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
42-
; CHECK-NEXT: str {{q[0-9]+}}, [{{x[0-9]+}}, #16]
50+
; CHECK-LABEL: test_ld_st_p128:
51+
; CHECK: // %bb.0: // %entry
52+
; CHECK-NEXT: ldr q0, [x0]
53+
; CHECK-NEXT: str q0, [x0, #16]
54+
; CHECK-NEXT: ret
4355
entry:
4456
%0 = load fp128, ptr %ptr, align 16
4557
%add.ptr = getelementptr inbounds i128, ptr %ptr, i64 1

llvm/test/CodeGen/AArch64/2s-complement-asm.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
12
; RUN: llc -mtriple=arm64-apple-ios %s -filetype=obj -o - | llvm-objdump --macho --section __DATA,__data - | FileCheck %s
23

34
; CHECK: Contents of (__DATA,__data) section

llvm/test/CodeGen/AArch64/aarch64-smull.ll

Lines changed: 61 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,6 @@
33
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
44
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
55

6-
; CHECK-GI: warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmlsl_pmlsl2_v8i16_uzp1
8-
96
define <8 x i16> @smull_v8i8_v8i16(ptr %A, ptr %B) nounwind {
107
; CHECK-LABEL: smull_v8i8_v8i16:
118
; CHECK: // %bb.0:
@@ -1832,14 +1829,33 @@ entry:
18321829
}
18331830

18341831
define void @pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) {
1835-
; CHECK-LABEL: pmlsl2_v8i16_uzp1:
1836-
; CHECK: // %bb.0:
1837-
; CHECK-NEXT: ldr q2, [x1, #16]
1838-
; CHECK-NEXT: uzp1 v2.16b, v0.16b, v2.16b
1839-
; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b
1840-
; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
1841-
; CHECK-NEXT: str q0, [x0]
1842-
; CHECK-NEXT: ret
1832+
; CHECK-NEON-LABEL: pmlsl2_v8i16_uzp1:
1833+
; CHECK-NEON: // %bb.0:
1834+
; CHECK-NEON-NEXT: ldr q2, [x1, #16]
1835+
; CHECK-NEON-NEXT: uzp1 v2.16b, v0.16b, v2.16b
1836+
; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b
1837+
; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h
1838+
; CHECK-NEON-NEXT: str q0, [x0]
1839+
; CHECK-NEON-NEXT: ret
1840+
;
1841+
; CHECK-SVE-LABEL: pmlsl2_v8i16_uzp1:
1842+
; CHECK-SVE: // %bb.0:
1843+
; CHECK-SVE-NEXT: ldr q2, [x1, #16]
1844+
; CHECK-SVE-NEXT: uzp1 v2.16b, v0.16b, v2.16b
1845+
; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b
1846+
; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h
1847+
; CHECK-SVE-NEXT: str q0, [x0]
1848+
; CHECK-SVE-NEXT: ret
1849+
;
1850+
; CHECK-GI-LABEL: pmlsl2_v8i16_uzp1:
1851+
; CHECK-GI: // %bb.0:
1852+
; CHECK-GI-NEXT: ldr q2, [x1, #16]
1853+
; CHECK-GI-NEXT: mov d0, v0.d[1]
1854+
; CHECK-GI-NEXT: xtn v2.8b, v2.8h
1855+
; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b
1856+
; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
1857+
; CHECK-GI-NEXT: str q0, [x0]
1858+
; CHECK-GI-NEXT: ret
18431859
%5 = getelementptr inbounds i32, ptr %3, i64 4
18441860
%6 = load <8 x i16>, ptr %5, align 4
18451861
%7 = trunc <8 x i16> %6 to <8 x i8>
@@ -1991,16 +2007,40 @@ define void @umlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) {
19912007
}
19922008

19932009
define void @pmlsl_pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3, i32 %4) {
1994-
; CHECK-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
1995-
; CHECK: // %bb.0: // %entry
1996-
; CHECK-NEXT: ldp q2, q3, [x1]
1997-
; CHECK-NEXT: uzp1 v2.16b, v2.16b, v3.16b
1998-
; CHECK-NEXT: pmull v3.8h, v0.8b, v2.8b
1999-
; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b
2000-
; CHECK-NEXT: add v0.8h, v3.8h, v0.8h
2001-
; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
2002-
; CHECK-NEXT: str q0, [x0]
2003-
; CHECK-NEXT: ret
2010+
; CHECK-NEON-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
2011+
; CHECK-NEON: // %bb.0: // %entry
2012+
; CHECK-NEON-NEXT: ldp q2, q3, [x1]
2013+
; CHECK-NEON-NEXT: uzp1 v2.16b, v2.16b, v3.16b
2014+
; CHECK-NEON-NEXT: pmull v3.8h, v0.8b, v2.8b
2015+
; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b
2016+
; CHECK-NEON-NEXT: add v0.8h, v3.8h, v0.8h
2017+
; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h
2018+
; CHECK-NEON-NEXT: str q0, [x0]
2019+
; CHECK-NEON-NEXT: ret
2020+
;
2021+
; CHECK-SVE-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
2022+
; CHECK-SVE: // %bb.0: // %entry
2023+
; CHECK-SVE-NEXT: ldp q2, q3, [x1]
2024+
; CHECK-SVE-NEXT: uzp1 v2.16b, v2.16b, v3.16b
2025+
; CHECK-SVE-NEXT: pmull v3.8h, v0.8b, v2.8b
2026+
; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b
2027+
; CHECK-SVE-NEXT: add v0.8h, v3.8h, v0.8h
2028+
; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h
2029+
; CHECK-SVE-NEXT: str q0, [x0]
2030+
; CHECK-SVE-NEXT: ret
2031+
;
2032+
; CHECK-GI-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
2033+
; CHECK-GI: // %bb.0: // %entry
2034+
; CHECK-GI-NEXT: ldp q2, q3, [x1]
2035+
; CHECK-GI-NEXT: mov d4, v0.d[1]
2036+
; CHECK-GI-NEXT: xtn v2.8b, v2.8h
2037+
; CHECK-GI-NEXT: xtn v3.8b, v3.8h
2038+
; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b
2039+
; CHECK-GI-NEXT: pmull v2.8h, v4.8b, v3.8b
2040+
; CHECK-GI-NEXT: add v0.8h, v0.8h, v2.8h
2041+
; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
2042+
; CHECK-GI-NEXT: str q0, [x0]
2043+
; CHECK-GI-NEXT: ret
20042044
entry:
20052045
%5 = load <8 x i16>, ptr %3, align 4
20062046
%6 = trunc <8 x i16> %5 to <8 x i8>

llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,6 @@
22
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes | FileCheck %s --check-prefixes=CHECK,CHECK-SD
33
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

5-
; CHECK-GI: warning: Instruction selection used fallback path for test_vmull_p8
6-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p8
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_p64
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p64
9-
105
declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>)
116
declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5
127
declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)

llvm/test/CodeGen/AArch64/arm64-vmul.ll

Lines changed: 26 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2,45 +2,6 @@
22
; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
33
; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

5-
; CHECK-GI: warning: Instruction selection used fallback path for pmull8h
6-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for commutable_pmull8h
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_1s
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s
9-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s
10-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d
11-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s
12-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s
13-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d
14-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s
15-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s
16-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d
17-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict
18-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict
19-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict
20-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict
21-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict
22-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict
23-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s
24-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d
25-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d
26-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_low
27-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_high
28-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_low
29-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_high
30-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32
31-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32
32-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64
33-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32
34-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1
35-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32
36-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1
37-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64
38-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d
39-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d
40-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_64
41-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_64
42-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_commutable_pmull_64
43-
445
define <8 x i16> @smull8h(ptr %A, ptr %B) nounwind {
456
; CHECK-LABEL: smull8h:
467
; CHECK: // %bb.0:
@@ -2895,11 +2856,18 @@ define <8 x i16> @pmull_from_extract_dup_low(<16 x i8> %lhs, i8 %rhs) {
28952856
}
28962857

28972858
define <8 x i16> @pmull_from_extract_dup_high(<16 x i8> %lhs, i8 %rhs) {
2898-
; CHECK-LABEL: pmull_from_extract_dup_high:
2899-
; CHECK: // %bb.0:
2900-
; CHECK-NEXT: dup v1.16b, w0
2901-
; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b
2902-
; CHECK-NEXT: ret
2859+
; CHECK-SD-LABEL: pmull_from_extract_dup_high:
2860+
; CHECK-SD: // %bb.0:
2861+
; CHECK-SD-NEXT: dup v1.16b, w0
2862+
; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b
2863+
; CHECK-SD-NEXT: ret
2864+
;
2865+
; CHECK-GI-LABEL: pmull_from_extract_dup_high:
2866+
; CHECK-GI: // %bb.0:
2867+
; CHECK-GI-NEXT: dup v1.8b, w0
2868+
; CHECK-GI-NEXT: mov d0, v0.d[1]
2869+
; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b
2870+
; CHECK-GI-NEXT: ret
29032871
%rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0
29042872
%rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
29052873

@@ -2924,12 +2892,20 @@ define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs)
29242892
}
29252893

29262894
define <8 x i16> @pmull_from_extract_duplane_high(<16 x i8> %lhs, <8 x i8> %rhs) {
2927-
; CHECK-LABEL: pmull_from_extract_duplane_high:
2928-
; CHECK: // %bb.0:
2929-
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
2930-
; CHECK-NEXT: dup v1.16b, v1.b[0]
2931-
; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b
2932-
; CHECK-NEXT: ret
2895+
; CHECK-SD-LABEL: pmull_from_extract_duplane_high:
2896+
; CHECK-SD: // %bb.0:
2897+
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
2898+
; CHECK-SD-NEXT: dup v1.16b, v1.b[0]
2899+
; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b
2900+
; CHECK-SD-NEXT: ret
2901+
;
2902+
; CHECK-GI-LABEL: pmull_from_extract_duplane_high:
2903+
; CHECK-GI: // %bb.0:
2904+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
2905+
; CHECK-GI-NEXT: mov d0, v0.d[1]
2906+
; CHECK-GI-NEXT: dup v1.8b, v1.b[0]
2907+
; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b
2908+
; CHECK-GI-NEXT: ret
29332909
%lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
29342910
%rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
29352911

llvm/test/CodeGen/AArch64/highextractbitcast.ll

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,6 @@
33
; RUN: llc -mtriple=aarch64_be-unknown-linux-gnu < %s | FileCheck %s --check-prefix CHECK-BE
44
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes CHECK,CHECK-GI
55

6-
; CHECK-GI: warning: Instruction selection used fallback path for test_pmull_high_p8_128
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_p8_64
8-
96
declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
107
declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
118
declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
@@ -521,12 +518,12 @@ entry:
521518
}
522519

523520
define <8 x i16> @test_pmull_high_p8_128(i128 %aa, i128 %bb) {
524-
; CHECK-LABEL: test_pmull_high_p8_128:
525-
; CHECK: // %bb.0: // %entry
526-
; CHECK-NEXT: fmov d0, x3
527-
; CHECK-NEXT: fmov d1, x1
528-
; CHECK-NEXT: pmull v0.8h, v1.8b, v0.8b
529-
; CHECK-NEXT: ret
521+
; CHECK-LE-LABEL: test_pmull_high_p8_128:
522+
; CHECK-LE: // %bb.0: // %entry
523+
; CHECK-LE-NEXT: fmov d0, x3
524+
; CHECK-LE-NEXT: fmov d1, x1
525+
; CHECK-LE-NEXT: pmull v0.8h, v1.8b, v0.8b
526+
; CHECK-LE-NEXT: ret
530527
;
531528
; CHECK-BE-LABEL: test_pmull_high_p8_128:
532529
; CHECK-BE: // %bb.0: // %entry
@@ -538,6 +535,15 @@ define <8 x i16> @test_pmull_high_p8_128(i128 %aa, i128 %bb) {
538535
; CHECK-BE-NEXT: rev64 v0.8h, v0.8h
539536
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
540537
; CHECK-BE-NEXT: ret
538+
;
539+
; CHECK-GI-LABEL: test_pmull_high_p8_128:
540+
; CHECK-GI: // %bb.0: // %entry
541+
; CHECK-GI-NEXT: mov v0.d[0], x0
542+
; CHECK-GI-NEXT: mov v1.d[0], x2
543+
; CHECK-GI-NEXT: mov v0.d[1], x1
544+
; CHECK-GI-NEXT: mov v1.d[1], x3
545+
; CHECK-GI-NEXT: pmull2 v0.8h, v0.16b, v1.16b
546+
; CHECK-GI-NEXT: ret
541547
entry:
542548
%a = bitcast i128 %aa to <16 x i8>
543549
%b = bitcast i128 %bb to <16 x i8>

0 commit comments

Comments
 (0)