|
3 | 3 | ; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
|
4 | 4 |
|
5 | 5 | ; CHECK-GI: warning: Instruction selection used fallback path for pmull8h
|
| 6 | +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for commutable_pmull8h |
6 | 7 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_1s
|
7 | 8 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s
|
8 | 9 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s
|
@@ -78,6 +79,21 @@ define <2 x i64> @smull2d(ptr %A, ptr %B) nounwind {
|
78 | 79 | ret <2 x i64> %tmp3
|
79 | 80 | }
|
80 | 81 |
|
| 82 | +define void @commutable_smull(<2 x i32> %A, <2 x i32> %B, ptr %C) { |
| 83 | +; CHECK-LABEL: commutable_smull: |
| 84 | +; CHECK: // %bb.0: |
| 85 | +; CHECK-NEXT: smull v2.2d, v0.2s, v1.2s |
| 86 | +; CHECK-NEXT: smull v0.2d, v1.2s, v0.2s |
| 87 | +; CHECK-NEXT: stp q2, q0, [x0] |
| 88 | +; CHECK-NEXT: ret |
| 89 | + %1 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %B) |
| 90 | + %2 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %B, <2 x i32> %A) |
| 91 | + store <2 x i64> %1, ptr %C |
| 92 | + %3 = getelementptr i8, ptr %C, i64 16 |
| 93 | + store <2 x i64> %2, ptr %3 |
| 94 | + ret void |
| 95 | +} |
| 96 | + |
81 | 97 | declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
|
82 | 98 | declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
|
83 | 99 | declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
|
@@ -121,6 +137,21 @@ define <2 x i64> @umull2d(ptr %A, ptr %B) nounwind {
|
121 | 137 | ret <2 x i64> %tmp3
|
122 | 138 | }
|
123 | 139 |
|
| 140 | +define void @commutable_umull(<2 x i32> %A, <2 x i32> %B, ptr %C) { |
| 141 | +; CHECK-LABEL: commutable_umull: |
| 142 | +; CHECK: // %bb.0: |
| 143 | +; CHECK-NEXT: umull v2.2d, v0.2s, v1.2s |
| 144 | +; CHECK-NEXT: umull v0.2d, v1.2s, v0.2s |
| 145 | +; CHECK-NEXT: stp q2, q0, [x0] |
| 146 | +; CHECK-NEXT: ret |
| 147 | + %1 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %B) |
| 148 | + %2 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %B, <2 x i32> %A) |
| 149 | + store <2 x i64> %1, ptr %C |
| 150 | + %3 = getelementptr i8, ptr %C, i64 16 |
| 151 | + store <2 x i64> %2, ptr %3 |
| 152 | + ret void |
| 153 | +} |
| 154 | + |
124 | 155 | declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
|
125 | 156 | declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
|
126 | 157 | declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
|
@@ -212,6 +243,21 @@ define <8 x i16> @pmull8h(ptr %A, ptr %B) nounwind {
|
212 | 243 | ret <8 x i16> %tmp3
|
213 | 244 | }
|
214 | 245 |
|
| 246 | +define void @commutable_pmull8h(<8 x i8> %A, <8 x i8> %B, ptr %C) { |
| 247 | +; CHECK-LABEL: commutable_pmull8h: |
| 248 | +; CHECK: // %bb.0: |
| 249 | +; CHECK-NEXT: pmull v2.8h, v0.8b, v1.8b |
| 250 | +; CHECK-NEXT: pmull v0.8h, v1.8b, v0.8b |
| 251 | +; CHECK-NEXT: stp q2, q0, [x0] |
| 252 | +; CHECK-NEXT: ret |
| 253 | + %1 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %A, <8 x i8> %B) |
| 254 | + %2 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %B, <8 x i8> %A) |
| 255 | + store <8 x i16> %1, ptr %C |
| 256 | + %3 = getelementptr i8, ptr %C, i8 16 |
| 257 | + store <8 x i16> %2, ptr %3 |
| 258 | + ret void |
| 259 | +} |
| 260 | + |
215 | 261 | declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
|
216 | 262 |
|
217 | 263 | define <4 x i16> @sqdmulh_4h(ptr %A, ptr %B) nounwind {
|
@@ -487,10 +533,10 @@ define void @smlal2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
|
487 | 533 | ; CHECK-GI-LABEL: smlal2d_chain_with_constant:
|
488 | 534 | ; CHECK-GI: // %bb.0:
|
489 | 535 | ; CHECK-GI-NEXT: mvn v3.8b, v2.8b
|
490 |
| -; CHECK-GI-NEXT: adrp x8, .LCPI27_0 |
| 536 | +; CHECK-GI-NEXT: adrp x8, .LCPI30_0 |
491 | 537 | ; CHECK-GI-NEXT: smull v1.2d, v1.2s, v3.2s
|
492 | 538 | ; CHECK-GI-NEXT: smlal v1.2d, v0.2s, v2.2s
|
493 |
| -; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI27_0] |
| 539 | +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI30_0] |
494 | 540 | ; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
|
495 | 541 | ; CHECK-GI-NEXT: str q0, [x0]
|
496 | 542 | ; CHECK-GI-NEXT: ret
|
@@ -566,8 +612,8 @@ define void @smlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
|
566 | 612 | ;
|
567 | 613 | ; CHECK-GI-LABEL: smlsl2d_chain_with_constant:
|
568 | 614 | ; CHECK-GI: // %bb.0:
|
569 |
| -; CHECK-GI-NEXT: adrp x8, .LCPI31_0 |
570 |
| -; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI31_0] |
| 615 | +; CHECK-GI-NEXT: adrp x8, .LCPI34_0 |
| 616 | +; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI34_0] |
571 | 617 | ; CHECK-GI-NEXT: smlsl v3.2d, v0.2s, v2.2s
|
572 | 618 | ; CHECK-GI-NEXT: mvn v0.8b, v2.8b
|
573 | 619 | ; CHECK-GI-NEXT: smlsl v3.2d, v1.2s, v0.2s
|
@@ -829,10 +875,10 @@ define void @umlal2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
|
829 | 875 | ; CHECK-GI-LABEL: umlal2d_chain_with_constant:
|
830 | 876 | ; CHECK-GI: // %bb.0:
|
831 | 877 | ; CHECK-GI-NEXT: mvn v3.8b, v2.8b
|
832 |
| -; CHECK-GI-NEXT: adrp x8, .LCPI43_0 |
| 878 | +; CHECK-GI-NEXT: adrp x8, .LCPI46_0 |
833 | 879 | ; CHECK-GI-NEXT: umull v1.2d, v1.2s, v3.2s
|
834 | 880 | ; CHECK-GI-NEXT: umlal v1.2d, v0.2s, v2.2s
|
835 |
| -; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI43_0] |
| 881 | +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI46_0] |
836 | 882 | ; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
|
837 | 883 | ; CHECK-GI-NEXT: str q0, [x0]
|
838 | 884 | ; CHECK-GI-NEXT: ret
|
@@ -908,8 +954,8 @@ define void @umlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
|
908 | 954 | ;
|
909 | 955 | ; CHECK-GI-LABEL: umlsl2d_chain_with_constant:
|
910 | 956 | ; CHECK-GI: // %bb.0:
|
911 |
| -; CHECK-GI-NEXT: adrp x8, .LCPI47_0 |
912 |
| -; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI47_0] |
| 957 | +; CHECK-GI-NEXT: adrp x8, .LCPI50_0 |
| 958 | +; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI50_0] |
913 | 959 | ; CHECK-GI-NEXT: umlsl v3.2d, v0.2s, v2.2s
|
914 | 960 | ; CHECK-GI-NEXT: mvn v0.8b, v2.8b
|
915 | 961 | ; CHECK-GI-NEXT: umlsl v3.2d, v1.2s, v0.2s
|
@@ -3222,6 +3268,21 @@ define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind {
|
3222 | 3268 | ret <16 x i8> %val
|
3223 | 3269 | }
|
3224 | 3270 |
|
| 3271 | +define <16 x i8> @test_commutable_pmull_64(i64 %l, i64 %r) nounwind { |
| 3272 | +; CHECK-LABEL: test_commutable_pmull_64: |
| 3273 | +; CHECK: // %bb.0: |
| 3274 | +; CHECK-NEXT: fmov d0, x1 |
| 3275 | +; CHECK-NEXT: fmov d1, x0 |
| 3276 | +; CHECK-NEXT: pmull v2.1q, v1.1d, v0.1d |
| 3277 | +; CHECK-NEXT: pmull v0.1q, v0.1d, v1.1d |
| 3278 | +; CHECK-NEXT: add v0.16b, v2.16b, v0.16b |
| 3279 | +; CHECK-NEXT: ret |
| 3280 | + %1 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r) |
| 3281 | + %2 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %r, i64 %l) |
| 3282 | + %3 = add <16 x i8> %1, %2 |
| 3283 | + ret <16 x i8> %3 |
| 3284 | +} |
| 3285 | + |
3225 | 3286 | declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64)
|
3226 | 3287 |
|
3227 | 3288 | define <1 x i64> @test_mul_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) nounwind {
|
|
0 commit comments