Skip to content

Commit 71e4e0d

Browse files
committed
[AArch64][GlobalISel] Removed fallback for uqsh, uqrsh, and sqrsh with <1 x i64> operands
GISel now places operands for these intrinsics on floating point registers. Generated code is slightly less efficient compared to SDAG.
1 parent df5a394 commit 71e4e0d

File tree

2 files changed

+119
-69
lines changed

2 files changed

+119
-69
lines changed

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,9 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
484484
case Intrinsic::aarch64_neon_sqsub:
485485
case Intrinsic::aarch64_crypto_sha1h:
486486
case Intrinsic::aarch64_neon_sqshl:
487+
case Intrinsic::aarch64_neon_uqshl:
488+
case Intrinsic::aarch64_neon_sqrshl:
489+
case Intrinsic::aarch64_neon_uqrshl:
487490
case Intrinsic::aarch64_crypto_sha1c:
488491
case Intrinsic::aarch64_crypto_sha1p:
489492
case Intrinsic::aarch64_crypto_sha1m:

llvm/test/CodeGen/AArch64/arm64-vshift.ll

Lines changed: 116 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,7 @@
22
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
33
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

5-
; CHECK-GI: warning: Instruction selection used fallback path for uqshl1d
6-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl1d_constant
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl_scalar
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl_scalar_constant
9-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl1d
10-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl1d_constant
11-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl_scalar
12-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl_scalar_constant
13-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl1d
14-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl1d_constant
15-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl_scalar
16-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl_scalar_constant
17-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl1d
18-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl1d_constant
19-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl_scalar
20-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl_scalar_constant
21-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl1d
22-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl1d_constant
23-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl_scalar
24-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl_scalar_constant
25-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshr1d
5+
; CHECK-GI: warning: Instruction selection used fallback path for urshr1d
266
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshr_scalar
277
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshr1d
288
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshr_scalar
@@ -371,37 +351,64 @@ define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind {
371351
}
372352

373353
define <1 x i64> @uqshl1d_constant(ptr %A) nounwind {
374-
; CHECK-LABEL: uqshl1d_constant:
375-
; CHECK: // %bb.0:
376-
; CHECK-NEXT: ldr d0, [x0]
377-
; CHECK-NEXT: uqshl d0, d0, #1
378-
; CHECK-NEXT: ret
354+
; CHECK-SD-LABEL: uqshl1d_constant:
355+
; CHECK-SD: // %bb.0:
356+
; CHECK-SD-NEXT: ldr d0, [x0]
357+
; CHECK-SD-NEXT: uqshl d0, d0, #1
358+
; CHECK-SD-NEXT: ret
359+
;
360+
; CHECK-GI-LABEL: uqshl1d_constant:
361+
; CHECK-GI: // %bb.0:
362+
; CHECK-GI-NEXT: mov w8, #1 // =0x1
363+
; CHECK-GI-NEXT: ldr d0, [x0]
364+
; CHECK-GI-NEXT: fmov d1, x8
365+
; CHECK-GI-NEXT: uqshl d0, d0, d1
366+
; CHECK-GI-NEXT: ret
379367
%tmp1 = load <1 x i64>, ptr %A
380368
%tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
381369
ret <1 x i64> %tmp3
382370
}
383371

384372
define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind {
385-
; CHECK-LABEL: uqshl_scalar:
386-
; CHECK: // %bb.0:
387-
; CHECK-NEXT: ldr d0, [x0]
388-
; CHECK-NEXT: ldr d1, [x1]
389-
; CHECK-NEXT: uqshl d0, d0, d1
390-
; CHECK-NEXT: fmov x0, d0
391-
; CHECK-NEXT: ret
373+
; CHECK-SD-LABEL: uqshl_scalar:
374+
; CHECK-SD: // %bb.0:
375+
; CHECK-SD-NEXT: ldr x8, [x0]
376+
; CHECK-SD-NEXT: ldr x9, [x1]
377+
; CHECK-SD-NEXT: fmov d0, x8
378+
; CHECK-SD-NEXT: fmov d1, x9
379+
; CHECK-SD-NEXT: uqshl d0, d0, d1
380+
; CHECK-SD-NEXT: fmov x0, d0
381+
; CHECK-SD-NEXT: ret
382+
;
383+
; CHECK-GI-LABEL: uqshl_scalar:
384+
; CHECK-GI: // %bb.0:
385+
; CHECK-GI-NEXT: ldr d0, [x0]
386+
; CHECK-GI-NEXT: ldr d1, [x1]
387+
; CHECK-GI-NEXT: uqshl d0, d0, d1
388+
; CHECK-GI-NEXT: fmov x0, d0
389+
; CHECK-GI-NEXT: ret
392390
%tmp1 = load i64, ptr %A
393391
%tmp2 = load i64, ptr %B
394392
%tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 %tmp2)
395393
ret i64 %tmp3
396394
}
397395

398396
define i64 @uqshl_scalar_constant(ptr %A) nounwind {
399-
; CHECK-LABEL: uqshl_scalar_constant:
400-
; CHECK: // %bb.0:
401-
; CHECK-NEXT: ldr d0, [x0]
402-
; CHECK-NEXT: uqshl d0, d0, #1
403-
; CHECK-NEXT: fmov x0, d0
404-
; CHECK-NEXT: ret
397+
; CHECK-SD-LABEL: uqshl_scalar_constant:
398+
; CHECK-SD: // %bb.0:
399+
; CHECK-SD-NEXT: ldr d0, [x0]
400+
; CHECK-SD-NEXT: uqshl d0, d0, #1
401+
; CHECK-SD-NEXT: fmov x0, d0
402+
; CHECK-SD-NEXT: ret
403+
;
404+
; CHECK-GI-LABEL: uqshl_scalar_constant:
405+
; CHECK-GI: // %bb.0:
406+
; CHECK-GI-NEXT: mov w8, #1 // =0x1
407+
; CHECK-GI-NEXT: ldr d0, [x0]
408+
; CHECK-GI-NEXT: fmov d1, x8
409+
; CHECK-GI-NEXT: uqshl d0, d0, d1
410+
; CHECK-GI-NEXT: fmov x0, d0
411+
; CHECK-GI-NEXT: ret
405412
%tmp1 = load i64, ptr %A
406413
%tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 1)
407414
ret i64 %tmp3
@@ -905,28 +912,48 @@ define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind {
905912
}
906913

907914
define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind {
908-
; CHECK-LABEL: sqrshl_scalar:
909-
; CHECK: // %bb.0:
910-
; CHECK-NEXT: ldr d0, [x0]
911-
; CHECK-NEXT: ldr d1, [x1]
912-
; CHECK-NEXT: sqrshl d0, d0, d1
913-
; CHECK-NEXT: fmov x0, d0
914-
; CHECK-NEXT: ret
915+
; CHECK-SD-LABEL: sqrshl_scalar:
916+
; CHECK-SD: // %bb.0:
917+
; CHECK-SD-NEXT: ldr x8, [x0]
918+
; CHECK-SD-NEXT: ldr x9, [x1]
919+
; CHECK-SD-NEXT: fmov d0, x8
920+
; CHECK-SD-NEXT: fmov d1, x9
921+
; CHECK-SD-NEXT: sqrshl d0, d0, d1
922+
; CHECK-SD-NEXT: fmov x0, d0
923+
; CHECK-SD-NEXT: ret
924+
;
925+
; CHECK-GI-LABEL: sqrshl_scalar:
926+
; CHECK-GI: // %bb.0:
927+
; CHECK-GI-NEXT: ldr d0, [x0]
928+
; CHECK-GI-NEXT: ldr d1, [x1]
929+
; CHECK-GI-NEXT: sqrshl d0, d0, d1
930+
; CHECK-GI-NEXT: fmov x0, d0
931+
; CHECK-GI-NEXT: ret
915932
%tmp1 = load i64, ptr %A
916933
%tmp2 = load i64, ptr %B
917934
%tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 %tmp2)
918935
ret i64 %tmp3
919936
}
920937

921938
define i64 @sqrshl_scalar_constant(ptr %A) nounwind {
922-
; CHECK-LABEL: sqrshl_scalar_constant:
923-
; CHECK: // %bb.0:
924-
; CHECK-NEXT: mov x8, #1 // =0x1
925-
; CHECK-NEXT: ldr d0, [x0]
926-
; CHECK-NEXT: fmov d1, x8
927-
; CHECK-NEXT: sqrshl d0, d0, d1
928-
; CHECK-NEXT: fmov x0, d0
929-
; CHECK-NEXT: ret
939+
; CHECK-SD-LABEL: sqrshl_scalar_constant:
940+
; CHECK-SD: // %bb.0:
941+
; CHECK-SD-NEXT: ldr x9, [x0]
942+
; CHECK-SD-NEXT: mov w8, #1 // =0x1
943+
; CHECK-SD-NEXT: fmov d1, x8
944+
; CHECK-SD-NEXT: fmov d0, x9
945+
; CHECK-SD-NEXT: sqrshl d0, d0, d1
946+
; CHECK-SD-NEXT: fmov x0, d0
947+
; CHECK-SD-NEXT: ret
948+
;
949+
; CHECK-GI-LABEL: sqrshl_scalar_constant:
950+
; CHECK-GI: // %bb.0:
951+
; CHECK-GI-NEXT: mov w8, #1 // =0x1
952+
; CHECK-GI-NEXT: ldr d0, [x0]
953+
; CHECK-GI-NEXT: fmov d1, x8
954+
; CHECK-GI-NEXT: sqrshl d0, d0, d1
955+
; CHECK-GI-NEXT: fmov x0, d0
956+
; CHECK-GI-NEXT: ret
930957
%tmp1 = load i64, ptr %A
931958
%tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 1)
932959
ret i64 %tmp3
@@ -1011,28 +1038,48 @@ define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind {
10111038
}
10121039

10131040
define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind {
1014-
; CHECK-LABEL: uqrshl_scalar:
1015-
; CHECK: // %bb.0:
1016-
; CHECK-NEXT: ldr d0, [x0]
1017-
; CHECK-NEXT: ldr d1, [x1]
1018-
; CHECK-NEXT: uqrshl d0, d0, d1
1019-
; CHECK-NEXT: fmov x0, d0
1020-
; CHECK-NEXT: ret
1041+
; CHECK-SD-LABEL: uqrshl_scalar:
1042+
; CHECK-SD: // %bb.0:
1043+
; CHECK-SD-NEXT: ldr x8, [x0]
1044+
; CHECK-SD-NEXT: ldr x9, [x1]
1045+
; CHECK-SD-NEXT: fmov d0, x8
1046+
; CHECK-SD-NEXT: fmov d1, x9
1047+
; CHECK-SD-NEXT: uqrshl d0, d0, d1
1048+
; CHECK-SD-NEXT: fmov x0, d0
1049+
; CHECK-SD-NEXT: ret
1050+
;
1051+
; CHECK-GI-LABEL: uqrshl_scalar:
1052+
; CHECK-GI: // %bb.0:
1053+
; CHECK-GI-NEXT: ldr d0, [x0]
1054+
; CHECK-GI-NEXT: ldr d1, [x1]
1055+
; CHECK-GI-NEXT: uqrshl d0, d0, d1
1056+
; CHECK-GI-NEXT: fmov x0, d0
1057+
; CHECK-GI-NEXT: ret
10211058
%tmp1 = load i64, ptr %A
10221059
%tmp2 = load i64, ptr %B
10231060
%tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 %tmp2)
10241061
ret i64 %tmp3
10251062
}
10261063

10271064
define i64 @uqrshl_scalar_constant(ptr %A) nounwind {
1028-
; CHECK-LABEL: uqrshl_scalar_constant:
1029-
; CHECK: // %bb.0:
1030-
; CHECK-NEXT: mov x8, #1 // =0x1
1031-
; CHECK-NEXT: ldr d0, [x0]
1032-
; CHECK-NEXT: fmov d1, x8
1033-
; CHECK-NEXT: uqrshl d0, d0, d1
1034-
; CHECK-NEXT: fmov x0, d0
1035-
; CHECK-NEXT: ret
1065+
; CHECK-SD-LABEL: uqrshl_scalar_constant:
1066+
; CHECK-SD: // %bb.0:
1067+
; CHECK-SD-NEXT: ldr x9, [x0]
1068+
; CHECK-SD-NEXT: mov w8, #1 // =0x1
1069+
; CHECK-SD-NEXT: fmov d1, x8
1070+
; CHECK-SD-NEXT: fmov d0, x9
1071+
; CHECK-SD-NEXT: uqrshl d0, d0, d1
1072+
; CHECK-SD-NEXT: fmov x0, d0
1073+
; CHECK-SD-NEXT: ret
1074+
;
1075+
; CHECK-GI-LABEL: uqrshl_scalar_constant:
1076+
; CHECK-GI: // %bb.0:
1077+
; CHECK-GI-NEXT: mov w8, #1 // =0x1
1078+
; CHECK-GI-NEXT: ldr d0, [x0]
1079+
; CHECK-GI-NEXT: fmov d1, x8
1080+
; CHECK-GI-NEXT: uqrshl d0, d0, d1
1081+
; CHECK-GI-NEXT: fmov x0, d0
1082+
; CHECK-GI-NEXT: ret
10361083
%tmp1 = load i64, ptr %A
10371084
%tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 1)
10381085
ret i64 %tmp3

0 commit comments

Comments
 (0)