Skip to content

Commit df5a394

Browse files
committed
[AArch64][GlobalISel] Removed sqshl fallback occurring for <1 x i64> operands
GISel now places sqshl operands on floating point registers. Generated code is slightly less efficient compared to SDAG.
1 parent 68caa8b commit df5a394

File tree

2 files changed

+47
-24
lines changed

2 files changed

+47
-24
lines changed

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,7 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
483483
case Intrinsic::aarch64_neon_sqadd:
484484
case Intrinsic::aarch64_neon_sqsub:
485485
case Intrinsic::aarch64_crypto_sha1h:
486+
case Intrinsic::aarch64_neon_sqshl:
486487
case Intrinsic::aarch64_crypto_sha1c:
487488
case Intrinsic::aarch64_crypto_sha1p:
488489
case Intrinsic::aarch64_crypto_sha1m:

llvm/test/CodeGen/AArch64/arm64-vshift.ll

Lines changed: 46 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,7 @@
22
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
33
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

5-
; CHECK-GI: warning: Instruction selection used fallback path for sqshl1d
6-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl1d_constant
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl_scalar
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl_scalar_constant
9-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl1d
5+
; CHECK-GI: warning: Instruction selection used fallback path for uqshl1d
106
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl1d_constant
117
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl_scalar
128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl_scalar_constant
@@ -155,37 +151,64 @@ define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind {
155151
}
156152

157153
define <1 x i64> @sqshl1d_constant(ptr %A) nounwind {
158-
; CHECK-LABEL: sqshl1d_constant:
159-
; CHECK: // %bb.0:
160-
; CHECK-NEXT: ldr d0, [x0]
161-
; CHECK-NEXT: sqshl d0, d0, #1
162-
; CHECK-NEXT: ret
154+
; CHECK-SD-LABEL: sqshl1d_constant:
155+
; CHECK-SD: // %bb.0:
156+
; CHECK-SD-NEXT: ldr d0, [x0]
157+
; CHECK-SD-NEXT: sqshl d0, d0, #1
158+
; CHECK-SD-NEXT: ret
159+
;
160+
; CHECK-GI-LABEL: sqshl1d_constant:
161+
; CHECK-GI: // %bb.0:
162+
; CHECK-GI-NEXT: mov w8, #1 // =0x1
163+
; CHECK-GI-NEXT: ldr d0, [x0]
164+
; CHECK-GI-NEXT: fmov d1, x8
165+
; CHECK-GI-NEXT: sqshl d0, d0, d1
166+
; CHECK-GI-NEXT: ret
163167
%tmp1 = load <1 x i64>, ptr %A
164168
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
165169
ret <1 x i64> %tmp3
166170
}
167171

168172
define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind {
169-
; CHECK-LABEL: sqshl_scalar:
170-
; CHECK: // %bb.0:
171-
; CHECK-NEXT: ldr d0, [x0]
172-
; CHECK-NEXT: ldr d1, [x1]
173-
; CHECK-NEXT: sqshl d0, d0, d1
174-
; CHECK-NEXT: fmov x0, d0
175-
; CHECK-NEXT: ret
173+
; CHECK-SD-LABEL: sqshl_scalar:
174+
; CHECK-SD: // %bb.0:
175+
; CHECK-SD-NEXT: ldr x8, [x0]
176+
; CHECK-SD-NEXT: ldr x9, [x1]
177+
; CHECK-SD-NEXT: fmov d0, x8
178+
; CHECK-SD-NEXT: fmov d1, x9
179+
; CHECK-SD-NEXT: sqshl d0, d0, d1
180+
; CHECK-SD-NEXT: fmov x0, d0
181+
; CHECK-SD-NEXT: ret
182+
;
183+
; CHECK-GI-LABEL: sqshl_scalar:
184+
; CHECK-GI: // %bb.0:
185+
; CHECK-GI-NEXT: ldr d0, [x0]
186+
; CHECK-GI-NEXT: ldr d1, [x1]
187+
; CHECK-GI-NEXT: sqshl d0, d0, d1
188+
; CHECK-GI-NEXT: fmov x0, d0
189+
; CHECK-GI-NEXT: ret
176190
%tmp1 = load i64, ptr %A
177191
%tmp2 = load i64, ptr %B
178192
%tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 %tmp2)
179193
ret i64 %tmp3
180194
}
181195

182196
define i64 @sqshl_scalar_constant(ptr %A) nounwind {
183-
; CHECK-LABEL: sqshl_scalar_constant:
184-
; CHECK: // %bb.0:
185-
; CHECK-NEXT: ldr d0, [x0]
186-
; CHECK-NEXT: sqshl d0, d0, #1
187-
; CHECK-NEXT: fmov x0, d0
188-
; CHECK-NEXT: ret
197+
; CHECK-SD-LABEL: sqshl_scalar_constant:
198+
; CHECK-SD: // %bb.0:
199+
; CHECK-SD-NEXT: ldr d0, [x0]
200+
; CHECK-SD-NEXT: sqshl d0, d0, #1
201+
; CHECK-SD-NEXT: fmov x0, d0
202+
; CHECK-SD-NEXT: ret
203+
;
204+
; CHECK-GI-LABEL: sqshl_scalar_constant:
205+
; CHECK-GI: // %bb.0:
206+
; CHECK-GI-NEXT: mov w8, #1 // =0x1
207+
; CHECK-GI-NEXT: ldr d0, [x0]
208+
; CHECK-GI-NEXT: fmov d1, x8
209+
; CHECK-GI-NEXT: sqshl d0, d0, d1
210+
; CHECK-GI-NEXT: fmov x0, d0
211+
; CHECK-GI-NEXT: ret
189212
%tmp1 = load i64, ptr %A
190213
%tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 1)
191214
ret i64 %tmp3
@@ -2616,7 +2639,6 @@ define <4 x i32> @neon_sshl4s_wrong_ext_constant_shift(ptr %A) nounwind {
26162639
; CHECK-GI-NEXT: ret
26172640
%tmp1 = load <4 x i8>, ptr %A
26182641
%tmp2 = sext <4 x i8> %tmp1 to <4 x i32>
2619-
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
26202642
ret <4 x i32> %tmp3
26212643
}
26222644

0 commit comments

Comments
 (0)