Skip to content

Commit 8cae064

Browse files
committed
[ARM] Add early-clobber to MVE VCMLA.f32
This instruction (but not the f16 variant) cannot us the same register for the output as either of the inputs, so it needs to be marked as early-clobber.
1 parent 1e9d068 commit 8cae064

File tree

2 files changed

+29
-5
lines changed

2 files changed

+29
-5
lines changed

llvm/lib/Target/ARM/ARMInstrMVE.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3583,10 +3583,10 @@ def ARMimmOneH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2620))))>; // 1.0 ha
35833583
defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32, ARMimmOneF>;
35843584
defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16, ARMimmOneH>;
35853585

3586-
class MVE_VCMLA<string suffix, bits<2> size>
3586+
class MVE_VCMLA<string suffix, bits<2> size, string cstr>
35873587
: MVEFloatArithNeon<"vcmla", suffix, size{1}, (outs MQPR:$Qd),
35883588
(ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
3589-
"$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", size, []> {
3589+
"$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src"#cstr, size, []> {
35903590
bits<4> Qd;
35913591
bits<4> Qn;
35923592
bits<2> rot;
@@ -3603,8 +3603,8 @@ class MVE_VCMLA<string suffix, bits<2> size>
36033603
let Inst{4} = 0b0;
36043604
}
36053605

3606-
multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI> {
3607-
def "" : MVE_VCMLA<VTI.Suffix, VTI.Size>;
3606+
multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI, string cstr=""> {
3607+
def "" : MVE_VCMLA<VTI.Suffix, VTI.Size, cstr>;
36083608
defvar Inst = !cast<Instruction>(NAME);
36093609

36103610
let Predicates = [HasMVEFloat] in {
@@ -3633,7 +3633,7 @@ multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI> {
36333633
}
36343634

36353635
defm MVE_VCMLAf16 : MVE_VCMLA_m<MVE_v8f16>;
3636-
defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32>;
3636+
defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32, ",@earlyclobber $Qd">;
36373637

36383638
class MVE_VADDSUBFMA_fp<string iname, string suffix, bits<2> size, bit bit_4,
36393639
bit bit_8, bit bit_21, dag iops=(ins),

llvm/test/CodeGen/Thumb2/mve-vcmla.ll

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,27 @@ entry:
121121
%res = fadd <4 x float> %d, %a
122122
ret <4 x float> %res
123123
}
124+
125+
define arm_aapcs_vfpcc <8 x half> @same_register_f16(<8 x half> %a) {
126+
; CHECK-LABEL: same_register_f16:
127+
; CHECK: @ %bb.0: @ %entry
128+
; CHECK-NEXT: vcmla.f16 q0, q0, q0, #0
129+
; CHECK-NEXT: bx lr
130+
entry:
131+
%d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 0, <8 x half> zeroinitializer, <8 x half> %a, <8 x half> %a)
132+
%res = fadd fast <8 x half> %d, %a
133+
ret <8 x half> %res
134+
}
135+
136+
define arm_aapcs_vfpcc <4 x float> @same_register_f32(<4 x float> %a) {
137+
; CHECK-LABEL: same_register_f32:
138+
; CHECK: @ %bb.0: @ %entry
139+
; CHECK-NEXT: vmov q1, q0
140+
; CHECK-NEXT: vcmla.f32 q1, q0, q0, #0
141+
; CHECK-NEXT: vmov q0, q1
142+
; CHECK-NEXT: bx lr
143+
entry:
144+
%d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %a, <4 x float> %a)
145+
%res = fadd fast <4 x float> %d, %a
146+
ret <4 x float> %res
147+
}

0 commit comments

Comments
 (0)