Skip to content

Conversation

@ostannard
Copy link
Collaborator

This instruction (but not the f16 variant) cannot us the same register for the output as either of the inputs, so it needs to be marked as early-clobber.

This instruction (but not the f16 variant) cannot us the same register
for the output as either of the inputs, so it needs to be marked as
early-clobber.
@llvmbot
Copy link
Member

llvmbot commented Nov 5, 2024

@llvm/pr-subscribers-backend-arm

Author: Oliver Stannard (ostannard)

Changes

This instruction (but not the f16 variant) cannot us the same register for the output as either of the inputs, so it needs to be marked as early-clobber.


Full diff: https://github.com/llvm/llvm-project/pull/114995.diff

2 Files Affected:

  • (modified) llvm/lib/Target/ARM/ARMInstrMVE.td (+5-5)
  • (modified) llvm/test/CodeGen/Thumb2/mve-vcmla.ll (+24)
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 8c8403ac58b080..22af599f4f0859 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -3583,10 +3583,10 @@ def ARMimmOneH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2620))))>; // 1.0 ha
 defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32, ARMimmOneF>;
 defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16, ARMimmOneH>;
 
-class MVE_VCMLA<string suffix, bits<2> size>
+class MVE_VCMLA<string suffix, bits<2> size, string cstr>
   : MVEFloatArithNeon<"vcmla", suffix, size{1}, (outs MQPR:$Qd),
                          (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
-                         "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", size, []> {
+                         "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src"#cstr, size, []> {
   bits<4> Qd;
   bits<4> Qn;
   bits<2> rot;
@@ -3603,8 +3603,8 @@ class MVE_VCMLA<string suffix, bits<2> size>
   let Inst{4} = 0b0;
 }
 
-multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI> {
-  def "" : MVE_VCMLA<VTI.Suffix, VTI.Size>;
+multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI, string cstr=""> {
+  def "" : MVE_VCMLA<VTI.Suffix, VTI.Size, cstr>;
   defvar Inst = !cast<Instruction>(NAME);
 
   let Predicates = [HasMVEFloat] in {
@@ -3633,7 +3633,7 @@ multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI> {
 }
 
 defm MVE_VCMLAf16 : MVE_VCMLA_m<MVE_v8f16>;
-defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32>;
+defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32, ",@earlyclobber $Qd">;
 
 class MVE_VADDSUBFMA_fp<string iname, string suffix, bits<2> size, bit bit_4,
                         bit bit_8, bit bit_21, dag iops=(ins),
diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmla.ll b/llvm/test/CodeGen/Thumb2/mve-vcmla.ll
index d1976472e39460..df542be73c58cb 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmla.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmla.ll
@@ -121,3 +121,27 @@ entry:
   %res = fadd <4 x float> %d, %a
   ret <4 x float> %res
 }
+
+define arm_aapcs_vfpcc <8 x half> @same_register_f16(<8 x half> %a) {
+; CHECK-LABEL: same_register_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vcmla.f16 q0, q0, q0, #0
+; CHECK-NEXT:    bx lr
+entry:
+  %d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 0, <8 x half> zeroinitializer, <8 x half> %a, <8 x half> %a)
+  %res = fadd fast <8 x half> %d, %a
+  ret <8 x half> %res
+}
+
+define arm_aapcs_vfpcc <4 x float> @same_register_f32(<4 x float> %a) {
+; CHECK-LABEL: same_register_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov q1, q0
+; CHECK-NEXT:    vcmla.f32 q1, q0, q0, #0
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %a, <4 x float> %a)
+  %res = fadd fast <4 x float> %d, %a
+  ret <4 x float> %res
+}

Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, cheers

@ostannard ostannard merged commit 9b016e3 into llvm:main Nov 6, 2024
10 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants