Skip to content

Conversation

@davemgreen
Copy link
Collaborator

A llvm.vector.reduce.fadd(float, <1 x float>) will be translated to G_VECREDUCE_SEQ_FADD with two scalar operands, which is illegal according to the verifier. This makes sure we generate a fadd/fmul instead.

@llvmbot
Copy link
Member

llvmbot commented Aug 16, 2025

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-globalisel

Author: David Green (davemgreen)

Changes

A llvm.vector.reduce.fadd(float, <1 x float>) will be translated to G_VECREDUCE_SEQ_FADD with two scalar operands, which is illegal according to the verifier. This makes sure we generate a fadd/fmul instead.


Patch is 21.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153966.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+3)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll (+161-136)
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 64c19fab1a023..a3707fef6328f 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2522,6 +2522,9 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
       Opc = ID == Intrinsic::vector_reduce_fadd
                 ? TargetOpcode::G_VECREDUCE_SEQ_FADD
                 : TargetOpcode::G_VECREDUCE_SEQ_FMUL;
+      if (!MRI->getType(VecSrc).isVector())
+        Opc = ID == Intrinsic::vector_reduce_fadd ? TargetOpcode::G_FADD
+                                                  : TargetOpcode::G_FMUL;
       MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc},
                             MachineInstr::copyFlagsFromInstruction(CI));
       return true;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll
index 16762dc4fd3fe..c791e35946f72 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll
@@ -1,34 +1,48 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -O0 -mtriple=aarch64-apple-ios -global-isel -disable-expand-reductions -stop-after=irtranslator %s -o - | FileCheck %s
 
-declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
-declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>)
-
 define float @fadd_seq(float %start, <4 x float> %vec) {
   ; CHECK-LABEL: name: fadd_seq
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q1, $s0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
-  ; CHECK:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
-  ; CHECK:   [[VECREDUCE_SEQ_FADD:%[0-9]+]]:_(s32) = G_VECREDUCE_SEQ_FADD [[COPY]](s32), [[BITCAST]](<4 x s32>)
-  ; CHECK:   $s0 = COPY [[VECREDUCE_SEQ_FADD]](s32)
-  ; CHECK:   RET_ReallyLR implicit $s0
+  ; CHECK-NEXT:   liveins: $q1, $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+  ; CHECK-NEXT:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
+  ; CHECK-NEXT:   [[VECREDUCE_SEQ_FADD:%[0-9]+]]:_(s32) = G_VECREDUCE_SEQ_FADD [[COPY]](s32), [[BITCAST]](<4 x s32>)
+  ; CHECK-NEXT:   $s0 = COPY [[VECREDUCE_SEQ_FADD]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
   %res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %vec)
   ret float %res
 }
 
+define float @fadd_seq_scalar(float %start, <1 x float> %vec) {
+  ; CHECK-LABEL: name: fadd_seq_scalar
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d1, $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+  ; CHECK-NEXT:   [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[UV]]
+  ; CHECK-NEXT:   $s0 = COPY [[FADD]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  %res = call float @llvm.vector.reduce.fadd.v1f32(float %start, <1 x float> %vec)
+  ret float %res
+}
+
 define float @fadd_fast(float %start, <4 x float> %vec) {
   ; CHECK-LABEL: name: fadd_fast
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q1, $s0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
-  ; CHECK:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
-  ; CHECK:   [[VECREDUCE_FADD:%[0-9]+]]:_(s32) = reassoc G_VECREDUCE_FADD [[BITCAST]](<4 x s32>)
-  ; CHECK:   [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY]], [[VECREDUCE_FADD]]
-  ; CHECK:   $s0 = COPY [[FADD]](s32)
-  ; CHECK:   RET_ReallyLR implicit $s0
+  ; CHECK-NEXT:   liveins: $q1, $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+  ; CHECK-NEXT:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
+  ; CHECK-NEXT:   [[VECREDUCE_FADD:%[0-9]+]]:_(s32) = reassoc G_VECREDUCE_FADD [[BITCAST]](<4 x s32>)
+  ; CHECK-NEXT:   [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY]], [[VECREDUCE_FADD]]
+  ; CHECK-NEXT:   $s0 = COPY [[FADD]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
   %res = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %vec)
   ret float %res
 }
@@ -36,48 +50,60 @@ define float @fadd_fast(float %start, <4 x float> %vec) {
 define double @fmul_seq(double %start, <4 x double> %vec) {
   ; CHECK-LABEL: name: fmul_seq
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $d0, $q1, $q2
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>)
-  ; CHECK:   [[VECREDUCE_SEQ_FMUL:%[0-9]+]]:_(s64) = G_VECREDUCE_SEQ_FMUL [[COPY]](s64), [[CONCAT_VECTORS]](<4 x s64>)
-  ; CHECK:   $d0 = COPY [[VECREDUCE_SEQ_FMUL]](s64)
-  ; CHECK:   RET_ReallyLR implicit $d0
+  ; CHECK-NEXT:   liveins: $d0, $q1, $q2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>)
+  ; CHECK-NEXT:   [[VECREDUCE_SEQ_FMUL:%[0-9]+]]:_(s64) = G_VECREDUCE_SEQ_FMUL [[COPY]](s64), [[CONCAT_VECTORS]](<4 x s64>)
+  ; CHECK-NEXT:   $d0 = COPY [[VECREDUCE_SEQ_FMUL]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
   %res = call double @llvm.vector.reduce.fmul.v4f64(double %start, <4 x double> %vec)
   ret double %res
 }
 
+define double @fmul_seq_scalar(double %start, <1 x double> %vec) {
+  ; CHECK-LABEL: name: fmul_seq_scalar
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0, $d1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $d1
+  ; CHECK-NEXT:   [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $d0 = COPY [[FMUL]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  %res = call double @llvm.vector.reduce.fmul.v1f64(double %start, <1 x double> %vec)
+  ret double %res
+}
+
 define double @fmul_fast(double %start, <4 x double> %vec) {
   ; CHECK-LABEL: name: fmul_fast
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $d0, $q1, $q2
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>)
-  ; CHECK:   [[VECREDUCE_FMUL:%[0-9]+]]:_(s64) = reassoc G_VECREDUCE_FMUL [[CONCAT_VECTORS]](<4 x s64>)
-  ; CHECK:   [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[COPY]], [[VECREDUCE_FMUL]]
-  ; CHECK:   $d0 = COPY [[FMUL]](s64)
-  ; CHECK:   RET_ReallyLR implicit $d0
+  ; CHECK-NEXT:   liveins: $d0, $q1, $q2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>)
+  ; CHECK-NEXT:   [[VECREDUCE_FMUL:%[0-9]+]]:_(s64) = reassoc G_VECREDUCE_FMUL [[CONCAT_VECTORS]](<4 x s64>)
+  ; CHECK-NEXT:   [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[COPY]], [[VECREDUCE_FMUL]]
+  ; CHECK-NEXT:   $d0 = COPY [[FMUL]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
   %res = call reassoc double @llvm.vector.reduce.fmul.v4f64(double %start, <4 x double> %vec)
   ret double %res
 }
 
-declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
-declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
-declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>)
-declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>)
-
 define float @fmax(<4 x float> %vec) {
   ; CHECK-LABEL: name: fmax
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-  ; CHECK:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
-  ; CHECK:   [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAX [[BITCAST]](<4 x s32>)
-  ; CHECK:   $s0 = COPY [[VECREDUCE_FMAX]](s32)
-  ; CHECK:   RET_ReallyLR implicit $s0
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; CHECK-NEXT:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+  ; CHECK-NEXT:   [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAX [[BITCAST]](<4 x s32>)
+  ; CHECK-NEXT:   $s0 = COPY [[VECREDUCE_FMAX]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
   %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %vec)
   ret float %res
 }
@@ -85,12 +111,13 @@ define float @fmax(<4 x float> %vec) {
 define float @fmin(<4 x float> %vec) {
   ; CHECK-LABEL: name: fmin
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-  ; CHECK:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
-  ; CHECK:   [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMIN [[BITCAST]](<4 x s32>)
-  ; CHECK:   $s0 = COPY [[VECREDUCE_FMIN]](s32)
-  ; CHECK:   RET_ReallyLR implicit $s0
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; CHECK-NEXT:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+  ; CHECK-NEXT:   [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMIN [[BITCAST]](<4 x s32>)
+  ; CHECK-NEXT:   $s0 = COPY [[VECREDUCE_FMIN]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
   %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %vec)
   ret float %res
 }
@@ -98,12 +125,13 @@ define float @fmin(<4 x float> %vec) {
 define float @fmin_nnan(<4 x float> %vec) {
   ; CHECK-LABEL: name: fmin_nnan
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-  ; CHECK:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
-  ; CHECK:   [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMIN [[BITCAST]](<4 x s32>)
-  ; CHECK:   $s0 = COPY [[VECREDUCE_FMIN]](s32)
-  ; CHECK:   RET_ReallyLR implicit $s0
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; CHECK-NEXT:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+  ; CHECK-NEXT:   [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMIN [[BITCAST]](<4 x s32>)
+  ; CHECK-NEXT:   $s0 = COPY [[VECREDUCE_FMIN]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
   %res = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %vec)
   ret float %res
 }
@@ -111,12 +139,13 @@ define float @fmin_nnan(<4 x float> %vec) {
 define float @fmaximum(<4 x float> %vec) {
   ; CHECK-LABEL: name: fmaximum
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-  ; CHECK:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
-  ; CHECK:   [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAXIMUM [[BITCAST]](<4 x s32>)
-  ; CHECK:   $s0 = COPY [[VECREDUCE_FMAX]](s32)
-  ; CHECK:   RET_ReallyLR implicit $s0
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; CHECK-NEXT:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+  ; CHECK-NEXT:   [[VECREDUCE_FMAXIMUM:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAXIMUM [[BITCAST]](<4 x s32>)
+  ; CHECK-NEXT:   $s0 = COPY [[VECREDUCE_FMAXIMUM]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
   %res = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %vec)
   ret float %res
 }
@@ -124,12 +153,13 @@ define float @fmaximum(<4 x float> %vec) {
 define float @fminimum(<4 x float> %vec) {
   ; CHECK-LABEL: name: fminimum
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-  ; CHECK:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
-  ; CHECK:   [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>)
-  ; CHECK:   $s0 = COPY [[VECREDUCE_FMIN]](s32)
-  ; CHECK:   RET_ReallyLR implicit $s0
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; CHECK-NEXT:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+  ; CHECK-NEXT:   [[VECREDUCE_FMINIMUM:%[0-9]+]]:_(s32) = G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>)
+  ; CHECK-NEXT:   $s0 = COPY [[VECREDUCE_FMINIMUM]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
   %res = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %vec)
   ret float %res
 }
@@ -137,99 +167,91 @@ define float @fminimum(<4 x float> %vec) {
 define float @fminimum_nnan(<4 x float> %vec) {
   ; CHECK-LABEL: name: fminimum_nnan
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-  ; CHECK:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
-  ; CHECK:   [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>)
-  ; CHECK:   $s0 = COPY [[VECREDUCE_FMIN]](s32)
-  ; CHECK:   RET_ReallyLR implicit $s0
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; CHECK-NEXT:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+  ; CHECK-NEXT:   [[VECREDUCE_FMINIMUM:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>)
+  ; CHECK-NEXT:   $s0 = COPY [[VECREDUCE_FMINIMUM]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
   %res = call nnan float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %vec)
   ret float %res
 }
 
-declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
-
 define i32 @add(<4 x i32> %vec) {
   ; CHECK-LABEL: name: add
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-  ; CHECK:   [[VECREDUCE_ADD:%[0-9]+]]:_(s32) = G_VECREDUCE_ADD [[COPY]](<4 x s32>)
-  ; CHECK:   $w0 = COPY [[VECREDUCE_ADD]](s32)
-  ; CHECK:   RET_ReallyLR implicit $w0
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+  ; CHECK-NEXT:   [[VECREDUCE_ADD:%[0-9]+]]:_(s32) = G_VECREDUCE_ADD [[COPY]](<4 x s32>)
+  ; CHECK-NEXT:   $w0 = COPY [[VECREDUCE_ADD]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
   %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %vec)
   ret i32 %res
 }
 
-declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
-
 define i32 @mul(<4 x i32> %vec) {
   ; CHECK-LABEL: name: mul
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-  ; CHECK:   [[VECREDUCE_MUL:%[0-9]+]]:_(s32) = G_VECREDUCE_MUL [[COPY]](<4 x s32>)
-  ; CHECK:   $w0 = COPY [[VECREDUCE_MUL]](s32)
-  ; CHECK:   RET_ReallyLR implicit $w0
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+  ; CHECK-NEXT:   [[VECREDUCE_MUL:%[0-9]+]]:_(s32) = G_VECREDUCE_MUL [[COPY]](<4 x s32>)
+  ; CHECK-NEXT:   $w0 = COPY [[VECREDUCE_MUL]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
   %res = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %vec)
   ret i32 %res
 }
 
-declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
-
 define i32 @and(<4 x i32> %vec) {
   ; CHECK-LABEL: name: and
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-  ; CHECK:   [[VECREDUCE_AND:%[0-9]+]]:_(s32) = G_VECREDUCE_AND [[COPY]](<4 x s32>)
-  ; CHECK:   $w0 = COPY [[VECREDUCE_AND]](s32)
-  ; CHECK:   RET_ReallyLR implicit $w0
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+  ; CHECK-NEXT:   [[VECREDUCE_AND:%[0-9]+]]:_(s32) = G_VECREDUCE_AND [[COPY]](<4 x s32>)
+  ; CHECK-NEXT:   $w0 = COPY [[VECREDUCE_AND]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
   %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %vec)
   ret i32 %res
 }
 
-declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
-
 define i32 @or(<4 x i32> %vec) {
   ; CHECK-LABEL: name: or
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-  ; CHECK:   [[VECREDUCE_OR:%[0-9]+]]:_(s32) = G_VECREDUCE_OR [[COPY]](<4 x s32>)
-  ; CHECK:   $w0 = COPY [[VECREDUCE_OR]](s32)
-  ; CHECK:   RET_ReallyLR implicit $w0
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+  ; CHECK-NEXT:   [[VECREDUCE_OR:%[0-9]+]]:_(s32) = G_VECREDUCE_OR [[COPY]](<4 x s32>)
+  ; CHECK-NEXT:   $w0 = COPY [[VECREDUCE_OR]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
   %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %vec)
   ret i32 %res
 }
 
-declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>)
-
 define i32 @xor(<4 x i32> %vec) {
   ; CHECK-LABEL: name: xor
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-  ; CHECK:   [[VECREDUCE_XOR:%[0-9]+]]:_(s32) = G_VECREDUCE_XOR [[COPY]](<4 x s32>)
-  ; CHECK:   $w0 = COPY [[VECREDUCE_XOR]](s32)
-  ; CHECK:   RET_ReallyLR implicit $w0
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+  ; CHECK-NEXT:   [[VECREDUCE_XOR:%[0-9]+]]:_(s32) = G_VECREDUCE_XOR [[COPY]](<4 x s32>)
+  ; CHECK-NEXT:   $w0 = COPY [[VECREDUCE_XOR]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
   %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %vec)
   ret i32 %res
 }
 
-declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
-
 define i32 @smax(<4 x i32> %vec) {
   ; CHECK-LABEL: name: smax
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-  ; CHECK:   [[VECREDUCE_SMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_SMAX [[COPY]](<4 x s32>)
-  ; CHECK:   $w0 = COPY [[VECREDUCE_SMAX]](s32)
-  ; CHECK:   RET_ReallyLR implicit $w0
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+  ; CHECK-NEXT:   [[VECREDUCE_SMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_SMAX [[COPY]](<4 x s32>)
+  ; CHECK-NEXT:   $w0 = COPY [[VECREDUCE_SMAX]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
   %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %vec)
   ret i32 %res
 }
@@ -237,11 +259,12 @@ define i32 @smax(<4 x i32> %vec) {
 define i32 @smin(<4 x i32> %vec) {
   ; CHECK-LABEL: name: smin
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-  ; CHECK:   [[VECREDUCE_SMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_SMIN [[COPY]](<4 x s32>)
-  ; CHECK:   $w0 = COPY [[VECREDUCE_SMIN]](s32)
-  ; CHECK:   RET_ReallyLR implicit $w0
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+  ; CHECK-NEXT:   [[VECREDUCE_SMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_SMIN [[COPY]](<4 x s32>)
+  ; CHECK-NEXT:   $w0 = COPY [[VECREDUCE_SMIN]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
   %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %vec)
   ret i32 %res
 }
@@ -249,11 +272,12 @@ define i32 @smin(<4 x i32> %vec) {
 define i32 @umax(<4 x i32> %vec) {
   ; CHECK-LABEL: name: umax
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-  ; CHECK:   [[VECREDUCE_UMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_UMAX [[COPY]](<4 x s32>)
-  ; CHECK:   $w0 ...
[truncated]

@davemgreen davemgreen force-pushed the gh-gi-vecreducescalar branch from 1f80d5b to e5645f3 Compare August 18, 2025 08:27
Comment on lines 53 to 60
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you precommit the regeneration of the base checks to include the -NEXTs

…fmul.

A llvm.vector.reduce.fadd(float, <1 x float>) will be translated to
G_VECREDUCE_SEQ_FADD with two scalar operands, which is illegal according to
the verifier. This makes sure we generate a fadd/fmul instead.
@davemgreen davemgreen force-pushed the gh-gi-vecreducescalar branch from e5645f3 to e73876f Compare August 18, 2025 14:32
@davemgreen davemgreen enabled auto-merge (squash) August 18, 2025 14:59
@davemgreen davemgreen merged commit 03912a1 into llvm:main Aug 18, 2025
9 checks passed
@davemgreen davemgreen deleted the gh-gi-vecreducescalar branch August 18, 2025 14:59
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants