-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[GlobalISel] Translate scalar sequential vecreduce.fadd/fmul as fadd/fmul. #153966
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: David Green (davemgreen) ChangesA llvm.vector.reduce.fadd(float, <1 x float>) will be translated to G_VECREDUCE_SEQ_FADD with two scalar operands, which is illegal according to the verifier. This makes sure we generate a fadd/fmul instead. Patch is 21.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153966.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 64c19fab1a023..a3707fef6328f 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2522,6 +2522,9 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
Opc = ID == Intrinsic::vector_reduce_fadd
? TargetOpcode::G_VECREDUCE_SEQ_FADD
: TargetOpcode::G_VECREDUCE_SEQ_FMUL;
+ if (!MRI->getType(VecSrc).isVector())
+ Opc = ID == Intrinsic::vector_reduce_fadd ? TargetOpcode::G_FADD
+ : TargetOpcode::G_FMUL;
MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc},
MachineInstr::copyFlagsFromInstruction(CI));
return true;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll
index 16762dc4fd3fe..c791e35946f72 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll
@@ -1,34 +1,48 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -O0 -mtriple=aarch64-apple-ios -global-isel -disable-expand-reductions -stop-after=irtranslator %s -o - | FileCheck %s
-declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
-declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>)
-
define float @fadd_seq(float %start, <4 x float> %vec) {
; CHECK-LABEL: name: fadd_seq
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q1, $s0
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
- ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
- ; CHECK: [[VECREDUCE_SEQ_FADD:%[0-9]+]]:_(s32) = G_VECREDUCE_SEQ_FADD [[COPY]](s32), [[BITCAST]](<4 x s32>)
- ; CHECK: $s0 = COPY [[VECREDUCE_SEQ_FADD]](s32)
- ; CHECK: RET_ReallyLR implicit $s0
+ ; CHECK-NEXT: liveins: $q1, $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
+ ; CHECK-NEXT: [[VECREDUCE_SEQ_FADD:%[0-9]+]]:_(s32) = G_VECREDUCE_SEQ_FADD [[COPY]](s32), [[BITCAST]](<4 x s32>)
+ ; CHECK-NEXT: $s0 = COPY [[VECREDUCE_SEQ_FADD]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
%res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %vec)
ret float %res
}
+define float @fadd_seq_scalar(float %start, <1 x float> %vec) {
+ ; CHECK-LABEL: name: fadd_seq_scalar
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $d1, $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+ ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[UV]]
+ ; CHECK-NEXT: $s0 = COPY [[FADD]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
+ %res = call float @llvm.vector.reduce.fadd.v1f32(float %start, <1 x float> %vec)
+ ret float %res
+}
+
define float @fadd_fast(float %start, <4 x float> %vec) {
; CHECK-LABEL: name: fadd_fast
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q1, $s0
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
- ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
- ; CHECK: [[VECREDUCE_FADD:%[0-9]+]]:_(s32) = reassoc G_VECREDUCE_FADD [[BITCAST]](<4 x s32>)
- ; CHECK: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY]], [[VECREDUCE_FADD]]
- ; CHECK: $s0 = COPY [[FADD]](s32)
- ; CHECK: RET_ReallyLR implicit $s0
+ ; CHECK-NEXT: liveins: $q1, $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
+ ; CHECK-NEXT: [[VECREDUCE_FADD:%[0-9]+]]:_(s32) = reassoc G_VECREDUCE_FADD [[BITCAST]](<4 x s32>)
+ ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY]], [[VECREDUCE_FADD]]
+ ; CHECK-NEXT: $s0 = COPY [[FADD]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
%res = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %vec)
ret float %res
}
@@ -36,48 +50,60 @@ define float @fadd_fast(float %start, <4 x float> %vec) {
define double @fmul_seq(double %start, <4 x double> %vec) {
; CHECK-LABEL: name: fmul_seq
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $d0, $q1, $q2
- ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
- ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>)
- ; CHECK: [[VECREDUCE_SEQ_FMUL:%[0-9]+]]:_(s64) = G_VECREDUCE_SEQ_FMUL [[COPY]](s64), [[CONCAT_VECTORS]](<4 x s64>)
- ; CHECK: $d0 = COPY [[VECREDUCE_SEQ_FMUL]](s64)
- ; CHECK: RET_ReallyLR implicit $d0
+ ; CHECK-NEXT: liveins: $d0, $q1, $q2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>)
+ ; CHECK-NEXT: [[VECREDUCE_SEQ_FMUL:%[0-9]+]]:_(s64) = G_VECREDUCE_SEQ_FMUL [[COPY]](s64), [[CONCAT_VECTORS]](<4 x s64>)
+ ; CHECK-NEXT: $d0 = COPY [[VECREDUCE_SEQ_FMUL]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
%res = call double @llvm.vector.reduce.fmul.v4f64(double %start, <4 x double> %vec)
ret double %res
}
+define double @fmul_seq_scalar(double %start, <1 x double> %vec) {
+ ; CHECK-LABEL: name: fmul_seq_scalar
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1
+ ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $d0 = COPY [[FMUL]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %res = call double @llvm.vector.reduce.fmul.v1f64(double %start, <1 x double> %vec)
+ ret double %res
+}
+
define double @fmul_fast(double %start, <4 x double> %vec) {
; CHECK-LABEL: name: fmul_fast
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $d0, $q1, $q2
- ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
- ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>)
- ; CHECK: [[VECREDUCE_FMUL:%[0-9]+]]:_(s64) = reassoc G_VECREDUCE_FMUL [[CONCAT_VECTORS]](<4 x s64>)
- ; CHECK: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[COPY]], [[VECREDUCE_FMUL]]
- ; CHECK: $d0 = COPY [[FMUL]](s64)
- ; CHECK: RET_ReallyLR implicit $d0
+ ; CHECK-NEXT: liveins: $d0, $q1, $q2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>)
+ ; CHECK-NEXT: [[VECREDUCE_FMUL:%[0-9]+]]:_(s64) = reassoc G_VECREDUCE_FMUL [[CONCAT_VECTORS]](<4 x s64>)
+ ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[COPY]], [[VECREDUCE_FMUL]]
+ ; CHECK-NEXT: $d0 = COPY [[FMUL]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
%res = call reassoc double @llvm.vector.reduce.fmul.v4f64(double %start, <4 x double> %vec)
ret double %res
}
-declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
-declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
-declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>)
-declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>)
-
define float @fmax(<4 x float> %vec) {
; CHECK-LABEL: name: fmax
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
- ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
- ; CHECK: [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAX [[BITCAST]](<4 x s32>)
- ; CHECK: $s0 = COPY [[VECREDUCE_FMAX]](s32)
- ; CHECK: RET_ReallyLR implicit $s0
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+ ; CHECK-NEXT: [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAX [[BITCAST]](<4 x s32>)
+ ; CHECK-NEXT: $s0 = COPY [[VECREDUCE_FMAX]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
%res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %vec)
ret float %res
}
@@ -85,12 +111,13 @@ define float @fmax(<4 x float> %vec) {
define float @fmin(<4 x float> %vec) {
; CHECK-LABEL: name: fmin
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
- ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
- ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMIN [[BITCAST]](<4 x s32>)
- ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32)
- ; CHECK: RET_ReallyLR implicit $s0
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+ ; CHECK-NEXT: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMIN [[BITCAST]](<4 x s32>)
+ ; CHECK-NEXT: $s0 = COPY [[VECREDUCE_FMIN]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
%res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %vec)
ret float %res
}
@@ -98,12 +125,13 @@ define float @fmin(<4 x float> %vec) {
define float @fmin_nnan(<4 x float> %vec) {
; CHECK-LABEL: name: fmin_nnan
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
- ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
- ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMIN [[BITCAST]](<4 x s32>)
- ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32)
- ; CHECK: RET_ReallyLR implicit $s0
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+ ; CHECK-NEXT: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMIN [[BITCAST]](<4 x s32>)
+ ; CHECK-NEXT: $s0 = COPY [[VECREDUCE_FMIN]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
%res = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %vec)
ret float %res
}
@@ -111,12 +139,13 @@ define float @fmin_nnan(<4 x float> %vec) {
define float @fmaximum(<4 x float> %vec) {
; CHECK-LABEL: name: fmaximum
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
- ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
- ; CHECK: [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAXIMUM [[BITCAST]](<4 x s32>)
- ; CHECK: $s0 = COPY [[VECREDUCE_FMAX]](s32)
- ; CHECK: RET_ReallyLR implicit $s0
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+ ; CHECK-NEXT: [[VECREDUCE_FMAXIMUM:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAXIMUM [[BITCAST]](<4 x s32>)
+ ; CHECK-NEXT: $s0 = COPY [[VECREDUCE_FMAXIMUM]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
%res = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %vec)
ret float %res
}
@@ -124,12 +153,13 @@ define float @fmaximum(<4 x float> %vec) {
define float @fminimum(<4 x float> %vec) {
; CHECK-LABEL: name: fminimum
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
- ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
- ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>)
- ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32)
- ; CHECK: RET_ReallyLR implicit $s0
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+ ; CHECK-NEXT: [[VECREDUCE_FMINIMUM:%[0-9]+]]:_(s32) = G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>)
+ ; CHECK-NEXT: $s0 = COPY [[VECREDUCE_FMINIMUM]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
%res = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %vec)
ret float %res
}
@@ -137,99 +167,91 @@ define float @fminimum(<4 x float> %vec) {
define float @fminimum_nnan(<4 x float> %vec) {
; CHECK-LABEL: name: fminimum_nnan
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
- ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
- ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>)
- ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32)
- ; CHECK: RET_ReallyLR implicit $s0
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+ ; CHECK-NEXT: [[VECREDUCE_FMINIMUM:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>)
+ ; CHECK-NEXT: $s0 = COPY [[VECREDUCE_FMINIMUM]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
%res = call nnan float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %vec)
ret float %res
}
-declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
-
define i32 @add(<4 x i32> %vec) {
; CHECK-LABEL: name: add
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s32) = G_VECREDUCE_ADD [[COPY]](<4 x s32>)
- ; CHECK: $w0 = COPY [[VECREDUCE_ADD]](s32)
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[VECREDUCE_ADD:%[0-9]+]]:_(s32) = G_VECREDUCE_ADD [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: $w0 = COPY [[VECREDUCE_ADD]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %vec)
ret i32 %res
}
-declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
-
define i32 @mul(<4 x i32> %vec) {
; CHECK-LABEL: name: mul
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[VECREDUCE_MUL:%[0-9]+]]:_(s32) = G_VECREDUCE_MUL [[COPY]](<4 x s32>)
- ; CHECK: $w0 = COPY [[VECREDUCE_MUL]](s32)
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[VECREDUCE_MUL:%[0-9]+]]:_(s32) = G_VECREDUCE_MUL [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: $w0 = COPY [[VECREDUCE_MUL]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%res = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %vec)
ret i32 %res
}
-declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
-
define i32 @and(<4 x i32> %vec) {
; CHECK-LABEL: name: and
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[VECREDUCE_AND:%[0-9]+]]:_(s32) = G_VECREDUCE_AND [[COPY]](<4 x s32>)
- ; CHECK: $w0 = COPY [[VECREDUCE_AND]](s32)
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[VECREDUCE_AND:%[0-9]+]]:_(s32) = G_VECREDUCE_AND [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: $w0 = COPY [[VECREDUCE_AND]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %vec)
ret i32 %res
}
-declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
-
define i32 @or(<4 x i32> %vec) {
; CHECK-LABEL: name: or
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[VECREDUCE_OR:%[0-9]+]]:_(s32) = G_VECREDUCE_OR [[COPY]](<4 x s32>)
- ; CHECK: $w0 = COPY [[VECREDUCE_OR]](s32)
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[VECREDUCE_OR:%[0-9]+]]:_(s32) = G_VECREDUCE_OR [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: $w0 = COPY [[VECREDUCE_OR]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %vec)
ret i32 %res
}
-declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>)
-
define i32 @xor(<4 x i32> %vec) {
; CHECK-LABEL: name: xor
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[VECREDUCE_XOR:%[0-9]+]]:_(s32) = G_VECREDUCE_XOR [[COPY]](<4 x s32>)
- ; CHECK: $w0 = COPY [[VECREDUCE_XOR]](s32)
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[VECREDUCE_XOR:%[0-9]+]]:_(s32) = G_VECREDUCE_XOR [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: $w0 = COPY [[VECREDUCE_XOR]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %vec)
ret i32 %res
}
-declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
-
define i32 @smax(<4 x i32> %vec) {
; CHECK-LABEL: name: smax
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[VECREDUCE_SMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_SMAX [[COPY]](<4 x s32>)
- ; CHECK: $w0 = COPY [[VECREDUCE_SMAX]](s32)
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[VECREDUCE_SMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_SMAX [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: $w0 = COPY [[VECREDUCE_SMAX]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %vec)
ret i32 %res
}
@@ -237,11 +259,12 @@ define i32 @smax(<4 x i32> %vec) {
define i32 @smin(<4 x i32> %vec) {
; CHECK-LABEL: name: smin
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[VECREDUCE_SMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_SMIN [[COPY]](<4 x s32>)
- ; CHECK: $w0 = COPY [[VECREDUCE_SMIN]](s32)
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[VECREDUCE_SMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_SMIN [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: $w0 = COPY [[VECREDUCE_SMIN]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %vec)
ret i32 %res
}
@@ -249,11 +272,12 @@ define i32 @smin(<4 x i32> %vec) {
define i32 @umax(<4 x i32> %vec) {
; CHECK-LABEL: name: umax
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[VECREDUCE_UMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_UMAX [[COPY]](<4 x s32>)
- ; CHECK: $w0 ...
[truncated]
|
1f80d5b to
e5645f3
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you precommit the regeneration of the base checks to include the -NEXTs
…fmul. A llvm.vector.reduce.fadd(float, <1 x float>) will be translated to G_VECREDUCE_SEQ_FADD with two scalar operands, which is illegal according to the verifier. This makes sure we generate a fadd/fmul instead.
e5645f3 to
e73876f
Compare
A llvm.vector.reduce.fadd(float, <1 x float>) will be translated to G_VECREDUCE_SEQ_FADD with two scalar operands, which is illegal according to the verifier. This makes sure we generate a fadd/fmul instead.