Skip to content

Commit 03912a1

Browse files
authored
[GlobalISel] Translate scalar sequential vecreduce.fadd/fmul as fadd/fmul. (llvm#153966)
A llvm.vector.reduce.fadd(float, <1 x float>) will be translated to G_VECREDUCE_SEQ_FADD with two scalar operands, which is illegal according to the verifier. This makes sure we generate a fadd/fmul instead.
1 parent f4b5c24 commit 03912a1

File tree

2 files changed

+32
-0
lines changed

2 files changed

+32
-0
lines changed

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2522,6 +2522,9 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
25222522
Opc = ID == Intrinsic::vector_reduce_fadd
25232523
? TargetOpcode::G_VECREDUCE_SEQ_FADD
25242524
: TargetOpcode::G_VECREDUCE_SEQ_FMUL;
2525+
if (!MRI->getType(VecSrc).isVector())
2526+
Opc = ID == Intrinsic::vector_reduce_fadd ? TargetOpcode::G_FADD
2527+
: TargetOpcode::G_FMUL;
25252528
MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc},
25262529
MachineInstr::copyFlagsFromInstruction(CI));
25272530
return true;

llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,21 @@ define float @fadd_seq(float %start, <4 x float> %vec) {
1616
ret float %res
1717
}
1818

19+
define float @fadd_seq_scalar(float %start, <1 x float> %vec) {
20+
; CHECK-LABEL: name: fadd_seq_scalar
21+
; CHECK: bb.1 (%ir-block.0):
22+
; CHECK-NEXT: liveins: $d1, $s0
23+
; CHECK-NEXT: {{ $}}
24+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
25+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
26+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
27+
; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[UV]]
28+
; CHECK-NEXT: $s0 = COPY [[FADD]](s32)
29+
; CHECK-NEXT: RET_ReallyLR implicit $s0
30+
%res = call float @llvm.vector.reduce.fadd.v1f32(float %start, <1 x float> %vec)
31+
ret float %res
32+
}
33+
1934
define float @fadd_fast(float %start, <4 x float> %vec) {
2035
; CHECK-LABEL: name: fadd_fast
2136
; CHECK: bb.1 (%ir-block.0):
@@ -48,6 +63,20 @@ define double @fmul_seq(double %start, <4 x double> %vec) {
4863
ret double %res
4964
}
5065

66+
define double @fmul_seq_scalar(double %start, <1 x double> %vec) {
67+
; CHECK-LABEL: name: fmul_seq_scalar
68+
; CHECK: bb.1 (%ir-block.0):
69+
; CHECK-NEXT: liveins: $d0, $d1
70+
; CHECK-NEXT: {{ $}}
71+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
72+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1
73+
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]]
74+
; CHECK-NEXT: $d0 = COPY [[FMUL]](s64)
75+
; CHECK-NEXT: RET_ReallyLR implicit $d0
76+
%res = call double @llvm.vector.reduce.fmul.v1f64(double %start, <1 x double> %vec)
77+
ret double %res
78+
}
79+
5180
define double @fmul_fast(double %start, <4 x double> %vec) {
5281
; CHECK-LABEL: name: fmul_fast
5382
; CHECK: bb.1 (%ir-block.0):

0 commit comments

Comments
 (0)