44target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-ni:1-p2:32:8:8:32-ni:2"
55target triple = "aarch64-arm-none-linux"
66
7- ; Ensure that a second reduction-like pattern doesn't override the first
8- ; We don't care what this IR produces, just that it produces something and doesn't cause a crash
7+ ; Ensure that a second reduction-like pattern doesn't override the first.
98define void @reprocessing_crash () #0 {
10- ; CHECK-LABEL: define void @reprocessing_crash
9+ ; CHECK-LABEL: define void @reprocessing_crash(
10+ ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
11+ ; CHECK-NEXT: [[ENTRY:.*]]:
12+ ; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer)
13+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
14+ ; CHECK: [[VECTOR_BODY]]:
15+ ; CHECK-NEXT: [[TMP1:%.*]] = phi <vscale x 4 x double> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
16+ ; CHECK-NEXT: [[TMP2]] = fsub <vscale x 4 x double> [[TMP1]], zeroinitializer
17+ ; CHECK-NEXT: br i1 false, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
18+ ; CHECK: [[MIDDLE_BLOCK]]:
19+ ; CHECK-NEXT: [[TMP3:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> [[TMP2]])
20+ ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 0
21+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd <vscale x 2 x double> [[TMP4]], zeroinitializer
22+ ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 1
23+ ; CHECK-NEXT: [[BIN_RDX23:%.*]] = fadd <vscale x 2 x double> [[TMP5]], zeroinitializer
24+ ; CHECK-NEXT: ret void
1125;
1226entry:
1327 br label %vector.body
@@ -28,8 +42,58 @@ middle.block: ; preds = %vector.body
2842 ret void
2943}
3044
45+ ; Make sure we don't crash on floating point single reductions. For now, they
46+ ; should be left as-is.
47+ define double @test_fp_single_reduction (i1 %c ) #2 {
48+ ; CHECK-LABEL: define double @test_fp_single_reduction(
49+ ; CHECK-SAME: i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] {
50+ ; CHECK-NEXT: [[ENTRY:.*]]:
51+ ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x double> @llvm.vector.interleave2.v8f64(<4 x double> zeroinitializer, <4 x double> zeroinitializer)
52+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
53+ ; CHECK: [[VECTOR_BODY]]:
54+ ; CHECK-NEXT: [[VEC_PHI218:%.*]] = phi <4 x double> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
55+ ; CHECK-NEXT: [[TMP1:%.*]] = phi <8 x double> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
56+ ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x double> zeroinitializer, <8 x double> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
57+ ; CHECK-NEXT: [[TMP2]] = fadd <4 x double> [[VEC_PHI218]], [[STRIDED_VEC]]
58+ ; CHECK-NEXT: [[TMP3]] = fadd <8 x double> [[TMP1]], zeroinitializer
59+ ; CHECK-NEXT: br i1 [[C]], label %[[EXIT:.*]], label %[[VECTOR_BODY]]
60+ ; CHECK: [[EXIT]]:
61+ ; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x double>, <4 x double> } @llvm.vector.deinterleave2.v8f64(<8 x double> [[TMP3]])
62+ ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x double>, <4 x double> } [[TMP4]], 0
63+ ; CHECK-NEXT: [[TMP6:%.*]] = tail call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP5]])
64+ ; CHECK-NEXT: [[TMP7:%.*]] = tail call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP2]])
65+ ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x double>, <4 x double> } [[TMP4]], 1
66+ ; CHECK-NEXT: [[TMP9:%.*]] = tail call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP8]])
67+ ; CHECK-NEXT: [[ADD_1:%.*]] = fadd double [[TMP6]], [[TMP7]]
68+ ; CHECK-NEXT: [[ADD_2:%.*]] = fadd double [[ADD_1]], [[TMP9]]
69+ ; CHECK-NEXT: ret double [[ADD_2]]
70+ ;
71+ entry:
72+ br label %vector.body
73+
74+ vector.body:
75+ %vec.phi216 = phi <4 x double > [ zeroinitializer , %entry ], [ %2 , %vector.body ]
76+ %vec.phi218 = phi <4 x double > [ zeroinitializer , %entry ], [ %1 , %vector.body ]
77+ %vec.phi222 = phi <4 x double > [ zeroinitializer , %entry ], [ %3 , %vector.body ]
78+ %strided.vec = shufflevector <8 x double > zeroinitializer , <8 x double > zeroinitializer , <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
79+ %strided.vec223 = shufflevector <8 x double > zeroinitializer , <8 x double > zeroinitializer , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
80+ %1 = fadd <4 x double > %vec.phi218 , %strided.vec
81+ %2 = fadd <4 x double > %vec.phi216 , %strided.vec
82+ %3 = fadd <4 x double > %vec.phi222 , %strided.vec223
83+ br i1 %c , label %exit , label %vector.body
84+
85+ exit:
86+ %4 = tail call double @llvm.vector.reduce.fadd.v4f64 (double 0 .000000e+00 , <4 x double > %2 )
87+ %5 = tail call double @llvm.vector.reduce.fadd.v4f64 (double 0 .000000e+00 , <4 x double > %1 )
88+ %6 = tail call double @llvm.vector.reduce.fadd.v4f64 (double 0 .000000e+00 , <4 x double > %3 )
89+ %add.1 = fadd double %4 , %5
90+ %add.2 = fadd double %add.1 , %6
91+ ret double %add.2
92+ }
93+
3194; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
3295declare { <vscale x 2 x double >, <vscale x 2 x double > } @llvm.vector.deinterleave2.nxv4f64 (<vscale x 4 x double >) #1
3396
3497attributes #0 = { "target-cpu" ="neoverse-v1" }
3598attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
99+ attributes #2 = { "target-cpu" ="apple-m1" }
0 commit comments