@@ -344,6 +344,70 @@ for.end:
344344 ret float %.sroa.speculated
345345}
346346
347+ ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
348+ define half @fmin_fast_half_zvfhmin (ptr noalias nocapture readonly %a , i64 %n ) #1 {
349+ ; CHECK-LABEL: @fmin_fast
350+ ; CHECK: vector.body:
351+ ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x half>
352+ ; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x half>
353+ ; CHECK: %[[FCMP1:.*]] = fcmp olt <vscale x 8 x half> %[[LOAD1]]
354+ ; CHECK: %[[FCMP2:.*]] = fcmp olt <vscale x 8 x half> %[[LOAD2]]
355+ ; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x half> %[[LOAD1]]
356+ ; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x half> %[[LOAD2]]
357+ ; CHECK: middle.block:
358+ ; CHECK: %[[FCMP:.*]] = fcmp olt <vscale x 8 x half> %[[SEL1]], %[[SEL2]]
359+ ; CHECK-NEXT: %[[SEL:.*]] = select <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x half> %[[SEL1]], <vscale x 8 x half> %[[SEL2]]
360+ ; CHECK-NEXT: call half @llvm.vector.reduce.fmin.nxv8f16(<vscale x 8 x half> %[[SEL]])
361+ entry:
362+ br label %for.body
363+
364+ for.body:
365+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
366+ %sum.07 = phi half [ 0 .000000e+00 , %entry ], [ %.sroa.speculated , %for.body ]
367+ %arrayidx = getelementptr inbounds half , ptr %a , i64 %iv
368+ %0 = load half , ptr %arrayidx , align 4
369+ %cmp.i = fcmp olt half %0 , %sum.07
370+ %.sroa.speculated = select i1 %cmp.i , half %0 , half %sum.07
371+ %iv.next = add nuw nsw i64 %iv , 1
372+ %exitcond.not = icmp eq i64 %iv.next , %n
373+ br i1 %exitcond.not , label %for.end , label %for.body , !llvm.loop !0
374+
375+ for.end:
376+ ret half %.sroa.speculated
377+ }
378+
379+ ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
380+ define bfloat @fmin_fast_bfloat_zvfbfmin (ptr noalias nocapture readonly %a , i64 %n ) #2 {
381+ ; CHECK-LABEL: @fmin_fast
382+ ; CHECK: vector.body:
383+ ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x bfloat>
384+ ; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x bfloat>
385+ ; CHECK: %[[FCMP1:.*]] = fcmp olt <vscale x 8 x bfloat> %[[LOAD1]]
386+ ; CHECK: %[[FCMP2:.*]] = fcmp olt <vscale x 8 x bfloat> %[[LOAD2]]
387+ ; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x bfloat> %[[LOAD1]]
388+ ; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x bfloat> %[[LOAD2]]
389+ ; CHECK: middle.block:
390+ ; CHECK: %[[FCMP:.*]] = fcmp olt <vscale x 8 x bfloat> %[[SEL1]], %[[SEL2]]
391+ ; CHECK-NEXT: %[[SEL:.*]] = select <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x bfloat> %[[SEL1]], <vscale x 8 x bfloat> %[[SEL2]]
392+ ; CHECK-NEXT: call bfloat @llvm.vector.reduce.fmin.nxv8bf16(<vscale x 8 x bfloat> %[[SEL]])
393+ entry:
394+ br label %for.body
395+
396+ for.body:
397+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
398+ %sum.07 = phi bfloat [ 0 .000000e+00 , %entry ], [ %.sroa.speculated , %for.body ]
399+ %arrayidx = getelementptr inbounds bfloat, ptr %a , i64 %iv
400+ %0 = load bfloat, ptr %arrayidx , align 4
401+ %cmp.i = fcmp olt bfloat %0 , %sum.07
402+ %.sroa.speculated = select i1 %cmp.i , bfloat %0 , bfloat %sum.07
403+ %iv.next = add nuw nsw i64 %iv , 1
404+ %exitcond.not = icmp eq i64 %iv.next , %n
405+ br i1 %exitcond.not , label %for.end , label %for.body , !llvm.loop !0
406+
407+ for.end:
408+ ret bfloat %.sroa.speculated
409+ }
410+
347411; FMAX (FAST)
348412
349413; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
@@ -378,6 +442,70 @@ for.end:
378442 ret float %.sroa.speculated
379443}
380444
445+ ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
446+ define half @fmax_fast_half_zvfhmin (ptr noalias nocapture readonly %a , i64 %n ) #1 {
447+ ; CHECK-LABEL: @fmax_fast
448+ ; CHECK: vector.body:
449+ ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x half>
450+ ; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x half>
451+ ; CHECK: %[[FCMP1:.*]] = fcmp fast ogt <vscale x 8 x half> %[[LOAD1]]
452+ ; CHECK: %[[FCMP2:.*]] = fcmp fast ogt <vscale x 8 x half> %[[LOAD2]]
453+ ; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x half> %[[LOAD1]]
454+ ; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x half> %[[LOAD2]]
455+ ; CHECK: middle.block:
456+ ; CHECK: %[[FCMP:.*]] = fcmp fast ogt <vscale x 8 x half> %[[SEL1]], %[[SEL2]]
457+ ; CHECK-NEXT: %[[SEL:.*]] = select fast <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x half> %[[SEL1]], <vscale x 8 x half> %[[SEL2]]
458+ ; CHECK-NEXT: call fast half @llvm.vector.reduce.fmax.nxv8f16(<vscale x 8 x half> %[[SEL]])
459+ entry:
460+ br label %for.body
461+
462+ for.body:
463+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
464+ %sum.07 = phi half [ 0 .000000e+00 , %entry ], [ %.sroa.speculated , %for.body ]
465+ %arrayidx = getelementptr inbounds half , ptr %a , i64 %iv
466+ %0 = load half , ptr %arrayidx , align 4
467+ %cmp.i = fcmp fast ogt half %0 , %sum.07
468+ %.sroa.speculated = select i1 %cmp.i , half %0 , half %sum.07
469+ %iv.next = add nuw nsw i64 %iv , 1
470+ %exitcond.not = icmp eq i64 %iv.next , %n
471+ br i1 %exitcond.not , label %for.end , label %for.body , !llvm.loop !0
472+
473+ for.end:
474+ ret half %.sroa.speculated
475+ }
476+
477+ ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
478+ define bfloat @fmax_fast_bfloat_zvfbfmin (ptr noalias nocapture readonly %a , i64 %n ) #2 {
479+ ; CHECK-LABEL: @fmax_fast
480+ ; CHECK: vector.body:
481+ ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x bfloat>
482+ ; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x bfloat>
483+ ; CHECK: %[[FCMP1:.*]] = fcmp fast ogt <vscale x 8 x bfloat> %[[LOAD1]]
484+ ; CHECK: %[[FCMP2:.*]] = fcmp fast ogt <vscale x 8 x bfloat> %[[LOAD2]]
485+ ; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x bfloat> %[[LOAD1]]
486+ ; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x bfloat> %[[LOAD2]]
487+ ; CHECK: middle.block:
488+ ; CHECK: %[[FCMP:.*]] = fcmp fast ogt <vscale x 8 x bfloat> %[[SEL1]], %[[SEL2]]
489+ ; CHECK-NEXT: %[[SEL:.*]] = select fast <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x bfloat> %[[SEL1]], <vscale x 8 x bfloat> %[[SEL2]]
490+ ; CHECK-NEXT: call fast bfloat @llvm.vector.reduce.fmax.nxv8bf16(<vscale x 8 x bfloat> %[[SEL]])
491+ entry:
492+ br label %for.body
493+
494+ for.body:
495+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
496+ %sum.07 = phi bfloat [ 0 .000000e+00 , %entry ], [ %.sroa.speculated , %for.body ]
497+ %arrayidx = getelementptr inbounds bfloat, ptr %a , i64 %iv
498+ %0 = load bfloat, ptr %arrayidx , align 4
499+ %cmp.i = fcmp fast ogt bfloat %0 , %sum.07
500+ %.sroa.speculated = select i1 %cmp.i , bfloat %0 , bfloat %sum.07
501+ %iv.next = add nuw nsw i64 %iv , 1
502+ %exitcond.not = icmp eq i64 %iv.next , %n
503+ br i1 %exitcond.not , label %for.end , label %for.body , !llvm.loop !0
504+
505+ for.end:
506+ ret bfloat %.sroa.speculated
507+ }
508+
381509; Reduction cannot be vectorized
382510
383511; MUL
@@ -591,6 +719,8 @@ for.end:
591719declare float @llvm.fmuladd.f32 (float , float , float )
592720
593721attributes #0 = { "no-nans-fp-math" ="true" "no-signed-zeros-fp-math" ="true" }
722+ attributes #1 = { "no-nans-fp-math" ="true" "no-signed-zeros-fp-math" ="true" "target-features" ="+zfhmin,+zvfhmin" }
723+ attributes #2 = { "no-nans-fp-math" ="true" "no-signed-zeros-fp-math" ="true" "target-features" ="+zfbfmin,+zvfbfmin" }
594724
595725!0 = distinct !{!0 , !1 , !2 , !3 , !4 }
596726!1 = !{!"llvm.loop.vectorize.width" , i32 8 }
0 commit comments