1+ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of"
2+ ; RUN: opt -S -passes=loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -debug -disable-output -S %s 2>&1 | FileCheck %s
3+
4+ ; REQUIRES: asserts
5+
6+ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
7+
8+ define void @wide_or_replaced_with_add_vpinstruction (ptr %src , ptr noalias %dst ) {
9+ ; CHECK-LABEL: 'wide_or_replaced_with_add_vpinstruction'
10+ ; CHECK: Cost of 1 for VF 2: induction instruction %iv.next = add nuw nsw i64 %iv, 1
11+ ; CHECK: Cost of 0 for VF 2: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
12+ ; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
13+ ; CHECK: Cost of 0 for VF 2: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
14+ ; CHECK: Cost of 0 for VF 2: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0>
15+ ; CHECK: Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
16+ ; CHECK: Cost of 0 for VF 2: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>
17+ ; CHECK: Cost of 0 for VF 2: vp<%5> = vector-pointer ir<%g.src>
18+ ; CHECK: Cost of 1 for VF 2: WIDEN ir<%l> = load vp<%5>
19+ ; CHECK: Cost of 1 for VF 2: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
20+ ; CHECK: Cost of 1 for VF 2: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
21+ ; CHECK: Cost of 1 for VF 2: EMIT ir<%or> = add ir<%iv.4>, ir<1>
22+ ; CHECK: Cost of 0 for VF 2: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
23+ ; CHECK: Cost of 0 for VF 2: vp<%6> = vector-pointer ir<%g.dst>
24+ ; CHECK: Cost of 1 for VF 2: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>
25+ ; CHECK: Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>
26+ ; CHECK: Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<%2>
27+ ; CHECK: Cost of 0 for VF 2: vector loop backedge
28+ ; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1
29+ ; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
30+ ; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
31+ ; CHECK: Cost of 0 for VF 4: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
32+ ; CHECK: Cost of 0 for VF 4: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0>
33+ ; CHECK: Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
34+ ; CHECK: Cost of 0 for VF 4: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>
35+ ; CHECK: Cost of 0 for VF 4: vp<%5> = vector-pointer ir<%g.src>
36+ ; CHECK: Cost of 1 for VF 4: WIDEN ir<%l> = load vp<%5>
37+ ; CHECK: Cost of 1 for VF 4: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
38+ ; CHECK: Cost of 1 for VF 4: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
39+ ; CHECK: Cost of 1 for VF 4: EMIT ir<%or> = add ir<%iv.4>, ir<1>
40+ ; CHECK: Cost of 0 for VF 4: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
41+ ; CHECK: Cost of 0 for VF 4: vp<%6> = vector-pointer ir<%g.dst>
42+ ; CHECK: Cost of 1 for VF 4: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>
43+ ; CHECK: Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>
44+ ; CHECK: Cost of 0 for VF 4: EMIT branch-on-count vp<%index.next>, vp<%2>
45+ ; CHECK: Cost of 0 for VF 4: vector loop backedge
46+ ; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1
47+ ; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
48+ ; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
49+ ;
50+ entry:
51+ br label %loop.header
52+
53+ loop.header:
54+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop.latch ]
55+ %g.src = getelementptr inbounds i64 , ptr %src , i64 %iv
56+ %l = load i64 , ptr %g.src
57+ %iv.4 = add nuw nsw i64 %iv , 4
58+ %c = icmp ule i64 %l , 128
59+ br i1 %c , label %loop.then , label %loop.latch
60+
61+ loop.then:
62+ %or = or disjoint i64 %iv.4 , 1
63+ %g.dst = getelementptr inbounds i64 , ptr %dst , i64 %or
64+ store i64 %iv.4 , ptr %g.dst , align 4
65+ br label %loop.latch
66+
67+ loop.latch:
68+ %iv.next = add nuw nsw i64 %iv , 1
69+ %exitcond = icmp eq i64 %iv.next , 32
70+ br i1 %exitcond , label %exit , label %loop.header
71+
72+ exit:
73+ ret void
74+ }
0 commit comments