Skip to content

Commit ff2a895

Browse files
Vasileios Porpodasmemfrob
authored andcommitted
[SLP][NFC] Precommit a lit test for a followup patch that improves tree reordering for external users.
Differential Revision: https://reviews.llvm.org/D125110
1 parent d4d4d2e commit ff2a895

File tree

1 file changed

+52
-0
lines changed

1 file changed

+52
-0
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
3+
4+
; Make sure that we rotate the graph to help avoid the shuffle to
5+
; the external vectorizable stores.
6+
;
7+
; SLP starts vectorizing from the operands of the `fcmp` in bb2, then crosses
8+
; into bb1, vectorizing all the way to the broadcast load at the top.
9+
; The stores in bb1 are external to this tree, but they are vectorizable and are
10+
; in reverse order.
11+
define void @rotate_with_external_users(double *%A, double *%ptr) {
12+
; CHECK-LABEL: @rotate_with_external_users(
13+
; CHECK-NEXT: bb1:
14+
; CHECK-NEXT: [[LD:%.*]] = load double, double* undef, align 8
15+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0
16+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[LD]], i32 1
17+
; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], <double 1.100000e+00, double 2.200000e+00>
18+
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], <double 1.100000e+00, double 2.200000e+00>
19+
; CHECK-NEXT: [[PTRA1:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
20+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
21+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[PTRA1]] to <2 x double>*
22+
; CHECK-NEXT: store <2 x double> [[SHUFFLE]], <2 x double>* [[TMP4]], align 8
23+
; CHECK-NEXT: br label [[BB2:%.*]]
24+
; CHECK: bb2:
25+
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], <double 3.300000e+00, double 4.400000e+00>
26+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
27+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
28+
; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP6]], [[TMP7]]
29+
; CHECK-NEXT: ret void
30+
;
31+
bb1:
32+
%ld = load double, double* undef
33+
34+
%add1 = fadd double %ld, 1.1
35+
%add2 = fadd double %ld, 2.2
36+
37+
%mul1 = fmul double %add1, 1.1
38+
%mul2 = fmul double %add2, 2.2
39+
40+
; Thes are external vectorizable stores with operands in reverse order.
41+
%ptrA1 = getelementptr inbounds double, double* %A, i64 0
42+
%ptrA2 = getelementptr inbounds double, double* %A, i64 1
43+
store double %mul2, double *%ptrA1
44+
store double %mul1, double *%ptrA2
45+
br label %bb2
46+
47+
bb2:
48+
%add3 = fadd double %mul1, 3.3
49+
%add4 = fadd double %mul2, 4.4
50+
%seed = fcmp ogt double %add3, %add4
51+
ret void
52+
}

0 commit comments

Comments
 (0)