1
1
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2
2
; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-- | FileCheck %s
3
3
4
- define <4 x i8 > @pr52275 (<4 x i8 > %v , i8* %x ) {
5
- ; CHECK-LABEL: @pr52275(
6
- ; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i64 1
7
- ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[X]] to <2 x i8>*
8
- ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 4
9
- ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
10
- ; CHECK-NEXT: [[V11:%.*]] = shufflevector <4 x i8> [[V:%.*]], <4 x i8> [[TMP3]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
11
- ; CHECK-NEXT: [[V2:%.*]] = add <4 x i8> [[V11]], [[V11]]
12
- ; CHECK-NEXT: ret <4 x i8> [[V2]]
13
- ;
4
+ define <4 x i8 > @test (<4 x i8 > %v , i8* %x ) {
14
5
%x0 = load i8 , i8* %x , align 4
15
6
%g1 = getelementptr inbounds i8 , i8* %x , i64 1
16
7
%x1 = load i8 , i8* %g1 , align 4
@@ -19,3 +10,139 @@ define <4 x i8> @pr52275(<4 x i8> %v, i8* %x) {
19
10
%v2 = add <4 x i8 > %v0 , %v1
20
11
ret <4 x i8 > %v2
21
12
}
13
+
14
+ define <2 x i8 > @test2 (<2 x i8 > %t6 , i32* %t1 ) {
15
+ ; CHECK-LABEL: @test2(
16
+ ; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
17
+ ; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
18
+ ; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
19
+ ; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
20
+ ; CHECK-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0
21
+ ; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
22
+ ; CHECK-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 1
23
+ ; CHECK-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
24
+ ; CHECK-NEXT: ret <2 x i8> [[T11]]
25
+ ;
26
+ ; FORCE_SLP-LABEL: @test2(
27
+ ; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
28
+ ; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
29
+ ; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
30
+ ; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
31
+ ; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0
32
+ ; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
33
+ ; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 1
34
+ ; FORCE_SLP-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
35
+ ; FORCE_SLP-NEXT: ret <2 x i8> [[T11]]
36
+ ;
37
+ %t3 = load i32 , i32* %t1 , align 4
38
+ %t4 = getelementptr inbounds i32 , i32* %t1 , i64 1
39
+ %t5 = load i32 , i32* %t4 , align 4
40
+ %t7 = trunc i32 %t3 to i8
41
+ %t8 = insertelement <2 x i8 > %t6 , i8 %t7 , i64 0
42
+ %t9 = trunc i32 %t5 to i8
43
+ %t10 = insertelement <2 x i8 > %t8 , i8 %t9 , i64 1
44
+ %t11 = add <2 x i8 > %t10 , %t8
45
+ ret <2 x i8 > %t11
46
+ }
47
+
48
+ define <2 x i8 > @test_reorder (<2 x i8 > %t6 , i32* %t1 ) {
49
+ ; CHECK-LABEL: @test_reorder(
50
+ ; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
51
+ ; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
52
+ ; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
53
+ ; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
54
+ ; CHECK-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1
55
+ ; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
56
+ ; CHECK-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 0
57
+ ; CHECK-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
58
+ ; CHECK-NEXT: ret <2 x i8> [[T11]]
59
+ ;
60
+ ; FORCE_SLP-LABEL: @test_reorder(
61
+ ; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
62
+ ; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
63
+ ; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
64
+ ; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
65
+ ; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1
66
+ ; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
67
+ ; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 0
68
+ ; FORCE_SLP-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
69
+ ; FORCE_SLP-NEXT: ret <2 x i8> [[T11]]
70
+ ;
71
+ %t3 = load i32 , i32* %t1 , align 4
72
+ %t4 = getelementptr inbounds i32 , i32* %t1 , i64 1
73
+ %t5 = load i32 , i32* %t4 , align 4
74
+ %t7 = trunc i32 %t3 to i8
75
+ %t8 = insertelement <2 x i8 > %t6 , i8 %t7 , i64 1
76
+ %t9 = trunc i32 %t5 to i8
77
+ %t10 = insertelement <2 x i8 > %t8 , i8 %t9 , i64 0
78
+ %t11 = add <2 x i8 > %t10 , %t8
79
+ ret <2 x i8 > %t11
80
+ }
81
+
82
+ define <4 x i8 > @test_subvector (<4 x i8 > %t6 , i32* %t1 ) {
83
+ ; CHECK-LABEL: @test_subvector(
84
+ ; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
85
+ ; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
86
+ ; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
87
+ ; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
88
+ ; CHECK-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0
89
+ ; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
90
+ ; CHECK-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 1
91
+ ; CHECK-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
92
+ ; CHECK-NEXT: ret <4 x i8> [[T11]]
93
+ ;
94
+ ; FORCE_SLP-LABEL: @test_subvector(
95
+ ; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
96
+ ; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
97
+ ; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
98
+ ; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
99
+ ; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0
100
+ ; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
101
+ ; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 1
102
+ ; FORCE_SLP-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
103
+ ; FORCE_SLP-NEXT: ret <4 x i8> [[T11]]
104
+ ;
105
+ %t3 = load i32 , i32* %t1 , align 4
106
+ %t4 = getelementptr inbounds i32 , i32* %t1 , i64 1
107
+ %t5 = load i32 , i32* %t4 , align 4
108
+ %t7 = trunc i32 %t3 to i8
109
+ %t8 = insertelement <4 x i8 > %t6 , i8 %t7 , i64 0
110
+ %t9 = trunc i32 %t5 to i8
111
+ %t10 = insertelement <4 x i8 > %t8 , i8 %t9 , i64 1
112
+ %t11 = add <4 x i8 > %t10 , %t8
113
+ ret <4 x i8 > %t11
114
+ }
115
+
116
+ define <4 x i8 > @test_subvector_reorder (<4 x i8 > %t6 , i32* %t1 ) {
117
+ ; CHECK-LABEL: @test_subvector_reorder(
118
+ ; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
119
+ ; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
120
+ ; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
121
+ ; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
122
+ ; CHECK-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3
123
+ ; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
124
+ ; CHECK-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 2
125
+ ; CHECK-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
126
+ ; CHECK-NEXT: ret <4 x i8> [[T11]]
127
+ ;
128
+ ; FORCE_SLP-LABEL: @test_subvector_reorder(
129
+ ; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
130
+ ; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
131
+ ; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
132
+ ; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
133
+ ; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3
134
+ ; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
135
+ ; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 2
136
+ ; FORCE_SLP-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
137
+ ; FORCE_SLP-NEXT: ret <4 x i8> [[T11]]
138
+ ;
139
+ %t3 = load i32 , i32* %t1 , align 4
140
+ %t4 = getelementptr inbounds i32 , i32* %t1 , i64 1
141
+ %t5 = load i32 , i32* %t4 , align 4
142
+ %t7 = trunc i32 %t3 to i8
143
+ %t8 = insertelement <4 x i8 > %t6 , i8 %t7 , i64 3
144
+ %t9 = trunc i32 %t5 to i8
145
+ %t10 = insertelement <4 x i8 > %t8 , i8 %t9 , i64 2
146
+ %t11 = add <4 x i8 > %t10 , %t8
147
+ ret <4 x i8 > %t11
148
+ }
0 commit comments