1
1
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2
2
; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
3
3
4
+ ; Allocas should be placed in the entry block.
5
+ ; Allocas should also be reused across multiple insertelement and extractelement instructions for the same vector
6
+ define void @alloca_placement_and_reuse (<3 x i32 > %v1 , <3 x i32 > %v2 , i32 %a , i32 %i , i32 %j ) {
7
+ ; CHECK-LABEL: define void @alloca_placement_and_reuse(
8
+ ; CHECK-SAME: <3 x i32> [[V1:%.*]], <3 x i32> [[V2:%.*]], i32 [[A:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) {
9
+ ; CHECK-NEXT: [[AL:%.*]] = alloca [3 x i32], align 4
10
+ ; CHECK-NEXT: [[EE1_ALLOCA:%.*]] = alloca [3 x i32], align 4
11
+ ; CHECK-NEXT: [[EE2_ALLOCA:%.*]] = alloca [3 x i32], align 4
12
+ ; CHECK-NEXT: [[EE2_EXTRACT:%.*]] = extractelement <3 x i32> [[V2]], i64 0
13
+ ; CHECK-NEXT: [[EE2_INDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 0
14
+ ; CHECK-NEXT: store i32 [[EE2_EXTRACT]], ptr [[EE2_INDEX]], align 4
15
+ ; CHECK-NEXT: [[EE2_EXTRACT10:%.*]] = extractelement <3 x i32> [[V2]], i64 1
16
+ ; CHECK-NEXT: [[EE2_INDEX11:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 1
17
+ ; CHECK-NEXT: store i32 [[EE2_EXTRACT10]], ptr [[EE2_INDEX11]], align 4
18
+ ; CHECK-NEXT: [[EE2_EXTRACT12:%.*]] = extractelement <3 x i32> [[V2]], i64 2
19
+ ; CHECK-NEXT: [[EE2_INDEX13:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 2
20
+ ; CHECK-NEXT: store i32 [[EE2_EXTRACT12]], ptr [[EE2_INDEX13]], align 4
21
+ ; CHECK-NEXT: [[EE1_EXTRACT:%.*]] = extractelement <3 x i32> [[V1]], i64 0
22
+ ; CHECK-NEXT: [[EE1_INDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 0
23
+ ; CHECK-NEXT: store i32 [[EE1_EXTRACT]], ptr [[EE1_INDEX]], align 4
24
+ ; CHECK-NEXT: [[EE1_EXTRACT1:%.*]] = extractelement <3 x i32> [[V1]], i64 1
25
+ ; CHECK-NEXT: [[EE1_INDEX2:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 1
26
+ ; CHECK-NEXT: store i32 [[EE1_EXTRACT1]], ptr [[EE1_INDEX2]], align 4
27
+ ; CHECK-NEXT: [[EE1_EXTRACT3:%.*]] = extractelement <3 x i32> [[V1]], i64 2
28
+ ; CHECK-NEXT: [[EE1_INDEX4:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 2
29
+ ; CHECK-NEXT: store i32 [[EE1_EXTRACT3]], ptr [[EE1_INDEX4]], align 4
30
+ ; CHECK-NEXT: br label %[[BODY:.*]]
31
+ ; CHECK: [[BODY]]:
32
+ ; CHECK-NEXT: [[EE1_INDEX5:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 [[I]]
33
+ ; CHECK-NEXT: [[EE1_LOAD:%.*]] = load i32, ptr [[EE1_INDEX5]], align 4
34
+ ; CHECK-NEXT: [[IE1_DYNINDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 [[I]]
35
+ ; CHECK-NEXT: store i32 [[A]], ptr [[IE1_DYNINDEX]], align 4
36
+ ; CHECK-NEXT: [[IE1_LOAD:%.*]] = load i32, ptr [[EE1_INDEX]], align 4
37
+ ; CHECK-NEXT: [[IE1_INSERT:%.*]] = insertelement <3 x i32> poison, i32 [[IE1_LOAD]], i32 0
38
+ ; CHECK-NEXT: [[IE1_LOAD6:%.*]] = load i32, ptr [[EE1_INDEX2]], align 4
39
+ ; CHECK-NEXT: [[IE1_INSERT7:%.*]] = insertelement <3 x i32> [[IE1_INSERT]], i32 [[IE1_LOAD6]], i32 1
40
+ ; CHECK-NEXT: [[IE1_LOAD8:%.*]] = load i32, ptr [[EE1_INDEX4]], align 4
41
+ ; CHECK-NEXT: [[IE1_INSERT9:%.*]] = insertelement <3 x i32> [[IE1_INSERT7]], i32 [[IE1_LOAD8]], i32 2
42
+ ; CHECK-NEXT: [[EE2_INDEX14:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 [[J]]
43
+ ; CHECK-NEXT: [[EE2_LOAD:%.*]] = load i32, ptr [[EE2_INDEX14]], align 4
44
+ ; CHECK-NEXT: ret void
45
+ ;
46
+ %al = alloca [3 x i32 ], align 4
47
+ br label %body
48
+ body:
49
+ %ee1 = extractelement <3 x i32 > %v1 , i32 %i
50
+ %ie1 = insertelement <3 x i32 > %v1 , i32 %a , i32 %i
51
+ %ee2 = extractelement <3 x i32 > %v2 , i32 %j
52
+ ret void
53
+ }
54
+
4
55
define float @extract_float_vec_dynamic (<4 x float > %v , i32 %i ) {
5
56
; CHECK-LABEL: define float @extract_float_vec_dynamic(
6
57
; CHECK-SAME: <4 x float> [[V:%.*]], i32 [[I:%.*]]) {
7
- ; CHECK-NEXT: [[TMP1 :%.*]] = alloca [4 x float], align 4
8
- ; CHECK-NEXT: [[TMP2 :%.*]] = extractelement <4 x float> [[V]], i64 0
9
- ; CHECK-NEXT: [[TMP3 :%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1 ]], i32 0, i32 0
10
- ; CHECK-NEXT: store float [[TMP2 ]], ptr [[TMP3 ]], align 4
11
- ; CHECK-NEXT: [[TMP4 :%.*]] = extractelement <4 x float> [[V]], i64 1
12
- ; CHECK-NEXT: [[TMP5 :%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1 ]], i32 0, i32 1
13
- ; CHECK-NEXT: store float [[TMP4 ]], ptr [[TMP5 ]], align 4
14
- ; CHECK-NEXT: [[TMP6 :%.*]] = extractelement <4 x float> [[V]], i64 2
15
- ; CHECK-NEXT: [[TMP7 :%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1 ]], i32 0, i32 2
16
- ; CHECK-NEXT: store float [[TMP6 ]], ptr [[TMP7 ]], align 4
17
- ; CHECK-NEXT: [[TMP8 :%.*]] = extractelement <4 x float> [[V]], i64 3
18
- ; CHECK-NEXT: [[TMP9 :%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1 ]], i32 0, i32 3
19
- ; CHECK-NEXT: store float [[TMP8 ]], ptr [[TMP9 ]], align 4
20
- ; CHECK-NEXT: [[TMP10 :%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1 ]], i32 0, i32 [[I]]
21
- ; CHECK-NEXT: [[TMP11 :%.*]] = load float, ptr [[TMP10 ]], align 4
22
- ; CHECK-NEXT: ret float [[TMP11 ]]
58
+ ; CHECK-NEXT: [[EE_ALLOCA :%.*]] = alloca [4 x float], align 4
59
+ ; CHECK-NEXT: [[EE_EXTRACT :%.*]] = extractelement <4 x float> [[V]], i64 0
60
+ ; CHECK-NEXT: [[EE_INDEX :%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA ]], i32 0, i32 0
61
+ ; CHECK-NEXT: store float [[EE_EXTRACT ]], ptr [[EE_INDEX ]], align 4
62
+ ; CHECK-NEXT: [[EE_EXTRACT1 :%.*]] = extractelement <4 x float> [[V]], i64 1
63
+ ; CHECK-NEXT: [[EE_INDEX2 :%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA ]], i32 0, i32 1
64
+ ; CHECK-NEXT: store float [[EE_EXTRACT1 ]], ptr [[EE_INDEX2 ]], align 4
65
+ ; CHECK-NEXT: [[EE_EXTRACT3 :%.*]] = extractelement <4 x float> [[V]], i64 2
66
+ ; CHECK-NEXT: [[EE_INDEX4 :%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA ]], i32 0, i32 2
67
+ ; CHECK-NEXT: store float [[EE_EXTRACT3 ]], ptr [[EE_INDEX4 ]], align 4
68
+ ; CHECK-NEXT: [[EE_EXTRACT5 :%.*]] = extractelement <4 x float> [[V]], i64 3
69
+ ; CHECK-NEXT: [[EE_INDEX6 :%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA ]], i32 0, i32 3
70
+ ; CHECK-NEXT: store float [[EE_EXTRACT5 ]], ptr [[EE_INDEX6 ]], align 4
71
+ ; CHECK-NEXT: [[EE_INDEX7 :%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA ]], i32 0, i32 [[I]]
72
+ ; CHECK-NEXT: [[EE_LOAD :%.*]] = load float, ptr [[EE_INDEX7 ]], align 4
73
+ ; CHECK-NEXT: ret float [[EE_LOAD ]]
23
74
;
24
75
%ee = extractelement <4 x float > %v , i32 %i
25
76
ret float %ee
@@ -28,25 +79,25 @@ define float @extract_float_vec_dynamic(<4 x float> %v, i32 %i) {
28
79
define <3 x i32 > @insert_i32_vec_dynamic (<3 x i32 > %v , i32 %a , i32 %i ) {
29
80
; CHECK-LABEL: define <3 x i32> @insert_i32_vec_dynamic(
30
81
; CHECK-SAME: <3 x i32> [[V:%.*]], i32 [[A:%.*]], i32 [[I:%.*]]) {
31
- ; CHECK-NEXT: [[TMP1 :%.*]] = alloca [3 x i32], align 4
32
- ; CHECK-NEXT: [[TMP2 :%.*]] = extractelement <3 x i32> [[V]], i64 0
33
- ; CHECK-NEXT: [[TMP3 :%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1 ]], i32 0, i32 0
34
- ; CHECK-NEXT: store i32 [[TMP2 ]], ptr [[TMP3 ]], align 4
35
- ; CHECK-NEXT: [[TMP4 :%.*]] = extractelement <3 x i32> [[V]], i64 1
36
- ; CHECK-NEXT: [[TMP5 :%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1 ]], i32 0, i32 1
37
- ; CHECK-NEXT: store i32 [[TMP4 ]], ptr [[TMP5 ]], align 4
38
- ; CHECK-NEXT: [[TMP6 :%.*]] = extractelement <3 x i32> [[V]], i64 2
39
- ; CHECK-NEXT: [[TMP7 :%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1 ]], i32 0, i32 2
40
- ; CHECK-NEXT: store i32 [[TMP6 ]], ptr [[TMP7 ]], align 4
41
- ; CHECK-NEXT: [[TMP8 :%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1 ]], i32 0, i32 [[I]]
42
- ; CHECK-NEXT: store i32 [[A]], ptr [[TMP8 ]], align 4
43
- ; CHECK-NEXT: [[TMP9 :%.*]] = load i32, ptr [[TMP3 ]], align 4
44
- ; CHECK-NEXT: [[TMP10 :%.*]] = insertelement <3 x i32> poison, i32 [[TMP9 ]], i32 0
45
- ; CHECK-NEXT: [[TMP11 :%.*]] = load i32, ptr [[TMP5 ]], align 4
46
- ; CHECK-NEXT: [[TMP12 :%.*]] = insertelement <3 x i32> [[TMP10 ]], i32 [[TMP11 ]], i32 1
47
- ; CHECK-NEXT: [[TMP13 :%.*]] = load i32, ptr [[TMP7 ]], align 4
48
- ; CHECK-NEXT: [[TMP14 :%.*]] = insertelement <3 x i32> [[TMP12 ]], i32 [[TMP13 ]], i32 2
49
- ; CHECK-NEXT: ret <3 x i32> [[TMP14 ]]
82
+ ; CHECK-NEXT: [[IE_ALLOCA :%.*]] = alloca [3 x i32], align 4
83
+ ; CHECK-NEXT: [[IE_EXTRACT :%.*]] = extractelement <3 x i32> [[V]], i64 0
84
+ ; CHECK-NEXT: [[IE_INDEX :%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA ]], i32 0, i32 0
85
+ ; CHECK-NEXT: store i32 [[IE_EXTRACT ]], ptr [[IE_INDEX ]], align 4
86
+ ; CHECK-NEXT: [[IE_EXTRACT1 :%.*]] = extractelement <3 x i32> [[V]], i64 1
87
+ ; CHECK-NEXT: [[IE_INDEX2 :%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA ]], i32 0, i32 1
88
+ ; CHECK-NEXT: store i32 [[IE_EXTRACT1 ]], ptr [[IE_INDEX2 ]], align 4
89
+ ; CHECK-NEXT: [[IE_EXTRACT3 :%.*]] = extractelement <3 x i32> [[V]], i64 2
90
+ ; CHECK-NEXT: [[IE_INDEX4 :%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA ]], i32 0, i32 2
91
+ ; CHECK-NEXT: store i32 [[IE_EXTRACT3 ]], ptr [[IE_INDEX4 ]], align 4
92
+ ; CHECK-NEXT: [[IE_DYNINDEX :%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA ]], i32 0, i32 [[I]]
93
+ ; CHECK-NEXT: store i32 [[A]], ptr [[IE_DYNINDEX ]], align 4
94
+ ; CHECK-NEXT: [[IE_LOAD :%.*]] = load i32, ptr [[IE_INDEX ]], align 4
95
+ ; CHECK-NEXT: [[IE_INSERT :%.*]] = insertelement <3 x i32> poison, i32 [[IE_LOAD ]], i32 0
96
+ ; CHECK-NEXT: [[IE_LOAD5 :%.*]] = load i32, ptr [[IE_INDEX2 ]], align 4
97
+ ; CHECK-NEXT: [[IE_INSERT6 :%.*]] = insertelement <3 x i32> [[IE_INSERT ]], i32 [[IE_LOAD5 ]], i32 1
98
+ ; CHECK-NEXT: [[IE_LOAD7 :%.*]] = load i32, ptr [[IE_INDEX4 ]], align 4
99
+ ; CHECK-NEXT: [[IE_INSERT8 :%.*]] = insertelement <3 x i32> [[IE_INSERT6 ]], i32 [[IE_LOAD7 ]], i32 2
100
+ ; CHECK-NEXT: ret <3 x i32> [[IE_INSERT8 ]]
50
101
;
51
102
%ie = insertelement <3 x i32 > %v , i32 %a , i32 %i
52
103
ret <3 x i32 > %ie
@@ -67,8 +118,8 @@ define i16 @extract_i16_vec_constant(<4 x i16> %v) {
67
118
define <2 x half > @insert_half_vec_constant (<2 x half > %v , half %a ) {
68
119
; CHECK-LABEL: define <2 x half> @insert_half_vec_constant(
69
120
; CHECK-SAME: <2 x half> [[V:%.*]], half [[A:%.*]]) {
70
- ; CHECK-NEXT: [[TMP1 :%.*]] = insertelement <2 x half> [[V]], half [[A]], i32 1
71
- ; CHECK-NEXT: ret <2 x half> [[TMP1 ]]
121
+ ; CHECK-NEXT: [[IE :%.*]] = insertelement <2 x half> [[V]], half [[A]], i32 1
122
+ ; CHECK-NEXT: ret <2 x half> [[IE ]]
72
123
;
73
124
%ie = insertelement <2 x half > %v , half %a , i32 1
74
125
ret <2 x half > %ie
0 commit comments