11; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
33
4+ ; Allocas should be placed in the entry block.
5+ ; Allocas should also be reused across multiple insertelement and extractelement instructions for the same vector
6+ define void @alloca_placement_and_reuse (<3 x i32 > %v1 , <3 x i32 > %v2 , i32 %a , i32 %i , i32 %j ) {
7+ ; CHECK-LABEL: define void @alloca_placement_and_reuse(
8+ ; CHECK-SAME: <3 x i32> [[V1:%.*]], <3 x i32> [[V2:%.*]], i32 [[A:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) {
9+ ; CHECK-NEXT: [[AL:%.*]] = alloca [3 x i32], align 4
10+ ; CHECK-NEXT: [[EE1_ALLOCA:%.*]] = alloca [3 x i32], align 4
11+ ; CHECK-NEXT: [[EE2_ALLOCA:%.*]] = alloca [3 x i32], align 4
12+ ; CHECK-NEXT: [[EE2_EXTRACT:%.*]] = extractelement <3 x i32> [[V2]], i64 0
13+ ; CHECK-NEXT: [[EE2_INDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 0
14+ ; CHECK-NEXT: store i32 [[EE2_EXTRACT]], ptr [[EE2_INDEX]], align 4
15+ ; CHECK-NEXT: [[EE2_EXTRACT10:%.*]] = extractelement <3 x i32> [[V2]], i64 1
16+ ; CHECK-NEXT: [[EE2_INDEX11:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 1
17+ ; CHECK-NEXT: store i32 [[EE2_EXTRACT10]], ptr [[EE2_INDEX11]], align 4
18+ ; CHECK-NEXT: [[EE2_EXTRACT12:%.*]] = extractelement <3 x i32> [[V2]], i64 2
19+ ; CHECK-NEXT: [[EE2_INDEX13:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 2
20+ ; CHECK-NEXT: store i32 [[EE2_EXTRACT12]], ptr [[EE2_INDEX13]], align 4
21+ ; CHECK-NEXT: [[EE1_EXTRACT:%.*]] = extractelement <3 x i32> [[V1]], i64 0
22+ ; CHECK-NEXT: [[EE1_INDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 0
23+ ; CHECK-NEXT: store i32 [[EE1_EXTRACT]], ptr [[EE1_INDEX]], align 4
24+ ; CHECK-NEXT: [[EE1_EXTRACT1:%.*]] = extractelement <3 x i32> [[V1]], i64 1
25+ ; CHECK-NEXT: [[EE1_INDEX2:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 1
26+ ; CHECK-NEXT: store i32 [[EE1_EXTRACT1]], ptr [[EE1_INDEX2]], align 4
27+ ; CHECK-NEXT: [[EE1_EXTRACT3:%.*]] = extractelement <3 x i32> [[V1]], i64 2
28+ ; CHECK-NEXT: [[EE1_INDEX4:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 2
29+ ; CHECK-NEXT: store i32 [[EE1_EXTRACT3]], ptr [[EE1_INDEX4]], align 4
30+ ; CHECK-NEXT: br label %[[BODY:.*]]
31+ ; CHECK: [[BODY]]:
32+ ; CHECK-NEXT: [[EE1_INDEX5:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 [[I]]
33+ ; CHECK-NEXT: [[EE1_LOAD:%.*]] = load i32, ptr [[EE1_INDEX5]], align 4
34+ ; CHECK-NEXT: [[IE1_DYNINDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 [[I]]
35+ ; CHECK-NEXT: store i32 [[A]], ptr [[IE1_DYNINDEX]], align 4
36+ ; CHECK-NEXT: [[IE1_LOAD:%.*]] = load i32, ptr [[EE1_INDEX]], align 4
37+ ; CHECK-NEXT: [[IE1_INSERT:%.*]] = insertelement <3 x i32> poison, i32 [[IE1_LOAD]], i32 0
38+ ; CHECK-NEXT: [[IE1_LOAD6:%.*]] = load i32, ptr [[EE1_INDEX2]], align 4
39+ ; CHECK-NEXT: [[IE1_INSERT7:%.*]] = insertelement <3 x i32> [[IE1_INSERT]], i32 [[IE1_LOAD6]], i32 1
40+ ; CHECK-NEXT: [[IE1_LOAD8:%.*]] = load i32, ptr [[EE1_INDEX4]], align 4
41+ ; CHECK-NEXT: [[IE1_INSERT9:%.*]] = insertelement <3 x i32> [[IE1_INSERT7]], i32 [[IE1_LOAD8]], i32 2
42+ ; CHECK-NEXT: [[EE2_INDEX14:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 [[J]]
43+ ; CHECK-NEXT: [[EE2_LOAD:%.*]] = load i32, ptr [[EE2_INDEX14]], align 4
44+ ; CHECK-NEXT: ret void
45+ ;
46+ %al = alloca [3 x i32 ], align 4
47+ br label %body
48+ body:
49+ %ee1 = extractelement <3 x i32 > %v1 , i32 %i
50+ %ie1 = insertelement <3 x i32 > %v1 , i32 %a , i32 %i
51+ %ee2 = extractelement <3 x i32 > %v2 , i32 %j
52+ ret void
53+ }
54+
455define float @extract_float_vec_dynamic (<4 x float > %v , i32 %i ) {
556; CHECK-LABEL: define float @extract_float_vec_dynamic(
657; CHECK-SAME: <4 x float> [[V:%.*]], i32 [[I:%.*]]) {
7- ; CHECK-NEXT: [[TMP1 :%.*]] = alloca [4 x float], align 4
8- ; CHECK-NEXT: [[TMP2 :%.*]] = extractelement <4 x float> [[V]], i64 0
9- ; CHECK-NEXT: [[TMP3 :%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1 ]], i32 0, i32 0
10- ; CHECK-NEXT: store float [[TMP2 ]], ptr [[TMP3 ]], align 4
11- ; CHECK-NEXT: [[TMP4 :%.*]] = extractelement <4 x float> [[V]], i64 1
12- ; CHECK-NEXT: [[TMP5 :%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1 ]], i32 0, i32 1
13- ; CHECK-NEXT: store float [[TMP4 ]], ptr [[TMP5 ]], align 4
14- ; CHECK-NEXT: [[TMP6 :%.*]] = extractelement <4 x float> [[V]], i64 2
15- ; CHECK-NEXT: [[TMP7 :%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1 ]], i32 0, i32 2
16- ; CHECK-NEXT: store float [[TMP6 ]], ptr [[TMP7 ]], align 4
17- ; CHECK-NEXT: [[TMP8 :%.*]] = extractelement <4 x float> [[V]], i64 3
18- ; CHECK-NEXT: [[TMP9 :%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1 ]], i32 0, i32 3
19- ; CHECK-NEXT: store float [[TMP8 ]], ptr [[TMP9 ]], align 4
20- ; CHECK-NEXT: [[TMP10 :%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1 ]], i32 0, i32 [[I]]
21- ; CHECK-NEXT: [[TMP11 :%.*]] = load float, ptr [[TMP10 ]], align 4
22- ; CHECK-NEXT: ret float [[TMP11 ]]
58+ ; CHECK-NEXT: [[EE_ALLOCA :%.*]] = alloca [4 x float], align 4
59+ ; CHECK-NEXT: [[EE_EXTRACT :%.*]] = extractelement <4 x float> [[V]], i64 0
60+ ; CHECK-NEXT: [[EE_INDEX :%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA ]], i32 0, i32 0
61+ ; CHECK-NEXT: store float [[EE_EXTRACT ]], ptr [[EE_INDEX ]], align 4
62+ ; CHECK-NEXT: [[EE_EXTRACT1 :%.*]] = extractelement <4 x float> [[V]], i64 1
63+ ; CHECK-NEXT: [[EE_INDEX2 :%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA ]], i32 0, i32 1
64+ ; CHECK-NEXT: store float [[EE_EXTRACT1 ]], ptr [[EE_INDEX2 ]], align 4
65+ ; CHECK-NEXT: [[EE_EXTRACT3 :%.*]] = extractelement <4 x float> [[V]], i64 2
66+ ; CHECK-NEXT: [[EE_INDEX4 :%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA ]], i32 0, i32 2
67+ ; CHECK-NEXT: store float [[EE_EXTRACT3 ]], ptr [[EE_INDEX4 ]], align 4
68+ ; CHECK-NEXT: [[EE_EXTRACT5 :%.*]] = extractelement <4 x float> [[V]], i64 3
69+ ; CHECK-NEXT: [[EE_INDEX6 :%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA ]], i32 0, i32 3
70+ ; CHECK-NEXT: store float [[EE_EXTRACT5 ]], ptr [[EE_INDEX6 ]], align 4
71+ ; CHECK-NEXT: [[EE_INDEX7 :%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA ]], i32 0, i32 [[I]]
72+ ; CHECK-NEXT: [[EE_LOAD :%.*]] = load float, ptr [[EE_INDEX7 ]], align 4
73+ ; CHECK-NEXT: ret float [[EE_LOAD ]]
2374;
2475 %ee = extractelement <4 x float > %v , i32 %i
2576 ret float %ee
@@ -28,25 +79,25 @@ define float @extract_float_vec_dynamic(<4 x float> %v, i32 %i) {
2879define <3 x i32 > @insert_i32_vec_dynamic (<3 x i32 > %v , i32 %a , i32 %i ) {
2980; CHECK-LABEL: define <3 x i32> @insert_i32_vec_dynamic(
3081; CHECK-SAME: <3 x i32> [[V:%.*]], i32 [[A:%.*]], i32 [[I:%.*]]) {
31- ; CHECK-NEXT: [[TMP1 :%.*]] = alloca [3 x i32], align 4
32- ; CHECK-NEXT: [[TMP2 :%.*]] = extractelement <3 x i32> [[V]], i64 0
33- ; CHECK-NEXT: [[TMP3 :%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1 ]], i32 0, i32 0
34- ; CHECK-NEXT: store i32 [[TMP2 ]], ptr [[TMP3 ]], align 4
35- ; CHECK-NEXT: [[TMP4 :%.*]] = extractelement <3 x i32> [[V]], i64 1
36- ; CHECK-NEXT: [[TMP5 :%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1 ]], i32 0, i32 1
37- ; CHECK-NEXT: store i32 [[TMP4 ]], ptr [[TMP5 ]], align 4
38- ; CHECK-NEXT: [[TMP6 :%.*]] = extractelement <3 x i32> [[V]], i64 2
39- ; CHECK-NEXT: [[TMP7 :%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1 ]], i32 0, i32 2
40- ; CHECK-NEXT: store i32 [[TMP6 ]], ptr [[TMP7 ]], align 4
41- ; CHECK-NEXT: [[TMP8 :%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1 ]], i32 0, i32 [[I]]
42- ; CHECK-NEXT: store i32 [[A]], ptr [[TMP8 ]], align 4
43- ; CHECK-NEXT: [[TMP9 :%.*]] = load i32, ptr [[TMP3 ]], align 4
44- ; CHECK-NEXT: [[TMP10 :%.*]] = insertelement <3 x i32> poison, i32 [[TMP9 ]], i32 0
45- ; CHECK-NEXT: [[TMP11 :%.*]] = load i32, ptr [[TMP5 ]], align 4
46- ; CHECK-NEXT: [[TMP12 :%.*]] = insertelement <3 x i32> [[TMP10 ]], i32 [[TMP11 ]], i32 1
47- ; CHECK-NEXT: [[TMP13 :%.*]] = load i32, ptr [[TMP7 ]], align 4
48- ; CHECK-NEXT: [[TMP14 :%.*]] = insertelement <3 x i32> [[TMP12 ]], i32 [[TMP13 ]], i32 2
49- ; CHECK-NEXT: ret <3 x i32> [[TMP14 ]]
82+ ; CHECK-NEXT: [[IE_ALLOCA :%.*]] = alloca [3 x i32], align 4
83+ ; CHECK-NEXT: [[IE_EXTRACT :%.*]] = extractelement <3 x i32> [[V]], i64 0
84+ ; CHECK-NEXT: [[IE_INDEX :%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA ]], i32 0, i32 0
85+ ; CHECK-NEXT: store i32 [[IE_EXTRACT ]], ptr [[IE_INDEX ]], align 4
86+ ; CHECK-NEXT: [[IE_EXTRACT1 :%.*]] = extractelement <3 x i32> [[V]], i64 1
87+ ; CHECK-NEXT: [[IE_INDEX2 :%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA ]], i32 0, i32 1
88+ ; CHECK-NEXT: store i32 [[IE_EXTRACT1 ]], ptr [[IE_INDEX2 ]], align 4
89+ ; CHECK-NEXT: [[IE_EXTRACT3 :%.*]] = extractelement <3 x i32> [[V]], i64 2
90+ ; CHECK-NEXT: [[IE_INDEX4 :%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA ]], i32 0, i32 2
91+ ; CHECK-NEXT: store i32 [[IE_EXTRACT3 ]], ptr [[IE_INDEX4 ]], align 4
92+ ; CHECK-NEXT: [[IE_DYNINDEX :%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA ]], i32 0, i32 [[I]]
93+ ; CHECK-NEXT: store i32 [[A]], ptr [[IE_DYNINDEX ]], align 4
94+ ; CHECK-NEXT: [[IE_LOAD :%.*]] = load i32, ptr [[IE_INDEX ]], align 4
95+ ; CHECK-NEXT: [[IE_INSERT :%.*]] = insertelement <3 x i32> poison, i32 [[IE_LOAD ]], i32 0
96+ ; CHECK-NEXT: [[IE_LOAD5 :%.*]] = load i32, ptr [[IE_INDEX2 ]], align 4
97+ ; CHECK-NEXT: [[IE_INSERT6 :%.*]] = insertelement <3 x i32> [[IE_INSERT ]], i32 [[IE_LOAD5 ]], i32 1
98+ ; CHECK-NEXT: [[IE_LOAD7 :%.*]] = load i32, ptr [[IE_INDEX4 ]], align 4
99+ ; CHECK-NEXT: [[IE_INSERT8 :%.*]] = insertelement <3 x i32> [[IE_INSERT6 ]], i32 [[IE_LOAD7 ]], i32 2
100+ ; CHECK-NEXT: ret <3 x i32> [[IE_INSERT8 ]]
50101;
51102 %ie = insertelement <3 x i32 > %v , i32 %a , i32 %i
52103 ret <3 x i32 > %ie
@@ -67,8 +118,8 @@ define i16 @extract_i16_vec_constant(<4 x i16> %v) {
67118define <2 x half > @insert_half_vec_constant (<2 x half > %v , half %a ) {
68119; CHECK-LABEL: define <2 x half> @insert_half_vec_constant(
69120; CHECK-SAME: <2 x half> [[V:%.*]], half [[A:%.*]]) {
70- ; CHECK-NEXT: [[TMP1 :%.*]] = insertelement <2 x half> [[V]], half [[A]], i32 1
71- ; CHECK-NEXT: ret <2 x half> [[TMP1 ]]
121+ ; CHECK-NEXT: [[IE :%.*]] = insertelement <2 x half> [[V]], half [[A]], i32 1
122+ ; CHECK-NEXT: ret <2 x half> [[IE ]]
72123;
73124 %ie = insertelement <2 x half > %v , half %a , i32 1
74125 ret <2 x half > %ie
0 commit comments