|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
2 | 2 | ; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
|
3 | 3 |
|
4 |
| -; Allocas should be placed in the entry block. |
5 |
| -; Allocas should also be reused across multiple insertelement and extractelement instructions for the same vector |
6 |
| -define void @alloca_placement_and_reuse(<3 x i32> %v1, <3 x i32> %v2, i32 %a, i32 %i, i32 %j) { |
7 |
| -; CHECK-LABEL: define void @alloca_placement_and_reuse( |
8 |
| -; CHECK-SAME: <3 x i32> [[V1:%.*]], <3 x i32> [[V2:%.*]], i32 [[A:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) { |
9 |
| -; CHECK-NEXT: [[AL:%.*]] = alloca [3 x i32], align 4 |
| 4 | +; Ensure that insertelement instructions have no side effects on each other |
| 5 | +; even in the presence of control flow |
| 6 | +define void @test_multiple_insert(i32 %c, i32 %i, i32 %j) { |
| 7 | +; CHECK-LABEL: define void @test_multiple_insert( |
| 8 | +; CHECK-SAME: i32 [[C:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) { |
| 9 | +; CHECK-NEXT: [[V0_ALLOCA:%.*]] = alloca [2 x i32], align 4 |
| 10 | +; CHECK-NEXT: [[V_ALLOCA:%.*]] = alloca [2 x i32], align 4 |
| 11 | +; CHECK-NEXT: [[V0_0:%.*]] = insertelement <2 x i32> poison, i32 0, i32 0 |
| 12 | +; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i32> [[V0_0]], i32 0, i32 1 |
| 13 | +; CHECK-NEXT: [[V0_EXTRACT0:%.*]] = extractelement <2 x i32> [[V0]], i64 0 |
| 14 | +; CHECK-NEXT: [[V0_INDEX0:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 0 |
| 15 | +; CHECK-NEXT: store i32 [[V0_EXTRACT0]], ptr [[V0_INDEX0]], align 4 |
| 16 | +; CHECK-NEXT: [[V0_EXTRACT1:%.*]] = extractelement <2 x i32> [[V0]], i64 1 |
| 17 | +; CHECK-NEXT: [[V0_INDEX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 1 |
| 18 | +; CHECK-NEXT: store i32 [[V0_EXTRACT1]], ptr [[V0_INDEX1]], align 4 |
| 19 | +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[C]], 1 |
| 20 | +; CHECK-NEXT: br i1 [[COND]], label %[[IF:.*]], label %[[ELSE:.*]] |
| 21 | +; CHECK: [[IF]]: |
| 22 | +; CHECK-NEXT: [[V1_INDEX:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 [[I]] |
| 23 | +; CHECK-NEXT: [[V1_LOAD:%.*]] = load i32, ptr [[V1_INDEX]], align 4 |
| 24 | +; CHECK-NEXT: store i32 1, ptr [[V1_INDEX]], align 4 |
| 25 | +; CHECK-NEXT: [[V1_LOAD0:%.*]] = load i32, ptr [[V0_INDEX0]], align 4 |
| 26 | +; CHECK-NEXT: [[V1_INSERT0:%.*]] = insertelement <2 x i32> poison, i32 [[V1_LOAD0]], i32 0 |
| 27 | +; CHECK-NEXT: [[V1_LOAD1:%.*]] = load i32, ptr [[V0_INDEX1]], align 4 |
| 28 | +; CHECK-NEXT: [[V1_INSERT1:%.*]] = insertelement <2 x i32> [[V1_INSERT0]], i32 [[V1_LOAD1]], i32 1 |
| 29 | +; CHECK-NEXT: store i32 [[V1_LOAD]], ptr [[V1_INDEX]], align 4 |
| 30 | +; CHECK-NEXT: br label %[[EXIT:.*]] |
| 31 | +; CHECK: [[ELSE]]: |
| 32 | +; CHECK-NEXT: [[V2_INDEX:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 [[I]] |
| 33 | +; CHECK-NEXT: [[V2_LOAD:%.*]] = load i32, ptr [[V2_INDEX]], align 4 |
| 34 | +; CHECK-NEXT: store i32 2, ptr [[V2_INDEX]], align 4 |
| 35 | +; CHECK-NEXT: [[V2_LOAD0:%.*]] = load i32, ptr [[V0_INDEX0]], align 4 |
| 36 | +; CHECK-NEXT: [[V2_INSERT0:%.*]] = insertelement <2 x i32> poison, i32 [[V2_LOAD0]], i32 0 |
| 37 | +; CHECK-NEXT: [[V2_LOAD1:%.*]] = load i32, ptr [[V0_INDEX1]], align 4 |
| 38 | +; CHECK-NEXT: [[V2_INSERT1:%.*]] = insertelement <2 x i32> [[V2_INSERT0]], i32 [[V2_LOAD1]], i32 1 |
| 39 | +; CHECK-NEXT: store i32 [[V2_LOAD]], ptr [[V2_INDEX]], align 4 |
| 40 | +; CHECK-NEXT: br label %[[EXIT]] |
| 41 | +; CHECK: [[EXIT]]: |
| 42 | +; CHECK-NEXT: [[V:%.*]] = phi <2 x i32> [ [[V1_INSERT1]], %[[IF]] ], [ [[V2_INSERT1]], %[[ELSE]] ] |
| 43 | +; CHECK-NEXT: [[V_EXTRACT:%.*]] = extractelement <2 x i32> [[V]], i64 0 |
| 44 | +; CHECK-NEXT: [[V_INDEX:%.*]] = getelementptr inbounds [2 x i32], ptr [[V_ALLOCA]], i32 0, i32 0 |
| 45 | +; CHECK-NEXT: store i32 [[V_EXTRACT]], ptr [[V_INDEX]], align 4 |
| 46 | +; CHECK-NEXT: [[V_EXTRACT10:%.*]] = extractelement <2 x i32> [[V]], i64 1 |
| 47 | +; CHECK-NEXT: [[V_INDEX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[V_ALLOCA]], i32 0, i32 1 |
| 48 | +; CHECK-NEXT: store i32 [[V_EXTRACT10]], ptr [[V_INDEX1]], align 4 |
| 49 | +; CHECK-NEXT: [[V3_INDEXJ:%.*]] = getelementptr inbounds [2 x i32], ptr [[V_ALLOCA]], i32 0, i32 [[J]] |
| 50 | +; CHECK-NEXT: [[V3_LOAD:%.*]] = load i32, ptr [[V3_INDEXJ]], align 4 |
| 51 | +; CHECK-NEXT: store i32 3, ptr [[V3_INDEXJ]], align 4 |
| 52 | +; CHECK-NEXT: [[V3_LOAD0:%.*]] = load i32, ptr [[V_INDEX]], align 4 |
| 53 | +; CHECK-NEXT: [[V3_INSERT0:%.*]] = insertelement <2 x i32> poison, i32 [[V3_LOAD0]], i32 0 |
| 54 | +; CHECK-NEXT: [[V3_LOAD1:%.*]] = load i32, ptr [[V_INDEX1]], align 4 |
| 55 | +; CHECK-NEXT: [[V3_INSERT1:%.*]] = insertelement <2 x i32> [[V3_INSERT0]], i32 [[V3_LOAD1]], i32 1 |
| 56 | +; CHECK-NEXT: store i32 [[V3_LOAD]], ptr [[V3_INDEXJ]], align 4 |
| 57 | +; CHECK-NEXT: ret void |
| 58 | +; |
| 59 | + %v0_0 = insertelement <2 x i32> poison, i32 0, i32 0 |
| 60 | + %v0 = insertelement <2 x i32> %v0_0, i32 0, i32 1 |
| 61 | + %cond = icmp eq i32 %c, 1 |
| 62 | + br i1 %cond, label %if, label %else |
| 63 | +if: |
| 64 | + %v1 = insertelement <2 x i32> %v0, i32 1, i32 %i |
| 65 | + br label %exit |
| 66 | +else: |
| 67 | + %v2 = insertelement <2 x i32> %v0, i32 2, i32 %i |
| 68 | + br label %exit |
| 69 | +exit: |
| 70 | + %v = phi <2 x i32> [ %v1, %if ], [ %v2, %else ] |
| 71 | + %v3 = insertelement <2 x i32> %v, i32 3, i32 %j |
| 72 | + ret void |
| 73 | +} |
| 74 | + |
| 75 | +; Allocas can be reused across insert/extractelement instructions on the same vector |
| 76 | +define void @test_alloca_reuse(<3 x i32> %v, i32 %a, i32 %i) { |
| 77 | +; CHECK-LABEL: define void @test_alloca_reuse( |
| 78 | +; CHECK-SAME: <3 x i32> [[V:%.*]], i32 [[A:%.*]], i32 [[I:%.*]]) { |
10 | 79 | ; CHECK-NEXT: [[EE1_ALLOCA:%.*]] = alloca [3 x i32], align 4
|
11 |
| -; CHECK-NEXT: [[EE2_ALLOCA:%.*]] = alloca [3 x i32], align 4 |
12 |
| -; CHECK-NEXT: [[EE2_EXTRACT:%.*]] = extractelement <3 x i32> [[V2]], i64 0 |
13 |
| -; CHECK-NEXT: [[EE2_INDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 0 |
14 |
| -; CHECK-NEXT: store i32 [[EE2_EXTRACT]], ptr [[EE2_INDEX]], align 4 |
15 |
| -; CHECK-NEXT: [[EE2_EXTRACT10:%.*]] = extractelement <3 x i32> [[V2]], i64 1 |
16 |
| -; CHECK-NEXT: [[EE2_INDEX11:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 1 |
17 |
| -; CHECK-NEXT: store i32 [[EE2_EXTRACT10]], ptr [[EE2_INDEX11]], align 4 |
18 |
| -; CHECK-NEXT: [[EE2_EXTRACT12:%.*]] = extractelement <3 x i32> [[V2]], i64 2 |
19 |
| -; CHECK-NEXT: [[EE2_INDEX13:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 2 |
20 |
| -; CHECK-NEXT: store i32 [[EE2_EXTRACT12]], ptr [[EE2_INDEX13]], align 4 |
21 |
| -; CHECK-NEXT: [[EE1_EXTRACT:%.*]] = extractelement <3 x i32> [[V1]], i64 0 |
| 80 | +; CHECK-NEXT: [[EE1_EXTRACT:%.*]] = extractelement <3 x i32> [[V]], i64 0 |
22 | 81 | ; CHECK-NEXT: [[EE1_INDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 0
|
23 | 82 | ; CHECK-NEXT: store i32 [[EE1_EXTRACT]], ptr [[EE1_INDEX]], align 4
|
24 |
| -; CHECK-NEXT: [[EE1_EXTRACT1:%.*]] = extractelement <3 x i32> [[V1]], i64 1 |
| 83 | +; CHECK-NEXT: [[EE1_EXTRACT1:%.*]] = extractelement <3 x i32> [[V]], i64 1 |
25 | 84 | ; CHECK-NEXT: [[EE1_INDEX2:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 1
|
26 | 85 | ; CHECK-NEXT: store i32 [[EE1_EXTRACT1]], ptr [[EE1_INDEX2]], align 4
|
27 |
| -; CHECK-NEXT: [[EE1_EXTRACT3:%.*]] = extractelement <3 x i32> [[V1]], i64 2 |
| 86 | +; CHECK-NEXT: [[EE1_EXTRACT3:%.*]] = extractelement <3 x i32> [[V]], i64 2 |
28 | 87 | ; CHECK-NEXT: [[EE1_INDEX4:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 2
|
29 | 88 | ; CHECK-NEXT: store i32 [[EE1_EXTRACT3]], ptr [[EE1_INDEX4]], align 4
|
30 |
| -; CHECK-NEXT: br label %[[BODY:.*]] |
31 |
| -; CHECK: [[BODY]]: |
32 | 89 | ; CHECK-NEXT: [[EE1_INDEX5:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 [[I]]
|
33 | 90 | ; CHECK-NEXT: [[EE1_LOAD:%.*]] = load i32, ptr [[EE1_INDEX5]], align 4
|
34 | 91 | ; CHECK-NEXT: [[IE1_DYNINDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 [[I]]
|
| 92 | +; CHECK-NEXT: [[IE1_LOAD1:%.*]] = load i32, ptr [[IE1_DYNINDEX]], align 4 |
35 | 93 | ; CHECK-NEXT: store i32 [[A]], ptr [[IE1_DYNINDEX]], align 4
|
36 | 94 | ; CHECK-NEXT: [[IE1_LOAD:%.*]] = load i32, ptr [[EE1_INDEX]], align 4
|
37 | 95 | ; CHECK-NEXT: [[IE1_INSERT:%.*]] = insertelement <3 x i32> poison, i32 [[IE1_LOAD]], i32 0
|
38 | 96 | ; CHECK-NEXT: [[IE1_LOAD6:%.*]] = load i32, ptr [[EE1_INDEX2]], align 4
|
39 | 97 | ; CHECK-NEXT: [[IE1_INSERT7:%.*]] = insertelement <3 x i32> [[IE1_INSERT]], i32 [[IE1_LOAD6]], i32 1
|
40 | 98 | ; CHECK-NEXT: [[IE1_LOAD8:%.*]] = load i32, ptr [[EE1_INDEX4]], align 4
|
41 | 99 | ; CHECK-NEXT: [[IE1_INSERT9:%.*]] = insertelement <3 x i32> [[IE1_INSERT7]], i32 [[IE1_LOAD8]], i32 2
|
42 |
| -; CHECK-NEXT: [[EE2_INDEX14:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE2_ALLOCA]], i32 0, i32 [[J]] |
43 |
| -; CHECK-NEXT: [[EE2_LOAD:%.*]] = load i32, ptr [[EE2_INDEX14]], align 4 |
| 100 | +; CHECK-NEXT: store i32 [[IE1_LOAD1]], ptr [[IE1_DYNINDEX]], align 4 |
44 | 101 | ; CHECK-NEXT: ret void
|
45 | 102 | ;
|
46 |
| - %al = alloca [3 x i32], align 4 |
47 |
| - br label %body |
48 |
| -body: |
49 |
| - %ee1 = extractelement <3 x i32> %v1, i32 %i |
50 |
| - %ie1 = insertelement <3 x i32> %v1, i32 %a, i32 %i |
51 |
| - %ee2 = extractelement <3 x i32> %v2, i32 %j |
| 103 | + %ee1 = extractelement <3 x i32> %v, i32 %i |
| 104 | + %ie1 = insertelement <3 x i32> %v, i32 %a, i32 %i |
52 | 105 | ret void
|
53 | 106 | }
|
54 | 107 |
|
@@ -90,13 +143,15 @@ define <3 x i32> @insert_i32_vec_dynamic(<3 x i32> %v, i32 %a, i32 %i) {
|
90 | 143 | ; CHECK-NEXT: [[IE_INDEX4:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 2
|
91 | 144 | ; CHECK-NEXT: store i32 [[IE_EXTRACT3]], ptr [[IE_INDEX4]], align 4
|
92 | 145 | ; CHECK-NEXT: [[IE_DYNINDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 [[I]]
|
| 146 | +; CHECK-NEXT: [[IE_LOAD1:%.*]] = load i32, ptr [[IE_DYNINDEX]], align 4 |
93 | 147 | ; CHECK-NEXT: store i32 [[A]], ptr [[IE_DYNINDEX]], align 4
|
94 | 148 | ; CHECK-NEXT: [[IE_LOAD:%.*]] = load i32, ptr [[IE_INDEX]], align 4
|
95 | 149 | ; CHECK-NEXT: [[IE_INSERT:%.*]] = insertelement <3 x i32> poison, i32 [[IE_LOAD]], i32 0
|
96 | 150 | ; CHECK-NEXT: [[IE_LOAD5:%.*]] = load i32, ptr [[IE_INDEX2]], align 4
|
97 | 151 | ; CHECK-NEXT: [[IE_INSERT6:%.*]] = insertelement <3 x i32> [[IE_INSERT]], i32 [[IE_LOAD5]], i32 1
|
98 | 152 | ; CHECK-NEXT: [[IE_LOAD7:%.*]] = load i32, ptr [[IE_INDEX4]], align 4
|
99 | 153 | ; CHECK-NEXT: [[IE_INSERT8:%.*]] = insertelement <3 x i32> [[IE_INSERT6]], i32 [[IE_LOAD7]], i32 2
|
| 154 | +; CHECK-NEXT: store i32 [[IE_LOAD1]], ptr [[IE_DYNINDEX]], align 4 |
100 | 155 | ; CHECK-NEXT: ret <3 x i32> [[IE_INSERT8]]
|
101 | 156 | ;
|
102 | 157 | %ie = insertelement <3 x i32> %v, i32 %a, i32 %i
|
|
0 commit comments