Skip to content

Commit 264b1b5

Browse files
ViacheslavRbigcbot
authored andcommitted
Fixing ConstantCoalescing optimiztion
Fixing ConstantCoalescing optimiztion. The fix removes an unreasonable limitation on base address calculations for OCL shaders.
1 parent a9e1ef2 commit 264b1b5

File tree

3 files changed

+35
-41
lines changed

3 files changed

+35
-41
lines changed

IGC/Compiler/CISACodeGen/ConstantCoalescing.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1596,7 +1596,7 @@ bool ConstantCoalescing::DecomposePtrExp(
15961596
int64_t Offset = 0;
15971597
auto *Ptr = getPointerBaseWithConstantOffset(ptr_val, Offset, *dataLayout);
15981598

1599-
if (Ptr == ptr_val || Offset < 0)
1599+
if ((m_ctx->type != ShaderType::OPENCL_SHADER && Ptr == ptr_val) || Offset < 0)
16001600
return false;
16011601

16021602
buf_idxv = Ptr;

IGC/Compiler/tests/ConstantCoalescing/get-base-const-offset-typed-pointers.ll

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717
; or a getelementptr instruction.
1818

1919
; Merge
20-
define void @test_merge(float addrspace(2)* %src) {
20+
define void @test_merge(i32 addrspace(2)* %src) {
2121
; CHECK-LABEL: define void @test_merge(
22-
; CHECK-SAME: float addrspace(2)* [[SRC:%.*]]) {
23-
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint float addrspace(2)* [[SRC]] to i64
22+
; CHECK-SAME: i32 addrspace(2)* [[SRC:%.*]]) {
23+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i32 addrspace(2)* [[SRC]] to i64
2424
; CHECK-NEXT: [[CHUNKPTR:%.*]] = inttoptr i64 [[TMP1]] to <2 x i32> addrspace(2)*
2525
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32> addrspace(2)* [[CHUNKPTR]], align 4
2626
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
@@ -29,12 +29,11 @@ define void @test_merge(float addrspace(2)* %src) {
2929
; CHECK-NEXT: call void @use.i32(i32 [[TMP4]])
3030
; CHECK-NEXT: ret void
3131
;
32-
%1 = bitcast float addrspace(2)* %src to i32 addrspace(2)*
33-
%2 = getelementptr i32, i32 addrspace(2)* %1, i32 1
32+
%1 = getelementptr i32, i32 addrspace(2)* %src, i32 1
33+
%2 = load i32, i32 addrspace(2)* %src
3434
%3 = load i32, i32 addrspace(2)* %1
35-
%4 = load i32, i32 addrspace(2)* %2
35+
call void @use.i32(i32 %2)
3636
call void @use.i32(i32 %3)
37-
call void @use.i32(i32 %4)
3837
ret void
3938
}
4039

@@ -66,14 +65,13 @@ define void @test_nonconst_gep(i32 addrspace(2)* %src, i32 %off) {
6665
; CHECK-LABEL: define void @test_nonconst_gep(
6766
; CHECK-SAME: i32 addrspace(2)* [[SRC:%.*]], i32 [[OFF:%.*]]) {
6867
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32 addrspace(2)* [[SRC]], i32 [[OFF]]
69-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32 addrspace(2)* [[TMP1]], align 4
70-
; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint i32 addrspace(2)* [[TMP1]] to i64
71-
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 4
72-
; CHECK-NEXT: [[CHUNKPTR:%.*]] = inttoptr i64 [[TMP4]] to <1 x i32> addrspace(2)*
73-
; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i32>, <1 x i32> addrspace(2)* [[CHUNKPTR]], align 4
74-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i32> [[TMP5]], i32 0
75-
; CHECK-NEXT: call void @use.i32(i32 [[TMP2]])
76-
; CHECK-NEXT: call void @use.i32(i32 [[TMP6]])
68+
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint i32 addrspace(2)* [[TMP1]] to i64
69+
; CHECK-NEXT: [[CHUNKPTR:%.*]] = inttoptr i64 [[TMP2]] to <2 x i32> addrspace(2)*
70+
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, <2 x i32> addrspace(2)* [[CHUNKPTR]], align 4
71+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
72+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
73+
; CHECK-NEXT: call void @use.i32(i32 [[TMP4]])
74+
; CHECK-NEXT: call void @use.i32(i32 [[TMP5]])
7775
; CHECK-NEXT: ret void
7876
;
7977
%1 = getelementptr i32, i32 addrspace(2)* %src, i32 %off
@@ -89,7 +87,7 @@ declare void @use.i32(i32)
8987

9088
!igc.functions = !{!0, !4, !5}
9189

92-
!0 = !{void (float addrspace(2)*)* @test_merge, !1}
90+
!0 = !{void (i32 addrspace(2)*)* @test_merge, !1}
9391
!1 = !{!2, !3}
9492
!2 = !{!"function_type", i32 0}
9593
!3 = !{!"implicit_arg_desc"}

IGC/Compiler/tests/ConstantCoalescing/get-base-const-offset.ll

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,16 @@
1616
; which is part of DecomposePtrExp, used to calculate base from bitcast
1717
; or a getelementptr instruction.
1818

19-
; TODO: check, with opaque pointers on, pass doesn't optimize test_merge case
20-
2119
define void @test_merge(ptr addrspace(2) %src) {
2220
; CHECK-LABEL: define void @test_merge(
2321
; CHECK-SAME: ptr addrspace(2) [[SRC:%.*]]) {
24-
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(2) [[SRC]], align 4
25-
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(2) [[SRC]] to i64
26-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 4
27-
; CHECK-NEXT: [[CHUNKPTR:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(2)
28-
; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i32>, ptr addrspace(2) [[CHUNKPTR]], align 4
29-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i32> [[TMP4]], i32 0
30-
; CHECK-NEXT: call void @use.i32(i32 [[TMP1]])
31-
; CHECK-NEXT: call void @use.i32(i32 [[TMP5]])
22+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(2) [[SRC]] to i64
23+
; CHECK-NEXT: [[CHUNKPTR:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(2)
24+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(2) [[CHUNKPTR]], align 4
25+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
26+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
27+
; CHECK-NEXT: call void @use.i32(i32 [[TMP3]])
28+
; CHECK-NEXT: call void @use.i32(i32 [[TMP4]])
3229
; CHECK-NEXT: ret void
3330
;
3431
%1 = getelementptr i32, ptr addrspace(2) %src, i32 1
@@ -42,13 +39,13 @@ define void @test_merge(ptr addrspace(2) %src) {
4239
define void @test_vectorize(ptr addrspace(2) %src) {
4340
; CHECK-LABEL: define void @test_vectorize(
4441
; CHECK-SAME: ptr addrspace(2) [[SRC:%.*]]) {
45-
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(2) [[SRC]], align 4
42+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr addrspace(2) [[SRC]], i32 2
4643
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(2) [[SRC]] to i64
47-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 8
48-
; CHECK-NEXT: [[CHUNKPTR:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(2)
49-
; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i32>, ptr addrspace(2) [[CHUNKPTR]], align 4
50-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i32> [[TMP4]], i32 0
51-
; CHECK-NEXT: call void @use.i32(i32 [[TMP1]])
44+
; CHECK-NEXT: [[CHUNKPTR:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(2)
45+
; CHECK-NEXT: [[TMP3:%.*]] = load <1 x i32>, ptr addrspace(2) [[CHUNKPTR]], align 4
46+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <1 x i32> [[TMP3]], i32 0
47+
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(2) [[TMP1]], align 4
48+
; CHECK-NEXT: call void @use.i32(i32 [[TMP4]])
5249
; CHECK-NEXT: call void @use.i32(i32 [[TMP5]])
5350
; CHECK-NEXT: ret void
5451
;
@@ -64,14 +61,13 @@ define void @test_nonconst_gep(ptr addrspace(2) %src, i32 %off) {
6461
; CHECK-LABEL: define void @test_nonconst_gep(
6562
; CHECK-SAME: ptr addrspace(2) [[SRC:%.*]], i32 [[OFF:%.*]]) {
6663
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr addrspace(2) [[SRC]], i32 [[OFF]]
67-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(2) [[TMP1]], align 4
68-
; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr addrspace(2) [[TMP1]] to i64
69-
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 4
70-
; CHECK-NEXT: [[CHUNKPTR:%.*]] = inttoptr i64 [[TMP4]] to ptr addrspace(2)
71-
; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i32>, ptr addrspace(2) [[CHUNKPTR]], align 4
72-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i32> [[TMP5]], i32 0
73-
; CHECK-NEXT: call void @use.i32(i32 [[TMP2]])
74-
; CHECK-NEXT: call void @use.i32(i32 [[TMP6]])
64+
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(2) [[TMP1]] to i64
65+
; CHECK-NEXT: [[CHUNKPTR:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(2)
66+
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(2) [[CHUNKPTR]], align 4
67+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
68+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
69+
; CHECK-NEXT: call void @use.i32(i32 [[TMP4]])
70+
; CHECK-NEXT: call void @use.i32(i32 [[TMP5]])
7571
; CHECK-NEXT: ret void
7672
;
7773
%1 = getelementptr i32, ptr addrspace(2) %src, i32 %off

0 commit comments

Comments
 (0)