Skip to content

Commit 46c11fc

Browse files
esukhovigcbot
authored andcommitted
Rematerialization pass now supports CMP instructions
Rematerialization pass now supports CMP instructions
1 parent 391a1da commit 46c11fc

File tree

3 files changed

+151
-2
lines changed

3 files changed

+151
-2
lines changed

IGC/Compiler/CISACodeGen/RematAddressArithmetic.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,7 @@ bool CloneAddressArithmetic::rematerialize(RematSet &ToProcess, unsigned int Flo
408408
for (auto El : ToProcess) {
409409

410410
PRINT_LOG("rematerialize: ");
411-
PRINT_INST(El);
411+
PRINT_INST_NL(El);
412412

413413
Value *V = El;
414414
llvm::SmallVector<llvm::Use *, 8> VectorOfUses;
@@ -560,9 +560,14 @@ void CloneAddressArithmetic::collectInstToProcess(RematSet &ToProcess, Function
560560
bool IsLoad = llvm::isa<LoadInst>(I);
561561
bool IsStore = llvm::isa<StoreInst>(I);
562562
bool IsCall = llvm::isa<CallInst>(I);
563+
bool IsCmp = llvm::isa<CmpInst>(I);
563564

564-
if (!IsLoad && !IsStore && !IsCall)
565+
if (!IsLoad && !IsStore && !IsCall && !IsCmp)
565566
continue;
567+
if (IsCmp && IGC_IS_FLAG_ENABLED(RematDataAllowCMP)) {
568+
ToProcess.insert(static_cast<Instruction *>(&I));
569+
continue;
570+
}
566571

567572
llvm::Value *V =
568573
IsLoad ? static_cast<LoadInst *>(&I)->getPointerOperand() : static_cast<StoreInst *>(&I)->getPointerOperand();
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: regkeys
10+
; RUN: igc_opt --typed-pointers %s -S -o - -igc-clone-address-arithmetic --regkey=RematFlowThreshold=20 --regkey=RematRPELimit=0 --dce | FileCheck %s
11+
12+
; Function Attrs: convergent nounwind null_pointer_is_valid
13+
define spir_kernel void @widget(half addrspace(1)* align 2 %arg, half addrspace(1)* align 2 %arg1, half addrspace(1)* align 2 %arg2, float addrspace(1)* nocapture align 4 %arg3, i8 addrspace(1)* nocapture readonly align 1 %arg4, i32 addrspace(1)* nocapture readonly align 4 %arg5, i8 addrspace(1)* nocapture readonly align 1 %arg6, i32 addrspace(1)* nocapture readonly align 4 %arg7, half addrspace(1)* align 2 %arg8, <8 x i32> %arg9, i16 %arg10) #0 {
14+
bb:
15+
%tmp = extractelement <8 x i32> %arg9, i64 1
16+
%tmp11 = extractelement <8 x i32> %arg9, i64 6
17+
%tmp12 = icmp slt i32 %tmp11, 0
18+
%tmp14 = add i32 %tmp11, 127
19+
%spec.select = select i1 %tmp12, i32 %tmp14, i32 %tmp11
20+
%tmp17 = ashr i32 %spec.select, 7
21+
%tmp18 = shl i32 %tmp17, 7
22+
%tmp19 = sub i32 %tmp11, %tmp18
23+
%tmp20 = mul i32 %tmp17, 25165824
24+
%tmp21 = mul nsw i32 %tmp19, 196608
25+
%tmp22 = add i32 %tmp20, %tmp21
26+
%tmp23 = shl nsw i32 %tmp19, 17
27+
%tmp24 = sext i32 %tmp22 to i64
28+
%tmp25 = ptrtoint half addrspace(1)* %arg to i64
29+
%tmp26 = shl nsw i64 %tmp24, 1
30+
%tmp27 = add i64 %tmp26, %tmp25
31+
%tmp28 = sext i32 %tmp21 to i64
32+
%tmp29 = ptrtoint half addrspace(1)* %arg1 to i64
33+
%tmp30 = shl nsw i64 %tmp28, 1
34+
%tmp31 = add i64 %tmp30, %tmp29
35+
%tmp32 = sext i32 %tmp23 to i64
36+
%tmp33 = ptrtoint half addrspace(1)* %arg2 to i64
37+
%tmp34 = shl nsw i64 %tmp32, 1
38+
%tmp35 = add i64 %tmp34, %tmp33
39+
%tmp36 = shl i32 %tmp, 7
40+
%tmp37 = zext i16 %arg10 to i32
41+
%tmp38 = and i32 %tmp37, 112
42+
%tmp39 = or i32 %tmp38, %tmp36
43+
%tmp40 = or i32 %tmp39, 1
44+
%tmp41 = or i32 %tmp39, 2
45+
%tmp42 = or i32 %tmp39, 4
46+
%tmp43 = or i32 %tmp39, 5
47+
%tmp44 = or i32 %tmp39, 6
48+
%tmp45 = or i32 %tmp39, 7
49+
%tmp48 = and i64 %tmp27, -64
50+
call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %tmp48, i32 0, i32 1023, i32 383, i32 0, i32 %tmp36, i32 16, i32 32, i32 32, i32 1, i1 false, i1 false, i32 4)
51+
%tmp51 = and i64 %tmp31, -64
52+
call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %tmp51, i32 0, i32 1023, i32 383, i32 0, i32 0, i32 16, i32 32, i32 32, i32 1, i1 false, i1 false, i32 4)
53+
call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %tmp51, i32 0, i32 1023, i32 383, i32 0, i32 32, i32 16, i32 32, i32 32, i32 1, i1 false, i1 false, i32 4)
54+
%tmp54 = and i64 %tmp35, -64
55+
call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %tmp54, i32 255, i32 1023, i32 255, i32 0, i32 0, i32 16, i32 32, i32 32, i32 1, i1 false, i1 false, i32 4)
56+
br label %bb57
57+
58+
bb57: ; preds = %bb57, %bb
59+
call void @llvm.genx.GenISA.LSC2DBlockSetAddrPayloadField.p0i32.i32(i32* null, i32 6, i32 0, i1 false)
60+
%tmp58 = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* null, i32 0, i32 0, i32 32, i32 8, i32 16, i32 1, i1 true, i1 false, i32 0)
61+
%tmp59 = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* null, i32 0, i32 0, i32 32, i32 8, i32 16, i32 1, i1 true, i1 false, i32 0)
62+
call void @llvm.genx.GenISA.LSC2DBlockSetAddrPayloadField.p0i32.i32(i32* null, i32 5, i32 0, i1 false)
63+
call void @llvm.genx.GenISA.LSC2DBlockSetAddrPayloadField.p0i32.i32(i32* null, i32 6, i32 0, i1 false)
64+
%tmp60 = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* null, i32 0, i32 0, i32 32, i32 8, i32 16, i32 1, i1 true, i1 false, i32 0)
65+
%tmp61 = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* null, i32 0, i32 0, i32 32, i32 8, i32 16, i32 1, i1 true, i1 false, i32 0)
66+
call void @llvm.genx.GenISA.LSC2DBlockSetAddrPayloadField.p0i32.i32(i32* null, i32 5, i32 0, i1 false)
67+
call void @llvm.genx.GenISA.LSC2DBlockSetAddrPayloadField.p0i32.i32(i32* null, i32 6, i32 0, i1 false)
68+
%tmp62 = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* null, i32 0, i32 0, i32 32, i32 8, i32 16, i32 1, i1 true, i1 false, i32 0)
69+
%tmp63 = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* null, i32 0, i32 0, i32 32, i32 8, i32 16, i32 1, i1 true, i1 false, i32 0)
70+
call void @llvm.genx.GenISA.LSC2DBlockSetAddrPayloadField.p0i32.i32(i32* null, i32 5, i32 0, i1 false)
71+
%tmp84 = icmp slt i32 %tmp40, 0
72+
%tmp85 = icmp slt i32 %tmp41, 0
73+
%tmp87 = icmp slt i32 %tmp42, 0
74+
%tmp88 = icmp slt i32 %tmp43, 0
75+
%tmp89 = icmp slt i32 %tmp44, 0
76+
%tmp90 = icmp slt i32 %tmp45, 0
77+
78+
; CHECK: [[REMAT_1:%remat.*]] = or i32 %tmp39, 1
79+
; CHECK: [[CLONED_1:%cloned_.*]] = icmp slt i32 [[REMAT_1]]
80+
; CHECK: = select i1 [[CLONED_1]]
81+
; CHECK: [[REMAT_2:%remat.*]] = or i32 %tmp39, 2
82+
; CHECK: [[CLONED_2:%cloned_.*]] = icmp slt i32 [[REMAT_2]]
83+
; CHECK: = select i1 [[CLONED_2]]
84+
85+
%tmp95 = select i1 %tmp84, float 0xFFF0000000000000, float 0.000000e+00
86+
%tmp96 = select i1 %tmp85, float 0xFFF0000000000000, float 0.000000e+00
87+
%tmp98 = select i1 %tmp87, float 0xFFF0000000000000, float 0.000000e+00
88+
%tmp99 = select i1 %tmp88, float 0xFFF0000000000000, float 0.000000e+00
89+
%tmp100 = select i1 %tmp89, float 0xFFF0000000000000, float 0.000000e+00
90+
%tmp101 = select i1 %tmp90, float 0xFFF0000000000000, float 0.000000e+00
91+
%tmp103 = insertelement <8 x float> <float 0.000000e+00, float undef, float undef, float undef, float undef, float undef, float undef, float undef>, float %tmp95, i64 1
92+
%tmp104 = insertelement <8 x float> %tmp103, float %tmp96, i64 2
93+
%tmp105 = insertelement <8 x float> %tmp104, float 0.000000e+00, i64 3
94+
%tmp106 = insertelement <8 x float> %tmp105, float %tmp98, i64 4
95+
%tmp107 = insertelement <8 x float> %tmp106, float %tmp99, i64 5
96+
%tmp108 = insertelement <8 x float> %tmp107, float %tmp100, i64 6
97+
%tmp109 = insertelement <8 x float> %tmp108, float %tmp101, i64 7
98+
%tmp110 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> zeroinitializer, <8 x float> %tmp109)
99+
%tmp111 = extractelement <8 x float> %tmp110, i64 0
100+
%tmp118 = call float @llvm.genx.GenISA.WaveAll.f32(float %tmp111, i8 12, i32 0)
101+
br label %bb57
102+
}
103+
104+
; Function Attrs: convergent inaccessiblememonly nounwind
105+
declare float @llvm.genx.GenISA.WaveAll.f32(float, i8, i32) #1
106+
107+
; Function Attrs: convergent nounwind readnone willreturn
108+
declare <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1) #2
109+
110+
; Function Attrs: nounwind
111+
declare void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32) #3
112+
113+
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
114+
declare i32 @llvm.umin.i32(i32, i32) #4
115+
116+
; Function Attrs: nounwind readnone speculatable willreturn
117+
declare i32* @llvm.genx.GenISA.LSC2DBlockCreateAddrPayload.p0i32(i64, i32, i32, i32, i32, i32, i32, i32, i32) #5
118+
119+
; Function Attrs: argmemonly nounwind speculatable willreturn writeonly
120+
declare void @llvm.genx.GenISA.LSC2DBlockSetAddrPayloadField.p0i32.i32(i32*, i32, i32, i1) #6
121+
122+
; Function Attrs: nounwind willreturn
123+
declare <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32*, i32, i32, i32, i32, i32, i32, i1, i1, i32) #7
124+
125+
; Function Attrs: nounwind willreturn
126+
declare <32 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v32i32.p0i32(i32*, i32, i32, i32, i32, i32, i32, i1, i1, i32) #7
127+
128+
; Function Attrs: nounwind willreturn
129+
declare <32 x i16> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v32i16.p0i32(i32*, i32, i32, i32, i32, i32, i32, i1, i1, i32) #7
130+
131+
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
132+
declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #4
133+
134+
attributes #0 = { convergent nounwind null_pointer_is_valid }
135+
attributes #1 = { convergent inaccessiblememonly nounwind }
136+
attributes #2 = { convergent nounwind readnone willreturn }
137+
attributes #3 = { nounwind }
138+
attributes #4 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
139+
attributes #5 = { nounwind readnone speculatable willreturn }
140+
attributes #6 = { argmemonly nounwind speculatable willreturn writeonly }
141+
attributes #7 = { nounwind willreturn }
142+
143+
!igc.functions = !{}

IGC/common/igc_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1545,6 +1545,7 @@ DECLARE_IGC_REGKEY(
15451545
"Confine rematerialization only to variables within the same BB, we won't pull down values from predeccors", false)
15461546
DECLARE_IGC_REGKEY(bool, RematRespectUniformity, false, "Cutoff computation chain on uniform values", false)
15471547
DECLARE_IGC_REGKEY(bool, RematAllowExtractElement, true, "Allow Extract Element to computation chain", false)
1548+
DECLARE_IGC_REGKEY(bool, RematDataAllowCMP, true, "Allow rematerialization of cmp instructions", true)
15481549
DECLARE_IGC_REGKEY(bool, RematReassocBefore, false,
15491550
"Enable short sequence of passes before clone address arithmetic pass to potentially decrese amount "
15501551
"of operations that will be rematerialized",

0 commit comments

Comments
 (0)