Skip to content

Commit 3a468ec

Browse files
vmustyaigcbot
authored andcommitted
Emit null register sources for typed lsc operations in VC
When the typed lsc messages have some of the texture coordinates being equal to zero, the register sources for such coordinates can be omitted to reduce the register pressure.
1 parent a034a5a commit 3a468ec

File tree

2 files changed

+62
-2
lines changed

2 files changed

+62
-2
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXPatternMatch.cpp

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ class GenXPatternMatch : public FunctionPass,
202202
bool simplifyCmp(CmpInst *Cmp);
203203
CmpInst *reduceCmpWidth(CmpInst *Cmp);
204204
bool simplifyNullDst(CallInst *Inst);
205+
bool simplifyNullSrc(CallInst *Inst);
205206
bool simplifyDpasNullSrc(CallInst *Inst);
206207
// Transform logic operation with a mask from <N x iM> to <N/(32/M) x i32>
207208
bool extendMask(BinaryOperator *BO);
@@ -819,7 +820,7 @@ void GenXPatternMatch::visitBinaryOperator(BinaryOperator &I) {
819820
}
820821

821822
void GenXPatternMatch::visitCallInst(CallInst &I) {
822-
if (I.use_empty())
823+
if (I.use_empty() && !I.getType()->isVoidTy())
823824
return;
824825

825826
auto IID = vc::getAnyIntrinsicID(&I);
@@ -864,6 +865,15 @@ void GenXPatternMatch::visitCallInst(CallInst &I) {
864865
case GenXIntrinsic::genx_uutrunc_sat:
865866
Changed |= simplifyTruncSat(&I);
866867
break;
868+
869+
case vc::InternalIntrinsic::lsc_load_quad_tgm:
870+
Changed |= simplifyNullDst(&I);
871+
LLVM_FALLTHROUGH;
872+
case vc::InternalIntrinsic::lsc_prefetch_quad_tgm:
873+
case vc::InternalIntrinsic::lsc_store_quad_tgm:
874+
Changed |= simplifyNullSrc(&I);
875+
break;
876+
867877
case vc::InternalIntrinsic::lsc_atomic_ugm:
868878
case vc::InternalIntrinsic::lsc_load_ugm:
869879
case vc::InternalIntrinsic::lsc_load_quad_ugm:
@@ -883,7 +893,6 @@ void GenXPatternMatch::visitCallInst(CallInst &I) {
883893
case vc::InternalIntrinsic::lsc_load_quad_slm:
884894
case vc::InternalIntrinsic::lsc_store_slm:
885895
case vc::InternalIntrinsic::lsc_store_quad_slm:
886-
case vc::InternalIntrinsic::lsc_load_quad_tgm:
887896
case GenXIntrinsic::genx_dword_atomic_fadd:
888897
case GenXIntrinsic::genx_dword_atomic_fsub:
889898
case GenXIntrinsic::genx_dword_atomic_add:
@@ -4285,6 +4294,31 @@ bool GenXPatternMatch::simplifyNullDst(CallInst *Inst) {
42854294
return false;
42864295
}
42874296

4297+
bool GenXPatternMatch::simplifyNullSrc(CallInst *Inst) {
4298+
if (!vc::InternalIntrinsic::isInternalMemoryIntrinsic(Inst))
4299+
return false;
4300+
4301+
bool Changed = false;
4302+
4303+
auto IID = vc::getAnyIntrinsicID(Inst);
4304+
GenXIntrinsicInfo Info(IID);
4305+
4306+
for (unsigned I = 0; I < Inst->arg_size(); ++I) {
4307+
auto *Arg = dyn_cast<Constant>(Inst->getArgOperand(I));
4308+
if (!Arg || !Arg->isNullValue())
4309+
continue;
4310+
4311+
auto ArgInfo = Info.getArgInfo(I);
4312+
if (!ArgInfo.isRaw() || !ArgInfo.isNullAllowed())
4313+
continue;
4314+
4315+
Inst->setArgOperand(I, UndefValue::get(Arg->getType()));
4316+
Changed = true;
4317+
}
4318+
4319+
return Changed;
4320+
}
4321+
42884322
bool GenXPatternMatch::simplifyDpasNullSrc(CallInst *Inst) {
42894323
if (ST->hasFusedEU())
42904324
return false;
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: %opt %use_old_pass_manager% -GenXPatternMatch -march=genx64 -mcpu=Xe2 -mtriple=spir64-unknown-unknown -S < %s | FileCheck %s
10+
11+
declare <64 x float> @llvm.vc.internal.lsc.load.quad.tgm.v64f32.v32i1.v2i8.v32i32(<32 x i1>, <2 x i8>, i8, i32, <32 x i32>, <32 x i32>, <32 x i32>, <32 x i32>, <64 x float>)
12+
declare void @llvm.vc.internal.lsc.store.quad.tgm.v32i1.v2i8.v32i32.v64f32(<32 x i1>, <2 x i8>, i8, i32, <32 x i32>, <32 x i32>, <32 x i32>, <32 x i32>, <64 x float>)
13+
declare void @llvm.vc.internal.lsc.prefetch.quad.tgm.v32i1.v2i8.v32i32(<32 x i1>, <2 x i8>, i8, i32, <32 x i32>, <32 x i32>, <32 x i32>, <32 x i32>)
14+
15+
define void @test(i32 %bti, <32 x i32> %u, <32 x i32> %v) {
16+
; CHECK: tail call void @llvm.vc.internal.lsc.prefetch.quad.tgm.v32i1.v2i8.v32i32(<32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <2 x i8> zeroinitializer, i8 3, i32 %bti, <32 x i32> %u, <32 x i32> %v, <32 x i32> undef, <32 x i32> undef)
17+
tail call void @llvm.vc.internal.lsc.prefetch.quad.tgm.v32i1.v2i8.v32i32(<32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <2 x i8> zeroinitializer, i8 3, i32 %bti, <32 x i32> %u, <32 x i32> %v, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer)
18+
19+
; CHECK: %load = tail call <64 x float> @llvm.vc.internal.lsc.load.quad.tgm.v64f32.v32i1.v2i8.v32i32(<32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <2 x i8> zeroinitializer, i8 3, i32 %bti, <32 x i32> %u, <32 x i32> %v, <32 x i32> undef, <32 x i32> undef, <64 x float> undef)
20+
%load = tail call <64 x float> @llvm.vc.internal.lsc.load.quad.tgm.v64f32.v32i1.v2i8.v32i32(<32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <2 x i8> zeroinitializer, i8 3, i32 %bti, <32 x i32> %u, <32 x i32> %v, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <64 x float> undef)
21+
22+
; CHECK: tail call void @llvm.vc.internal.lsc.store.quad.tgm.v32i1.v2i8.v32i32.v64f32(<32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <2 x i8> zeroinitializer, i8 3, i32 %bti, <32 x i32> %u, <32 x i32> %v, <32 x i32> undef, <32 x i32> undef, <64 x float> %load)
23+
tail call void @llvm.vc.internal.lsc.store.quad.tgm.v32i1.v2i8.v32i32.v64f32(<32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <2 x i8> zeroinitializer, i8 3, i32 %bti, <32 x i32> %u, <32 x i32> %v, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <64 x float> %load)
24+
25+
ret void
26+
}

0 commit comments

Comments
 (0)