Skip to content

Commit 09dd7ec

Browse files
igorban-inteligcbot
authored andcommitted
llvm-16: Implement lost functionality from instcombine
Add two additional peephole in GenXSimplify: %1 = bitcast <32 x i1> %in1 to i32 %2 = bitcast <32 x i1> %in2 to i32 %binop = binop i32 %1, %2 %3 = bitcast i32 %binop to <32 x i1> And %1 = trunc <32 x i16> %in1 to <32 x i1> %2 = trunc <32 x i16> %in2 to <32 x i1> %binop = binop <32 x i1> %1, %2 %3 = zext <32 x i1> %binop to <32 x i16> To: %binop = binop <32 x i16> %in1, %in2
1 parent 3bae383 commit 09dd7ec

File tree

2 files changed

+120
-3
lines changed

2 files changed

+120
-3
lines changed

IGC/VectorCompiler/lib/GenXOpts/CMAnalysis/InstructionSimplifyGenX.cpp

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2024 Intel Corporation
3+
Copyright (C) 2017-2025 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -183,13 +183,70 @@ static Value *simplifyMulDDQ(BinaryOperator &Mul) {
183183
return Result;
184184
}
185185

186+
static inline bool isBitcastFits(BitCastInst *BC) {
187+
return BC->getSrcTy()->isVectorTy() && BC->getDestTy()->isIntegerTy();
188+
}
189+
190+
static inline bool isTrunkFits(TruncInst *TC) {
191+
return TC->getSrcTy()->isVectorTy() &&
192+
TC->getDestTy()->isVectorTy() &&
193+
TC->getDestTy()->getScalarType()->isIntegerTy(1);
194+
}
195+
196+
static inline bool simplifyBinOp(BinaryOperator *BinOp) {
197+
bool Changed = false;
198+
199+
auto *Bitcast1 = dyn_cast<BitCastInst>(BinOp->getOperand(0));
200+
auto *Bitcast2 = dyn_cast<BitCastInst>(BinOp->getOperand(1));
201+
if (Bitcast1 && Bitcast2 && (Bitcast1->getSrcTy() == Bitcast2->getSrcTy())) {
202+
if (isBitcastFits(Bitcast1) && isBitcastFits(Bitcast2)) {
203+
for (auto *User : BinOp->users()) {
204+
if (auto *BitcastBack = dyn_cast<BitCastInst>(User)) {
205+
if (BitcastBack->getDestTy()->isVectorTy()) {
206+
IRBuilder<> Builder(BinOp);
207+
auto *NewOp =
208+
Builder.CreateBinOp(BinOp->getOpcode(), Bitcast1->getOperand(0),
209+
Bitcast2->getOperand(0));
210+
BitcastBack->replaceAllUsesWith(NewOp);
211+
Changed = true;
212+
BinOp = cast<BinaryOperator>(NewOp);
213+
break;
214+
}
215+
}
216+
}
217+
}
218+
}
219+
auto *Trunc1 = dyn_cast<TruncInst>(BinOp->getOperand(0));
220+
auto *Trunc2 = dyn_cast<TruncInst>(BinOp->getOperand(1));
221+
if (Trunc1 && Trunc2 && (Trunc1->getSrcTy() == Trunc2->getSrcTy())) {
222+
if (isTrunkFits(Trunc1) && isTrunkFits(Trunc2)) {
223+
for (auto *User : BinOp->users()) {
224+
if (auto *ZextInst = dyn_cast<ZExtInst>(User)) {
225+
if (auto *DestVecType = dyn_cast<VectorType>(ZextInst->getDestTy())) {
226+
if (DestVecType->getElementType()->isIntegerTy()) {
227+
IRBuilder<> Builder(BinOp);
228+
auto *NewOp =
229+
Builder.CreateBinOp(BinOp->getOpcode(), Trunc1->getOperand(0),
230+
Trunc2->getOperand(0));
231+
ZextInst->replaceAllUsesWith(NewOp);
232+
Changed = true;
233+
break;
234+
}
235+
}
236+
}
237+
}
238+
}
239+
}
240+
return Changed;
241+
}
242+
186243
static Value *GenXSimplifyInstruction(llvm::Instruction *Inst) {
187244
IGC_ASSERT(Inst);
188245
if (!GenXEnablePeepholes)
189246
return nullptr;
190-
if (Inst->getOpcode() == Instruction::Mul) {
247+
if (Inst->getOpcode() == Instruction::Mul)
191248
return simplifyMulDDQ(*cast<BinaryOperator>(Inst));
192-
}
249+
193250
return nullptr;
194251
}
195252

@@ -506,6 +563,8 @@ bool GenXSimplify::runOnFunction(Function &F) {
506563
Changed |= replaceWithNewValue(*Inst, *V);
507564
continue;
508565
}
566+
if (auto *BO = dyn_cast<BinaryOperator>(Inst))
567+
Changed |= simplifyBinOp(BO);
509568
}
510569
}
511570

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; RUN: %opt_new_pm_typed -passes=GenXSimplify,dce -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=Gen9 -S < %s | FileCheck %s
10+
; RUN: %opt_new_pm_opaque -passes=GenXSimplify,dce -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=Gen9 -S < %s | FileCheck %s
11+
;
12+
; ------------------------------------------------
13+
; GenXSimplify
14+
; ------------------------------------------------
15+
; The test verifies that GenXSimplify implements the functionality from
16+
; LLVM-14 instcombine (the functionality was lost in LLVM-16).
17+
; ------------------------------------------------
18+
19+
declare <32 x i16> @llvm.genx.wrregioni.v32i16.v17i16.i16.i1(<32 x i16>, <17 x i16>, i32, i32, i32, i16, i32, i1)
20+
21+
declare <17 x i16> @llvm.genx.rdregioni.v17i16.v32i16.i16(<32 x i16>, i32, i32, i32, i16, i32)
22+
23+
define spir_kernel void @test_binops(<17 x i16> %0) {
24+
entry:
25+
%wrregion.i.i.i116 = call <32 x i16> @llvm.genx.wrregioni.v32i16.v17i16.i16.i1(<32 x i16> zeroinitializer, <17 x i16> %0, i32 0, i32 17, i32 1, i16 0, i32 undef, i1 true)
26+
%1 = trunc <32 x i16> %wrregion.i.i.i116 to <32 x i1>
27+
%call1.i.i.i.i117 = bitcast <32 x i1> %1 to i32
28+
29+
%wrregion.i.i.i = call <32 x i16> @llvm.genx.wrregioni.v32i16.v17i16.i16.i1(<32 x i16> zeroinitializer, <17 x i16> %0, i32 0, i32 17, i32 1, i16 0, i32 undef, i1 true)
30+
%2 = trunc <32 x i16> %wrregion.i.i.i to <32 x i1>
31+
%call1.i.i.i.i = bitcast <32 x i1> %2 to i32
32+
33+
%and.i.i = and i32 %call1.i.i.i.i, %call1.i.i.i.i117
34+
%or.i.i = or i32 %call1.i.i.i.i, %call1.i.i.i.i117
35+
%xor.i.i = xor i32 %call1.i.i.i.i, %call1.i.i.i.i117
36+
37+
%3 = bitcast i32 %and.i.i to <32 x i1>
38+
%call.i.i73 = zext <32 x i1> %3 to <32 x i16>
39+
%rdr.i.i74 = call <17 x i16> @llvm.genx.rdregioni.v17i16.v32i16.i16(<32 x i16> %call.i.i73, i32 0, i32 17, i32 1, i16 0, i32 undef)
40+
%4 = bitcast i32 %or.i.i to <32 x i1>
41+
%call.i.i66 = zext <32 x i1> %4 to <32 x i16>
42+
%rdr.i.i67 = call <17 x i16> @llvm.genx.rdregioni.v17i16.v32i16.i16(<32 x i16> %call.i.i66, i32 0, i32 17, i32 1, i16 0, i32 undef)
43+
44+
; CHECK: %[[WRREG1:[^ ]+]] = call <32 x i16> @llvm.genx.wrregioni.v32i16.v17i16.i16.i1(<32 x i16> zeroinitializer, <17 x i16> {{.*}}, i32 0, i32 17, i32 1, i16 0, i32 undef, i1 true)
45+
; CHECK: %[[WRREG2:[^ ]+]] = call <32 x i16> @llvm.genx.wrregioni.v32i16.v17i16.i16.i1(<32 x i16> zeroinitializer, <17 x i16> {{.*}}, i32 0, i32 17, i32 1, i16 0, i32 undef, i1 true)
46+
; CHECK: %[[AND:[^ ]+]] = and <32 x i16> %[[WRREG2]], %[[WRREG1]]
47+
; CHECK: %[[OR:[^ ]+]] = or <32 x i16> %[[WRREG2]], %[[WRREG1]]
48+
; CHECK: %[[XOR:[^ ]+]] = xor <32 x i16> %[[WRREG2]], %[[WRREG1]]
49+
; CHECK: {{.*}} = call <17 x i16> @llvm.genx.rdregioni.v17i16.v32i16.i16(<32 x i16> %[[AND]], i32 0, i32 17, i32 1, i16 0, i32 undef)
50+
; CHECK: {{.*}} = call <17 x i16> @llvm.genx.rdregioni.v17i16.v32i16.i16(<32 x i16> %[[OR]], i32 0, i32 17, i32 1, i16 0, i32 undef)
51+
; CHECK: {{.*}} = call <17 x i16> @llvm.genx.rdregioni.v17i16.v32i16.i16(<32 x i16> %[[XOR]], i32 0, i32 17, i32 1, i16 0, i32 undef)
52+
53+
54+
%5 = bitcast i32 %xor.i.i to <32 x i1>
55+
%call.i.i59 = zext <32 x i1> %5 to <32 x i16>
56+
%rdr.i.i60 = call <17 x i16> @llvm.genx.rdregioni.v17i16.v32i16.i16(<32 x i16> %call.i.i59, i32 0, i32 17, i32 1, i16 0, i32 undef)
57+
ret void
58+
}

0 commit comments

Comments
 (0)