Skip to content

Commit f72683a

Browse files
aratajewigcbot
authored andcommitted
Legalize vector fneg instructions
IGC's legalization phase was not able to properly legalize `fneg` instructions that operate on a vector type. This issue has been exposed while merging commit f8f17a0 which has disabled scalarization of vector `fneg` instructions in `ScalarizeFunction` pass for `optnone` functions. `ScalarizeFunction` pass is not intented to be a legalization pass. It's rather treated as an optimization pass, so whatever gets disabled in its implementation, shouldn't affect the final kernel correctness. `ScalarizerCodeGen` pass is a pass that runs during legalization phase and it's specifically designed to scalarize instructions that cannot be properly handled by `EmitVISAPass` if they are in a vector form. This change extends the capabilities of `ScalarizerCodeGen`, so that it can properly scalarize `fneg` vector instructions. This change also fixes a bug introduced in f8f17a0 which sometimes resulted in creating fneg instruction with `undef`` values.
1 parent 4823ae5 commit f72683a

File tree

5 files changed

+98
-1
lines changed

5 files changed

+98
-1
lines changed

IGC/Compiler/CISACodeGen/ScalarizerCodeGen.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,29 @@ void ScalarizerCodeGen::visitCastInst(llvm::CastInst& I)
152152
}
153153
}
154154
}
155+
156+
#if LLVM_VERSION_MAJOR >= 10
157+
void ScalarizerCodeGen::visitFNeg(llvm::UnaryOperator& I)
158+
{
159+
if (I.getType()->isVectorTy())
160+
{
161+
IGCLLVM::FixedVectorType* InstType = cast<IGCLLVM::FixedVectorType>(I.getType());
162+
unsigned NumElements = int_cast<unsigned>(InstType->getNumElements());
163+
Value* Src = I.getOperand(0);
164+
m_builder->SetInsertPoint(&I);
165+
166+
Value* LastOp = UndefValue::get(InstType);
167+
for (unsigned Idx = 0; Idx < NumElements; Idx++)
168+
{
169+
Value* ConstIndex = ConstantInt::get(m_builder->getInt32Ty(), Idx);
170+
Value* EESrc0 = m_builder->CreateExtractElement(Src, ConstIndex);
171+
Value* NewFNegInst = m_builder->CreateFNeg(EESrc0);
172+
LastOp = m_builder->CreateInsertElement(LastOp, NewFNegInst, ConstIndex);
173+
}
174+
175+
// Now replace all the instruction users with the newly created instruction
176+
I.replaceAllUsesWith(LastOp);
177+
I.eraseFromParent();
178+
}
179+
}
180+
#endif

IGC/Compiler/CISACodeGen/ScalarizerCodeGen.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ namespace IGC
3131
virtual bool runOnFunction(llvm::Function& F);
3232
void visitBinaryOperator(llvm::BinaryOperator& I);
3333
void visitCastInst(llvm::CastInst& I);
34+
#if LLVM_VERSION_MAJOR >= 10
35+
void visitFNeg(llvm::UnaryOperator& I);
36+
#endif
3437

3538
private:
3639
llvm::IRBuilder<>* m_builder = nullptr;

IGC/Compiler/Optimizer/Scalarizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ void ScalarizeFunction::scalarizeInstruction(UnaryOperator* UI)
426426
IGCLLVM::FixedVectorType* instType = dyn_cast<IGCLLVM::FixedVectorType>(UI->getType());
427427
// Do not apply if optnone function
428428
if (UI->getFunction()->hasOptNone())
429-
return;
429+
return recoverNonScalarizableInst(UI);
430430

431431
// Only need handling for vector binary ops
432432
if (!instType) return;
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; REQUIRES: llvm-14-plus
10+
; RUN: igc_opt %s -S -o - --igc-scalarize | FileCheck %s
11+
12+
; Function Attrs: noinline optnone
13+
define void @test_fneg_optnone(<4 x float> %src, <3 x float> addrspace(1)* %out) #0 {
14+
15+
; CHECK-LABEL: @test_fneg_optnone(
16+
;
17+
; CHECK: [[EE0:%.*]] = extractelement <4 x float> %src, i32 0
18+
; CHECK: [[EE1:%.*]] = extractelement <4 x float> %src, i32 1
19+
; CHECK: [[EE2:%.*]] = extractelement <4 x float> %src, i32 2
20+
; CHECK: [[EE3:%.*]] = extractelement <4 x float> %src, i32 3
21+
; CHECK: [[IE0:%.*]] = insertelement <3 x float> undef, float [[EE0]], i32 0
22+
; CHECK: [[IE1:%.*]] = insertelement <3 x float> [[IE0]], float [[EE1]], i32 1
23+
; CHECK: [[IE2:%.*]] = insertelement <3 x float> [[IE1]], float [[EE2]], i32 2
24+
; CHECK: [[FNEG:%.*]] = fneg <3 x float> [[IE2]]
25+
; CHECK: store <3 x float> [[FNEG]], <3 x float> addrspace(1)* %out, align 4
26+
27+
; CHECK-NOT: fneg <3 x float> undef
28+
29+
%1 = shufflevector <4 x float> %src, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
30+
%2 = fneg <3 x float> %1
31+
store <3 x float> %2, <3 x float> addrspace(1)* %out, align 4
32+
ret void
33+
}
34+
35+
attributes #0 = { noinline optnone }
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; REQUIRES: llvm-14-plus
10+
; RUN: igc_opt %s -S -o - --igc-scalarizer-in-codegen | FileCheck %s
11+
; ------------------------------------------------
12+
; ScalarizerCodeGen
13+
; ------------------------------------------------
14+
15+
define void @test_fneg_scalarization(<3 x float> %src, <3 x float> addrspace(1)* %out) {
16+
17+
; CHECK-LABEL: @test_fneg_scalarization(
18+
;
19+
; CHECK: [[EE0:%.*]] = extractelement <3 x float> %src, i32 0
20+
; CHECK: [[FNEG0:%.*]] = fneg float [[EE0]]
21+
; CHECK: [[IE0:%.*]] = insertelement <3 x float> undef, float [[FNEG0]], i32 0
22+
; CHECK: [[EE1:%.*]] = extractelement <3 x float> %src, i32 1
23+
; CHECK: [[FNEG1:%.*]] = fneg float [[EE1]]
24+
; CHECK: [[IE1:%.*]] = insertelement <3 x float> [[IE0]], float [[FNEG1]], i32 1
25+
; CHECK: [[EE2:%.*]] = extractelement <3 x float> %src, i32 2
26+
; CHECK: [[FNEG2:%.*]] = fneg float [[EE2]]
27+
; CHECK: [[IE2:%.*]] = insertelement <3 x float> [[IE1]], float [[FNEG2]], i32 2
28+
; CHECK: store <3 x float> [[IE2]], <3 x float> addrspace(1)* %out, align 4
29+
30+
%1 = fneg <3 x float> %src
31+
store <3 x float> %1, <3 x float> addrspace(1)* %out, align 4
32+
ret void
33+
}

0 commit comments

Comments
 (0)