Skip to content

Commit a8f65b1

Browse files
vmustyaigcbot
authored andcommitted
Enable denormal support for systolic operations in VC
When the target device supports denormals for DPAS instructions, the compiler should retain them. This change enables denormal support for the following data types: * bf16 - for Xe2 * tf32 - for Xe2
1 parent ed9da95 commit a8f65b1

File tree

2 files changed

+81
-23
lines changed

2 files changed

+81
-23
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXCisaBuilder.cpp

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -374,10 +374,11 @@ class GenXKernelBuilder final {
374374
// The default float control from kernel attribute. Each subroutine may
375375
// overrride this control mask, but it should revert back to the default float
376376
// control mask before exiting from the subroutine.
377-
uint32_t DefaultFloatControl = 0;
377+
uint32_t FloatControlKernel = 0;
378+
uint32_t FloatControlMask = 0;
378379

379-
380-
uint32_t CRMask = 0;
380+
// The hardware-initialization value for the float control register.
381+
static constexpr uint32_t FloatControlDefault = 0x0;
381382

382383
// normally false, set to true if there is any SIMD CF in the func or this is
383384
// (indirectly) called inside any SIMD CF.
@@ -1096,11 +1097,15 @@ bool GenXKernelBuilder::run() {
10961097
StackSurf = Subtarget->stackSurface();
10971098

10981099
using namespace visa;
1099-
CRMask = CRBits::RoundingBitMask | CRBits::DoublePrecisionDenorm |
1100-
CRBits::SinglePrecisionDenorm | CRBits::HalfPrecisionDenorm;
1100+
FloatControlMask = CRBits::DoublePrecisionDenorm |
1101+
CRBits::SinglePrecisionDenorm |
1102+
CRBits::HalfPrecisionDenorm | CRBits::RoundingBitMask;
1103+
FloatControlKernel = CRBits::RTNE;
11011104

1105+
// If the subtarget supports systolic denorm control, retain denormals for the
1106+
// systolic.
11021107
if (Subtarget->hasSystolicDenormControl())
1103-
CRMask |= CRBits::SystolicDenorm;
1108+
FloatControlKernel |= CRBits::SystolicDenorm;
11041109

11051110
StackCallExecSize =
11061111
getExecSizeFromValue(BackendConfig->getInteropSubgroupSize());
@@ -1294,27 +1299,32 @@ void GenXKernelBuilder::buildInstructions() {
12941299
beginFunctionLight(Func);
12951300

12961301
// If a float control is specified, emit code to make that happen.
1297-
// Float control contains rounding mode, denorm behaviour and single
1298-
// precision float mode (ALT or IEEE) Relevant bits are already set as
1299-
// defined for VISA control reg in header definition on enums
1302+
// Float control contains rounding mode and denorm behaviour. Relevant bits
1303+
// are already set as defined for VISA control reg in header definition on
1304+
// enums.
1305+
uint32_t FloatControl = FloatControlKernel;
1306+
13001307
if (Func->hasFnAttribute(genx::FunctionMD::CMFloatControl)) {
1301-
uint32_t FloatControl = 0;
13021308
Func->getFnAttribute(genx::FunctionMD::CMFloatControl)
13031309
.getValueAsString()
13041310
.getAsInteger(0, FloatControl);
13051311

1306-
// Clear current float control bits to known zero state
1307-
buildControlRegUpdate(CRMask, true);
1308-
13091312
// Set rounding mode to required state if that isn't zero
1310-
FloatControl &= CRMask;
1311-
if (FloatControl) {
1312-
if (FG->getHead() == Func)
1313-
DefaultFloatControl = FloatControl;
1314-
buildControlRegUpdate(FloatControl, false);
1313+
FloatControl &= FloatControlMask;
1314+
FloatControl |= FloatControlKernel & ~FloatControlMask;
1315+
if (FloatControl != (FloatControlKernel & FloatControlMask) &&
1316+
vc::isKernel(Func)) {
1317+
FloatControlKernel &= ~FloatControlMask;
1318+
FloatControlKernel |= FloatControl;
13151319
}
13161320
}
13171321

1322+
if ((vc::isKernel(Func) && FloatControlKernel != 0) ||
1323+
FloatControl != (FloatControlKernel & FloatControlMask)) {
1324+
buildControlRegUpdate(FloatControlMask, true);
1325+
buildControlRegUpdate(FloatControl, false);
1326+
}
1327+
13181328
// Only output a label for the initial basic block if it is used from
13191329
// somewhere else.
13201330
bool NeedsLabel = !Func->front().use_empty();
@@ -5475,11 +5485,11 @@ void GenXKernelBuilder::buildRet(ReturnInst *RI) {
54755485
F->getFnAttribute(genx::FunctionMD::CMFloatControl)
54765486
.getValueAsString()
54775487
.getAsInteger(0, FloatControl);
5478-
FloatControl &= CRMask;
5479-
if (FloatControl != DefaultFloatControl) {
5480-
buildControlRegUpdate(CRMask, true);
5481-
if (DefaultFloatControl)
5482-
buildControlRegUpdate(DefaultFloatControl, false);
5488+
FloatControl &= FloatControlMask;
5489+
if (FloatControl != (FloatControlKernel & FloatControlMask)) {
5490+
buildControlRegUpdate(FloatControlMask, true);
5491+
if (FloatControlKernel & FloatControlMask)
5492+
buildControlRegUpdate(FloatControlKernel, false);
54835493
}
54845494
if (vc::requiresStackCall(Func)) {
54855495
appendVISACFFunctionRetInst(nullptr, vISA_EMASK_M1, StackCallExecSize);
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
target triple = "genx64-unknown-unknown"
10+
11+
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXCategoryWrapper -GenXCisaBuilderPass -GenXFinalizer \
12+
; RUN: -march=genx64 -mtriple=spir64-unknown-unknown -finalizer-opts="-dumpcommonisa -isaasmToConsole" \
13+
; RUN: -mcpu=XeHPC < %s | FileCheck %s --check-prefix=FLUSH
14+
15+
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXCategoryWrapper -GenXCisaBuilderPass -GenXFinalizer \
16+
; RUN: -march=genx64 -mtriple=spir64-unknown-unknown -finalizer-opts="-dumpcommonisa -isaasmToConsole" \
17+
; RUN: -mcpu=Xe2 < %s | FileCheck %s --check-prefix=RETAIN
18+
19+
; FLUSH-NOT: and (M1, 1) %cr0(0,0)<1> %cr0(0,0)<0;1,0>
20+
; FLUSH-NOT: or (M1, 1) %cr0(0,0)<1> %cr0(0,0)<0;1,0>
21+
22+
; RETAIN: and (M1, 1) %cr0(0,0)<1> %cr0(0,0)<0;1,0> 0xfffffb0f:ud
23+
; RETAIN: or (M1, 1) %cr0(0,0)<1> %cr0(0,0)<0;1,0> 0x40000000:ud
24+
25+
define dllexport spir_kernel void @the_test(i32 %0, i32 %1) #0 {
26+
ret void
27+
}
28+
29+
attributes #0 = { "CMGenxMain" "CMFloatControl"="0" }
30+
31+
!spirv.Source = !{!0}
32+
!opencl.spir.version = !{!1}
33+
!opencl.ocl.version = !{!0}
34+
!opencl.used.extensions = !{!2}
35+
!opencl.used.optional.core.features = !{!2}
36+
!spirv.Generator = !{!3}
37+
!genx.kernels = !{!4}
38+
!genx.kernel.internal = !{!8}
39+
40+
!0 = !{i32 0, i32 0}
41+
!1 = !{i32 1, i32 2}
42+
!2 = !{}
43+
!3 = !{i16 6, i16 14}
44+
!4 = !{void (i32, i32)* @the_test, !"the_test", !5, i32 0, !6, !0, !7, i32 0}
45+
!5 = !{i32 2, i32 2}
46+
!6 = !{i32 64, i32 68}
47+
!7 = !{!"buffer_t", !"buffer_t"}
48+
!8 = !{void (i32, i32)* @the_test, null, null, null, null}

0 commit comments

Comments
 (0)