Skip to content

Commit 9c9f2d4

Browse files
pkwasnie-intelpszymich
authored andcommitted
disable OpenCL prefetch to L1 cache
Disables prefetch into L1 for standard builtin prefetch, as out of bound address will generate page fault (only prefetch to L3 supports out of bound).
1 parent e1eab91 commit 9c9f2d4

File tree

3 files changed

+33
-28
lines changed

3 files changed

+33
-28
lines changed

IGC/BiFModule/Implementation/prefetch.cl

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -9,37 +9,40 @@ SPDX-License-Identifier: MIT
99
#include "IGCBiF_Intrinsics_Lsc.cl"
1010

1111
extern __constant int __UseLSC;
12+
extern __constant int __ForceL1Prefetch;
1213

1314
// Mapping from OpenCL prefetch to LSC prefetch. Uniform immediate offset can
1415
// be used to save on base pointer arithmetics, but offset can't be variable.
15-
#define LSC_PREFETCH(type, p, num_elements) \
16-
if (num_elements == 1) \
17-
{ \
18-
__builtin_IB_lsc_prefetch_global_##type(p, 0, LSC_LDCC_L1C_L3C); \
19-
} \
20-
else if (num_elements == 2) \
21-
{ \
22-
__builtin_IB_lsc_prefetch_global_##type(p, 0, LSC_LDCC_L1C_L3C); \
23-
__builtin_IB_lsc_prefetch_global_##type(p, 1, LSC_LDCC_L1C_L3C); \
24-
} \
25-
else if (num_elements == 3) \
26-
{ \
27-
__builtin_IB_lsc_prefetch_global_##type(p, 0, LSC_LDCC_L1C_L3C); \
28-
__builtin_IB_lsc_prefetch_global_##type(p, 1, LSC_LDCC_L1C_L3C); \
29-
__builtin_IB_lsc_prefetch_global_##type(p, 2, LSC_LDCC_L1C_L3C); \
30-
} \
31-
else if (num_elements == 4) \
32-
{ \
33-
__builtin_IB_lsc_prefetch_global_##type(p, 0, LSC_LDCC_L1C_L3C); \
34-
__builtin_IB_lsc_prefetch_global_##type(p, 1, LSC_LDCC_L1C_L3C); \
35-
__builtin_IB_lsc_prefetch_global_##type(p, 2, LSC_LDCC_L1C_L3C); \
36-
__builtin_IB_lsc_prefetch_global_##type(p, 3, LSC_LDCC_L1C_L3C); \
37-
} \
38-
else \
39-
{ \
40-
for (int i = 0; i < num_elements; ++i) \
41-
__builtin_IB_lsc_prefetch_global_##type(p + i, 0, LSC_LDCC_L1C_L3C); \
42-
} \
16+
#define LSC_PREFETCH(type, p, num_elements) \
17+
/* Warning: out of bound L1 prefetch will generate page fault */ \
18+
enum LSC_LDCC cacheOpt = __ForceL1Prefetch ? LSC_LDCC_L1C_L3C : LSC_LDCC_L1UC_L3C; \
19+
if (num_elements == 1) \
20+
{ \
21+
__builtin_IB_lsc_prefetch_global_##type(p, 0, cacheOpt); \
22+
} \
23+
else if (num_elements == 2) \
24+
{ \
25+
__builtin_IB_lsc_prefetch_global_##type(p, 0, cacheOpt); \
26+
__builtin_IB_lsc_prefetch_global_##type(p, 1, cacheOpt); \
27+
} \
28+
else if (num_elements == 3) \
29+
{ \
30+
__builtin_IB_lsc_prefetch_global_##type(p, 0, cacheOpt); \
31+
__builtin_IB_lsc_prefetch_global_##type(p, 1, cacheOpt); \
32+
__builtin_IB_lsc_prefetch_global_##type(p, 2, cacheOpt); \
33+
} \
34+
else if (num_elements == 4) \
35+
{ \
36+
__builtin_IB_lsc_prefetch_global_##type(p, 0, cacheOpt); \
37+
__builtin_IB_lsc_prefetch_global_##type(p, 1, cacheOpt); \
38+
__builtin_IB_lsc_prefetch_global_##type(p, 2, cacheOpt); \
39+
__builtin_IB_lsc_prefetch_global_##type(p, 3, cacheOpt); \
40+
} \
41+
else \
42+
{ \
43+
for (int i = 0; i < num_elements; ++i) \
44+
__builtin_IB_lsc_prefetch_global_##type(p + i, 0, cacheOpt); \
45+
} \
4346

4447
//Prefetch function
4548

IGC/Compiler/Optimizer/BuiltInFuncImport.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,6 +1047,7 @@ void BIImport::InitializeBIFlags(Module& M)
10471047
useHighAccuracyMathFuncs = OCLContext->m_InternalOptions.UseHighAccuracyMathFuncs;
10481048

10491049
initializeVarWithValue("__UseLSC", pCtx->platform.hasLSC());
1050+
initializeVarWithValue("__ForceL1Prefetch", IGC_IS_FLAG_ENABLED(ForcePrefetchToL1Cache) ? 1 : 0);
10501051
}
10511052

10521053
initializeVarWithValue("__EnableSWSrgbWrites", IGC_GET_FLAG_VALUE(cl_khr_srgb_image_writes));

IGC/common/igc_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,7 @@ DECLARE_IGC_REGKEY(bool, LateInlineUnmaskedFunc, false, "Postpone inlinin
526526
DECLARE_IGC_REGKEY(bool, ForceFormatConversionDG2Plus, false,
527527
"Forces SW image format conversion for R10G10B10A2_UNORM, R11G11B10_FLOAT, R10G10B10A2_UINT image formats on DG2+ platforms", true)
528528
DECLARE_IGC_REGKEY(bool, EnableDivergentBarrierWA, false, "Generate continuation code to handle shaders that places barriers in divergent control flow", false)
529+
DECLARE_IGC_REGKEY(bool, ForcePrefetchToL1Cache, false, "Forces standard builtin prefetch to use L1 cache", false)
529530

530531
DECLARE_IGC_GROUP("Performance experiments")
531532
DECLARE_IGC_REGKEY(bool, ForceNonCoherentStatelessBTI, false, "Enable gneeration of non cache coherent stateless messages", false)

0 commit comments

Comments
 (0)