Skip to content

Commit c3e6c9d

Browse files
MiloszSkobejkoigcbot
authored andcommitted
Don't cache volatile load store instructions
On platforms with default cache policy set to L1 and L3 cached such as DG2 or BMG volatile instructions are also cached. Since CUDA doesn't cache volatile pointers, there is a code that is not supported by Intel GPU, as caching volatile can lead to hangs.
1 parent b98a2ba commit c3e6c9d

File tree

2 files changed

+101
-1
lines changed

2 files changed

+101
-1
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23173,6 +23173,33 @@ bool EmitPass::tryOverrideCacheOpts(
2317323173
return l1l3CacheVal != 0;
2317423174
}
2317523175

23176+
static bool isVolatileInst(Instruction *inst) {
23177+
if (!inst)
23178+
return false;
23179+
23180+
// TODO: Instead of this helper function inst can be casted to either
23181+
// ALoadInst/AStoreInst class or AbstractLoadInst/AbstractStoreInst
23182+
// to call inst->isVolatile() only once, after they're refactored.
23183+
if (auto *GII = dyn_cast<GenIntrinsicInst>(inst)) {
23184+
switch (GII->getIntrinsicID()) {
23185+
default:
23186+
return false;
23187+
case GenISAIntrinsic::GenISA_ldraw_indexed:
23188+
case GenISAIntrinsic::GenISA_ldrawvector_indexed:
23189+
return cast<LdRawIntrinsic>(inst)->isVolatile();
23190+
case GenISAIntrinsic::GenISA_storeraw_indexed:
23191+
case GenISAIntrinsic::GenISA_storerawvector_indexed:
23192+
return cast<StoreRawIntrinsic>(inst)->isVolatile();
23193+
case GenISAIntrinsic::GenISA_PredicatedLoad:
23194+
return cast<PredicatedLoadIntrinsic>(inst)->isVolatile();
23195+
case GenISAIntrinsic::GenISA_PredicatedStore:
23196+
return cast<PredicatedStoreIntrinsic>(inst)->isVolatile();
23197+
}
23198+
}
23199+
23200+
return inst->isVolatile();
23201+
}
23202+
2317623203
LSC_CACHE_OPTS
2317723204
EmitPass::translateLSCCacheControlsFromMetadata(Instruction *inst, bool isLoad,
2317823205
bool isTGM) const {
@@ -23261,7 +23288,7 @@ EmitPass::translateLSCCacheControlsFromMetadata(Instruction *inst, bool isLoad,
2326123288
return translateLSCCacheControlsFromValue(MD->getValue(), isLoad);
2326223289
}
2326323290
node = inst ? inst->getMetadata(LLVMContext::MD_nontemporal) : nullptr;
23264-
if (node) {
23291+
if (node || isVolatileInst(inst)) {
2326523292
return {LSC_CACHING_UNCACHED, LSC_CACHING_UNCACHED};
2326623293
}
2326723294

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; REQUIRES: regkeys, llvm-14-plus
9+
10+
; RUN: igc_opt -ocl -platformdg2 -igc-emit-visa -regkey DumpVISAASMToConsole < %s | FileCheck %s
11+
; ------------------------------------------------
12+
; EmitVISAPass
13+
; ------------------------------------------------
14+
15+
; Test checks if volatile load/store instructions emit uncached LSC intructions
16+
17+
define spir_kernel void @test(i32 addrspace(1)* %dst, i32 %bindlessOffset) {
18+
entry:
19+
; CHECK: lsc_load.ugm.uc.uc {{.*}}
20+
; CHECK: lsc_store.ugm.uc.uc {{.*}}
21+
%0 = load volatile i32, i32 addrspace(1)* %dst
22+
store volatile i32 %0, i32 addrspace(1)* %dst
23+
24+
; CHECK: lsc_load.ugm.uc.uc {{.*}}
25+
; CHECK: lsc_store.ugm.uc.uc {{.*}}
26+
%1 = inttoptr i32 %bindlessOffset to float addrspace(2490368)*
27+
%2 = call float @llvm.genx.GenISA.ldraw.indexed.f32.p2490368f32(float addrspace(2490368)* %1, i32 %0, i32 4, i1 true)
28+
call void @llvm.genx.GenISA.storeraw.indexed.p2490368f32.f32(float addrspace(2490368)* %1, i32 4, float %2, i32 4, i1 true)
29+
30+
; COM: Checks below can be enabled when PredicatedLoad and PredicatedStore class will
31+
; COM: start supporting volatile instructions.
32+
; COM: lsc_load.ugm.uc.uc {{.*}}
33+
; COM: lsc_store.ugm.uc.uc {{.*}}
34+
; COM: %3 = call i32 @llvm.genx.GenISA.PredicatedLoad.i32.p1i32.i32(i32 addrspace(1)* %dst, i64 4, i1 true, i32 4)
35+
; COM: call void @llvm.genx.GenISA.PredicatedStore.p1i32.i32(i32 addrspace(1)* %dst, i32 1, i64 2, i1 true)
36+
ret void
37+
}
38+
39+
declare float @llvm.genx.GenISA.ldraw.indexed.f32.p2490368f32(float addrspace(2490368)*, i32, i32, i1) #0
40+
declare void @llvm.genx.GenISA.storeraw.indexed.p2490368f32.f32(float addrspace(2490368)*, i32, float, i32, i1) #1
41+
; declare i32 @llvm.genx.GenISA.PredicatedLoad.i32.p1i32.i32(i32 addrspace(1)*, i64, i1, i32)
42+
; declare void @llvm.genx.GenISA.PredicatedStore.p1i32.i32(i32 addrspace(1)*, i32, i64, i1)
43+
44+
attributes #0 = { argmemonly nounwind readonly willreturn }
45+
attributes #1 = { argmemonly nounwind writeonly }
46+
47+
!IGCMetadata = !{!0}
48+
!igc.functions = !{!21}
49+
50+
!0 = !{!"ModuleMD", !1}
51+
!1 = !{!"FuncMD", !2, !3}
52+
!2 = !{!"FuncMDMap[0]", void (i32 addrspace(1)*, i32)* @test}
53+
!3 = !{!"FuncMDValue[0]", !4, !17}
54+
!4 = !{!"resAllocMD", !5}
55+
!5 = !{!"argAllocMDList", !6, !10, !11, !14, !15, !16}
56+
!6 = !{!"argAllocMDListVec[0]", !7, !8, !9}
57+
!7 = !{!"type", i32 0}
58+
!8 = !{!"extensionType", i32 -1}
59+
!9 = !{!"indexType", i32 -1}
60+
!10 = !{!"argAllocMDListVec[1]", !7, !8, !9}
61+
!11 = !{!"argAllocMDListVec[2]", !12, !8, !13}
62+
!12 = !{!"type", i32 1}
63+
!13 = !{!"indexType", i32 0}
64+
!14 = !{!"argAllocMDListVec[3]", !7, !8, !9}
65+
!15 = !{!"argAllocMDListVec[4]", !7, !8, !9}
66+
!16 = !{!"argAllocMDListVec[5]", !7, !8, !9}
67+
!17 = !{!"m_OpenCLArgTypeQualifiers", !18, !19, !20}
68+
!18 = !{!"m_OpenCLArgTypeQualifiersVec[0]", !""}
69+
!19 = !{!"m_OpenCLArgTypeQualifiersVec[1]", !""}
70+
!20 = !{!"m_OpenCLArgTypeQualifiersVec[2]", !""}
71+
!21 = !{void (i32 addrspace(1)*, i32)* @test, !22}
72+
!22 = !{!23}
73+
!23 = !{!"function_type", i32 0}

0 commit comments

Comments
 (0)