Skip to content

Commit 5ca0045

Browse files
pkwasnie-inteligcbot
authored andcommitted
don't remove implicit kernel args in case of subroutines
Don't remove implicit kernel arguments, as they might be used by subroutines.
1 parent fccfff3 commit 5ca0045

File tree

5 files changed

+150
-14
lines changed

5 files changed

+150
-14
lines changed

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2450,20 +2450,7 @@ namespace IGC
24502450
prevOffset = offset;
24512451

24522452
// skip unused arguments
2453-
bool IsUnusedArg =
2454-
(arg.getArgType() == KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET ||
2455-
arg.getArgType() == KernelArg::ArgType::IMPLICIT_BINDLESS_OFFSET ||
2456-
arg.getArgType() == KernelArg::ArgType::IMPLICIT_BUFFER_SIZE) &&
2457-
arg.getArg()->use_empty();
2458-
2459-
if (m_Context->platform.allowRemovingUnusedImplicitArguments())
2460-
{
2461-
IsUnusedArg |=
2462-
(arg.getArgType() == KernelArg::ArgType::IMPLICIT_PAYLOAD_HEADER || // contains global_id_offset
2463-
arg.getArgType() == KernelArg::ArgType::IMPLICIT_GLOBAL_OFFSET ||
2464-
arg.getArgType() == KernelArg::ArgType::IMPLICIT_ENQUEUED_LOCAL_WORK_SIZE) &&
2465-
arg.getArg()->use_empty();
2466-
}
2453+
bool IsUnusedArg = isUnusedArg(arg);
24672454

24682455
// Runtime Values should not be processed any further. No annotations shall be created for them.
24692456
// Only added to KernelArgs to enforce correct allocation order.
@@ -2692,6 +2679,28 @@ namespace IGC
26922679
CreatePrintfStringAnnotations();
26932680
}
26942681

2682+
bool COpenCLKernel::isUnusedArg(KernelArg& arg) const
2683+
{
2684+
bool canRemoveArg =
2685+
arg.getArgType() == KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET ||
2686+
arg.getArgType() == KernelArg::ArgType::IMPLICIT_BINDLESS_OFFSET ||
2687+
arg.getArgType() == KernelArg::ArgType::IMPLICIT_BUFFER_SIZE;
2688+
2689+
if (m_Context->platform.allowRemovingUnusedImplicitArguments())
2690+
{
2691+
// Assume subroutine calls can use implicit arguments.
2692+
if (!HasSubroutines())
2693+
{
2694+
canRemoveArg |=
2695+
arg.getArgType() == KernelArg::ArgType::IMPLICIT_PAYLOAD_HEADER || // contains global_id_offset
2696+
arg.getArgType() == KernelArg::ArgType::IMPLICIT_GLOBAL_OFFSET ||
2697+
arg.getArgType() == KernelArg::ArgType::IMPLICIT_ENQUEUED_LOCAL_WORK_SIZE;
2698+
}
2699+
}
2700+
2701+
return canRemoveArg && arg.getArg()->use_empty();
2702+
}
2703+
26952704
bool COpenCLKernel::passNOSInlineData()
26962705
{
26972706
if (IGC_GET_FLAG_VALUE(EnablePassInlineData) == -1) {

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ namespace IGC
268268
// Helper function to get SIMD size specified in intel_reqd_sub_group_size attribute
269269
uint32_t getReqdSubGroupSize(llvm::Function& F, IGC::IGCMD::MetaDataUtils* MDUtils) const;
270270
uint32_t getMaxPressure(llvm::Function& F, IGC::IGCMD::MetaDataUtils* MDUtils) const;
271+
bool isUnusedArg(KernelArg& arg) const;
271272
};
272273

273274
void CodeGen(OpenCLProgramContext* ctx);

IGC/Compiler/CISACodeGen/ShaderCodeGen.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ class CShader
319319
FunctionGroup* FG = (FGA && entry) ? FGA->getGroupForHead(entry) : nullptr;
320320
if (FG)
321321
{
322+
m_HasSubroutine = FG->hasSubroutine();
322323
m_HasStackCall = FG->hasStackCall();
323324
m_HasIndirectCall = FG->hasIndirectCall();
324325
m_HasNestedCall = FG->hasNestedCall();
@@ -331,6 +332,7 @@ class CShader
331332
}
332333

333334
GenXFunctionGroupAnalysis* GetFGA() { return m_FGA; }
335+
bool HasSubroutines() const { return m_HasSubroutine; }
334336
bool HasStackCalls() const { return m_HasStackCall; }
335337
void SetHasStackCalls(bool hasStackCall) { m_HasStackCall = hasStackCall; }
336338
bool HasNestedCalls() const { return m_HasNestedCall; }
@@ -744,6 +746,7 @@ class CShader
744746
DebugInfoData diData;
745747

746748
// Program function attributes
749+
bool m_HasSubroutine = false;
747750
bool m_HasStackCall = false;
748751
bool m_HasNestedCall = false;
749752
bool m_HasIndirectCall = false;
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2025 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
// REQUIRES: regkeys,pvc-supported,llvm-14-plus
10+
11+
// Verify that implicit kernel arguments are not removed for kernels with subroutines.
12+
// Arguments can be removed for stackcalls.
13+
14+
// RUN: ocloc compile -file %s -device pvc -options "-cl-std=CL2.0 -igc_opts 'EnableStackCallFuncCall=0, RemoveUnusedIdImplicitArguments=1, ShortImplicitPayloadHeader=1, DumpZEInfoToConsole=1'" 2>&1 | FileCheck %s --check-prefix=CHECK-SUBROUTINE
15+
// RUN: ocloc compile -file %s -device pvc -options "-cl-std=CL2.0 -igc_opts 'EnableStackCallFuncCall=1, RemoveUnusedIdImplicitArguments=1, ShortImplicitPayloadHeader=1, DumpZEInfoToConsole=1'" 2>&1 | FileCheck %s --check-prefix=CHECK-STACKCALL
16+
17+
// CHECK-SUBROUTINE: name: kernel_that_must_have_args
18+
// CHECK-SUBROUTINE: payload_arguments:
19+
// CHECK-SUBROUTINE-NEXT: - arg_type: global_id_offset
20+
// CHECK-SUBROUTINE-NEXT: offset: 0
21+
// CHECK-SUBROUTINE-NEXT: size: 12
22+
// CHECK-SUBROUTINE-NEXT: - arg_type: arg_bypointer
23+
// CHECK-SUBROUTINE-NEXT: offset: 16
24+
// CHECK-SUBROUTINE-NEXT: size: 8
25+
// CHECK-SUBROUTINE-NEXT: arg_index: 0
26+
// CHECK-SUBROUTINE-NEXT: addrmode: stateless
27+
// CHECK-SUBROUTINE-NEXT: addrspace: global
28+
// CHECK-SUBROUTINE-NEXT: access_type: readwrite
29+
// CHECK-SUBROUTINE-NEXT: - arg_type: enqueued_local_size
30+
// CHECK-SUBROUTINE-NEXT: offset: 24
31+
// CHECK-SUBROUTINE-NEXT: size: 12
32+
// CHECK-SUBROUTINE-NEXT: per_thread_payload_arguments:
33+
// CHECK-SUBROUTINE-NEXT: - arg_type: local_id
34+
// CHECK-SUBROUTINE-NEXT: offset: 0
35+
// CHECK-SUBROUTINE-NEXT: size: 192
36+
//
37+
// CHECK-SUBROUTINE: name: kernel_that_can_skip_args
38+
// CHECK-SUBROUTINE: payload_arguments:
39+
// CHECK-SUBROUTINE-NEXT: - arg_type: arg_bypointer
40+
// CHECK-SUBROUTINE-NEXT: offset: 0
41+
// CHECK-SUBROUTINE-NEXT: size: 8
42+
// CHECK-SUBROUTINE-NEXT: arg_index: 0
43+
// CHECK-SUBROUTINE-NEXT: addrmode: stateless
44+
// CHECK-SUBROUTINE-NEXT: addrspace: global
45+
// CHECK-SUBROUTINE-NEXT: access_type: readwrite
46+
47+
// CHECK-STACKCALL: name: kernel_that_must_have_args
48+
// CHECK-STACKCALL: payload_arguments:
49+
// CHECK-STACKCALL-NEXT: - arg_type: arg_bypointer
50+
// CHECK-STACKCALL-NEXT: offset: 0
51+
// CHECK-STACKCALL-NEXT: size: 8
52+
// CHECK-STACKCALL-NEXT: arg_index: 0
53+
// CHECK-STACKCALL-NEXT: addrmode: stateless
54+
// CHECK-STACKCALL-NEXT: addrspace: global
55+
// CHECK-STACKCALL-NEXT: access_type: readwrite
56+
// CHECK-STACKCALL-NEXT: - arg_type: private_base_stateless
57+
// CHECK-STACKCALL-NEXT: offset: 8
58+
// CHECK-STACKCALL-NEXT: size: 8
59+
// CHECK-STACKCALL-NEXT: per_thread_payload_arguments:
60+
// CHECK-STACKCALL-NEXT: - arg_type: local_id
61+
// CHECK-STACKCALL-NEXT: offset: 0
62+
// CHECK-STACKCALL-NEXT: size: 192
63+
//
64+
// CHECK-STACKCALL: name: kernel_that_can_skip_args
65+
// CHECK-STACKCALL: payload_arguments:
66+
// CHECK-STACKCALL-NEXT: - arg_type: arg_bypointer
67+
// CHECK-STACKCALL-NEXT: offset: 0
68+
// CHECK-STACKCALL-NEXT: size: 8
69+
// CHECK-STACKCALL-NEXT: arg_index: 0
70+
// CHECK-STACKCALL-NEXT: addrmode: stateless
71+
// CHECK-STACKCALL-NEXT: addrspace: global
72+
// CHECK-STACKCALL-NEXT: access_type: readwrite
73+
74+
__attribute__((noinline))
75+
void noinline_function(global float* ptr)
76+
{
77+
int i = get_global_id(0);
78+
ptr[i] = get_enqueued_local_size(0);
79+
}
80+
81+
kernel void kernel_that_must_have_args(global float *ptr)
82+
{
83+
noinline_function(ptr);
84+
}
85+
86+
kernel void kernel_that_can_skip_args(global float *ptr) {}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2025 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
// REQUIRES: regkeys,pvc-supported,llvm-14-plus
10+
11+
// Verify that implicit kernel arguments are removed when unused.
12+
13+
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'RemoveUnusedIdImplicitArguments=0, ShortImplicitPayloadHeader=1, DumpZEInfoToConsole=1'" 2>&1 | FileCheck %s --check-prefix=CHECK-DONT-REMOVE
14+
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'RemoveUnusedIdImplicitArguments=1, ShortImplicitPayloadHeader=1, DumpZEInfoToConsole=1'" 2>&1 | FileCheck %s --check-prefix=CHECK-REMOVE
15+
16+
// CHECK-DONT-REMOVE: payload_arguments:
17+
// CHECK-DONT-REMOVE-NEXT: - arg_type: global_id_offset
18+
// CHECK-DONT-REMOVE-NEXT: offset: 0
19+
// CHECK-DONT-REMOVE-NEXT: size: 12
20+
// CHECK-DONT-REMOVE-NEXT: - arg_type: arg_bypointer
21+
// CHECK-DONT-REMOVE-NEXT: offset: 16
22+
// CHECK-DONT-REMOVE-NEXT: size: 8
23+
// CHECK-DONT-REMOVE-NEXT: arg_index: 0
24+
// CHECK-DONT-REMOVE-NEXT: addrmode: stateless
25+
// CHECK-DONT-REMOVE-NEXT: addrspace: global
26+
// CHECK-DONT-REMOVE-NEXT: access_type: readwrite
27+
28+
// CHECK-REMOVE: payload_arguments:
29+
// CHECK-REMOVE-NEXT: - arg_type: arg_bypointer
30+
// CHECK-REMOVE-NEXT: offset: 0
31+
// CHECK-REMOVE-NEXT: size: 8
32+
// CHECK-REMOVE-NEXT: arg_index: 0
33+
// CHECK-REMOVE-NEXT: addrmode: stateless
34+
// CHECK-REMOVE-NEXT: addrspace: global
35+
// CHECK-REMOVE-NEXT: access_type: readwrite
36+
37+
kernel void test(global float* ptr) {}

0 commit comments

Comments
 (0)