Skip to content

Commit 3fbae08

Browse files
igorban-inteligcbot
authored andcommitted
CMABI : fix generation by-val copies
Do not copy byval-arguments for cases when there are no stores by value.
1 parent 90da14a commit 3fbae08

File tree

3 files changed

+154
-8
lines changed

3 files changed

+154
-8
lines changed

IGC/VectorCompiler/lib/GenXOpts/CMTrans/CMABI.cpp

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2024 Intel Corporation
3+
Copyright (C) 2017-2025 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -421,6 +421,43 @@ template <class NodeType> Function *getFunction(NodeType i) {
421421
return nullptr;
422422
}
423423

424+
// Analyzes the uses of a ByVal argument to determine if it is written to.
425+
static inline bool isReadOnlyArg(Argument &Arg) {
426+
SmallVector<Use *, 8> Uses;
427+
for (auto &U : Arg.uses())
428+
Uses.push_back(&U);
429+
430+
while (!Uses.empty()) {
431+
auto *U = Uses.back();
432+
Uses.pop_back();
433+
if (auto *SI = dyn_cast<StoreInst>(U->getUser())) {
434+
if (SI->getPointerOperand() == U->get())
435+
return false;
436+
continue; // sutable usage
437+
}
438+
439+
if (auto *LI = dyn_cast<LoadInst>(U->getUser()))
440+
continue; // sutable usage
441+
442+
if (auto *GEP = dyn_cast<GetElementPtrInst>(U->getUser())) {
443+
if (GEP->getPointerOperand() == U->get())
444+
for (auto &GU : GEP->uses())
445+
Uses.push_back(&GU);
446+
447+
continue;
448+
}
449+
450+
if (auto *Cast = dyn_cast<CastInst>(U->getUser())) {
451+
for (auto &CU : Cast->uses())
452+
Uses.push_back(&CU);
453+
continue;
454+
}
455+
456+
return false;
457+
}
458+
return true;
459+
}
460+
424461
template <class CallGraphImpl>
425462
bool CMABIBase<CallGraphImpl>::runOnCallGraphImpl(CallGraphImpl &SCC) {
426463
bool Changed = false;
@@ -451,7 +488,6 @@ bool CMABIBase<CallGraphImpl>::runOnCallGraphImpl(CallGraphImpl &SCC) {
451488
if (auto *CGN = ProcessNode(&*I)) {
452489
LocalChange = true;
453490
auto *NewF = getFunction(CGN);
454-
;
455491
SCC.getOuterRefSCC().replaceNodeFunction(*I, *NewF);
456492
}
457493
}
@@ -466,7 +502,8 @@ bool CMABIBase<CallGraphImpl>::runOnCallGraphImpl(CallGraphImpl &SCC) {
466502
continue;
467503
for (auto &Arg : F->args()) {
468504
auto *ArgTy = Arg.getType();
469-
if (!Arg.hasAttribute(Attribute::ByVal) || !ArgTy->isPointerTy())
505+
if (!Arg.hasAttribute(Attribute::ByVal) || !ArgTy->isPointerTy() ||
506+
isReadOnlyArg(Arg))
470507
continue;
471508
auto *M = F->getParent();
472509
auto *InsertBefore = F->getEntryBlock().getFirstNonPHI();

IGC/VectorCompiler/test/CMABI/byval_sret.ll

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ define internal spir_func void @store_float_internal_byval(%struct4* byval(%stru
7878
%s = load %struct4, %struct4* %s_ptr
7979
%f = extractvalue %struct4 %s, 0
8080
store float %f, float* %f_ptr
81+
store %struct4 %s, %struct4* %s_ptr
8182
ret void
8283
; SMALL-OFF-NEXT: %[[COPY_STR:[^ ]+]] = alloca %struct4, align 8
8384
; SMALL-OFF-TYPED-PTRS-NEXT: %[[MEMCPY_DST:[^ ]+]] = bitcast %struct4* %[[COPY_STR]] to i8*
@@ -98,10 +99,10 @@ define internal spir_func void @store_float_internal_byval(%struct4* byval(%stru
9899
; SMALL-ON-OPAQUE-PTRS-NEXT: %s = load %struct4, ptr %s_ptr
99100
; COMMON-NEXT: %f = extractvalue %struct4 %s, 0
100101
; COMMON-TYPED-PTRS-NEXT: store float %f, float* %f_ptr
101-
; COMMON-TYPED-PTRS-NEXT: %[[FLOAT_VAL:[^ ]+]] = load float, float* %f_ptr
102+
; COMMON-TYPED-PTRS: %[[FLOAT_VAL:[^ ]+]] = load float, float* %f_ptr
102103
; COMMON-OPAQUE-PTRS-NEXT: store float %f, ptr %f_ptr
103-
; COMMON-OPAQUE-PTRS-NEXT: %[[FLOAT_VAL:[^ ]+]] = load float, ptr %f_ptr
104-
; COMMON-NEXT: ret float %[[FLOAT_VAL]]
104+
; COMMON-OPAQUE-PTRS: %[[FLOAT_VAL:[^ ]+]] = load float, ptr %f_ptr
105+
; COMMON: ret float %[[FLOAT_VAL]]
105106
}
106107

107108
; COM: not internal function with byval attribute:
@@ -113,6 +114,7 @@ define spir_func void @store_float(%struct4* byval(%struct4) %s_ptr, float* %f_p
113114
%s = load %struct4, %struct4* %s_ptr
114115
%f = extractvalue %struct4 %s, 0
115116
store float %f, float* %f_ptr
117+
store %struct4 %s, %struct4* %s_ptr
116118
ret void
117119
; COMMON-NEXT: %[[COPY_STR:[^ ]+]] = alloca %struct4, align 8
118120
; COMMON-TYPED-PTRS-NEXT: %[[MEMCPY_DST:[^ ]+]] = bitcast %struct4* %[[COPY_STR]] to i8*
@@ -126,7 +128,7 @@ define spir_func void @store_float(%struct4* byval(%struct4) %s_ptr, float* %f_p
126128
; COMMON-NEXT: %f = extractvalue %struct4 %s, 0
127129
; COMMON-TYPED-PTRS-NEXT: store float %f, float* %f_ptr
128130
; COMMON-OPAQUE-PTRS-NEXT: store float %f, ptr %f_ptr
129-
; COMMON-NEXT: ret void
131+
; COMMON: ret void
130132
}
131133

132134
; COM: internal function with array access on ptr:
@@ -224,6 +226,8 @@ define spir_func void @store_sum(%struct24* byval(%struct24) %s_ptr) #0 {
224226
%fv_ptr = load <2 x float>*, <2 x float>** %ptr3
225227
%sum = fadd <2 x float> %fv1, %fv2
226228
store <2 x float> %sum, <2 x float>* %fv_ptr
229+
%tmp = load %struct24, %struct24* %s_ptr
230+
store %struct24 %tmp, %struct24* %s_ptr
227231
ret void
228232
; COMMON-NEXT: %[[COPY_STR:[^ ]+]] = alloca %struct24, align 8
229233
; COMMON-TYPED-PTRS-NEXT: %[[MEMCPY_DST:[^ ]+]] = bitcast %struct24* %[[COPY_STR]] to i8*
@@ -247,7 +251,7 @@ define spir_func void @store_sum(%struct24* byval(%struct24) %s_ptr) #0 {
247251
; COMMON-NEXT: %sum = fadd <2 x float> %fv1, %fv2
248252
; COMMON-TYPED-PTRS-NEXT: store <2 x float> %sum, <2 x float>* %fv_ptr
249253
; COMMON-OPAQUE-PTRS-NEXT: store <2 x float> %sum, ptr %fv_ptr
250-
; COMMON-NEXT: ret void
254+
; COMMON: ret void
251255
}
252256

253257
; COM: struct4s tests structure with structure inside.
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: %opt_typed_ptrs %use_old_pass_manager% -CMABILegacy -march=genx64 -mcpu=XeHPG -S < %s | FileCheck %s --check-prefixes=CHECK
10+
; RUN: %opt_opaque_ptrs %use_old_pass_manager% -CMABILegacy -march=genx64 -mcpu=XeHPG -S < %s | FileCheck %s --check-prefixes=CHECK
11+
12+
; RUN: %opt_new_pm_typed -passes=CMABI -march=genx64 -mcpu=XeHPG -S < %s | FileCheck %s --check-prefixes=CHECK
13+
; RUN: %opt_new_pm_opaque -passes=CMABI -march=genx64 -mcpu=XeHPG -S < %s | FileCheck %s --check-prefixes=CHECK
14+
15+
target datalayout = "e-p:64:64-i64:64-n8:16:32:64"
16+
target triple = "spir64-unknown-unknown"
17+
18+
; check that the test did not abort
19+
%test.struct = type <{ i8 addrspace(4)*, i32, [4 x i8] }>
20+
%struct4 = type { float }
21+
22+
; CHECK-LABEL: test0
23+
define internal spir_func <3 x i32> @test0(%test.struct addrspace(4)* byval(%test.struct) %0) {
24+
; CHECK-NOT: alloca
25+
ret <3 x i32> zeroinitializer
26+
}
27+
28+
; CHECK-LABEL: test1
29+
define spir_func void @test1(%struct4* byval(%struct4) %s_ptr, float* %f_ptr) #0 {
30+
; CHECK: alloca
31+
; Alloca -> store
32+
%s = load %struct4, %struct4* %s_ptr
33+
store %struct4 %s, %struct4* %s_ptr
34+
ret void
35+
}
36+
37+
; CHECK-LABEL: test2
38+
define spir_func void @test2(%struct4* byval(%struct4) %s_ptr, float* %f_ptr) #0 {
39+
; CHECK: alloca
40+
; Alloca -> cast -> store
41+
%s = load %struct4, %struct4* %s_ptr
42+
%cast1 = bitcast %struct4* %s_ptr to i8*
43+
%cast2 = bitcast i8* %cast1 to i16*
44+
%cast3 = bitcast i16* %cast2 to i32*
45+
%cast4 = bitcast i32* %cast3 to %struct4*
46+
store %struct4 %s, %struct4* %cast4
47+
ret void
48+
}
49+
50+
; CHECK-LABEL: test3
51+
define spir_func void @test3(%struct4* byval(%struct4) %s_ptr, float* %f_ptr) #0 {
52+
; CHECK: alloca
53+
; Alloca -> GEP -> store
54+
%s = load %struct4, %struct4* %s_ptr
55+
%load_ptr = getelementptr %struct4, %struct4* %s_ptr, i32 1, i32 0
56+
%f = load float, float* %load_ptr
57+
store float %f, float* %load_ptr
58+
ret void
59+
}
60+
61+
; CHECK-LABEL: test4
62+
define spir_func void @test4(%struct4* byval(%struct4) %s_ptr, float* %f_ptr) #0 {
63+
; CHECK-NOT: alloca
64+
; No Alloca -> GEP, cast, load
65+
%load_ptr = getelementptr %struct4, %struct4* %s_ptr, i32 1, i32 0
66+
%f = load float, float* %load_ptr
67+
%cast1 = bitcast %struct4* %s_ptr to i8*
68+
%cast2 = bitcast i8* %cast1 to i16*
69+
%cast3 = bitcast i16* %cast2 to i32*
70+
%cast4 = bitcast i32* %cast3 to %struct4*
71+
%f2 = load %struct4, %struct4* %cast4
72+
ret void
73+
}
74+
75+
; CHECK-LABEL: kern
76+
define spir_kernel void @kern(float* %RET, float* %aFOO, i64 %privBase) #1 {
77+
%str4_ptr = alloca %struct4, i32 2
78+
%f_value = load float, float* %aFOO
79+
%1 = call spir_func <3 x i32> @test0(%test.struct addrspace(4)* null)
80+
call spir_func void @test1(%struct4* %str4_ptr, float* %RET)
81+
call spir_func void @test2(%struct4* %str4_ptr, float* %RET)
82+
call spir_func void @test3(%struct4* %str4_ptr, float* %RET)
83+
call spir_func void @test4(%struct4* %str4_ptr, float* %RET)
84+
ret void
85+
}
86+
87+
attributes #0 = { noinline nounwind "CMStackCall" }
88+
attributes #1 = { noinline nounwind "CMGenxMain" "oclrt"="1" }
89+
90+
!llvm.module.flags = !{!0}
91+
!opencl.ocl.version = !{!1}
92+
93+
!0 = !{i32 1, !"genx.useGlobalMem", i32 1}
94+
!1 = !{i32 0, i32 0}
95+
!genx.kernels = !{!2}
96+
!genx.kernel.internal = !{!7}
97+
!2 = !{void (float*, float*, i64)* @kern, !"kern", !3, i32 0, !4, !5, !6, i32 0}
98+
!3 = !{i32 0, i32 0, i32 96}
99+
!4 = !{i32 72, i32 80, i32 64}
100+
!5 = !{i32 0, i32 0}
101+
!6 = !{!"", !""}
102+
!7 = !{void (float*, float*, i64)* @kern, !8, !9, !10, null}
103+
!8 = !{i32 0, i32 0, i32 0}
104+
!9 = !{i32 0, i32 1, i32 2}
105+
!10 = !{}

0 commit comments

Comments
 (0)