Skip to content

Commit 36a58d7

Browse files
igorban-inteligcbot
authored andcommitted
SIMDCF: support for no-return or no-arguments
SIMD-CF Conformance pass: Support for no return values and no arguments. LLVM-16 may optimize it out, therefore, we label functions that have arguments or a return value as SimdCFArg/SimdCFRet.
1 parent 6eb5d95 commit 36a58d7

File tree

4 files changed

+226
-53
lines changed

4 files changed

+226
-53
lines changed

IGC/VectorCompiler/include/vc/Utils/GenX/InternalMetadata.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2021-2024 Intel Corporation
3+
Copyright (C) 2021-2025 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -25,6 +25,9 @@ inline constexpr const char VCDisableMidThreadPreemption[] =
2525
// no attribute means that GenXStackUsage pass failed (recursion, etc)
2626
// attribute created by GenXStackUsage and used to create patch token
2727
inline constexpr const char VCStackAmount[] = "VC.Stack.Amount";
28+
29+
inline constexpr const char VCSimdCFArg[] = "VC.SimdCFArg";
30+
inline constexpr const char VCSimdCFRet[] = "VC.SimdCFRet";
2831
} // namespace FunctionMD
2932

3033
namespace InstMD {

IGC/VectorCompiler/lib/GenXCodeGen/GenXSimdCFConformance.cpp

Lines changed: 98 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2023 Intel Corporation
3+
Copyright (C) 2017-2025 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -2172,7 +2172,8 @@ bool GenXSimdCFConformance::checkEMVal(SimpleValue EMVal) {
21722172
return false; // uses constant that is not all ones, invalid
21732173
}
21742174
} else if (!EMVals.count(ConnectedVal)) {
2175-
LLVM_DEBUG(dbgs() << "checkEMVal: ConnectedVal is not in EMVals\n");
2175+
LLVM_DEBUG(dbgs() << "checkEMVal: ConnectedVal is not in EMVals : ");
2176+
LLVM_DEBUG(dbgs() << "val == " << *ConnectedVal.getValue() << "\n");
21762177
return false; // connected value is not in EMVals
21772178
}
21782179
LLVM_DEBUG(dbgs() << "checkEMVal: ConnectedVal checked "
@@ -2558,6 +2559,34 @@ static bool checkAllUsesAreSelectOrWrRegion(Value *V) {
25582559
return true;
25592560
}
25602561

2562+
static inline void PrepareFunctionAttributes(Function *CalledFunc,
2563+
Type *ValTy) {
2564+
if (CalledFunc->hasFnAttribute(vc::FunctionMD::VCSimdCFArg)) {
2565+
for (auto *Arg = CalledFunc->arg_begin(); Arg != CalledFunc->arg_end();
2566+
++Arg)
2567+
if (Arg->getType() == ValTy) {
2568+
CalledFunc->removeFnAttr(vc::FunctionMD::VCSimdCFArg);
2569+
LLVM_DEBUG(dbgs() << "Remove VCSimdCFArg attr from "
2570+
<< CalledFunc->getName() << "\n");
2571+
break;
2572+
}
2573+
}
2574+
if (CalledFunc->hasFnAttribute(vc::FunctionMD::VCSimdCFRet)) {
2575+
auto *RetTy = CalledFunc->getReturnType();
2576+
if (auto *ST = dyn_cast<StructType>(RetTy)) {
2577+
unsigned RetIdx = 0;
2578+
for (unsigned End = IndexFlattener::getNumElements(ST); RetIdx < End;
2579+
++RetIdx) {
2580+
auto *Ty = IndexFlattener::getElementType(ST, RetIdx);
2581+
if (Ty->isVectorTy() && Ty->getScalarType()->isIntegerTy(1))
2582+
CalledFunc->removeFnAttr(vc::FunctionMD::VCSimdCFRet);
2583+
}
2584+
}
2585+
if (RetTy->isVectorTy() && RetTy->getScalarType()->isIntegerTy(1))
2586+
CalledFunc->removeFnAttr(vc::FunctionMD::VCSimdCFRet);
2587+
}
2588+
}
2589+
25612590
/***********************************************************************
25622591
* getConnectedVals : for a SimpleValue, get other SimpleValues connected to
25632592
* it through phi nodes, insertvalue, extractvalue, goto/join, and maybe
@@ -2622,7 +2651,8 @@ bool GenXSimdCFConformance::getConnectedVals(
26222651
break;
26232652
}
26242653
} else if (RetTy != ValTy && !RetTy->isVoidTy())
2625-
return false; // no predicate ret value found
2654+
return F->hasFnAttribute(
2655+
vc::FunctionMD::VCSimdCFRet); // no predicate ret value found
26262656
if (!RetTy->isVoidTy())
26272657
for (auto fi = F->begin(), fe = F->end(); fi != fe; ++fi)
26282658
if (auto *Ret = dyn_cast<ReturnInst>(fi->getTerminator()))
@@ -2732,14 +2762,18 @@ bool GenXSimdCFConformance::getConnectedVals(
27322762
// about that.
27332763
auto ValTy =
27342764
IndexFlattener::getElementType(Val.getType(), Val.getIndex());
2735-
for (unsigned Idx = 0, End = IGCLLVM::getNumArgOperands(CI);; ++Idx) {
2736-
if (Idx == End)
2737-
return false; // no corresponding call arg found
2738-
if (CI->getArgOperand(Idx)->getType() == ValTy) {
2739-
ConnectedVals->push_back(SimpleValue(CI->getArgOperand(Idx), 0));
2740-
break;
2765+
2766+
PrepareFunctionAttributes(CalledFunc, ValTy);
2767+
2768+
if (!CalledFunc->hasFnAttribute(vc::FunctionMD::VCSimdCFArg))
2769+
for (unsigned Idx = 0, End = IGCLLVM::getNumArgOperands(CI);; ++Idx) {
2770+
if (Idx == End)
2771+
return false; // no corresponding call arg found
2772+
if (CI->getArgOperand(Idx)->getType() == ValTy) {
2773+
ConnectedVals->push_back(SimpleValue(CI->getArgOperand(Idx), 0));
2774+
break;
2775+
}
27412776
}
2742-
}
27432777
break;
27442778
}
27452779
default:
@@ -2809,22 +2843,25 @@ bool GenXSimdCFConformance::getConnectedVals(
28092843
auto ValTy =
28102844
IndexFlattener::getElementType(Val.getType(), Val.getIndex());
28112845
auto F = User->getFunction();
2846+
28122847
bool Lower = false;
2813-
for (auto ai = F->arg_begin(), ae = F->arg_end();; ++ai) {
2814-
if (ai == ae) {
2815-
// no arg of the right type found
2816-
Lower = true;
2817-
UsersToLower.push_back(SimpleValue(User, ui->getOperandNo()));
2818-
LLVM_DEBUG(dbgs() << "getConnectedVals: ai == ae push_back " << *User
2819-
<< " No=" << ui->getOperandNo() << "\n");
2820-
break;
2821-
}
2822-
auto Arg = &*ai;
2823-
if (Arg->getType() == ValTy) {
2824-
ConnectedVals->push_back(SimpleValue(Arg, 0));
2825-
break;
2848+
PrepareFunctionAttributes(F, ValTy);
2849+
if (!F->hasFnAttribute(vc::FunctionMD::VCSimdCFArg))
2850+
for (auto ai = F->arg_begin(), ae = F->arg_end();; ++ai) {
2851+
if (ai == ae) {
2852+
// no arg of the right type found
2853+
Lower = true;
2854+
UsersToLower.push_back(SimpleValue(User, ui->getOperandNo()));
2855+
LLVM_DEBUG(dbgs() << "getConnectedVals: ai == ae push_back "
2856+
<< *User << " No=" << ui->getOperandNo() << "\n");
2857+
break;
2858+
}
2859+
auto Arg = &*ai;
2860+
if (Arg->getType() == ValTy) {
2861+
ConnectedVals->push_back(SimpleValue(Arg, 0));
2862+
break;
2863+
}
28262864
}
2827-
}
28282865
if (IncludeOptional && !Lower) {
28292866
// With IncludeOptional, also add the values connected by being the
28302867
// return value at each call site.
@@ -2946,41 +2983,50 @@ bool GenXSimdCFConformance::getConnectedVals(
29462983
// Use in subroutine call. Add the corresponding function arg.
29472984
Function *CalledFunc = CI->getCalledFunction();
29482985
IGC_ASSERT(CalledFunc);
2949-
auto ai = CalledFunc->arg_begin();
2950-
for (unsigned Count = ui->getOperandNo(); Count; --Count, ++ai)
2951-
;
2952-
Argument *Arg = &*ai;
2953-
ConnectedVals->push_back(SimpleValue(Arg, Val.getIndex()));
2986+
2987+
auto ValTy = IndexFlattener::getElementType(Val.getValue()->getType(),
2988+
Val.getIndex());
2989+
2990+
PrepareFunctionAttributes(CalledFunc, ValTy);
2991+
// If Attribute setted - do not check
2992+
if (!CalledFunc->hasFnAttribute(vc::FunctionMD::VCSimdCFArg)) {
2993+
2994+
auto ai = CalledFunc->arg_begin();
2995+
for (unsigned Count = ui->getOperandNo(); Count; --Count, ++ai)
2996+
;
2997+
Argument *Arg = &*ai;
2998+
ConnectedVals->push_back(SimpleValue(Arg, Val.getIndex()));
2999+
}
29543000
// Connected to some return value from the call. There is a problem
29553001
// here in that it might find another predicate return value that is
29563002
// nothing to do with SIMD CF, and thus stop SIMD CF being optimized.
29573003
// But passing a predicate in and out of a function is rare outside
29583004
// of SIMD CF, so we do not worry about that.
2959-
unsigned RetIdx = 0;
2960-
auto ValTy = IndexFlattener::getElementType(Val.getValue()->getType(),
2961-
Val.getIndex());
2962-
if (auto *ST = dyn_cast<StructType>(CI->getType())) {
2963-
LLVM_DEBUG(dbgs()
2964-
<< "getConnectedVals: StructType get" << *ST << "\n");
2965-
for (unsigned End = IndexFlattener::getNumElements(ST);; ++RetIdx) {
2966-
if (RetIdx == End) {
2967-
UsersToLower.push_back(SimpleValue(
2968-
User, ui->getOperandNo())); // no predicate ret value found
2969-
LLVM_DEBUG(dbgs() << "getConnectedVals: push_back " << *CI
2970-
<< " No=" << ui->getOperandNo() << "\n");
2971-
}
2972-
if (IndexFlattener::getElementType(ST, RetIdx) == ValTy) {
2973-
ConnectedVals->push_back(SimpleValue(CI, RetIdx));
2974-
break;
3005+
if (!CalledFunc->hasFnAttribute(vc::FunctionMD::VCSimdCFRet)) {
3006+
unsigned RetIdx = 0;
3007+
if (auto *ST = dyn_cast<StructType>(CI->getType())) {
3008+
LLVM_DEBUG(dbgs()
3009+
<< "getConnectedVals: StructType get" << *ST << "\n");
3010+
for (unsigned End = IndexFlattener::getNumElements(ST);; ++RetIdx) {
3011+
if (RetIdx == End) {
3012+
UsersToLower.push_back(SimpleValue(
3013+
User, ui->getOperandNo())); // no predicate ret value found
3014+
LLVM_DEBUG(dbgs() << "getConnectedVals: push_back " << *CI
3015+
<< " No=" << ui->getOperandNo() << "\n");
3016+
}
3017+
if (IndexFlattener::getElementType(ST, RetIdx) == ValTy) {
3018+
ConnectedVals->push_back(SimpleValue(CI, RetIdx));
3019+
break;
3020+
}
29753021
}
3022+
} else if (CI->getType() == ValTy)
3023+
ConnectedVals->push_back(SimpleValue(CI, 0));
3024+
else if (!CI->getType()->isVoidTy()) {
3025+
UsersToLower.push_back(SimpleValue(
3026+
User, ui->getOperandNo())); // no predicate ret value found
3027+
LLVM_DEBUG(dbgs() << "getConnectedVals: push_back " << *CI
3028+
<< " No=" << ui->getOperandNo() << "\n");
29763029
}
2977-
} else if (CI->getType() == ValTy)
2978-
ConnectedVals->push_back(SimpleValue(CI, 0));
2979-
else if (!CI->getType()->isVoidTy()) {
2980-
UsersToLower.push_back(SimpleValue(
2981-
User, ui->getOperandNo())); // no predicate ret value found
2982-
LLVM_DEBUG(dbgs() << "getConnectedVals: push_back " << *CI
2983-
<< " No=" << ui->getOperandNo() << "\n");
29843030
}
29853031
break;
29863032
}

IGC/VectorCompiler/lib/GenXOpts/CMTrans/CMABI.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ SPDX-License-Identifier: MIT
4040
#include "vc/Utils/GenX/TypeSize.h"
4141
#include "vc/Utils/General/DebugInfo.h"
4242
#include "vc/Utils/General/FunctionAttrs.h"
43+
#include "vc/Utils/General/IndexFlattener.h"
4344
#include "vc/Utils/General/InstRebuilder.h"
4445
#include "vc/Utils/General/STLExtras.h"
4546
#include "vc/Utils/General/Types.h"
@@ -458,6 +459,34 @@ static inline bool isReadOnlyArg(Argument &Arg) {
458459
return true;
459460
}
460461

462+
// Mark SIMD-CF argument+ret functions
463+
static inline void CheckRMEM(Function *F) {
464+
if (F->isDeclaration())
465+
return;
466+
for (Argument &Arg : F->args()) {
467+
// Find SIMD_CF EM/RM in arguments
468+
if (Arg.getType()->isVectorTy() &&
469+
Arg.getType()->getScalarType()->isIntegerTy(1)) {
470+
// Add a dummy attribute so that the conformance-pass can recognize a call
471+
// with simd-sf
472+
F->addFnAttr(vc::FunctionMD::VCSimdCFArg);
473+
break;
474+
}
475+
}
476+
auto *RetTy = F->getReturnType();
477+
if (auto *ST = dyn_cast<StructType>(RetTy)) {
478+
unsigned RetIdx = 0;
479+
for (unsigned End = IndexFlattener::getNumElements(ST); RetIdx < End;
480+
++RetIdx) {
481+
auto *Ty = IndexFlattener::getElementType(ST, RetIdx);
482+
if (Ty->isVectorTy() && Ty->getScalarType()->isIntegerTy(1)) {
483+
F->addFnAttr(vc::FunctionMD::VCSimdCFRet);
484+
break;
485+
}
486+
}
487+
}
488+
}
489+
461490
template <class CallGraphImpl>
462491
bool CMABIBase<CallGraphImpl>::runOnCallGraphImpl(CallGraphImpl &SCC) {
463492
bool Changed = false;
@@ -500,6 +529,7 @@ bool CMABIBase<CallGraphImpl>::runOnCallGraphImpl(CallGraphImpl &SCC) {
500529
Function *F = getFunction(&*i);
501530
if (!F || F->empty())
502531
continue;
532+
CheckRMEM(F);
503533
for (auto &Arg : F->args()) {
504534
auto *ArgTy = Arg.getType();
505535
if (!Arg.hasAttribute(Attribute::ByVal) || !ArgTy->isPointerTy() ||
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: llvm_16_or_greater
10+
; RUN: %llc_opaque_ptrs %s -march=genx64 -mcpu=XeHPC -vc-skip-ocl-runtime-info -finalizer-opts='-dumpcommonisa -isaasmToConsole' -o /dev/null | FileCheck %s
11+
12+
; CHECK-NOT: Error
13+
%intel.buffer_rw_t.1 = type opaque
14+
15+
; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
16+
declare <64 x i8> @llvm.genx.rdregioni.v64i8.v192i8.i16(<192 x i8>, i32, i32, i32, i16, i32) #0
17+
18+
; Function Attrs: mustprogress nounwind willreturn
19+
declare void @llvm.genx.media.st.v64i8(i32, i32, i32, i32, i32, i32, <64 x i8>) #1
20+
21+
; Function Attrs: mustprogress noinline nounwind willreturn
22+
define internal spir_func { <192 x i8>, <32 x i1> } @foo() unnamed_addr #2 {
23+
entry:
24+
%goto_foo = tail call { <32 x i1>, <16 x i1>, i1 } @llvm.genx.simdcf.goto.v32i1.v16i1(<32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i1> zeroinitializer, <16 x i1> zeroinitializer)
25+
%em_foo = extractvalue { <32 x i1>, <16 x i1>, i1 } %goto_foo, 0
26+
%rm_foo = extractvalue { <32 x i1>, <16 x i1>, i1 } %goto_foo, 1
27+
%join_foo = tail call { <32 x i1>, i1 } @llvm.genx.simdcf.join.v32i1.v16i1(<32 x i1> %em_foo, <16 x i1> %rm_foo)
28+
%join_em_foo = extractvalue { <32 x i1>, i1 } %join_foo, 0
29+
%foo_res = insertvalue { <192 x i8>, <32 x i1> } { <192 x i8> zeroinitializer, <32 x i1> undef }, <32 x i1> %join_em_foo, 1
30+
ret { <192 x i8>, <32 x i1> } %foo_res
31+
}
32+
33+
; Function Attrs: mustprogress noinline nounwind willreturn
34+
define internal spir_func void @bar(<192 x i8> %bar_in, <32 x i1> %EM.in) unnamed_addr #2 {
35+
entry:
36+
%goto_bar = tail call { <32 x i1>, <16 x i1>, i1 } @llvm.genx.simdcf.goto.v32i1.v16i1(<32 x i1> %EM.in, <16 x i1> zeroinitializer, <16 x i1> undef)
37+
%em_bar = extractvalue { <32 x i1>, <16 x i1>, i1 } %goto_bar, 0
38+
%rm_bar = extractvalue { <32 x i1>, <16 x i1>, i1 } %goto_bar, 1
39+
%join_bar = tail call { <32 x i1>, i1 } @llvm.genx.simdcf.join.v32i1.v16i1(<32 x i1> %em_bar, <16 x i1> %rm_bar)
40+
%bar_out = tail call <64 x i8> @llvm.genx.rdregioni.v64i8.v192i8.i16(<192 x i8> %bar_in, i32 8, i32 8, i32 1, i16 0, i32 8)
41+
tail call void @llvm.genx.media.st.v64i8(i32 0, i32 0, i32 0, i32 8, i32 1, i32 2, <64 x i8> %bar_out)
42+
ret void
43+
}
44+
45+
; Function Attrs: mustprogress noinline nounwind willreturn
46+
define dllexport spir_kernel void @Gexp(%intel.buffer_rw_t.1 addrspace(1)* nocapture readnone %A, %intel.buffer_rw_t.1 addrspace(1)* nocapture readnone %B, %intel.buffer_rw_t.1 addrspace(1)* nocapture readnone %C, %intel.buffer_rw_t.1 addrspace(1)* nocapture readnone %D, %intel.buffer_rw_t.1 addrspace(1)* nocapture readnone %E, %intel.buffer_rw_t.1 addrspace(1)* nocapture readnone %F, i64 %impl.arg.private.base) local_unnamed_addr #3 {
47+
; CHECK-LABEL: Gexp
48+
; CHECK: call {{.*}} foo
49+
; CHECK: call {{.*}} bar
50+
entry:
51+
%foo_res = tail call spir_func { <192 x i8>, <32 x i1> } @foo() #4
52+
%data_foo = extractvalue { <192 x i8>, <32 x i1> } %foo_res, 0
53+
%em_foo = extractvalue { <192 x i8>, <32 x i1> } %foo_res, 1
54+
tail call spir_func void @bar(<192 x i8> %data_foo, <32 x i1> %em_foo) #4
55+
ret void
56+
}
57+
58+
; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
59+
declare { <32 x i1>, <16 x i1>, i1 } @llvm.genx.simdcf.goto.v32i1.v16i1(<32 x i1>, <16 x i1>, <16 x i1>) #0
60+
61+
; Function Attrs: mustprogress nounwind willreturn
62+
declare { <32 x i1>, i1 } @llvm.genx.simdcf.join.v32i1.v16i1(<32 x i1>, <16 x i1>) #1
63+
64+
attributes #0 = { mustprogress nofree nosync nounwind willreturn memory(none) }
65+
attributes #1 = { mustprogress nounwind willreturn }
66+
attributes #2 = { mustprogress noinline nounwind willreturn "VC.SimdCFArg" }
67+
attributes #3 = { mustprogress noinline nounwind willreturn "CMGenxMain" "oclrt"="1" }
68+
attributes #4 = { noinline nounwind }
69+
70+
!spirv.MemoryModel = !{!0}
71+
!opencl.enable.FP_CONTRACT = !{}
72+
!spirv.Source = !{!1}
73+
!opencl.spir.version = !{!2}
74+
!opencl.ocl.version = !{!1}
75+
!opencl.used.extensions = !{!3}
76+
!opencl.used.optional.core.features = !{!3}
77+
!spirv.Generator = !{!4}
78+
!genx.kernels = !{!5}
79+
!genx.kernel.internal = !{!10}
80+
81+
!0 = !{i32 2, i32 2}
82+
!1 = !{i32 0, i32 0}
83+
!2 = !{i32 1, i32 2}
84+
!3 = !{}
85+
!4 = !{i16 6, i16 14}
86+
!5 = !{void (%intel.buffer_rw_t.1 addrspace(1)*, %intel.buffer_rw_t.1 addrspace(1)*, %intel.buffer_rw_t.1 addrspace(1)*, %intel.buffer_rw_t.1 addrspace(1)*, %intel.buffer_rw_t.1 addrspace(1)*, %intel.buffer_rw_t.1 addrspace(1)*, i64)* @Gexp, !"Gexp", !6, i32 0, !7, !8, !9, i32 0}
87+
!6 = !{i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 96}
88+
!7 = !{i32 72, i32 80, i32 88, i32 96, i32 104, i32 112, i32 64}
89+
!8 = !{i32 0, i32 0, i32 0, i32 0, i32 0, i32 0}
90+
!9 = !{!"buffer_t read_write", !"buffer_t read_write", !"buffer_t read_write", !"buffer_t read_write", !"buffer_t read_write", !"buffer_t read_write"}
91+
!10 = !{void (%intel.buffer_rw_t.1 addrspace(1)*, %intel.buffer_rw_t.1 addrspace(1)*, %intel.buffer_rw_t.1 addrspace(1)*, %intel.buffer_rw_t.1 addrspace(1)*, %intel.buffer_rw_t.1 addrspace(1)*, %intel.buffer_rw_t.1 addrspace(1)*, i64)* @Gexp, !11, !12, !3, !13, i32 0}
92+
!11 = !{i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0}
93+
!12 = !{i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6}
94+
!13 = !{i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 255}

0 commit comments

Comments
 (0)