Skip to content

Commit 4458a3b

Browse files
esukhovigcbot
authored andcommitted
Stub vectorization for IGCVectorizer
Allow certain instructions to be "stub-vectorized" New tests are added to cover for additional flexibitlity of vectorization.
1 parent d19cdc5 commit 4458a3b

12 files changed

+1470
-45
lines changed

IGC/Compiler/CISACodeGen/IGCVectorizer.cpp

Lines changed: 252 additions & 42 deletions
Large diffs are not rendered by default.

IGC/Compiler/CISACodeGen/IGCVectorizer.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,10 @@ class IGCVectorizer : public llvm::FunctionPass {
5757
// basically every element inside scalarSlice should point to the same
5858
// vectorized element which contains all of them
5959
std::unordered_map<Value *, Value *> ScalarToVector;
60-
std::unordered_map<Value *, Value *> ReplacedDictionary;
61-
InstructionToSliceMap InstructionToSlice;
60+
61+
// contains information about instruction position inside BB
62+
// with relation to other instrucitons
63+
std::unordered_map<Value *, unsigned> PositionMap;
6264
// all vector instructions that were produced for chain will be stored
6365
// in this array, used for clean up if we bail
6466
VecArr CreatedVectorInstructions;
@@ -79,14 +81,25 @@ class IGCVectorizer : public llvm::FunctionPass {
7981
void collectInstructionToProcess(VecArr &ToProcess, Function &F);
8082
void buildTree(VecArr &V, VecOfSlices &Chain);
8183
void printSlice(Slice *S);
84+
bool checkDependencyAndTryToEliminate(VecArr &Slice);
85+
86+
unsigned getPositionInsideBB(llvm::Instruction *Inst);
87+
void collectPositionInsideBB(llvm::Instruction *Inst);
88+
89+
Instruction *getMaxPoint(VecArr &Slice);
90+
Instruction *getMinPoint(VecArr &Slice);
91+
Instruction *getInsertPointForVector(VecArr &Arr);
92+
Instruction *getInsertPointForCreatedInstruction(VecVal &Arr, VecArr &Slice);
8293

8394
bool checkPHI(Instruction *Compare, VecArr &Slice);
95+
bool handleStub(VecArr &Slice);
8496
bool handlePHI(VecArr &Slice);
8597
bool checkInsertElement(Instruction *First, VecArr &Slice);
8698
bool handleInsertElement(VecArr &Slice, Instruction *Final);
8799
bool checkExtractElement(Instruction *Compare, VecArr &Slice);
88100
bool handleExtractElement(VecArr &Slice);
89101
bool handleCastInstruction(VecArr &Slice);
102+
bool handleSelectInstruction(VecArr &Slice);
90103
bool handleBinaryInstruction(VecArr &Slice);
91104
bool handleIntrinsic(VecArr &Slice);
92105
bool checkBinaryOperator(VecArr &Slice);
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; RUN: igc_opt -S --igc-vectorizer -dce < %s 2>&1 | FileCheck %s
2+
3+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
4+
target triple = "spir64-unknown-unknown"
5+
6+
define spir_kernel void @triton_tem_fused_0() {
7+
.lr.ph:
8+
br label %._crit_edge333
9+
10+
._crit_edge333: ; preds = %._crit_edge333, %.lr.ph
11+
%0 = phi float [ 1.000000e+00, %.lr.ph ], [ 0.000000e+00, %._crit_edge333 ]
12+
%1 = phi float [ 0.000000e+00, %.lr.ph ], [ %8, %._crit_edge333 ]
13+
%2 = phi float [ 0.000000e+00, %.lr.ph ], [ %9, %._crit_edge333 ]
14+
%3 = phi float [ 0.000000e+00, %.lr.ph ], [ %10, %._crit_edge333 ]
15+
%4 = phi float [ 0.000000e+00, %.lr.ph ], [ 0x7FF8000000000000, %._crit_edge333 ]
16+
%5 = phi float [ 0.000000e+00, %.lr.ph ], [ 1.000000e+00, %._crit_edge333 ]
17+
%6 = phi float [ 0.000000e+00, %.lr.ph ], [ %11, %._crit_edge333 ]
18+
%7 = phi float [ 0.000000e+00, %.lr.ph ], [ %12, %._crit_edge333 ]
19+
; CHECK-NOT: vectorized_phi
20+
%8 = call float @llvm.maxnum.f32(float %1, float 0.000000e+00)
21+
%9 = call float @llvm.maxnum.f32(float %2, float 0.000000e+00)
22+
%10 = call float @llvm.maxnum.f32(float %3, float 0.000000e+00)
23+
%11 = call float @llvm.maxnum.f32(float %6, float 0.000000e+00)
24+
%12 = call float @llvm.maxnum.f32(float %7, float 0.000000e+00)
25+
%13 = fptrunc float %0 to half
26+
%14 = fptrunc float %1 to half
27+
%15 = fptrunc float %2 to half
28+
%16 = fptrunc float %3 to half
29+
%17 = fptrunc float %4 to half
30+
%18 = fptrunc float %5 to half
31+
%19 = fptrunc float %6 to half
32+
%20 = fptrunc float %7 to half
33+
%21 = insertelement <8 x half> zeroinitializer, half %13, i64 0
34+
%22 = insertelement <8 x half> %21, half %14, i64 1
35+
%23 = insertelement <8 x half> %22, half %15, i64 2
36+
%24 = insertelement <8 x half> %23, half %16, i64 3
37+
%25 = insertelement <8 x half> %24, half %17, i64 4
38+
%26 = insertelement <8 x half> %25, half %18, i64 5
39+
%27 = insertelement <8 x half> %26, half %19, i64 6
40+
%28 = insertelement <8 x half> %27, half %20, i64 7
41+
%29 = bitcast <8 x half> %28 to <8 x i16>
42+
%30 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> %29, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
43+
br label %._crit_edge333
44+
}
45+
46+
declare <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1)
47+
48+
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
49+
declare float @llvm.maxnum.f32(float, float) #0
50+
51+
; uselistorder directives
52+
uselistorder float (float, float)* @llvm.maxnum.f32, { 4, 3, 2, 1, 0 }
53+
54+
attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
55+
56+
!igc.functions = !{!0}
57+
58+
!0 = distinct !{void (half addrspace(1)*, half addrspace(1)*, half addrspace(1)*, float addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, half addrspace(1)*, <8 x i32>, <8 x i32>, i16, i16, i16, i8 addrspace(2)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32)* bitcast (void ()* @triton_tem_fused_0 to void (half addrspace(1)*, half addrspace(1)*, half addrspace(1)*, float addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, half addrspace(1)*, <8 x i32>, <8 x i32>, i16, i16, i16, i8 addrspace(2)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32)*), !1}
59+
!1 = distinct !{!2}
60+
!2 = distinct !{!"sub_group_size", i32 16}
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
; REQUIRES: regkeys, llvm-15-or-older
2+
; RUN: igc_opt -S --igc-vectorizer -dce --regkey=VectorizerAllowSelect=1 --regkey=VectorizerAllowCMP=1 --regkey=VectorizerAllowMAXNUM=1 --regkey=VectorizerAllowWAVEALL=1 --regkey=VectorizerDepWindowMultiplier=6 < %s 2>&1 | FileCheck %s
3+
4+
; CHECK: [[dpas_0:%.*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32
5+
; CHECK: [[extract_0_0:%.*]] = extractelement <8 x float> [[dpas_0]], i64 0
6+
; CHECK: [[extract_0_1:%.*]] = extractelement <8 x float> [[dpas_0]], i64 1
7+
8+
; CHECK: [[fmul_0_0:%.*]] = fmul float {{.*}}, 1.250000e-01
9+
; CHECK: [[fmul_0_1:%.*]] = fmul float {{.*}}, 1.250000e-01
10+
; CHECK: [[fmul_0_2:%.*]] = fmul float {{.*}}, 1.250000e-01
11+
; CHECK: [[fmul_0_3:%.*]] = fmul float {{.*}}, 1.250000e-01
12+
; CHECK: [[fmul_0_4:%.*]] = fmul float {{.*}}, 1.250000e-01
13+
; CHECK: [[fmul_0_5:%.*]] = fmul float {{.*}}, 1.250000e-01
14+
15+
; CHECK: [[vector_0_0:%.*]] = insertelement <8 x float> undef, float [[extract_0_0]], i32 0
16+
; CHECK: [[vector_0_1:%.*]] = insertelement <8 x float> [[vector_0_0]], float [[extract_0_1]], i32 1
17+
; CHECK: [[vector_0_2:%.*]] = insertelement <8 x float> [[vector_0_1]], float [[fmul_0_0]], i32 2
18+
; CHECK: [[vector_0_3:%.*]] = insertelement <8 x float> [[vector_0_2]], float [[fmul_0_1]], i32 3
19+
; CHECK: [[vector_0_4:%.*]] = insertelement <8 x float> [[vector_0_3]], float [[fmul_0_2]], i32 4
20+
; CHECK: [[vector_0_5:%.*]] = insertelement <8 x float> [[vector_0_4]], float [[fmul_0_3]], i32 5
21+
; CHECK: [[vector_0_6:%.*]] = insertelement <8 x float> [[vector_0_5]], float [[fmul_0_4]], i32 6
22+
; CHECK: [[vector_0_7:%.*]] = insertelement <8 x float> [[vector_0_6]], float [[fmul_0_5]], i32 7
23+
24+
; CHECK: [[vec_bin_0:%.*]] = fmul <8 x float> [[vector_0_7]], <float 1.250000e-01, float 1.250000e-01, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000>
25+
26+
; CHECK: [[vec_extract_0_0:%.*]] = extractelement <8 x float> %vectorized_binary, i32 0
27+
; CHECK: [[vec_extract_0_1:%.*]] = extractelement <8 x float> %vectorized_binary, i32 1
28+
; CHECK: [[vec_extract_0_2:%.*]] = extractelement <8 x float> %vectorized_binary, i32 2
29+
; CHECK: [[vec_extract_0_3:%.*]] = extractelement <8 x float> %vectorized_binary, i32 3
30+
; CHECK: [[vec_extract_0_4:%.*]] = extractelement <8 x float> %vectorized_binary, i32 4
31+
; CHECK: [[vec_extract_0_5:%.*]] = extractelement <8 x float> %vectorized_binary, i32 5
32+
; CHECK: [[vec_extract_0_6:%.*]] = extractelement <8 x float> %vectorized_binary, i32 6
33+
; CHECK: [[vec_extract_0_7:%.*]] = extractelement <8 x float> %vectorized_binary, i32 7
34+
35+
; CHECK: store float [[vec_extract_0_1]], float* null, align 4
36+
; CHECK: select i1 {{.*}}, float 0xFFF0000000000000, float [[vec_extract_0_0]]
37+
; CHECK: select i1 {{.*}}, float 0xFFF0000000000000, float [[vec_extract_0_1]]
38+
; CHECK: select i1 {{.*}}, float 0xFFF0000000000000, float [[vec_extract_0_2]]
39+
; CHECK: select i1 {{.*}}, float 0xFFF0000000000000, float [[vec_extract_0_3]]
40+
; CHECK: select i1 {{.*}}, float 0xFFF0000000000000, float [[vec_extract_0_4]]
41+
; CHECK: select i1 {{.*}}, float 0xFFF0000000000000, float [[vec_extract_0_5]]
42+
; CHECK: select i1 {{.*}}, float 0xFFF0000000000000, float [[vec_extract_0_6]]
43+
; CHECK: select i1 {{.*}}, float 0xFFF0000000000000, float [[vec_extract_0_7]]
44+
45+
; ModuleID = 'WINDOW_RESCHED.ll'
46+
source_filename = "initial.ll"
47+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
48+
target triple = "spir64-unknown-unknown"
49+
50+
; Function Attrs: convergent nounwind
51+
define spir_kernel void @snork(i16 %arg) #0 {
52+
bb:
53+
%tmp = zext i16 0 to i32
54+
%tmp1 = and i32 0, 0
55+
%tmp2 = and i32 0, 0
56+
%tmp3 = or i32 0, 0
57+
%tmp4 = or i32 0, 0
58+
%tmp5 = or i32 0, 0
59+
br label %bb6
60+
61+
bb6: ; preds = %bb6, %bb
62+
%tmp7 = phi i32 [ %tmp1, %bb ], [ 0, %bb6 ]
63+
%tmp8 = phi float [ 0.000000e+00, %bb ], [ %tmp18, %bb6 ]
64+
%tmp9 = phi float [ 0.000000e+00, %bb ], [ %tmp27, %bb6 ]
65+
%tmp10 = phi float [ 0.000000e+00, %bb ], [ %tmp20, %bb6 ]
66+
%tmp11 = phi float [ 0.000000e+00, %bb ], [ %tmp21, %bb6 ]
67+
%tmp12 = phi float [ 0.000000e+00, %bb ], [ %tmp22, %bb6 ]
68+
%tmp13 = phi float [ 0.000000e+00, %bb ], [ %tmp23, %bb6 ]
69+
%tmp14 = phi float [ 0.000000e+00, %bb ], [ %tmp24, %bb6 ]
70+
%tmp15 = phi float [ 0.000000e+00, %bb ], [ %tmp25, %bb6 ]
71+
%tmp16 = phi float [ 0.000000e+00, %bb ], [ %tmp112, %bb6 ]
72+
%tmp17 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
73+
%tmp18 = extractelement <8 x float> %tmp17, i64 0
74+
%tmp19 = extractelement <8 x float> %tmp17, i64 1
75+
%tmp20 = extractelement <8 x float> %tmp17, i64 2
76+
%tmp21 = extractelement <8 x float> %tmp17, i64 3
77+
%tmp22 = extractelement <8 x float> %tmp17, i64 4
78+
%tmp23 = extractelement <8 x float> %tmp17, i64 5
79+
%tmp24 = extractelement <8 x float> %tmp17, i64 6
80+
%tmp25 = extractelement <8 x float> %tmp17, i64 7
81+
%tmp26 = fmul float %tmp18, 1.250000e-01
82+
%tmp27 = fmul float %tmp19, 1.250000e-01
83+
store float %tmp27, float* null, align 4
84+
%tmp28 = fmul float %tmp20, 1.250000e-01
85+
%tmp29 = fmul float %tmp21, 1.250000e-01
86+
%tmp30 = fmul float %tmp22, 1.250000e-01
87+
%tmp31 = fmul float %tmp23, 1.250000e-01
88+
%tmp32 = fmul float %tmp24, 1.250000e-01
89+
%tmp33 = fmul float %tmp25, 1.250000e-01
90+
%tmp34 = icmp slt i32 %tmp2, %tmp7
91+
%tmp35 = icmp slt i32 %tmp3, %tmp7
92+
%tmp36 = icmp slt i32 %tmp4, %tmp7
93+
%tmp37 = icmp slt i32 %tmp5, %tmp1
94+
%tmp38 = icmp slt i32 %tmp, 1
95+
%tmp39 = select i1 %tmp34, float 0xFFF0000000000000, float %tmp26
96+
%tmp40 = select i1 %tmp35, float 0xFFF0000000000000, float %tmp27
97+
%tmp41 = fmul float %tmp28, 0x3FF7154760000000
98+
%tmp42 = select i1 %tmp36, float 0xFFF0000000000000, float %tmp41
99+
%tmp43 = fmul float %tmp29, 0x3FF7154760000000
100+
%tmp44 = select i1 %tmp37, float 0xFFF0000000000000, float %tmp43
101+
%tmp45 = fmul float %tmp30, 0x3FF7154760000000
102+
%tmp46 = select i1 %tmp38, float 0xFFF0000000000000, float %tmp45
103+
%tmp47 = fmul float %tmp31, 0x3FF7154760000000
104+
%tmp48 = select i1 %tmp38, float 0xFFF0000000000000, float %tmp47
105+
%tmp49 = fmul float %tmp32, 0x3FF7154760000000
106+
%tmp50 = select i1 %tmp38, float 0xFFF0000000000000, float %tmp49
107+
%tmp51 = fmul float %tmp33, 0x3FF7154760000000
108+
%tmp52 = select i1 %tmp38, float 0xFFF0000000000000, float %tmp51
109+
%tmp53 = call float @llvm.genx.GenISA.WaveAll.f32(float 0.000000e+00, i8 0, i32 0)
110+
%tmp54 = call float @llvm.genx.GenISA.WaveAll.f32(float 0.000000e+00, i8 0, i32 0)
111+
%tmp55 = call float @llvm.genx.GenISA.WaveAll.f32(float 0.000000e+00, i8 0, i32 0)
112+
%tmp56 = call float @llvm.genx.GenISA.WaveAll.f32(float 0.000000e+00, i8 0, i32 0)
113+
%tmp57 = call float @llvm.genx.GenISA.WaveAll.f32(float 0.000000e+00, i8 0, i32 0)
114+
%tmp58 = call float @llvm.genx.GenISA.WaveAll.f32(float 0.000000e+00, i8 0, i32 0)
115+
%tmp59 = call float @llvm.genx.GenISA.WaveAll.f32(float 0.000000e+00, i8 0, i32 0)
116+
%tmp60 = call float @llvm.genx.GenISA.WaveAll.f32(float 0.000000e+00, i8 0, i32 0)
117+
%tmp61 = call float @llvm.maxnum.f32(float %tmp8, float %tmp53)
118+
%tmp62 = call float @llvm.maxnum.f32(float %tmp9, float %tmp54)
119+
%tmp63 = call float @llvm.maxnum.f32(float %tmp10, float %tmp55)
120+
%tmp64 = call float @llvm.maxnum.f32(float %tmp11, float %tmp56)
121+
%tmp65 = call float @llvm.maxnum.f32(float %tmp12, float %tmp57)
122+
%tmp66 = call float @llvm.maxnum.f32(float %tmp13, float %tmp58)
123+
%tmp67 = call float @llvm.maxnum.f32(float %tmp14, float %tmp59)
124+
%tmp68 = call float @llvm.maxnum.f32(float %tmp15, float %tmp60)
125+
%tmp69 = select i1 false, float 0.000000e+00, float %tmp61
126+
%tmp70 = select i1 false, float 0.000000e+00, float %tmp62
127+
%tmp71 = select i1 false, float 0.000000e+00, float %tmp63
128+
%tmp72 = select i1 false, float 0.000000e+00, float %tmp64
129+
%tmp73 = select i1 false, float 0.000000e+00, float %tmp65
130+
%tmp74 = select i1 false, float 0.000000e+00, float %tmp66
131+
%tmp75 = select i1 false, float 0.000000e+00, float %tmp67
132+
%tmp76 = select i1 false, float 0.000000e+00, float %tmp68
133+
%tmp77 = fsub float %tmp39, %tmp69
134+
%tmp78 = fsub float %tmp40, %tmp70
135+
%tmp79 = fsub float %tmp42, %tmp71
136+
%tmp80 = fsub float %tmp44, %tmp72
137+
%tmp81 = fsub float %tmp46, %tmp73
138+
%tmp82 = fsub float %tmp48, %tmp74
139+
%tmp83 = fsub float %tmp50, %tmp75
140+
%tmp84 = fsub float %tmp52, %tmp76
141+
%tmp85 = call float @llvm.exp2.f32(float %tmp77)
142+
%tmp86 = call float @llvm.exp2.f32(float %tmp78)
143+
%tmp87 = call float @llvm.exp2.f32(float %tmp79)
144+
%tmp88 = call float @llvm.exp2.f32(float %tmp80)
145+
%tmp89 = call float @llvm.exp2.f32(float %tmp81)
146+
%tmp90 = call float @llvm.exp2.f32(float %tmp82)
147+
%tmp91 = call float @llvm.exp2.f32(float %tmp83)
148+
%tmp92 = call float @llvm.exp2.f32(float %tmp84)
149+
%tmp93 = fptrunc float %tmp85 to half
150+
%tmp94 = fptrunc float %tmp86 to half
151+
%tmp95 = fptrunc float %tmp87 to half
152+
%tmp96 = fptrunc float %tmp88 to half
153+
%tmp97 = fptrunc float %tmp89 to half
154+
%tmp98 = fptrunc float %tmp90 to half
155+
%tmp99 = fptrunc float %tmp91 to half
156+
%tmp100 = fptrunc float %tmp92 to half
157+
%tmp101 = insertelement <8 x float> zeroinitializer, float %tmp16, i64 0
158+
%tmp102 = insertelement <8 x half> zeroinitializer, half %tmp93, i64 0
159+
%tmp103 = insertelement <8 x half> %tmp102, half %tmp94, i64 1
160+
%tmp104 = insertelement <8 x half> %tmp103, half %tmp95, i64 2
161+
%tmp105 = insertelement <8 x half> %tmp104, half %tmp96, i64 3
162+
%tmp106 = insertelement <8 x half> %tmp105, half %tmp97, i64 4
163+
%tmp107 = insertelement <8 x half> %tmp106, half %tmp98, i64 5
164+
%tmp108 = insertelement <8 x half> %tmp107, half %tmp99, i64 6
165+
%tmp109 = insertelement <8 x half> %tmp108, half %tmp100, i64 7
166+
%tmp110 = bitcast <8 x half> %tmp109 to <8 x i16>
167+
%tmp111 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %tmp101, <8 x i16> %tmp110, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
168+
%tmp112 = extractelement <8 x float> %tmp111, i64 0
169+
br label %bb6
170+
}
171+
172+
; Function Attrs: convergent inaccessiblememonly nounwind
173+
declare float @llvm.genx.GenISA.WaveAll.f32(float, i8, i32) #1
174+
175+
; Function Attrs: convergent nounwind readnone willreturn
176+
declare <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1) #2
177+
178+
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
179+
declare float @llvm.maxnum.f32(float, float) #3
180+
181+
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
182+
declare float @llvm.exp2.f32(float) #3
183+
184+
; uselistorder directives
185+
uselistorder float (float, i8, i32)* @llvm.genx.GenISA.WaveAll.f32, { 7, 6, 5, 4, 3, 2, 1, 0 }
186+
uselistorder <8 x float> (<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1)* @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32, { 1, 0 }
187+
uselistorder float (float, float)* @llvm.maxnum.f32, { 7, 6, 5, 4, 3, 2, 1, 0 }
188+
uselistorder float (float)* @llvm.exp2.f32, { 7, 6, 5, 4, 3, 2, 1, 0 }
189+
190+
attributes #0 = { convergent nounwind }
191+
attributes #1 = { convergent inaccessiblememonly nounwind }
192+
attributes #2 = { convergent nounwind readnone willreturn }
193+
attributes #3 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
194+
195+
!igc.functions = !{!0}
196+
197+
!0 = !{void (i16)* @snork, !1}
198+
!1 = !{!2, !29}
199+
!2 = !{!"function_type", i32 0}
200+
!29 = !{!"sub_group_size", i32 16}

0 commit comments

Comments
 (0)