|
1 | | -; REQUIRES: pvc-supported, regkeys |
| 1 | +; UNSUPPORTED: system-windows |
| 2 | +; REQUIRES: regkeys |
2 | 3 |
|
3 | | -; RUN: igc_opt -S -dce -platformpvc -rev-id B -has-emulated-64-bit-insts -igc-emit-visa --regkey=DumpVISAASMToConsole=1 -simd-mode 16 < %s | FileCheck %s |
| 4 | +; RUN: igc_opt -S -dce -platformpvc -rev-id B -has-emulated-64-bit-insts -igc-emit-visa --regkey=DumpVISAASMToConsole=1 --regkey=VectorizerUniformValueVectorizationEnabled=0 -simd-mode 16 < %s | FileCheck %s |
4 | 5 |
|
5 | | -; CHECK: .decl vectorized_binary378 v_type=G type=f num_elts=8 align=dword |
6 | | -; CHECK: .decl V0035 v_type=G type=f num_elts=8 align=wordx32 |
7 | | -; CHECK: .decl vectorized_binary402 v_type=G type=f num_elts=128 align=wordx32 |
8 | | -; CHECK: .decl V0036 v_type=G type=f num_elts=8 align=wordx32 |
| 6 | +; CHECK: .decl vectorized_phi1095 v_type=G type=f num_elts=8 align=dword |
| 7 | +; CHECK: .decl vectorized_phi1116 v_type=G type=f num_elts=8 align=dword |
| 8 | +; CHECK: .decl vector1029 v_type=G type=f num_elts=8 align=dword |
| 9 | +; CHECK: .decl vector1052 v_type=G type=f num_elts=8 align=dword |
9 | 10 |
|
10 | | -; CHECK: inv (M1_NM, 1) vectorized_binary378(0,0)<1> V0035(0,0)<0;1,0> |
11 | | -; CHECK: inv (M1_NM, 1) vectorized_binary378(0,1)<1> V0035(0,1)<0;1,0> |
12 | | -; CHECK: inv (M1_NM, 1) vectorized_binary378(0,2)<1> V0035(0,2)<0;1,0> |
13 | | -; CHECK: inv (M1_NM, 1) vectorized_binary378(0,3)<1> V0035(0,3)<0;1,0> |
14 | | -; CHECK: inv (M1_NM, 1) vectorized_binary378(0,4)<1> V0035(0,4)<0;1,0> |
15 | | -; CHECK: inv (M1_NM, 1) vectorized_binary378(0,5)<1> V0035(0,5)<0;1,0> |
16 | | -; CHECK: inv (M1_NM, 1) vectorized_binary378(0,6)<1> V0035(0,6)<0;1,0> |
17 | | -; CHECK: inv (M1_NM, 1) vectorized_binary378(0,7)<1> V0035(0,7)<0;1,0> |
18 | | -; CHECK: div (M1, 16) vectorized_binary402(0,0)<1> V0032(0,0)<1;1,0> V0036(0,0)<0;1,0> |
19 | | -; CHECK: div (M1, 16) vectorized_binary402(1,0)<1> V0032(1,0)<1;1,0> V0036(0,1)<0;1,0> |
20 | | -; CHECK: div (M1, 16) vectorized_binary402(2,0)<1> V0032(2,0)<1;1,0> V0036(0,2)<0;1,0> |
21 | | -; CHECK: div (M1, 16) vectorized_binary402(3,0)<1> V0032(3,0)<1;1,0> V0036(0,3)<0;1,0> |
22 | | -; CHECK: div (M1, 16) vectorized_binary402(4,0)<1> V0032(4,0)<1;1,0> V0036(0,4)<0;1,0> |
23 | | -; CHECK: div (M1, 16) vectorized_binary402(5,0)<1> V0032(5,0)<1;1,0> V0036(0,5)<0;1,0> |
24 | | -; CHECK: div (M1, 16) vectorized_binary402(6,0)<1> V0032(6,0)<1;1,0> V0036(0,6)<0;1,0> |
25 | | -; CHECK: div (M1, 16) vectorized_binary402(7,0)<1> V0032(7,0)<1;1,0> V0036(0,7)<0;1,0> |
26 | 11 |
|
27 | | -define spir_kernel void @_attn_fwd(half addrspace(1)* %0, half addrspace(1)* %1, half addrspace(1)* %2, float %3, i8 addrspace(1)* %4, float addrspace(1)* %5, <8 x i32> %r0) { |
| 12 | +; CHECK: div (M1_NM, 1) vectorized_binary1096(0,0)<1> vectorized_phi1095(0,0)<0;1,0> vector1029(0,0)<0;1,0> |
| 13 | +; CHECK: div (M1_NM, 1) vectorized_binary1096(0,1)<1> vectorized_phi1095(0,1)<0;1,0> vector1029(0,1)<0;1,0> |
| 14 | +; CHECK: div (M1_NM, 1) vectorized_binary1096(0,2)<1> vectorized_phi1095(0,2)<0;1,0> vector1029(0,2)<0;1,0> |
| 15 | +; CHECK: div (M1_NM, 1) vectorized_binary1096(0,3)<1> vectorized_phi1095(0,3)<0;1,0> vector1029(0,3)<0;1,0> |
| 16 | +; CHECK: div (M1_NM, 1) vectorized_binary1096(0,4)<1> vectorized_phi1095(0,4)<0;1,0> vector1029(0,4)<0;1,0> |
| 17 | +; CHECK: div (M1_NM, 1) vectorized_binary1096(0,5)<1> vectorized_phi1095(0,5)<0;1,0> vector1029(0,5)<0;1,0> |
| 18 | +; CHECK: div (M1_NM, 1) vectorized_binary1096(0,6)<1> vectorized_phi1095(0,6)<0;1,0> vector1029(0,6)<0;1,0> |
| 19 | +; CHECK: div (M1_NM, 1) vectorized_binary1096(0,7)<1> vectorized_phi1095(0,7)<0;1,0> vector1029(0,7)<0;1,0> |
| 20 | +; CHECK: div (M1_NM, 1) vectorized_binary1117(0,0)<1> vectorized_phi1116(0,0)<0;1,0> vector1052(0,0)<0;1,0> |
| 21 | +; CHECK: div (M1_NM, 1) vectorized_binary1117(0,1)<1> vectorized_phi1116(0,1)<0;1,0> vector1052(0,1)<0;1,0> |
| 22 | +; CHECK: div (M1_NM, 1) vectorized_binary1117(0,2)<1> vectorized_phi1116(0,2)<0;1,0> vector1052(0,2)<0;1,0> |
| 23 | +; CHECK: div (M1_NM, 1) vectorized_binary1117(0,3)<1> vectorized_phi1116(0,3)<0;1,0> vector1052(0,3)<0;1,0> |
| 24 | +; CHECK: div (M1_NM, 1) vectorized_binary1117(0,4)<1> vectorized_phi1116(0,4)<0;1,0> vector1052(0,4)<0;1,0> |
| 25 | +; CHECK: div (M1_NM, 1) vectorized_binary1117(0,5)<1> vectorized_phi1116(0,5)<0;1,0> vector1052(0,5)<0;1,0> |
| 26 | +; CHECK: div (M1_NM, 1) vectorized_binary1117(0,6)<1> vectorized_phi1116(0,6)<0;1,0> vector1052(0,6)<0;1,0> |
| 27 | +; CHECK: div (M1_NM, 1) vectorized_binary1117(0,7)<1> vectorized_phi1116(0,7)<0;1,0> vector1052(0,7)<0;1,0> |
| 28 | + |
| 29 | +source_filename = "reduced.ll" |
| 30 | +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32" |
| 31 | +target triple = "spir64-unknown-unknown" |
| 32 | + |
| 33 | +define spir_kernel void @_attn_fwd(half addrspace(1)* %0, half addrspace(1)* %1, half addrspace(1)* %2, float %3, i8 addrspace(1)* %4, float addrspace(1)* %5, <8 x i32> %r0, <8 x i32> %payloadHeader, i32 %bufferOffset, i32 %bufferOffset1, i32 %bufferOffset2, i32 %bufferOffset3, i32 %bufferOffset4) { |
28 | 34 | br label %._crit_edge |
29 | 35 |
|
30 | | -._crit_edge: ; preds = %._crit_edge, %6 |
31 | | - %7 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false) |
32 | | - br i1 false, label %._crit_edge, label %8 |
| 36 | +._crit_edge: ; preds = %._crit_edge.._crit_edge_crit_edge, %6 |
| 37 | + %vectorized_phi1095 = phi <8 x float> [ zeroinitializer, %6 ], [ %vectorized_binary1105, %._crit_edge.._crit_edge_crit_edge ] |
| 38 | + %vectorized_phi1116 = phi <8 x float> [ zeroinitializer, %6 ], [ %vectorized_binary1126, %._crit_edge.._crit_edge_crit_edge ] |
| 39 | + %vector1029 = insertelement <8 x float> zeroinitializer, float 0.000000e+00, i64 0 |
| 40 | + %vector1052 = insertelement <8 x float> zeroinitializer, float 0.000000e+00, i64 0 |
| 41 | + %vectorized_binary1096 = fdiv <8 x float> %vectorized_phi1095, %vector1029 |
| 42 | + %vectorized_binary1117 = fdiv <8 x float> %vectorized_phi1116, %vector1052 |
| 43 | + %vectorized_binary1105 = fadd <8 x float> %vectorized_binary1096, zeroinitializer |
| 44 | + %vectorized_binary1126 = fadd <8 x float> %vectorized_binary1117, zeroinitializer |
| 45 | + br i1 false, label %._crit_edge.._crit_edge_crit_edge, label %7 |
33 | 46 |
|
34 | | -8: ; preds = %._crit_edge |
35 | | - %vectorized_binary378 = fdiv <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, zeroinitializer |
36 | | - %vectorized_binary402 = fdiv <8 x float> %7, zeroinitializer |
37 | | - %9 = bitcast <8 x float> %vectorized_binary378 to <8 x i32> |
38 | | - call void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false, i32 0, <8 x i32> %9) |
39 | | - %10 = bitcast <8 x float> %vectorized_binary402 to <8 x i32> |
40 | | - call void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false, i32 0, <8 x i32> %10) |
| 47 | +._crit_edge.._crit_edge_crit_edge: ; preds = %._crit_edge |
| 48 | + br label %._crit_edge |
| 49 | + |
| 50 | +7: ; preds = %._crit_edge |
| 51 | + %.assembled.vect934 = bitcast <8 x float> %vectorized_binary1126 to <8 x i32> |
| 52 | + call void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 32, i32 1, i32 1, i32 1, i1 false, i1 false, i32 0, <8 x i32> %.assembled.vect934) |
| 53 | + %.assembled.vect950 = bitcast <8 x float> %vectorized_binary1105 to <8 x i32> |
| 54 | + call void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 32, i32 1, i32 1, i32 1, i1 false, i1 false, i32 0, <8 x i32> %.assembled.vect950) |
41 | 55 | ret void |
42 | 56 | } |
43 | 57 |
|
44 | | -declare <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1) |
45 | | - |
46 | 58 | declare void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32, <8 x i32>) |
47 | 59 |
|
48 | 60 | ; uselistorder directives |
49 | 61 | uselistorder void (i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32, <8 x i32>)* @llvm.genx.GenISA.LSC2DBlockWrite.v8i32, { 1, 0 } |
50 | 62 |
|
51 | 63 | !igc.functions = !{!0} |
| 64 | +!IGCMetadata = !{!4} |
52 | 65 |
|
53 | | -!0 = !{void (half addrspace(1)*, half addrspace(1)*, half addrspace(1)*, float, i8 addrspace(1)*, float addrspace(1)*, <8 x i32>, <8 x i32>, i32, i32, i32, i32, i32)* bitcast (void (half addrspace(1)*, half addrspace(1)*, half addrspace(1)*, float, i8 addrspace(1)*, float addrspace(1)*, <8 x i32>)* @_attn_fwd to void (half addrspace(1)*, half addrspace(1)*, half addrspace(1)*, float, i8 addrspace(1)*, float addrspace(1)*, <8 x i32>, <8 x i32>, i32, i32, i32, i32, i32)*), !1} |
54 | | -!1 = !{!2, !3, !16} |
55 | | -!2 = !{!"function_type", i32 0} |
56 | | -!3 = !{!"implicit_arg_desc", !4, !5, !6, !8, !10, !12, !14} |
57 | | -!4 = !{i32 0} |
58 | | -!5 = !{i32 1} |
59 | | -!6 = !{i32 15, !7} |
60 | | -!7 = !{!"explicit_arg_num", i32 0} |
61 | | -!8 = !{i32 15, !9} |
62 | | -!9 = !{!"explicit_arg_num", i32 1} |
63 | | -!10 = !{i32 15, !11} |
64 | | -!11 = !{!"explicit_arg_num", i32 2} |
65 | | -!12 = !{i32 15, !13} |
66 | | -!13 = !{!"explicit_arg_num", i32 4} |
67 | | -!14 = !{i32 15, !15} |
68 | | -!15 = !{!"explicit_arg_num", i32 5} |
69 | | -!16 = !{!"sub_group_size", i32 16} |
| 66 | +!0 = distinct !{void (half addrspace(1)*, half addrspace(1)*, half addrspace(1)*, float, i8 addrspace(1)*, float addrspace(1)*, <8 x i32>, <8 x i32>, i32, i32, i32, i32, i32)* @_attn_fwd, !1} |
| 67 | +!1 = distinct !{!2, !3} |
| 68 | +!2 = distinct !{!"function_type", i32 0} |
| 69 | +!3 = distinct !{!"sub_group_size", i32 16} |
| 70 | +!4 = distinct !{!"ModuleMD", !5} |
| 71 | +!5 = distinct !{!"FuncMD", !6, !7} |
| 72 | +!6 = distinct !{!"FuncMDMap[0]", void (half addrspace(1)*, half addrspace(1)*, half addrspace(1)*, float, i8 addrspace(1)*, float addrspace(1)*, <8 x i32>, <8 x i32>, i32, i32, i32, i32, i32)* @_attn_fwd} |
| 73 | +!7 = distinct !{!"FuncMDValue[0]", !8} |
| 74 | +!8 = distinct !{!"resAllocMD", !9} |
| 75 | +!9 = distinct !{!"argAllocMDList", !10} |
| 76 | +!10 = distinct !{!"argAllocMDListVec[0]"} |
0 commit comments