|
9 | 9 | !DummyDDRT = memref<32000x1x1x1xf16, @DDR> |
10 | 10 | !DummyCMX0T = memref<32000x1x1x1xf16, [@CMX_NN, 0]> |
11 | 11 | !DummyCMX1T = memref<32000x1x1x1xf16, [@CMX_NN, 1]> |
12 | | -!DummyCMX0TopK = memref<16000x1x1x1xsi32, [@CMX_NN, 0]> |
13 | | -!DummyCMX1TopK = memref<16000x1x1x1xsi32, [@CMX_NN, 1]> |
| 12 | +!DummyCMX0Convert = memref<32000x1x1x1xf32, [@CMX_NN, 0]> |
| 13 | +!DummyCMX1Convert = memref<32000x1x1x1xf32, [@CMX_NN, 1]> |
14 | 14 |
|
15 | 15 | // This test checks following schedule |
16 | | -// Barriers : 0 1 2 3 4 5 |
17 | | -// Cluster 0: | [ DMA ] | [ DMA ] | [ Softmax] | [ TopK ] | [ DMA ] | [ Softmax ] |
18 | | -// Cluster 1: | [ DMA ] | [ Softmax] | [ TopK ] |
| 16 | +// Barriers : 0 1 2 3 4 5 |
| 17 | +// Cluster 0: | [ DMA ] | [ DMA ] | [ Softmax] | [ Convert ] | [ DMA ] | [ Softmax ] |
| 18 | +// Cluster 1: | [ DMA ] | [ Softmax] | [ Convert ] |
19 | 19 | // Other : [ SyncDMA ] | |
20 | 20 | // |
21 | 21 |
|
22 | 22 | module @subgraph attributes {config.arch = #config.arch_kind<NPU40XX>, config.compilationMode = #config.compilation_mode<DefaultHW>} { |
23 | 23 | VPURT.SW.Runtime entryPoint : @VPU.SW::@runtime stack_configuration : [4096, 4096, 4096, 4096, 4096, 4096] |
24 | 24 | module @VPU.SW { |
25 | 25 | func.func private @builtin_SoftMax(memref<*xf16, @CMX_NN>, memref<*xf16, @CMX_NN>, i64, i64) attributes {VPU.kernel_code = "softmax.cpp", VPU.kernel_entry = "softmax", VPU.task_type = @COMPUTE} |
26 | | - func.func private @builtin_TopK(memref<*xf16, @CMX_NN>, memref<*xf16, @CMX_NN>, memref<*xsi32, @CMX_NN>, i64, i64, i64, i64) attributes {VPU.kernel_code = "topk.cpp", VPU.kernel_entry = "topk", VPU.task_type = @COMPUTE} |
| 26 | + func.func private @builtin_Convert(memref<*xf16, @CMX_NN>, memref<*xf32, @CMX_NN>) attributes {VPU.kernel_code = "convert.cpp", VPU.kernel_entry = "convert", VPU.kernel_name = "convert", VPU.task_type = @COMPUTE} |
27 | 27 | func.func private @runtime() attributes {VPU.kernel_code = "nnActEntry"} |
28 | 28 | } |
29 | 29 | config.Resources {activity_factor = 0.078934384661980161 : f64} 2 of @NCE at 1.700000e+03 MHz { |
@@ -105,18 +105,18 @@ module @subgraph attributes {config.arch = #config.arch_kind<NPU40XX>, config.co |
105 | 105 | } |
106 | 106 | } |
107 | 107 |
|
108 | | - %cmx0_top_k = VPURT.DeclareBuffer <CMX_NN> [0] <0> -> !DummyCMX0TopK |
| 108 | + %cmx0_convert = VPURT.DeclareBuffer <CMX_NN> [0] <0> -> !DummyCMX0Convert |
109 | 109 | VPURT.Task waits(%5: !VPURT.Barrier) updates(%6 : !VPURT.Barrier) { |
110 | | - %results:2 = VPUIP.SW.Kernel {resultSegmentSizes = array<i32: 2, 0, 0>} @VPU.SW::@builtin_TopK inputs(%cmx_0 as %arg3: !DummyCMX0T) outputs(%cmx_0 as %arg4: !DummyCMX0T, %cmx0_top_k as %arg5: !DummyCMX0TopK) on tile 0 -> (!DummyCMX0T, !DummyCMX0TopK) { |
111 | | - VPUIP.SW.Kernel.run {attrs = [1, 0, 0, 1]}(%arg3, %arg4, %arg5) : !DummyCMX0T, !DummyCMX0T, !DummyCMX0TopK |
112 | | - } |
| 110 | + %results = VPUIP.SW.Kernel {resultSegmentSizes = array<i32: 1, 0, 0>} @VPU.SW::@builtin_Convert inputs(%cmx_0 as %arg3: !DummyCMX0T) outputs(%cmx0_convert as %arg4: !DummyCMX0Convert) on tile 0 -> (!DummyCMX0Convert) { |
| 111 | + VPUIP.SW.Kernel.run {attrs = [[]]}(%arg3, %arg4) : !DummyCMX0T, !DummyCMX0Convert |
| 112 | + } |
113 | 113 | } |
114 | 114 |
|
115 | | - %cmx1_top_k = VPURT.DeclareBuffer <CMX_NN> [1] <0> -> !DummyCMX1TopK |
| 115 | + %cmx1_convert = VPURT.DeclareBuffer <CMX_NN> [1] <0> -> !DummyCMX1Convert |
116 | 116 | VPURT.Task waits(%5: !VPURT.Barrier) updates(%6 : !VPURT.Barrier) { |
117 | | - %results:2 = VPUIP.SW.Kernel {resultSegmentSizes = array<i32: 2, 0, 0>} @VPU.SW::@builtin_TopK inputs(%cmx_1 as %arg3: !DummyCMX1T) outputs(%cmx_1 as %arg4: !DummyCMX1T, %cmx1_top_k as %arg5: !DummyCMX1TopK) on tile 1 -> (!DummyCMX1T, !DummyCMX1TopK) { |
118 | | - VPUIP.SW.Kernel.run {attrs = [1, 0, 0, 1]}(%arg3, %arg4, %arg5) : !DummyCMX1T, !DummyCMX1T, !DummyCMX1TopK |
119 | | - } |
| 117 | + %results = VPUIP.SW.Kernel {resultSegmentSizes = array<i32: 1, 0, 0>} @VPU.SW::@builtin_Convert inputs(%cmx_1 as %arg3: !DummyCMX1T) outputs(%cmx1_convert as %arg4: !DummyCMX1Convert) on tile 1 -> (!DummyCMX1Convert) { |
| 118 | + VPUIP.SW.Kernel.run {attrs = [[]]}(%arg3, %arg4) : !DummyCMX1T, !DummyCMX1Convert |
| 119 | + } |
120 | 120 | } |
121 | 121 |
|
122 | 122 | VPURT.Task waits(%6: !VPURT.Barrier) updates(%7 : !VPURT.Barrier) { |
@@ -158,15 +158,15 @@ module @subgraph attributes {config.arch = #config.arch_kind<NPU40XX>, config.co |
158 | 158 | // CHECK: VPURT.Task { |
159 | 159 | // CHECK-NEXT: VPUIP.SW.Kernel |
160 | 160 | // CHECK-SAME: skipProfiling |
161 | | - // CHECK-SAME: @VPU.SW::@builtin_TopK |
| 161 | + // CHECK-SAME: @VPU.SW::@builtin_Convert |
162 | 162 |
|
163 | 163 | // CHECK: VPURT.Task waits([[BARRIER_5]] : !VPURT.Barrier) updates([[BARRIER_6]] : !VPURT.Barrier) { |
164 | 164 | // CHECK: VPUIP.SW.Kernel |
165 | | - // CHECK-SAME: @VPU.SW::@builtin_TopK |
| 165 | + // CHECK-SAME: @VPU.SW::@builtin_Convert |
166 | 166 |
|
167 | 167 | // CHECK: VPURT.Task waits([[BARRIER_5]] : !VPURT.Barrier) updates([[BARRIER_6]] : !VPURT.Barrier) { |
168 | 168 | // CHECK: VPUIP.SW.Kernel |
169 | | - // CHECK-SAME: @VPU.SW::@builtin_TopK |
| 169 | + // CHECK-SAME: @VPU.SW::@builtin_Convert |
170 | 170 |
|
171 | 171 | // CHECK: VPURT.Task waits([[BARRIER_6]] : !VPURT.Barrier) updates([[BARRIER_7]] : !VPURT.Barrier) { |
172 | 172 | // CHECK-NEXT: VPUIP.NNDMA |
|
0 commit comments