Skip to content

Commit e96f0f8

Browse files
committed
Fix mid execution mlir test by replacing topk with convert
1 parent 51c2ac3 commit e96f0f8

File tree

1 file changed

+17
-17
lines changed

1 file changed

+17
-17
lines changed

tests/lit/NPU/dialect/VPUIP/passes/add_sw_kernel_instruction_prefetch_mid_execution_40XX.mlir

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,21 @@
99
!DummyDDRT = memref<32000x1x1x1xf16, @DDR>
1010
!DummyCMX0T = memref<32000x1x1x1xf16, [@CMX_NN, 0]>
1111
!DummyCMX1T = memref<32000x1x1x1xf16, [@CMX_NN, 1]>
12-
!DummyCMX0TopK = memref<16000x1x1x1xsi32, [@CMX_NN, 0]>
13-
!DummyCMX1TopK = memref<16000x1x1x1xsi32, [@CMX_NN, 1]>
12+
!DummyCMX0Convert = memref<32000x1x1x1xf32, [@CMX_NN, 0]>
13+
!DummyCMX1Convert = memref<32000x1x1x1xf32, [@CMX_NN, 1]>
1414

1515
// This test checks following schedule
16-
// Barriers : 0 1 2 3 4 5
17-
// Cluster 0: | [ DMA ] | [ DMA ] | [ Softmax] | [ TopK ] | [ DMA ] | [ Softmax ]
18-
// Cluster 1: | [ DMA ] | [ Softmax] | [ TopK ]
16+
// Barriers : 0 1 2 3 4 5
17+
// Cluster 0: | [ DMA ] | [ DMA ] | [ Softmax] | [ Convert ] | [ DMA ] | [ Softmax ]
18+
// Cluster 1: | [ DMA ] | [ Softmax] | [ Convert ]
1919
// Other : [ SyncDMA ] |
2020
//
2121

2222
module @subgraph attributes {config.arch = #config.arch_kind<NPU40XX>, config.compilationMode = #config.compilation_mode<DefaultHW>} {
2323
VPURT.SW.Runtime entryPoint : @VPU.SW::@runtime stack_configuration : [4096, 4096, 4096, 4096, 4096, 4096]
2424
module @VPU.SW {
2525
func.func private @builtin_SoftMax(memref<*xf16, @CMX_NN>, memref<*xf16, @CMX_NN>, i64, i64) attributes {VPU.kernel_code = "softmax.cpp", VPU.kernel_entry = "softmax", VPU.task_type = @COMPUTE}
26-
func.func private @builtin_TopK(memref<*xf16, @CMX_NN>, memref<*xf16, @CMX_NN>, memref<*xsi32, @CMX_NN>, i64, i64, i64, i64) attributes {VPU.kernel_code = "topk.cpp", VPU.kernel_entry = "topk", VPU.task_type = @COMPUTE}
26+
func.func private @builtin_Convert(memref<*xf16, @CMX_NN>, memref<*xf32, @CMX_NN>) attributes {VPU.kernel_code = "convert.cpp", VPU.kernel_entry = "convert", VPU.kernel_name = "convert", VPU.task_type = @COMPUTE}
2727
func.func private @runtime() attributes {VPU.kernel_code = "nnActEntry"}
2828
}
2929
config.Resources {activity_factor = 0.078934384661980161 : f64} 2 of @NCE at 1.700000e+03 MHz {
@@ -105,18 +105,18 @@ module @subgraph attributes {config.arch = #config.arch_kind<NPU40XX>, config.co
105105
}
106106
}
107107

108-
%cmx0_top_k = VPURT.DeclareBuffer <CMX_NN> [0] <0> -> !DummyCMX0TopK
108+
%cmx0_convert = VPURT.DeclareBuffer <CMX_NN> [0] <0> -> !DummyCMX0Convert
109109
VPURT.Task waits(%5: !VPURT.Barrier) updates(%6 : !VPURT.Barrier) {
110-
%results:2 = VPUIP.SW.Kernel {resultSegmentSizes = array<i32: 2, 0, 0>} @VPU.SW::@builtin_TopK inputs(%cmx_0 as %arg3: !DummyCMX0T) outputs(%cmx_0 as %arg4: !DummyCMX0T, %cmx0_top_k as %arg5: !DummyCMX0TopK) on tile 0 -> (!DummyCMX0T, !DummyCMX0TopK) {
111-
VPUIP.SW.Kernel.run {attrs = [1, 0, 0, 1]}(%arg3, %arg4, %arg5) : !DummyCMX0T, !DummyCMX0T, !DummyCMX0TopK
112-
}
110+
%results = VPUIP.SW.Kernel {resultSegmentSizes = array<i32: 1, 0, 0>} @VPU.SW::@builtin_Convert inputs(%cmx_0 as %arg3: !DummyCMX0T) outputs(%cmx0_convert as %arg4: !DummyCMX0Convert) on tile 0 -> (!DummyCMX0Convert) {
111+
VPUIP.SW.Kernel.run {attrs = [[]]}(%arg3, %arg4) : !DummyCMX0T, !DummyCMX0Convert
112+
}
113113
}
114114

115-
%cmx1_top_k = VPURT.DeclareBuffer <CMX_NN> [1] <0> -> !DummyCMX1TopK
115+
%cmx1_convert = VPURT.DeclareBuffer <CMX_NN> [1] <0> -> !DummyCMX1Convert
116116
VPURT.Task waits(%5: !VPURT.Barrier) updates(%6 : !VPURT.Barrier) {
117-
%results:2 = VPUIP.SW.Kernel {resultSegmentSizes = array<i32: 2, 0, 0>} @VPU.SW::@builtin_TopK inputs(%cmx_1 as %arg3: !DummyCMX1T) outputs(%cmx_1 as %arg4: !DummyCMX1T, %cmx1_top_k as %arg5: !DummyCMX1TopK) on tile 1 -> (!DummyCMX1T, !DummyCMX1TopK) {
118-
VPUIP.SW.Kernel.run {attrs = [1, 0, 0, 1]}(%arg3, %arg4, %arg5) : !DummyCMX1T, !DummyCMX1T, !DummyCMX1TopK
119-
}
117+
%results = VPUIP.SW.Kernel {resultSegmentSizes = array<i32: 1, 0, 0>} @VPU.SW::@builtin_Convert inputs(%cmx_1 as %arg3: !DummyCMX1T) outputs(%cmx1_convert as %arg4: !DummyCMX1Convert) on tile 1 -> (!DummyCMX1Convert) {
118+
VPUIP.SW.Kernel.run {attrs = [[]]}(%arg3, %arg4) : !DummyCMX1T, !DummyCMX1Convert
119+
}
120120
}
121121

122122
VPURT.Task waits(%6: !VPURT.Barrier) updates(%7 : !VPURT.Barrier) {
@@ -158,15 +158,15 @@ module @subgraph attributes {config.arch = #config.arch_kind<NPU40XX>, config.co
158158
// CHECK: VPURT.Task {
159159
// CHECK-NEXT: VPUIP.SW.Kernel
160160
// CHECK-SAME: skipProfiling
161-
// CHECK-SAME: @VPU.SW::@builtin_TopK
161+
// CHECK-SAME: @VPU.SW::@builtin_Convert
162162

163163
// CHECK: VPURT.Task waits([[BARRIER_5]] : !VPURT.Barrier) updates([[BARRIER_6]] : !VPURT.Barrier) {
164164
// CHECK: VPUIP.SW.Kernel
165-
// CHECK-SAME: @VPU.SW::@builtin_TopK
165+
// CHECK-SAME: @VPU.SW::@builtin_Convert
166166

167167
// CHECK: VPURT.Task waits([[BARRIER_5]] : !VPURT.Barrier) updates([[BARRIER_6]] : !VPURT.Barrier) {
168168
// CHECK: VPUIP.SW.Kernel
169-
// CHECK-SAME: @VPU.SW::@builtin_TopK
169+
// CHECK-SAME: @VPU.SW::@builtin_Convert
170170

171171
// CHECK: VPURT.Task waits([[BARRIER_6]] : !VPURT.Barrier) updates([[BARRIER_7]] : !VPURT.Barrier) {
172172
// CHECK-NEXT: VPUIP.NNDMA

0 commit comments

Comments
 (0)