|
189 | 189 | "gemm_fp32_mlir_vector_kernel_32_sve": { |
190 | 190 | "fp32_3x1024_omp_2_mlir": { |
191 | 191 | "type": "IR-GEN", |
192 | | - "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 192 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
193 | 193 | "environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
194 | 194 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ], |
195 | 195 | "extensions": [ "asimd" ] |
196 | 196 | }, |
197 | 197 | "fp32_3x1024_omp_4_mlir": { |
198 | 198 | "type": "IR-GEN", |
199 | | - "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 199 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
200 | 200 | "environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
201 | 201 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ], |
202 | 202 | "extensions": [ "asimd" ] |
203 | 203 | }, |
204 | 204 | "fp32_3x1024_omp_8_mlir": { |
205 | 205 | "type": "IR-GEN", |
206 | | - "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 206 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
207 | 207 | "environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
208 | 208 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ], |
209 | 209 | "extensions": [ "asimd" ] |
210 | 210 | }, |
211 | 211 | "fp32_3x1024_omp_16_mlir": { |
212 | 212 | "type": "IR-GEN", |
213 | | - "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 213 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
214 | 214 | "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
215 | 215 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ], |
216 | 216 | "extensions": [ "asimd" ] |
|
220 | 220 | "mlp_fp32_mlir_vector_kernel_32_sve": { |
221 | 221 | "fp32_3x1024_omp_2_mlir": { |
222 | 222 | "type": "IR-GEN", |
223 | | - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 223 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
224 | 224 | "environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
225 | 225 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ], |
226 | 226 | "extensions": [ "asimd" ] |
227 | 227 | }, |
228 | 228 | "fp32_3x1024_omp_4_mlir": { |
229 | 229 | "type": "IR-GEN", |
230 | | - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 230 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
231 | 231 | "environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
232 | 232 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ], |
233 | 233 | "extensions": [ "asimd" ] |
234 | 234 | }, |
235 | 235 | "fp32_3x1024_omp_8_mlir": { |
236 | 236 | "type": "IR-GEN", |
237 | | - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 237 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
238 | 238 | "environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
239 | 239 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ], |
240 | 240 | "extensions": [ "asimd" ] |
241 | 241 | }, |
242 | 242 | "fp32_3x1024_omp_16_mlir": { |
243 | 243 | "type": "IR-GEN", |
244 | | - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 244 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
245 | 245 | "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
246 | 246 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ], |
247 | 247 | "extensions": [ "asimd" ] |
|
0 commit comments