|
1 | 1 | // RUN: mlir-opt -xevm-attach-target='chip=pvc' -xegpu-propagate-layout="layout-kind=inst" -split-input-file %s | FileCheck %s |
2 | 2 |
|
| 3 | + |
| 4 | +// CHECK-LABEL: func.func @load_store_no_array_len( |
| 5 | +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<8x32xf32>, %[[ARG1:[0-9a-zA-Z]+]]: memref<8x32xf32>) { |
| 6 | +// CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<8x16xf32> |
| 7 | +// CHECK: %[[TDESC_SRC:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32, #xegpu.layout<inst_data = [8, 16]>> |
| 8 | +// CHECK: %[[TDESC_DST:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32, #xegpu.layout<inst_data = [8, 16]>> |
| 9 | +// CHECK: %[[LOADED:.*]] = xegpu.load_nd %0 {layout_result_0 = #xegpu.layout<inst_data = [8, 16]>} : |
| 10 | +// CHECK-SAME: !xegpu.tensor_desc<8x32xf32, #xegpu.layout<inst_data = [8, 16]>> -> vector<8x32xf32> |
| 11 | +// CHECK: xegpu.store_nd %[[LOADED]], %[[TDESC_DST]] : vector<8x32xf32>, !xegpu.tensor_desc<8x32xf32, #xegpu.layout<inst_data = [8, 16]>> |
| 12 | +gpu.module @test { |
| 13 | +// Although the uArch allows 8x32 inst data using block count (or array_len), |
| 14 | +// it is up to optimization passes to decide on the block count usage. |
| 15 | +func.func @load_store_no_array_len(%arg0: memref<8x32xf32>, %arg1: memref<8x32xf32>) { |
| 16 | + %cst = arith.constant dense<0.000000e+00> : vector<8x16xf32> |
| 17 | + %0 = xegpu.create_nd_tdesc %arg0 : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32> |
| 18 | + %1 = xegpu.create_nd_tdesc %arg1 : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32> |
| 19 | + %2 = xegpu.load_nd %0 : !xegpu.tensor_desc<8x32xf32> -> vector<8x32xf32> |
| 20 | + xegpu.store_nd %2, %1 : vector<8x32xf32>, !xegpu.tensor_desc<8x32xf32> |
| 21 | + return |
| 22 | +} |
| 23 | +} |
| 24 | + |
| 25 | +// ----- |
| 26 | + |
3 | 27 | // CHECK-LABEL: func.func @dpas_f16( |
4 | 28 | // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<8x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x16xf16>, %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xf32>) { |
5 | 29 | // CHECK: %[[CST:.*]] = arith.constant {layout_result_0 = #xegpu.layout<inst_data = [8, 16]>} dense<0.000000e+00> : vector<8x16xf32> |
|
0 commit comments