|
1 | | -// RUN: mlir-opt -xegpu-subgroup-distribute -canonicalize -cse -split-input-file %s | FileCheck %s |
| 1 | +// RUN: mlir-opt -xegpu-subgroup-distribute -allow-unregistered-dialect -canonicalize -cse -split-input-file %s | FileCheck %s |
2 | 2 |
|
3 | 3 | // CHECK-LABEL: gpu.func @store_nd_1d |
4 | 4 | // CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16xf32>) { |
@@ -265,6 +265,28 @@ gpu.module @test { |
265 | 265 | } |
266 | 266 | } |
267 | 267 |
|
| 268 | +// ----- |
| 269 | +// Explicitly check that update_nd_offset distributed tensor descriptor retains the layouts. |
| 270 | +// CHECK-LABEL: gpu.func @check_update_nd_offset_distributed_tensor_desc |
| 271 | +// CHECK: %[[W:.*]] = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> |
| 272 | +// CHECK-SAME: (!xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>) { |
| 273 | +// CHECK: %[[T0:.*]] = "some_op"() : () -> !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> |
| 274 | +// CHECK: gpu.yield %[[T0]] : !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> |
| 275 | +// CHECK: } |
| 276 | +// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]] : |
| 277 | +// CHECK-SAME: !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> to !xegpu.tensor_desc<16x16xf32> {resolve_simt_type_mismatch} |
| 278 | +// CHECK: xegpu.update_nd_offset %[[T1]], [%{{.*}}] : !xegpu.tensor_desc<16x16xf32> |
| 279 | +gpu.module @test { |
| 280 | + gpu.func @check_update_nd_offset_distributed_tensor_desc() { |
| 281 | + %c32 = arith.constant 32 : index |
| 282 | + %cst = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} dense<1.000000e+00> : vector<16x16xf32> |
| 283 | + %0 = "some_op"() : () -> !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> |
| 284 | + %1 = xegpu.update_nd_offset %0, [%c32, %c32] : !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> |
| 285 | + xegpu.store_nd %cst, %1 : vector<16x16xf32>, !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> |
| 286 | + gpu.return |
| 287 | + } |
| 288 | +} |
| 289 | + |
268 | 290 | // ----- |
269 | 291 | // CHECK-LABEL: gpu.func @prefetch_1d |
270 | 292 | // CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<256xf16>) { |
|
0 commit comments