Skip to content

Commit 4a6f72f

Browse files
committed
add test
1 parent 073bd22 commit 4a6f72f

File tree

1 file changed

+23
-1
lines changed

1 file changed

+23
-1
lines changed

mlir/test/Dialect/XeGPU/subgroup-distribute.mlir

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: mlir-opt -xegpu-subgroup-distribute -canonicalize -cse -split-input-file %s | FileCheck %s
1+
// RUN: mlir-opt -xegpu-subgroup-distribute -allow-unregistered-dialect -canonicalize -cse -split-input-file %s | FileCheck %s
22

33
// CHECK-LABEL: gpu.func @store_nd_1d
44
// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16xf32>) {
@@ -265,6 +265,28 @@ gpu.module @test {
265265
}
266266
}
267267

268+
// -----
269+
// Explicitly check that update_nd_offset distributed tensor descriptor retains the layouts.
270+
// CHECK-LABEL: gpu.func @check_update_nd_offset_distributed_tensor_desc
271+
// CHECK: %[[W:.*]] = gpu.warp_execute_on_lane_0(%{{.*}})[16] ->
272+
// CHECK-SAME: (!xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>) {
273+
// CHECK: %[[T0:.*]] = "some_op"() : () -> !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
274+
// CHECK: gpu.yield %[[T0]] : !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
275+
// CHECK: }
276+
// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]] :
277+
// CHECK-SAME: !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> to !xegpu.tensor_desc<16x16xf32> {resolve_simt_type_mismatch}
278+
// CHECK: xegpu.update_nd_offset %[[T1]], [%{{.*}}] : !xegpu.tensor_desc<16x16xf32>
279+
gpu.module @test {
280+
gpu.func @check_update_nd_offset_distributed_tensor_desc() {
281+
%c32 = arith.constant 32 : index
282+
%cst = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} dense<1.000000e+00> : vector<16x16xf32>
283+
%0 = "some_op"() : () -> !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
284+
%1 = xegpu.update_nd_offset %0, [%c32, %c32] : !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
285+
xegpu.store_nd %cst, %1 : vector<16x16xf32>, !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
286+
gpu.return
287+
}
288+
}
289+
268290
// -----
269291
// CHECK-LABEL: gpu.func @prefetch_1d
270292
// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<256xf16>) {

0 commit comments

Comments
 (0)