@@ -561,3 +561,62 @@ func.func @test_vector_inner_reduction(%arg0: vector<16x16xf32>, %arg1: !xegpu.t
561561 xegpu.store_nd %0 , %arg1 : vector <16 xf32 >, !xegpu.tensor_desc <16 xf32 >
562562 return
563563}
564+
565+ // -----
566+ // CHECK: function: update_nd_offset_1d:
567+ // CHECK: op : %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<16xf32>
568+ // CHECK-NEXT: layout for result #0: lane_layout: [16], lane_data: [1]
569+ // CHECK-NEXT: op : %[[T0:.*]] = xegpu.create_nd_tdesc %{{.*}}[%{{.*}}] : memref<256xf32> -> !xegpu.tensor_desc<16xf32>
570+ // CHECK-NEXT: layout for result #0: lane_layout: [16], lane_data: [1]
571+ // CHECK-NEXT: op : %[[T1:.*]] = xegpu.update_nd_offset %[[T0]], [%{{.*}}] : !xegpu.tensor_desc<16xf32>
572+ // CHECK-NEXT: layout for result #0: lane_layout: [16], lane_data: [1]
573+ func.func @update_nd_offset_1d (%arg0: memref <256 xf32 >){
574+ %c0 = arith.constant 0 : index
575+ %c32 = arith.constant 32 : index
576+ %1 = arith.constant dense <1.000000e+00 > : vector <16 xf32 >
577+ %0 = xegpu.create_nd_tdesc %arg0 [%c0 ] : memref <256 xf32 > -> !xegpu.tensor_desc <16 xf32 >
578+ %2 = xegpu.update_nd_offset %0 , [%c32 ] : !xegpu.tensor_desc <16 xf32 >
579+ xegpu.store_nd %1 , %2 : vector <16 xf32 >, !xegpu.tensor_desc <16 xf32 >
580+ return
581+ }
582+
583+ // -----
584+ // CHECK: function: update_nd_offset_2d:
585+ // CHECK: op : %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<16x16xf32>
586+ // CHECK-NEXT: layout for result #0: lane_layout: [1, 16], lane_data: [1, 1]
587+ // CHECK-NEXT: op : %[[T0:.*]] = xegpu.create_nd_tdesc %{{.*}}[%{{.*}}] : memref<256x256xf32> -> !xegpu.tensor_desc<16x16xf32>
588+ // CHECK-NEXT: layout for result #0: lane_layout: [1, 16], lane_data: [1, 1]
589+ // CHECK-NEXT: op : %[[T1:.*]] = xegpu.update_nd_offset %[[T0]], [%{{.*}}] : !xegpu.tensor_desc<16x16xf32>
590+ // CHECK-NEXT: layout for result #0: lane_layout: [1, 16], lane_data: [1, 1]
591+ func.func @update_nd_offset_2d (%arg0: memref <256 x256 xf32 >){
592+ %c0 = arith.constant 0 : index
593+ %c32 = arith.constant 32 : index
594+ %1 = arith.constant dense <1.000000e+00 > : vector <16 x16 xf32 >
595+ %0 = xegpu.create_nd_tdesc %arg0 [%c0 , %c0 ] : memref <256 x256 xf32 > -> !xegpu.tensor_desc <16 x16 xf32 >
596+ %2 = xegpu.update_nd_offset %0 , [%c32 , %c32 ] : !xegpu.tensor_desc <16 x16 xf32 >
597+ xegpu.store_nd %1 , %2 : vector <16 x16 xf32 >, !xegpu.tensor_desc <16 x16 xf32 >
598+ return
599+ }
600+
601+ // -----
602+ // CHECK: function: prefetch_2d:
603+ // CHECK: layout for result #0: Not assigned.
604+ // CHECK-NEXT: op : %[[T0:.*]] = xegpu.create_nd_tdesc %{{.*}}[%{{.*}}] : memref<256x256xf16> -> !xegpu.tensor_desc<16x16xf16>
605+ // CHECK-NEXT: layout for result #0: lane_layout: [1, 16], lane_data: [1, 1]
606+ func.func @prefetch_2d (%arg0: memref <256 x256 xf16 >){
607+ %c0 = arith.constant 0 : index
608+ %0 = xegpu.create_nd_tdesc %arg0 [%c0 , %c0 ] : memref <256 x256 xf16 > -> !xegpu.tensor_desc <16 x16 xf16 >
609+ xegpu.prefetch_nd %0 <{l1_hint = #xegpu.cache_hint <cached >, l2_hint = #xegpu.cache_hint <uncached >}>: !xegpu.tensor_desc <16 x16 xf16 >
610+ return
611+ }
612+
613+ // -----
614+ // CHECK: function: prefetch_1d:
615+ // CHECK: op : %[[T0:.*]] = xegpu.create_nd_tdesc %{{.*}}[%{{.*}}] : memref<256xf16> -> !xegpu.tensor_desc<16xf16>
616+ // CHECK-NEXT: layout for result #0: lane_layout: [16], lane_data: [1]
617+ func.func @prefetch_1d (%arg0: memref <256 xf16 >){
618+ %c0 = arith.constant 0 : index
619+ %0 = xegpu.create_nd_tdesc %arg0 [%c0 ] : memref <256 xf16 > -> !xegpu.tensor_desc <16 xf16 >
620+ xegpu.prefetch_nd %0 <{l1_hint = #xegpu.cache_hint <cached >, l2_hint = #xegpu.cache_hint <uncached >}>: !xegpu.tensor_desc <16 xf16 >
621+ return
622+ }
0 commit comments