@@ -892,17 +892,25 @@ func.func @store_mem_desc_invalid_rank(%arg0: !xegpu.mem_desc<64xf16>, %arg1: ve
892892}
893893
894894// -----
895- func.func @simt_store_matrix_vector_nonlinear (%arg0: !xegpu.mem_desc <32 x32 xf32 , #xegpu.mem_layout <stride = [1 , 32 ]>>, %arg1: vector <2 x16 xf32 >) {
896- // expected-error@+1 {{With subgroup_block_io, lane data must be linear }}
895+ func.func @simt_store_matrix_vector_nonlinear (%arg0: !xegpu.mem_desc <32 x32 xf32 , #xegpu.mem_layout <stride = [32 , 1 ]>>, %arg1: vector <2 x16 xf32 >) {
896+ // expected-error@+1 {{With subgroup_block_io, accessed data must be contiguous and coalesced }}
897897 xegpu.store_matrix %arg1 , %arg0 [0 , 0 ] {subgroup_block_io , layout = #xegpu.layout <lane_layout = [1 , 16 ], lane_data = [2 , 1 ]>} :
898- vector <2 x16 xf32 >, !xegpu.mem_desc <32 x32 xf32 , #xegpu.mem_layout <stride = [1 , 32 ]>>
898+ vector <2 x16 xf32 >, !xegpu.mem_desc <32 x32 xf32 , #xegpu.mem_layout <stride = [32 , 1 ]>>
899899 return
900900}
901901
902902// -----
903- func.func @simt_store_matrix_vector_noncoalesced (%arg0: !xegpu.mem_desc <32 x32 xf32 , #xegpu.mem_layout <stride = [1 , 32 ]>>, %arg1: vector <16 x2 xf32 >) {
904- // expected-error@+1 {{With subgroup_block_io, lane data must be coalesced }}
903+ func.func @simt_store_matrix_vector_noncoalesced (%arg0: !xegpu.mem_desc <32 x32 xf32 , #xegpu.mem_layout <stride = [1 , 32 ], block = [ 1 , 16 ] >>, %arg1: vector <16 x2 xf32 >) {
904+ // expected-error@+1 {{With subgroup_block_io, the distributed dimensions must be contiguous }}
905905 xegpu.store_matrix %arg1 , %arg0 [0 , 0 ] {subgroup_block_io , layout = #xegpu.layout <lane_layout = [1 , 16 ], lane_data = [1 , 2 ]>} :
906- vector <16 x2 xf32 >, !xegpu.mem_desc <32 x32 xf32 , #xegpu.mem_layout <stride = [1 , 32 ]>>
906+ vector <16 x2 xf32 >, !xegpu.mem_desc <32 x32 xf32 , #xegpu.mem_layout <stride = [1 , 32 ], block = [1 , 16 ]>>
907+ return
908+ }
909+
910+ // -----
911+ func.func @simt_store_matrix_vector_noncoalesced (%arg0: !xegpu.mem_desc <32 x32 xf32 , #xegpu.mem_layout <stride = [32 , 1 ], block = [1 , 17 ]>>, %arg1: vector <16 x2 xf32 >) {
912+ // expected-error@+1 {{With subgroup_block_io, the block shape must match the lane layout}}
913+ xegpu.store_matrix %arg1 , %arg0 [0 , 0 ] {subgroup_block_io , layout = #xegpu.layout <lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>} :
914+ vector <16 x2 xf32 >, !xegpu.mem_desc <32 x32 xf32 , #xegpu.mem_layout <stride = [32 , 1 ], block = [1 , 17 ]>>
907915 return
908916}
0 commit comments