@@ -330,10 +330,12 @@ gpu.module @test_distribution {
330330    //CHECK: [[l_off_x:%.+]] = index.mul [[id_x]], [[c32_1]] 
331331    //CHECK: [[c0:%.+]] = arith.constant 0 : index 
332332    //CHECK: [[c0_1:%.+]] = arith.constant 0 : index 
333+     //CHECK: [[l_off_y_0:%.+]] = arith.addi [[l_off_y]], [[c0]] : index 
334+     //CHECK: [[l_off_x_0:%.+]] = arith.addi [[l_off_x]], [[c0_1]] : index 
333335    //CHECK: [[c64:%.+]] = arith.constant 64 : index 
334-     //CHECK: [[off_y:%.+]] = index.remu [[l_off_y ]], [[c64]] 
336+     //CHECK: [[off_y:%.+]] = index.remu [[l_off_y_0 ]], [[c64]] 
335337    //CHECK: [[c128:%.+]] = arith.constant 128 : index 
336-     //CHECK: [[off_x:%.+]] = index.remu [[l_off_x ]], [[c128]] 
338+     //CHECK: [[off_x:%.+]] = index.remu [[l_off_x_0 ]], [[c128]] 
337339    //CHECK: xegpu.load_matrix [[mdesc]][[[off_y]], [[off_x]]] <{layout = #xegpu.layout<lane_layout = [2, 8], lane_data = [1, 1]>}>: !xegpu.mem_desc<64x128xf32>, index, index -> vector<32x32xf32> 
338340    %0  = xegpu.create_mem_desc  %arg0  : memref <32768 xi8 , 3 > -> !xegpu.mem_desc <64 x128 xf32 >
339341    %1  = xegpu.load_matrix  %0 [0 , 0 ] <{layout  = #xegpu.layout <sg_layout  = [2 , 4 ], sg_data  = [32 , 32 ], lane_layout  = [2 , 8 ], lane_data  = [1 , 1 ]>}>: !xegpu.mem_desc <64 x128 xf32 > -> vector <64 x128 xf32 >
@@ -352,11 +354,13 @@ gpu.module @test_distribution {
352354    //CHECK: [[id_y:%.+]] = affine.apply #map()[[[sgid]]] 
353355    //CHECK: [[id_x:%.+]] = affine.apply #map1()[[[sgid]]] 
354356    //CHECK: [[c32:%.+]] = arith.constant 32 : index 
355-     //CHECK: [[l_off_y :%.+]] = index.mul [[id_y]], [[c32]] 
357+     //CHECK: [[l_off_y_0 :%.+]] = index.mul [[id_y]], [[c32]] 
356358    //CHECK: [[c32_1:%.+]] = arith.constant 32 : index 
357-     //CHECK: [[l_off_x :%.+]] = index.mul [[id_x]], [[c32_1]] 
359+     //CHECK: [[l_off_x_0 :%.+]] = index.mul [[id_x]], [[c32_1]] 
358360    //CHECK: [[c0:%.+]] = arith.constant 0 : index 
359361    //CHECK: [[c0_2:%.+]] = arith.constant 0 : index 
362+     //CHECK: [[l_off_y:%.+]] = arith.addi [[l_off_y_0]], [[c0]] : index 
363+     //CHECK: [[l_off_x:%.+]] = arith.addi [[l_off_x_0]], [[c0_2]] : index 
360364    //CHECK: [[c64:%.+]] = arith.constant 64 : index 
361365    //CHECK: [[off_y:%.+]] = index.remu [[l_off_y]], [[c64]] 
362366    //CHECK: [[c128:%.+]] = arith.constant 128 : index 
@@ -413,10 +417,11 @@ gpu.module @test_distribution {
413417    //CHECK: [[sgId:%.+]] = gpu.subgroup_id : index 
414418    //CHECK-DAG: [[IDY:%.+]] = affine.apply #map2()[[[sgId]]] 
415419    //CHECK-DAG: [[c32:%.+]] = arith.constant 32 : index 
416-     //CHECK-DAG: [[LY :%.+]] = index.mul [[IDY]], [[c32]] 
420+     //CHECK-DAG: [[LOCALY :%.+]] = index.mul [[IDY]], [[c32]] 
417421    //CHECK-DAG: [[c0:%.+]] = arith.constant 0 : index 
422+     //CHECK-DAG: [[Y:%.+]] = arith.addi [[LOCALY]], [[c0]] : index 
418423    //CHECK-DAG: [[c128:%.+]] = arith.constant 128 : index 
419-     //CHECK-DAG: [[MODY:%.+]] = index.remu [[LY ]], [[c128]] 
424+     //CHECK-DAG: [[MODY:%.+]] = index.remu [[Y ]], [[c128]] 
420425    //CHECK-DAG: [[BASE:%.+]] = vector.step : vector<32xindex> 
421426    //CHECK-DAG: [[CAST:%.+]] = vector.broadcast [[MODY]] : index to vector<32xindex> 
422427    //CHECK: [[ADD:%.+]] = arith.addi [[BASE]], [[CAST]] : vector<32xindex> 
@@ -430,8 +435,9 @@ gpu.module @test_distribution {
430435    //CHECK-DAG: [[c8:%.+]] = arith.constant 8 : index 
431436    //CHECK-DAG: [[LOCALY:%.+]] = index.mul [[sgId]], [[c8]] 
432437    //CHECK-DAG: [[c0:%.+]] = arith.constant 0 : index 
438+     //CHECK-DAG: [[Y:%.+]] = arith.addi [[LOCALY]], [[c0]] : index 
433439    //CHECK-DAG: [[c128:%.+]] = arith.constant 128 : index 
434-     //CHECK-DAG: [[MODY:%.+]] = index.remu [[LOCALY ]], [[c128]] 
440+     //CHECK-DAG: [[MODY:%.+]] = index.remu [[Y ]], [[c128]] 
435441    //CHECK-DAG: [[BASE:%.+]] = vector.step : vector<8xindex> 
436442    //CHECK-DAG: [[CAST:%.+]] = vector.broadcast [[MODY]] : index to vector<8xindex> 
437443    //CHECK: [[ADD:%.+]] = arith.addi [[BASE]], [[CAST]] : vector<8xindex> 
0 commit comments