@@ -463,4 +463,68 @@ gpu.module @test_distribution {
463463 %broadcast = vector.broadcast %muli {layout_result_0 = #xegpu.layout <sg_layout = [4 , 2 , 6 , 1 ], sg_data = [1 , 1 , 1 , 32 ]>} : index to vector <4 x2 x6 x32 xindex >
464464 gpu.return
465465 }
466+
467+ // CHECK-LABEL: non_splat_constant_2D
468+ gpu.func @non_splat_constant_2D () {
469+ // CHECK-DAG: %[[CST:.*]] = arith.constant dense<0> : vector<1x1xindex>
470+ // CHECK-DAG: %[[SGID:.*]] = gpu.subgroup_id : index
471+ // CHECK-DAG: affine.apply #map4()[%[[SGID]]]
472+ // CHECK-DAG: affine.apply #map5()[%[[SGID]]]
473+ // CHECK-DAG: %[[IDY:.*]] = index.remu %{{.*}}, %[[C32:.*]]
474+ // CHECK-DAG: %[[IDX:.*]] = index.remu %{{.*}}, %[[C1:.*]]
475+ // CHECK-DAG: %[[STRIDECOL:.*]] = arith.muli %[[IDY]], %[[C16:.*]] : index
476+ // CHECK-DAG: %[[ADD:.*]] = arith.addi %[[C0:.*]], %[[STRIDECOL]] : index
477+ // CHECK-DAG: %[[STRIDEROW:.*]] = arith.muli %[[IDX]], %[[C0:.*]] : index
478+ // CHECK-DAG: %[[ADDSTRIDES:.*]] = arith.addi %[[ADD]], %[[STRIDEROW]] : index
479+ // CHECK-DAG: %[[BCAST:.*]] = vector.broadcast %[[ADDSTRIDES]] : index to vector<1x1xindex>
480+ // CHECK-DAG: arith.addi %[[CST]], %[[BCAST]] : vector<1x1xindex>
481+ %cst = arith.constant {layout_result_0 = #xegpu.layout <sg_layout = [32 , 1 ], sg_data = [1 , 1 ]>} dense <[[0 ], [16 ], [32 ], [48 ], [64 ], [80 ], [96 ], [112 ], [128 ], [144 ], [160 ], [176 ], [192 ], [208 ], [224 ], [240 ], [256 ], [272 ], [288 ], [304 ], [320 ], [336 ], [352 ], [368 ], [384 ], [400 ], [416 ], [432 ], [448 ], [464 ], [480 ], [496 ]]> : vector <32 x1 xindex >
482+ gpu.return
483+ }
484+
485+ // CHECK-LABEL: non_splat_constant_2D_non_unit_dim
486+ gpu.func @non_splat_constant_2D_non_unit_dim () {
487+ // CHECK-DAG: %[[BASECST:.*]] = arith.constant dense<{{.*}} : vector<2x2xindex>
488+ // CHECK-DAG: %[[SGID:.*]] = gpu.subgroup_id : index
489+ // CHECK-DAG: %[[IDY:.*]] = affine.apply #map()[%[[SGID]]]
490+ // CHECK-DAG: %[[IDX:.*]] = affine.apply #map1()[%[[SGID]]]
491+ // CHECK-DAG: %[[MULY:.*]] = index.mul %[[IDY]], %[[C2:.*]]
492+ // CHECK-DAG: %[[C2_2:.*]] = arith.constant 2 : index
493+ // CHECK-DAG: %[[MULX:.*]] = index.mul %[[IDX]], %[[C2:.*]]
494+ // CHECK-DAG: %[[REMU_Y:.*]] = index.remu %[[MULY]], %[[C8:.*]]
495+ // CHECK-DAG: %[[C8_2:.*]] = arith.constant 8 : index
496+ // CHECK-DAG: %[[REMU_X:.*]] = index.remu %[[MULX]], %[[C8:.*]]
497+ // CHECK-DAG: %[[MUL5:.*]] = arith.muli %[[REMU_Y]], %[[C8:.*]] : index
498+ // CHECK-DAG: %[[ADD:.*]] = arith.addi %[[C0:.*]], %[[MUL5]] : index
499+ // CHECK-DAG: %[[MUL6:.*]] = arith.muli %[[REMU_X]], %[[C16:.*]] : index
500+ // CHECK-DAG: %[[ADDSTRIDES:.*]] = arith.addi %[[ADD]], %[[MUL6]] : index
501+ // CHECK-DAG: %[[BCAST:.*]] = vector.broadcast %[[ADDSTRIDES]] : index to vector<2x2xindex>
502+ // CHECK-DAG: %[[ADDCST:.*]] = arith.addi %[[BASECST]], %[[BCAST]] : vector<2x2xindex>
503+ %cst_8x8 = arith.constant {layout_result_0 = #xegpu.layout <sg_layout = [4 , 4 ], sg_data = [2 , 2 ]>} dense <[
504+ [0 , 16 , 32 , 48 , 64 , 80 , 96 , 112 ],
505+ [8 , 24 , 40 , 56 , 72 , 88 , 104 , 120 ],
506+ [16 , 32 , 48 , 64 , 80 , 96 , 112 , 128 ],
507+ [24 , 40 , 56 , 72 , 88 , 104 , 120 , 136 ],
508+ [32 , 48 , 64 , 80 , 96 , 112 , 128 , 144 ],
509+ [40 , 56 , 72 , 88 , 104 , 120 , 136 , 152 ],
510+ [48 , 64 , 80 , 96 , 112 , 128 , 144 , 160 ],
511+ [56 , 72 , 88 , 104 , 120 , 136 , 152 , 168 ]
512+ ]> : vector <8 x8 xindex >
513+ gpu.return
514+ }
515+
516+ // CHECK-LABEL: non_splat_constant
517+ gpu.func @non_splat_constant () {
518+ // CHECK-DAG: %[[CST:.*]] = arith.constant dense<0> : vector<1xindex>
519+ // CHECK-DAG: %[[SGID:.*]] = gpu.subgroup_id : index
520+ // CHECK-DAG: %[[REMU:.*]] = index.remu %[[SGID]], %[[C32:.*]]
521+ // CHECK-DAG: %[[MUL:.*]] = arith.muli %[[REMU]], %[[C16:.*]] : index
522+ // CHECK-DAG: %[[ADDSTRIDES:.*]] = arith.addi %[[C0:.*]], %[[MUL]] : index
523+ // CHECK-DAG: %[[BCAST:.*]] = vector.broadcast %[[ADDSTRIDES]] : index to vector<1xindex>
524+ // CHECK-DAG: %[[ADD:.*]] = arith.addi %[[CST]], %[[BCAST]] : vector<1xindex>
525+ %cst = arith.constant {layout_result_0 = #xegpu.layout <sg_layout = [32 ], sg_data = [1 ]>} dense <[0 , 16 , 32 , 48 , 64 , 80 , 96 , 112 , 128 , 144 , 160 , 176 , 192 , 208 , 224 , 240 , 256 , 272 , 288 , 304 , 320 , 336 , 352 , 368 , 384 , 400 , 416 , 432 , 448 , 464 , 480 , 496 ]> : vector <32 xindex >
526+ // CHECK: arith.constant dense<{{\[}}[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]{{\]}}> : vector<1x16xindex>
527+ %cst_1 = arith.constant {layout_result_0 = #xegpu.layout <sg_layout = [32 , 1 ], sg_data = [1 , 16 ]>} dense <[[0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ]]> : vector <1 x16 xindex >
528+ gpu.return
529+ }
466530}
0 commit comments