@@ -463,4 +463,68 @@ gpu.module @test_distribution {
463
463
%broadcast = vector.broadcast %muli {layout_result_0 = #xegpu.layout <sg_layout = [4 , 2 , 6 , 1 ], sg_data = [1 , 1 , 1 , 32 ]>} : index to vector <4 x2 x6 x32 xindex >
464
464
gpu.return
465
465
}
466
+
467
+ // CHECK-LABEL: non_splat_constant_2D
468
+ gpu.func @non_splat_constant_2D () {
469
+ // CHECK-DAG: %[[CST:.*]] = arith.constant dense<0> : vector<1x1xindex>
470
+ // CHECK-DAG: %[[SGID:.*]] = gpu.subgroup_id : index
471
+ // CHECK-DAG: affine.apply #map4()[%[[SGID]]]
472
+ // CHECK-DAG: affine.apply #map5()[%[[SGID]]]
473
+ // CHECK-DAG: %[[IDY:.*]] = index.remu %{{.*}}, %[[C32:.*]]
474
+ // CHECK-DAG: %[[IDX:.*]] = index.remu %{{.*}}, %[[C1:.*]]
475
+ // CHECK-DAG: %[[STRIDECOL:.*]] = arith.muli %[[IDY]], %[[C16:.*]] : index
476
+ // CHECK-DAG: %[[ADD:.*]] = arith.addi %[[C0:.*]], %[[STRIDECOL]] : index
477
+ // CHECK-DAG: %[[STRIDEROW:.*]] = arith.muli %[[IDX]], %[[C0:.*]] : index
478
+ // CHECK-DAG: %[[ADDSTRIDES:.*]] = arith.addi %[[ADD]], %[[STRIDEROW]] : index
479
+ // CHECK-DAG: %[[BCAST:.*]] = vector.broadcast %[[ADDSTRIDES]] : index to vector<1x1xindex>
480
+ // CHECK-DAG: arith.addi %[[CST]], %[[BCAST]] : vector<1x1xindex>
481
+ %cst = arith.constant {layout_result_0 = #xegpu.layout <sg_layout = [32 , 1 ], sg_data = [1 , 1 ]>} dense <[[0 ], [16 ], [32 ], [48 ], [64 ], [80 ], [96 ], [112 ], [128 ], [144 ], [160 ], [176 ], [192 ], [208 ], [224 ], [240 ], [256 ], [272 ], [288 ], [304 ], [320 ], [336 ], [352 ], [368 ], [384 ], [400 ], [416 ], [432 ], [448 ], [464 ], [480 ], [496 ]]> : vector <32 x1 xindex >
482
+ gpu.return
483
+ }
484
+
485
+ // CHECK-LABEL: non_splat_constant_2D_non_unit_dim
486
+ gpu.func @non_splat_constant_2D_non_unit_dim () {
487
+ // CHECK-DAG: %[[BASECST:.*]] = arith.constant dense<{{.*}} : vector<2x2xindex>
488
+ // CHECK-DAG: %[[SGID:.*]] = gpu.subgroup_id : index
489
+ // CHECK-DAG: %[[IDY:.*]] = affine.apply #map()[%[[SGID]]]
490
+ // CHECK-DAG: %[[IDX:.*]] = affine.apply #map1()[%[[SGID]]]
491
+ // CHECK-DAG: %[[MULY:.*]] = index.mul %[[IDY]], %[[C2:.*]]
492
+ // CHECK-DAG: %[[C2_2:.*]] = arith.constant 2 : index
493
+ // CHECK-DAG: %[[MULX:.*]] = index.mul %[[IDX]], %[[C2:.*]]
494
+ // CHECK-DAG: %[[REMU_Y:.*]] = index.remu %[[MULY]], %[[C8:.*]]
495
+ // CHECK-DAG: %[[C8_2:.*]] = arith.constant 8 : index
496
+ // CHECK-DAG: %[[REMU_X:.*]] = index.remu %[[MULX]], %[[C8:.*]]
497
+ // CHECK-DAG: %[[MUL5:.*]] = arith.muli %[[REMU_Y]], %[[C8:.*]] : index
498
+ // CHECK-DAG: %[[ADD:.*]] = arith.addi %[[C0:.*]], %[[MUL5]] : index
499
+ // CHECK-DAG: %[[MUL6:.*]] = arith.muli %[[REMU_X]], %[[C16:.*]] : index
500
+ // CHECK-DAG: %[[ADDSTRIDES:.*]] = arith.addi %[[ADD]], %[[MUL6]] : index
501
+ // CHECK-DAG: %[[BCAST:.*]] = vector.broadcast %[[ADDSTRIDES]] : index to vector<2x2xindex>
502
+ // CHECK-DAG: %[[ADDCST:.*]] = arith.addi %[[BASECST]], %[[BCAST]] : vector<2x2xindex>
503
+ %cst_8x8 = arith.constant {layout_result_0 = #xegpu.layout <sg_layout = [4 , 4 ], sg_data = [2 , 2 ]>} dense <[
504
+ [0 , 16 , 32 , 48 , 64 , 80 , 96 , 112 ],
505
+ [8 , 24 , 40 , 56 , 72 , 88 , 104 , 120 ],
506
+ [16 , 32 , 48 , 64 , 80 , 96 , 112 , 128 ],
507
+ [24 , 40 , 56 , 72 , 88 , 104 , 120 , 136 ],
508
+ [32 , 48 , 64 , 80 , 96 , 112 , 128 , 144 ],
509
+ [40 , 56 , 72 , 88 , 104 , 120 , 136 , 152 ],
510
+ [48 , 64 , 80 , 96 , 112 , 128 , 144 , 160 ],
511
+ [56 , 72 , 88 , 104 , 120 , 136 , 152 , 168 ]
512
+ ]> : vector <8 x8 xindex >
513
+ gpu.return
514
+ }
515
+
516
+ // CHECK-LABEL: non_splat_constant
517
+ gpu.func @non_splat_constant () {
518
+ // CHECK-DAG: %[[CST:.*]] = arith.constant dense<0> : vector<1xindex>
519
+ // CHECK-DAG: %[[SGID:.*]] = gpu.subgroup_id : index
520
+ // CHECK-DAG: %[[REMU:.*]] = index.remu %[[SGID]], %[[C32:.*]]
521
+ // CHECK-DAG: %[[MUL:.*]] = arith.muli %[[REMU]], %[[C16:.*]] : index
522
+ // CHECK-DAG: %[[ADDSTRIDES:.*]] = arith.addi %[[C0:.*]], %[[MUL]] : index
523
+ // CHECK-DAG: %[[BCAST:.*]] = vector.broadcast %[[ADDSTRIDES]] : index to vector<1xindex>
524
+ // CHECK-DAG: %[[ADD:.*]] = arith.addi %[[CST]], %[[BCAST]] : vector<1xindex>
525
+ %cst = arith.constant {layout_result_0 = #xegpu.layout <sg_layout = [32 ], sg_data = [1 ]>} dense <[0 , 16 , 32 , 48 , 64 , 80 , 96 , 112 , 128 , 144 , 160 , 176 , 192 , 208 , 224 , 240 , 256 , 272 , 288 , 304 , 320 , 336 , 352 , 368 , 384 , 400 , 416 , 432 , 448 , 464 , 480 , 496 ]> : vector <32 xindex >
526
+ // CHECK: arith.constant dense<{{\[}}[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]{{\]}}> : vector<1x16xindex>
527
+ %cst_1 = arith.constant {layout_result_0 = #xegpu.layout <sg_layout = [32 , 1 ], sg_data = [1 , 16 ]>} dense <[[0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ]]> : vector <1 x16 xindex >
528
+ gpu.return
529
+ }
466
530
}
0 commit comments