@@ -230,18 +230,17 @@ func.func @vectorize_nd_tensor_extract_index_from_tensor(%arg0: tensor<3x3xf32>,
230
230
// CHECK-SAME: %[[ARG4:.*]]: tensor<4x7x3x2xf32>
231
231
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
232
232
// CHECK-DAG: %[[PV:.*]] = ub.poison : i32
233
- // CHECK-DAG: %[[CST:.*]] = arith.constant dense<3> : vector<7x2x4x3xindex >
233
+ // CHECK-DAG: %[[CST:.*]] = arith.constant dense<3> : vector<4x3xindex >
234
234
// CHECK-DAG: %[[CST_1:.*]] = arith.constant dense<true> : vector<4x7x3x2xi1>
235
235
// CHECK-DAG: %[[PASSTHRU:.*]] = arith.constant dense<0.000000e+00> : vector<4x7x3x2xf32>
236
236
// CHECK: %[[V0:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C0]]], %[[PV]] {in_bounds = [true, true]} : tensor<4x3xi32>, vector<4x3xi32>
237
237
// CHECK: %[[V1:.*]] = vector.transfer_read %[[ARG2]][%[[C0]], %[[C0]]], %[[PV]] {in_bounds = [true, true]} : tensor<4x3xi32>, vector<4x3xi32>
238
238
// CHECK: %[[CAST:.*]] = arith.index_cast %[[V0]] : vector<4x3xi32> to vector<4x3xindex>
239
- // CHECK: %[[B1:.*]] = vector.broadcast %[[CAST]] : vector<4x3xindex> to vector<7x2x4x3xindex>
240
239
// CHECK: %[[CAST_1:.*]] = arith.index_cast %[[V1]] : vector<4x3xi32> to vector<4x3xindex>
241
- // CHECK: %[[B2 :.*]] = vector.broadcast %[[CAST_1]] : vector<4x3xindex> to vector<7x2x4x3xindex >
242
- // CHECK: %[[MULI :.*]] = arith.muli %[[B1 ]], %[[CST ]] : vector<7x2x4x3xindex >
243
- // CHECK: %[[ADDI :.*]] = arith.addi %[[B2]], %[[MULI]] : vector<7x2x4x3xindex>
244
- // CHECK: %[[T:.*]] = vector.transpose %[[ADDI ]], [2, 0, 3, 1] : vector<7x2x4x3xindex> to vector<4x7x3x2xindex>
240
+ // CHECK: %[[MULI :.*]] = arith.muli %[[CAST]], %[[CST]] : vector<4x3xindex>
241
+ // CHECK: %[[ADDI :.*]] = arith.addi %[[CAST_1 ]], %[[MULI ]] : vector<4x3xindex >
242
+ // CHECK: %[[B :.*]] = vector.broadcast %[[ADDI]] : vector<4x3xindex> to vector<7x2x4x3xindex>
243
+ // CHECK: %[[T:.*]] = vector.transpose %[[B ]], [2, 0, 3, 1] : vector<7x2x4x3xindex> to vector<4x7x3x2xindex>
245
244
// CHECK: %[[GATHER:.*]] = vector.gather %[[ARG0]][%[[C0]], %[[C0]]] [%[[T]]], %[[CST_1]], %[[PASSTHRU]] : tensor<3x3xf32>, vector<4x7x3x2xindex>, vector<4x7x3x2xi1>, vector<4x7x3x2xf32> into vector<4x7x3x2xf32>
246
245
// CHECK: vector.transfer_write %[[GATHER]], %[[ARG4]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true, true]} : vector<4x7x3x2xf32>, tensor<4x7x3x2xf32>
247
246
@@ -270,20 +269,16 @@ func.func @vectorize_nd_tensor_extract_load_1d_column_vector_using_gather_load(%
270
269
// CHECK-SAME: %[[ARG0:.*]]: tensor<8x128x768xf32>
271
270
// CHECK-SAME: %[[ARG1:.*]]: index
272
271
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
273
- // CHECK-DAG: %[[CST:.*]] = arith.constant dense<768> : vector<1x8xindex>
274
- // CHECK-DAG: %[[CST_0:.*]] = arith.constant dense<128> : vector<1x8xindex>
275
272
// CHECK-DAG: %[[PASSTHRU:.*]] = arith.constant dense<0.000000e+00> : vector<8x1xf32>
276
- // CHECK-DAG: %[[CST_2 :.*]] = arith.constant dense<true> : vector<8x1xi1>
277
- // CHECK-DAG: %[[CST_3 :.*]] = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7 ]> : vector<8xindex>
273
+ // CHECK-DAG: %[[CST_0 :.*]] = arith.constant dense<true> : vector<8x1xi1>
274
+ // CHECK-DAG: %[[CST_1 :.*]] = arith.constant dense<[0, 98304, 196608, 294912, 393216, 491520, 589824, 688128 ]> : vector<8xindex>
278
275
// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<8x1xf32>
279
- // CHECK: %[[B1:.*]] = vector.broadcast %[[CST_3]] : vector<8xindex> to vector<1x8xindex>
280
276
// CHECK: %[[ADDI_ARG1:.*]] = arith.addi %[[ARG1]], %[[ARG1]] : index
281
- // CHECK: %[[MULI_1:.*]] = arith.muli %[[B1]], %[[CST_0]] : vector<1x8xindex>
282
- // CHECK: %[[MULI_2:.*]] = arith.muli %[[MULI_1]], %[[CST]] : vector<1x8xindex>
283
- // CHECK: %[[T:.*]] = vector.transpose %[[MULI_2]], [1, 0] : vector<1x8xindex> to vector<8x1xindex>
277
+ // CHECK: %[[B1:.*]] = vector.broadcast %[[CST_1]] : vector<8xindex> to vector<1x8xindex>
278
+ // CHECK: %[[T:.*]] = vector.transpose %[[B1]], [1, 0] : vector<1x8xindex> to vector<8x1xindex>
284
279
// CHECK: %[[B2:.*]] = vector.broadcast %[[ADDI_ARG1]] : index to vector<8x1xindex>
285
280
// CHECK: %[[ADDI:.*]] = arith.addi %[[B2]], %[[T]] : vector<8x1xindex>
286
- // CHECK: %[[GATHER:.*]] = vector.gather %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]] [%[[ADDI]]], %[[CST_2 ]], %[[PASSTHRU]] : tensor<8x128x768xf32>, vector<8x1xindex>, vector<8x1xi1>, vector<8x1xf32> into vector<8x1xf32>
281
+ // CHECK: %[[GATHER:.*]] = vector.gather %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]] [%[[ADDI]]], %[[CST_0 ]], %[[PASSTHRU]] : tensor<8x128x768xf32>, vector<8x1xindex>, vector<8x1xi1>, vector<8x1xf32> into vector<8x1xf32>
287
282
// CHECK: vector.transfer_write %[[GATHER]], %[[EMPTY]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<8x1xf32>, tensor<8x1xf32>
288
283
289
284
// -----
@@ -309,15 +304,13 @@ func.func @index_from_output_column_vector_gather_load(%src: tensor<8x128xf32>)
309
304
310
305
// CHECK-LABEL: func.func @index_from_output_column_vector_gather_load(
311
306
// CHECK-SAME: %[[SRC:.*]]: tensor<8x128xf32>) -> tensor<8x1xf32> {
312
- // CHECK: %[[C128 :.*]] = arith.constant dense<128> : vector<1x8xindex >
307
+ // CHECK: %[[IDX_VEC :.*]] = arith.constant dense<[0, 128, 256, 384, 512, 640, 768, 896] > : vector<8xindex >
313
308
// CHECK: %[[C0:.*]] = arith.constant 0 : index
314
309
// CHECK: %[[PASS_THRU:.*]] = arith.constant dense<0.000000e+00> : vector<8x1xf32>
315
310
// CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<8x1xi1>
316
- // CHECK: %[[IDX_VEC:.*]] = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7]> : vector<8xindex>
317
311
// CHECK: %[[OUT:.*]] = tensor.empty() : tensor<8x1xf32>
318
312
// CHECK: %[[B:.*]] = vector.broadcast %[[IDX_VEC]] : vector<8xindex> to vector<1x8xindex>
319
- // CHECK: %[[MUL:.*]] = arith.muli %[[B]], %[[C128]] : vector<1x8xindex>
320
- // CHECK: %[[TR:.*]] = vector.transpose %[[MUL]], [1, 0] : vector<1x8xindex> to vector<8x1xindex>
313
+ // CHECK: %[[TR:.*]] = vector.transpose %[[B]], [1, 0] : vector<1x8xindex> to vector<8x1xindex>
321
314
// CHECK: %[[GATHER:.*]] = vector.gather %[[SRC]]{{\[}}%[[C0]], %[[C0]]] {{\[}}%[[TR]]], %[[MASK]], %[[PASS_THRU]] : tensor<8x128xf32>, vector<8x1xindex>, vector<8x1xi1>, vector<8x1xf32> into vector<8x1xf32>
322
315
// CHECK: %[[RES:.*]] = vector.transfer_write %[[GATHER]], %[[OUT]]{{\[}}%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<8x1xf32>, tensor<8x1xf32>
323
316
// CHECK: return %[[RES]] : tensor<8x1xf32>
@@ -420,12 +413,12 @@ func.func @vectorize_nd_tensor_extract_with_affine_apply_gather(%6: tensor<80x16
420
413
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant dense<true> : vector<1x4xi1>
421
414
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant dense<0.000000e+00> : vector<1x4xf32>
422
415
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index
423
- // CHECK-DAG: %[[VAL_7:.*]] = arith.constant dense<16> : vector<1x4xindex >
416
+ // CHECK-DAG: %[[VAL_7:.*]] = arith.constant dense<16> : vector<4xindex >
424
417
// CHECK: %[[VAL_8:.*]] = vector.broadcast %[[VAL_1]] : index to vector<4xindex>
425
418
// CHECK: %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_3]] : vector<4xindex>
426
- // CHECK: %[[VAL_10:.*]] = vector.broadcast %[[VAL_9]] : vector<4xindex> to vector<1x4xindex >
427
- // CHECK: %[[VAL_11:.*]] = arith.muli %[[VAL_10]], %[[VAL_7]] : vector<1x4xindex >
428
- // CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_7]] : vector<1x4xindex>
419
+ // CHECK: %[[VAL_10:.*]] = arith.muli %[[VAL_9]], %[[VAL_7]] : vector<4xindex>
420
+ // CHECK: %[[VAL_11:.*]] = arith.addi %[[VAL_10]], %[[VAL_7]] : vector<4xindex >
421
+ // CHECK: %[[VAL_12:.*]] = vector.broadcast %[[VAL_11]] : vector<4xindex> to vector<1x4xindex>
429
422
// CHECK: %[[VAL_13:.*]] = vector.gather %[[VAL_0]]{{\[}}%[[VAL_6]], %[[VAL_6]]] {{\[}}%[[VAL_12]]], %[[VAL_4]], %[[VAL_5]] : tensor<80x16xf32>, vector<1x4xindex>, vector<1x4xi1>, vector<1x4xf32> into vector<1x4xf32>
430
423
// CHECK: %[[VAL_14:.*]] = vector.transfer_write %[[VAL_13]], %[[VAL_2]]{{\[}}%[[VAL_6]], %[[VAL_6]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32>
431
424
// CHECK: return %[[VAL_14]] : tensor<1x4xf32>
@@ -450,14 +443,12 @@ func.func @vectorize_nd_tensor_extract_with_maxsi_gather(%arg0: tensor<80x16xf32
450
443
// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_with_maxsi_gather(
451
444
// CHECK-SAME: %[[VAL_0:.*]]: tensor<80x16xf32>,
452
445
// CHECK-SAME: %[[VAL_1:.*]]: tensor<1x4xf32>) -> tensor<1x4xf32> {
453
- // CHECK-DAG: %[[VAL_2:.*]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex>
454
- // CHECK-DAG: %[[VAL_3:.*]] = arith.constant dense<1264> : vector<1x4xindex>
446
+ // CHECK-DAG: %[[VAL_2:.*]] = arith.constant dense<[1264, 1265, 1266, 1267]> : vector<4xindex>
455
447
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant dense<true> : vector<1x4xi1>
456
448
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant dense<0.000000e+00> : vector<1x4xf32>
457
449
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index
458
450
// CHECK: %[[VAL_7:.*]] = vector.broadcast %[[VAL_2]] : vector<4xindex> to vector<1x4xindex>
459
- // CHECK: %[[VAL_8:.*]] = arith.addi %[[VAL_7]], %[[VAL_3]] : vector<1x4xindex>
460
- // CHECK: %[[VAL_9:.*]] = vector.gather %[[VAL_0]]{{\[}}%[[VAL_6]], %[[VAL_6]]] {{\[}}%[[VAL_8]]], %[[VAL_4]], %[[VAL_5]] : tensor<80x16xf32>, vector<1x4xindex>, vector<1x4xi1>, vector<1x4xf32> into vector<1x4xf32>
451
+ // CHECK: %[[VAL_9:.*]] = vector.gather %[[VAL_0]]{{\[}}%[[VAL_6]], %[[VAL_6]]] {{\[}}%[[VAL_7]]], %[[VAL_4]], %[[VAL_5]] : tensor<80x16xf32>, vector<1x4xindex>, vector<1x4xi1>, vector<1x4xf32> into vector<1x4xf32>
461
452
// CHECK: %[[VAL_10:.*]] = vector.transfer_write %[[VAL_9]], %[[VAL_1]]{{\[}}%[[VAL_6]], %[[VAL_6]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32>
462
453
// CHECK: return %[[VAL_10]] : tensor<1x4xf32>
463
454
// CHECK: }
@@ -519,13 +510,13 @@ func.func @vectorize_reverse_like_tensor_extract(%arg0: tensor<1x2x3xf32>, %arg1
519
510
// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]
520
511
// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]
521
512
// CHECK-SAME: %[[ARG2:[0-9a-zA-Z]*]]
522
- // CHECK-DAG: %[[CST :.+]] = arith.constant dense<3> : vector<1x1x3xindex>
513
+ // CHECK-DAG: %[[C3 :.+]] = arith.constant 3 : index
523
514
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
524
515
// CHECK-DAG: %[[MASK:.*]] = arith.constant dense<true> : vector<1x1x3xi1>
525
516
// CHECK-DAG: %[[PASSTHRU:.*]] = arith.constant dense<0.000000e+00> : vector<1x1x3xf32>
526
517
// CHECK-DAG: %[[INIT_IDX:.+]] = arith.constant dense<[2, 1, 0]> : vector<3xindex>
527
- // CHECK: %[[T0:.+]] = vector.broadcast %[[ARG2]] : index to vector<1x1x3xindex>
528
- // CHECK: %[[T1:.+]] = arith.muli %[[T0]], %[[CST]] : vector<1x1x3xindex>
518
+ // CHECK: %[[T0:.+]] = arith.muli %[[ARG2]], %[[C3]] : index
519
+ // CHECK: %[[T1:.+]] = vector.broadcast %[[T0]] : index to vector<1x1x3xindex>
529
520
// CHECK: %[[T2:.+]] = vector.broadcast %[[INIT_IDX]]
530
521
// CHECK: %[[T3:.+]] = arith.addi %[[T2]], %[[T1]]
531
522
// CHECK: %[[GATHER:.*]] = vector.gather %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]] [%[[T3]]], %[[MASK]], %[[PASSTHRU]]
0 commit comments