|
1 | 1 | // RUN: mlir-opt --test-emulate-narrow-int="arith-compute-bitwidth=1 memref-load-bitwidth=8" --cse --split-input-file %s | FileCheck %s |
2 | 2 | // RUN: mlir-opt --test-emulate-narrow-int="arith-compute-bitwidth=1 memref-load-bitwidth=32" --cse --split-input-file %s | FileCheck %s --check-prefix=CHECK32 |
3 | 3 |
|
| 4 | +///---------------------------------------------------------------------------------------- |
| 5 | +/// vector.load |
| 6 | +///---------------------------------------------------------------------------------------- |
| 7 | + |
4 | 8 | func.func @vector_load_i8(%arg1: index, %arg2: index) -> vector<4xi8> { |
5 | 9 | %0 = memref.alloc() : memref<3x4xi8> |
6 | 10 | %1 = vector.load %0[%arg1, %arg2] : memref<3x4xi8>, vector<4xi8> |
@@ -82,6 +86,10 @@ func.func @vector_load_i4_dynamic(%arg0 : index, %arg1 : index, %arg2 : index, % |
82 | 86 |
|
83 | 87 | // ----- |
84 | 88 |
|
| 89 | +///---------------------------------------------------------------------------------------- |
| 90 | +/// vector.transfer_read |
| 91 | +///---------------------------------------------------------------------------------------- |
| 92 | + |
85 | 93 | func.func @vector_transfer_read_i4(%arg1: index, %arg2: index) -> vector<8xi4> { |
86 | 94 | %c0 = arith.constant 0 : i4 |
87 | 95 | %0 = memref.alloc() : memref<3x8xi4> |
@@ -111,6 +119,10 @@ func.func @vector_transfer_read_i4(%arg1: index, %arg2: index) -> vector<8xi4> { |
111 | 119 |
|
112 | 120 | // ----- |
113 | 121 |
|
| 122 | +///---------------------------------------------------------------------------------------- |
| 123 | +/// vector.maskedload |
| 124 | +///---------------------------------------------------------------------------------------- |
| 125 | + |
114 | 126 | func.func @vector_maskedload_i8(%arg1: index, %arg2: index, %arg3: index, %passthru: vector<4xi8>) -> vector<4xi8> { |
115 | 127 | %0 = memref.alloc() : memref<3x4xi8> |
116 | 128 | %mask = vector.create_mask %arg3 : vector<4xi1> |
@@ -263,6 +275,10 @@ func.func @vector_cst_maskedload_i4(%arg1: index, %arg2: index, %passthru: vecto |
263 | 275 |
|
264 | 276 | // ----- |
265 | 277 |
|
| 278 | +///---------------------------------------------------------------------------------------- |
| 279 | +/// vector.extract -> vector.masked_load |
| 280 | +///---------------------------------------------------------------------------------------- |
| 281 | + |
266 | 282 | func.func @vector_extract_maskedload_i4(%arg1: index) -> vector<8x8x16xi4> { |
267 | 283 | %0 = memref.alloc() : memref<8x8x16xi4> |
268 | 284 | %c0 = arith.constant 0 : index |
@@ -353,6 +369,10 @@ func.func @vector_extract_cst_maskedload_i4() -> vector<8x8x16xi4> { |
353 | 369 |
|
354 | 370 | // ----- |
355 | 371 |
|
| 372 | +///---------------------------------------------------------------------------------------- |
| 373 | +/// vector.store |
| 374 | +///---------------------------------------------------------------------------------------- |
| 375 | + |
356 | 376 | func.func @vector_store_i8(%arg0: vector<8xi8>, %arg1: index, %arg2: index) { |
357 | 377 | %0 = memref.alloc() : memref<4x8xi8> |
358 | 378 | vector.store %arg0, %0[%arg1, %arg2] :memref<4x8xi8>, vector<8xi8> |
@@ -431,6 +451,10 @@ func.func @vector_store_i4_dynamic(%arg0: vector<8xi4>, %arg1: index, %arg2: ind |
431 | 451 |
|
432 | 452 | // ----- |
433 | 453 |
|
| 454 | +///---------------------------------------------------------------------------------------- |
| 455 | +/// vector.maskedstore |
| 456 | +///---------------------------------------------------------------------------------------- |
| 457 | + |
434 | 458 | func.func @vector_maskedstore_i8(%arg0: index, %arg1: index, %arg2: index, %value: vector<8xi8>) { |
435 | 459 | %0 = memref.alloc() : memref<3x8xi8> |
436 | 460 | %mask = vector.create_mask %arg2 : vector<8xi1> |
@@ -469,6 +493,61 @@ func.func @vector_maskedstore_i8(%arg0: index, %arg1: index, %arg2: index, %valu |
469 | 493 |
|
470 | 494 | // ----- |
471 | 495 |
|
| 496 | +func.func @vector_maskedstore_i4( |
| 497 | + %idx1: index, |
| 498 | + %idx2: index, |
| 499 | + %num_elements_to_store: index, |
| 500 | + %value: vector<8xi4>) { |
| 501 | + |
| 502 | + %0 = memref.alloc() : memref<3x8xi4> |
| 503 | + %cst = arith.constant dense<0> : vector<3x8xi4> |
| 504 | + %mask = vector.create_mask %num_elements_to_store : vector<8xi1> |
| 505 | + vector.maskedstore %0[%idx1, %idx2], %mask, %value : |
| 506 | + memref<3x8xi4>, vector<8xi1>, vector<8xi4> |
| 507 | + return |
| 508 | +} |
| 509 | +// CHECK: #[[$ATTR_10:.+]] = affine_map<()[s0, s1] -> (s0 * 4 + s1 floordiv 2)> |
| 510 | +// CHECK: #[[$ATTR_11:.+]] = affine_map<()[s0] -> ((s0 + 1) floordiv 2)> |
| 511 | + |
| 512 | +// CHECK-LABEL: func.func @vector_maskedstore_i4( |
| 513 | +// CHECK-SAME: %[[IDX_1:[a-zA-Z0-9]+]]: index, |
| 514 | +// CHECK-SAME: %[[IDX_2:[a-zA-Z0-9]+]]: index, |
| 515 | +// CHECK-SAME: %[[NUM_EL_TO_STORE:[a-zA-Z0-9]+]]: index, |
| 516 | +// CHECK-SAME: %[[VAL_TO_STORE:[a-zA-Z0-9]+]]: vector<8xi4>) { |
| 517 | +// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<12xi8> |
| 518 | +// CHECK: %[[ORIG_MASK:.+]] = vector.create_mask %[[NUM_EL_TO_STORE]] : vector<8xi1> |
| 519 | +// CHECK: %[[LIDX:.+]] = affine.apply #[[$ATTR_10]](){{\[}}%[[IDX_1]], %[[IDX_2]]] |
| 520 | +// CHECK: %[[MASK_IDX:.+]] = affine.apply #[[$ATTR_11]](){{\[}}%[[NUM_EL_TO_STORE]]] |
| 521 | +// CHECK: %[[NEW_MASK:.+]] = vector.create_mask %[[MASK_IDX]] : vector<4xi1> |
| 522 | +// CHECK: %[[PASS_THRU:.+]] = arith.constant dense<0> : vector<4xi8> |
| 523 | +// CHECK: %[[LOAD:.+]] = vector.maskedload %[[ALLOC]]{{\[}}%[[LIDX]]], %[[NEW_MASK]], %[[PASS_THRU]] : memref<12xi8>, vector<4xi1>, vector<4xi8> into vector<4xi8> |
| 524 | +// CHECK: %[[BITCAST:.+]] = vector.bitcast %[[LOAD]] : vector<4xi8> to vector<8xi4> |
| 525 | +// CHECK: %[[SELECT:.+]] = arith.select %[[ORIG_MASK]], %[[VAL_TO_STORE]], %[[BITCAST]] : vector<8xi1>, vector<8xi4> |
| 526 | +// CHECK: %[[NEW_VAL:.+]] = vector.bitcast %[[SELECT]] : vector<8xi4> to vector<4xi8> |
| 527 | +// CHECK: vector.maskedstore %[[ALLOC]]{{\[}}%[[LIDX]]], %[[NEW_MASK]], %[[NEW_VAL]] : memref<12xi8>, vector<4xi1>, vector<4xi8> |
| 528 | + |
| 529 | +// CHECK32: #[[$ATTR_17:.+]] = affine_map<()[s0, s1] -> (s0 + s1 floordiv 8)> |
| 530 | +// CHECK32: #[[$ATTR_18:.+]] = affine_map<()[s0] -> ((s0 + 7) floordiv 8)> |
| 531 | + |
| 532 | +// CHECK32-LABEL: func.func @vector_maskedstore_i4( |
| 533 | +// CHECK32-SAME: %[[IDX_1:[a-zA-Z0-9]+]]: index, |
| 534 | +// CHECK32-SAME: %[[IDX_2:[a-zA-Z0-9]+]]: index, |
| 535 | +// CHECK32-SAME: %[[NUM_EL_TO_STORE:[a-zA-Z0-9]+]]: index, |
| 536 | +// CHECK32-SAME: %[[VAL_TO_STORE:[a-zA-Z0-9]+]]: vector<8xi4>) { |
| 537 | +// CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<3xi32> |
| 538 | +// CHECK32: %[[ORIG_MASK:.+]] = vector.create_mask %[[NUM_EL_TO_STORE]] : vector<8xi1> |
| 539 | +// CHECK32: %[[LIDX:.+]] = affine.apply #[[$ATTR_17]](){{\[}}%[[IDX_1]], %[[IDX_2]]] |
| 540 | +// CHECK32: %[[MASK_IDX:.+]] = affine.apply #[[$ATTR_18]](){{\[}}%[[NUM_EL_TO_STORE]]] |
| 541 | +// CHECK32: %[[NEW_MASK:.+]] = vector.create_mask %[[MASK_IDX]] : vector<1xi1> |
| 542 | +// CHECK32: %[[PASS_THRU:.+]] = arith.constant dense<0> : vector<1xi32> |
| 543 | +// CHECK32: %[[LOAD:.+]] = vector.maskedload %[[ALLOC]]{{\[}}%[[LIDX]]], %[[NEW_MASK]], %[[PASS_THRU]] : memref<3xi32>, vector<1xi1>, vector<1xi32> into vector<1xi32> |
| 544 | +// CHECK32: %[[BITCAST:.+]] = vector.bitcast %[[LOAD]] : vector<1xi32> to vector<8xi4> |
| 545 | +// CHECK32: %[[SELECT:.+]] = arith.select %[[ORIG_MASK]], %[[VAL_TO_STORE]], %[[BITCAST]] : vector<8xi1>, vector<8xi4> |
| 546 | +// CHECK32: %[[NEW_VAL:.+]] = vector.bitcast %[[SELECT]] : vector<8xi4> to vector<1xi32> |
| 547 | +// CHECK32: vector.maskedstore %[[ALLOC]]{{\[}}%[[LIDX]]], %[[NEW_MASK]], %[[NEW_VAL]] : memref<3xi32>, vector<1xi1>, vector<1xi32> |
| 548 | + |
| 549 | +// ----- |
| 550 | + |
472 | 551 | func.func @vector_cst_maskedstore_i8(%arg0: index, %arg1: index, %value: vector<8xi8>) { |
473 | 552 | %0 = memref.alloc() : memref<3x8xi8> |
474 | 553 | %mask = vector.constant_mask [4] : vector<8xi1> |
@@ -500,3 +579,50 @@ func.func @vector_cst_maskedstore_i8(%arg0: index, %arg1: index, %value: vector< |
500 | 579 | // CHECK32: %[[SELECT:.+]] = arith.select %[[ORIG_MASK]], %[[VAL]], %[[BITCAST]] : vector<8xi1>, vector<8xi8> |
501 | 580 | // CHECK32: %[[NEW_VAL:.+]] = vector.bitcast %[[SELECT]] : vector<8xi8> to vector<2xi32> |
502 | 581 | // CHECK32: vector.maskedstore %[[ALLOC]][%[[LIDX]]], %[[NEW_MASK]], %[[NEW_VAL]] |
| 582 | + |
| 583 | +// ----- |
| 584 | + |
| 585 | +func.func @vector_cst_maskedstore_i4( |
| 586 | + %idx_1: index, |
| 587 | + %idx_2: index, |
| 588 | + %val_to_store: vector<8xi4>) { |
| 589 | + |
| 590 | + %0 = memref.alloc() : memref<3x8xi4> |
| 591 | + %cst = arith.constant dense<0> : vector<3x8xi4> |
| 592 | + %mask = vector.constant_mask [4] : vector<8xi1> |
| 593 | + vector.maskedstore %0[%idx_1, %idx_2], %mask, %val_to_store : |
| 594 | + memref<3x8xi4>, vector<8xi1>, vector<8xi4> |
| 595 | + return |
| 596 | +} |
| 597 | + |
| 598 | +// CHECK: #[[$ATTR_12:.+]] = affine_map<()[s0, s1] -> (s0 * 4 + s1 floordiv 2)> |
| 599 | +// CHECK-LABEL: func.func @vector_cst_maskedstore_i4( |
| 600 | +// CHECK-SAME: %[[IDX_1:[a-zA-Z0-9]+]]: index, |
| 601 | +// CHECK-SAME: %[[IDX_2:[a-zA-Z0-9]+]]: index, |
| 602 | +// CHECK-SAME: %[[VAL_TO_STORE:[a-zA-Z0-9]+]]: vector<8xi4>) { |
| 603 | +// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<12xi8> |
| 604 | +// CHECK: %[[ORIG_MASK:.+]] = vector.constant_mask [4] : vector<8xi1> |
| 605 | +// CHECK: %[[LIDX:.+]] = affine.apply #[[$ATTR_12]](){{\[}}%[[IDX_1]], %[[IDX_2]]] |
| 606 | +// CHECK: %[[NEW_MASK:.+]] = vector.constant_mask [2] : vector<4xi1> |
| 607 | +// CHECK: %[[PASS_THRU:.+]] = arith.constant dense<0> : vector<4xi8> |
| 608 | +// CHECK: %[[LOAD:.+]] = vector.maskedload %[[ALLOC]]{{\[}}%[[LIDX]]], %[[NEW_MASK]], %[[PASS_THRU]] : memref<12xi8>, vector<4xi1>, vector<4xi8> into vector<4xi8> |
| 609 | +// CHECK: %[[VAL_9:.+]] = vector.bitcast %[[LOAD]] : vector<4xi8> to vector<8xi4> |
| 610 | +// CHECK: %[[SELECT:.+]] = arith.select %[[ORIG_MASK]], %[[VAL_TO_STORE]], %[[VAL_9]] : vector<8xi1>, vector<8xi4> |
| 611 | +// CHECK: %[[BITCAST:.+]] = vector.bitcast %[[SELECT]] : vector<8xi4> to vector<4xi8> |
| 612 | +// CHECK: vector.maskedstore %[[ALLOC]]{{\[}}%[[LIDX]]], %[[NEW_MASK]], %[[BITCAST]] : memref<12xi8>, vector<4xi1>, vector<4xi8> |
| 613 | + |
| 614 | +// CHECK32: #[[$ATTR_20:.+]] = affine_map<()[s0, s1] -> (s0 + s1 floordiv 8)> |
| 615 | +// CHECK32-LABEL: func.func @vector_cst_maskedstore_i4( |
| 616 | +// CHECK32-SAME: %[[IDX_1:[a-zA-Z0-9]+]]: index, |
| 617 | +// CHECK32-SAME: %[[IDX_2:[a-zA-Z0-9]+]]: index, |
| 618 | +// CHECK32-SAME: %[[VAL_TO_STORE:[a-zA-Z0-9]+]]: vector<8xi4>) { |
| 619 | +// CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<3xi32> |
| 620 | +// CHECK32: %[[ORIG_MASK:.+]] = vector.constant_mask [4] : vector<8xi1> |
| 621 | +// CHECK32: %[[LIDX:.+]] = affine.apply #[[$ATTR_20]](){{\[}}%[[IDX_1]], %[[IDX_2]]] |
| 622 | +// CHECK32: %[[NEW_MASK:.+]] = vector.constant_mask [1] : vector<1xi1> |
| 623 | +// CHECK32: %[[PASS_THRU:.+]] = arith.constant dense<0> : vector<1xi32> |
| 624 | +// CHECK32: %[[LOAD:.+]] = vector.maskedload %[[ALLOC]]{{\[}}%[[LIDX]]], %[[NEW_MASK]], %[[PASS_THRU]] : memref<3xi32>, vector<1xi1>, vector<1xi32> into vector<1xi32> |
| 625 | +// CHECK32: %[[VAL_9:.+]] = vector.bitcast %[[LOAD]] : vector<1xi32> to vector<8xi4> |
| 626 | +// CHECK32: %[[SELECT:.+]] = arith.select %[[ORIG_MASK]], %[[VAL_2]], %[[VAL_9]] : vector<8xi1>, vector<8xi4> |
| 627 | +// CHECK32: %[[BITCAST:.+]] = vector.bitcast %[[SELECT]] : vector<8xi4> to vector<1xi32> |
| 628 | +// CHECK32: vector.maskedstore %[[ALLOC]]{{\[}}%[[LIDX]]], %[[NEW_MASK]], %[[BITCAST]] : memref<3xi32>, vector<1xi1>, vector<1xi32> |
0 commit comments