|
1 | 1 | // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-generic-vectorization))" --split-input-file %s | FileCheck %s |
2 | 2 | // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-generic-vectorization{enable-vector-masking=true vectorize-padding=true}))" --split-input-file %s | FileCheck %s -check-prefix=CHECK-MASK |
3 | 3 | // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-generic-vectorization{fold-cast-into-contract=true}))" --split-input-file %s | FileCheck %s -check-prefix=CHECK-FOLD |
| 4 | +// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-generic-vectorization{vectorize-to-transfer-gather=true}))" --split-input-file %s | FileCheck %s --check-prefix=CHECK-GATHER |
4 | 5 |
|
5 | 6 | func.func @matmul(%lhs: tensor<3x4xf16>, %rhs: tensor<4x5xf16>, %acc: tensor<3x5xf32>) -> tensor<3x5xf32> { |
6 | 7 | %result = linalg.matmul ins(%lhs, %rhs: tensor<3x4xf16>, tensor<4x5xf16>) outs(%acc: tensor<3x5xf32>) -> tensor<3x5xf32> |
@@ -533,3 +534,172 @@ func.func @depthwise_conv_fold_away_masking(%arg0: tensor<1x68x120x96xf32>, %arg |
533 | 534 | // CHECK-MASK: vector.fma |
534 | 535 | // CHECK-MASK-NOT: vector.create_mask |
535 | 536 | // CHECK-MASK-NOT: vector.constant_mask |
| 537 | + |
| 538 | +// ----- |
| 539 | + |
| 540 | +!storage = tensor<8192x8xf16> |
| 541 | +!ind = tensor<128xi64> |
| 542 | +!x = tensor<128x8xf16> |
| 543 | + |
| 544 | +#gather = { |
| 545 | + indexing_maps = [affine_map<(page, vec) -> (page)>, |
| 546 | + affine_map<(page, vec) -> (page, vec)>], |
| 547 | + iterator_types = ["parallel", "parallel"] |
| 548 | +} |
| 549 | + |
| 550 | +func.func @paged_gather_read(%storage : !storage, %ind: !ind) -> !x { |
| 551 | + %x = tensor.empty() : !x |
| 552 | + %x_g = linalg.generic #gather |
| 553 | + ins(%ind : !ind) |
| 554 | + outs(%x : !x) { |
| 555 | + ^bb0(%page: i64, %out: f16): |
| 556 | + %pageidx = arith.index_cast %page : i64 to index |
| 557 | + %vec = linalg.index 1 : index |
| 558 | + %extracted = tensor.extract %storage[%pageidx, %vec] : !storage |
| 559 | + linalg.yield %extracted : f16 |
| 560 | + } -> !x |
| 561 | + return %x_g : !x |
| 562 | +} |
| 563 | + |
| 564 | +// CHECK-GATHER-LABEL: @paged_gather_read |
| 565 | +// CHECK-GATHER-SAME: %[[ARG0:.+]]: tensor<8192x8xf16>, %[[ARG1:.+]]: tensor<128xi64> |
| 566 | +// CHECK-GATHER: %[[INDEX_LOAD:.+]] = vector.transfer_read %[[ARG1]] |
| 567 | +// CHECK-GATHER: %[[INDEX_CAST:.+]] = arith.index_cast %[[INDEX_LOAD]] : vector<128xi64> to vector<128xindex> |
| 568 | +// CHECK-GATHER: %[[GATHER:.+]] = iree_vector_ext.transfer_gather %[[ARG0]] |
| 569 | +// CHECK-GATHER-SAME: [%[[INDEX_CAST]]: vector<128xindex>, None] |
| 570 | +// CHECK-GATHER: vector.transfer_write %[[GATHER]], %{{.*}} |
| 571 | + |
| 572 | +// ----- |
| 573 | + |
| 574 | +!storage = tensor<8192x8xf16> |
| 575 | +!x = tensor<128x8xf16> |
| 576 | + |
| 577 | +#gather = { |
| 578 | + indexing_maps = [affine_map<(page, vec) -> (page, vec)>], |
| 579 | + iterator_types = ["parallel", "parallel"] |
| 580 | +} |
| 581 | + |
| 582 | +func.func @contiguous_gather_read(%storage : !storage) -> !x { |
| 583 | + %x = tensor.empty() : !x |
| 584 | + %x_g = linalg.generic #gather |
| 585 | + outs(%x : !x) { |
| 586 | + ^bb0(%out: f16): |
| 587 | + %pageidx = linalg.index 0 : index |
| 588 | + %vec = linalg.index 1 : index |
| 589 | + %extracted = tensor.extract %storage[%pageidx, %vec] : !storage |
| 590 | + linalg.yield %extracted : f16 |
| 591 | + } -> !x |
| 592 | + return %x_g : !x |
| 593 | +} |
| 594 | + |
| 595 | +// CHECK-GATHER-LABEL: @contiguous_gather_read |
| 596 | +// CHECK-GATHER-SAME: %[[ARG0:.+]]: tensor<8192x8xf16> |
| 597 | +// CHECK-GATHER: %[[GATHER:.+]] = iree_vector_ext.transfer_gather %[[ARG0]] |
| 598 | +// CHECK-GATHER-SAME: [None, None] |
| 599 | +// CHECK-GATHER: vector.transfer_write %[[GATHER]], %{{.*}} |
| 600 | + |
| 601 | +// ----- |
| 602 | + |
| 603 | +!storage = tensor<8192x8xf16> |
| 604 | +!ind = tensor<128xi64> |
| 605 | +!x = tensor<128x8xf16> |
| 606 | + |
| 607 | +#gather = { |
| 608 | + indexing_maps = [affine_map<(page, vec) -> (page)>, |
| 609 | + affine_map<(page, vec) -> (page, vec)>], |
| 610 | + iterator_types = ["parallel", "parallel"] |
| 611 | +} |
| 612 | + |
| 613 | +func.func @negative_strided_paged_gather_read(%storage : !storage, %ind: !ind) -> !x { |
| 614 | + %x = tensor.empty() : !x |
| 615 | + %c2 = arith.constant 2 : index |
| 616 | + %x_g = linalg.generic #gather |
| 617 | + ins(%ind : !ind) |
| 618 | + outs(%x : !x) { |
| 619 | + ^bb0(%page: i64, %out: f16): |
| 620 | + %pageidx = arith.index_cast %page : i64 to index |
| 621 | + %vec = linalg.index 1 : index |
| 622 | + %strided_vec = arith.muli %vec, %c2 : index |
| 623 | + %extracted = tensor.extract %storage[%pageidx, %strided_vec] : !storage |
| 624 | + linalg.yield %extracted : f16 |
| 625 | + } -> !x |
| 626 | + return %x_g : !x |
| 627 | +} |
| 628 | + |
| 629 | +// For now, the vectorizer does not walk back on binary ops to find a mapping |
| 630 | +// from the iteration space to the memory space. This can be improved in future. |
| 631 | +// CHECK-GATHER-LABEL: @negative_strided_paged_gather_read |
| 632 | +// CHECK-GATHER: linalg.generic |
| 633 | + |
| 634 | +// ----- |
| 635 | + |
| 636 | +!storage = tensor<8192x8xf16> |
| 637 | +!ind0 = tensor<128xi64> |
| 638 | +!ind1 = tensor<8xi64> |
| 639 | +!x = tensor<128x8xf16> |
| 640 | + |
| 641 | +#gather = { |
| 642 | + indexing_maps = [affine_map<(d0, d1) -> (d0)>, |
| 643 | + affine_map<(d0, d1) -> (d1)>, |
| 644 | + affine_map<(d0, d1) -> (d0, d1)>], |
| 645 | + iterator_types = ["parallel", "parallel"] |
| 646 | +} |
| 647 | + |
| 648 | +func.func @full_gather_read(%storage : !storage, %ind0: !ind0, %ind1 : !ind1) -> !x { |
| 649 | + %x = tensor.empty() : !x |
| 650 | + %x_g = linalg.generic #gather |
| 651 | + ins(%ind0, %ind1 : !ind0, !ind1) |
| 652 | + outs(%x : !x) { |
| 653 | + ^bb0(%id0: i64, %id1 : i64, %out: f16): |
| 654 | + %idx0 = arith.index_cast %id0 : i64 to index |
| 655 | + %idx1 = arith.index_cast %id1 : i64 to index |
| 656 | + %extracted = tensor.extract %storage[%idx0, %idx1] : !storage |
| 657 | + linalg.yield %extracted : f16 |
| 658 | + } -> !x |
| 659 | + return %x_g : !x |
| 660 | +} |
| 661 | + |
| 662 | +// CHECK-GATHER-LABEL: @full_gather_read |
| 663 | +// CHECK-GATHER-SAME: %[[ARG0:.+]]: tensor<8192x8xf16>, %[[ARG1:.+]]: tensor<128xi64>, %[[ARG2:.+]]: tensor<8xi64> |
| 664 | +// CHECK-GATHER-DAG: %[[IDX0:.+]] = vector.transfer_read %[[ARG1]] |
| 665 | +// CHECK-GATHER-DAG: %[[IDX1:.+]] = vector.transfer_read %[[ARG2]] |
| 666 | +// CHECK-GATHER-DAG: %[[CAST0:.+]] = arith.index_cast %[[IDX0]] : vector<128xi64> to vector<128xindex> |
| 667 | +// CHECK-GATHER-DAG: %[[CAST1:.+]] = arith.index_cast %[[IDX1]] : vector<8xi64> to vector<8xindex> |
| 668 | +// CHECK-GATHER-DAG: %[[GATHER:.+]] = iree_vector_ext.transfer_gather %[[ARG0]] |
| 669 | +// CHECK-GATHER-SAME: [%[[CAST0]]: vector<128xindex>, %[[CAST1]]: vector<8xindex>] |
| 670 | +// CHECK-GATHER: vector.transfer_write %[[GATHER]], %{{.*}} |
| 671 | + |
| 672 | +// ----- |
| 673 | + |
| 674 | +!storage = tensor<8192x8xf16> |
| 675 | +!ind0 = tensor<128xi64> |
| 676 | +!ind1 = tensor<8xi64> |
| 677 | +!x = tensor<128x8xf16> |
| 678 | + |
| 679 | +#gather = { |
| 680 | + indexing_maps = [affine_map<(d0, d1) -> (d0)>, |
| 681 | + affine_map<(d0, d1) -> (d1)>, |
| 682 | + affine_map<(d0, d1) -> (d0, d1)>, |
| 683 | + affine_map<(d0, d1) -> (d0, d1)>], |
| 684 | + iterator_types = ["parallel", "parallel"] |
| 685 | +} |
| 686 | + |
| 687 | +func.func @multi_extract(%storage : !storage, %storage2: !storage, %ind0: !ind0, %ind1 : !ind1) -> ( !x, !x ) { |
| 688 | + %x = tensor.empty() : !x |
| 689 | + %x_g, %x_g1 = linalg.generic #gather |
| 690 | + ins(%ind0, %ind1 : !ind0, !ind1) |
| 691 | + outs(%x, %x : !x, !x) { |
| 692 | + ^bb0(%id0: i64, %id1 : i64, %out: f16, %out2: f16): |
| 693 | + %idx0 = arith.index_cast %id0 : i64 to index |
| 694 | + %idx1 = arith.index_cast %id1 : i64 to index |
| 695 | + %extracted = tensor.extract %storage[%idx0, %idx1] : !storage |
| 696 | + %idx2 = arith.index_cast %id0 : i64 to index |
| 697 | + %idx3 = arith.index_cast %id1 : i64 to index |
| 698 | + %extracted1 = tensor.extract %storage2[%idx2, %idx3] : !storage |
| 699 | + linalg.yield %extracted, %extracted1 : f16, f16 |
| 700 | + } -> (!x, !x) |
| 701 | + return %x_g, %x_g1 : !x, !x |
| 702 | +} |
| 703 | + |
| 704 | +// CHECK-GATHER-LABEL: @multi_extract |
| 705 | +// CHECK-GATHER-COUNT-2: transfer_gather |
0 commit comments