|
37 | 37 | 'sequence_last_step',
|
38 | 38 | 'dropout',
|
39 | 39 | 'split',
|
| 40 | + 'matmul', |
40 | 41 | ]
|
41 | 42 |
|
42 | 43 |
|
@@ -1586,83 +1587,71 @@ def split(input, num_or_sections, dim=-1):
|
1586 | 1587 | return outs
|
1587 | 1588 |
|
1588 | 1589 |
|
1589 |
| -def matmul(x, y): |
| 1590 | +def matmul(x, y, transpose_x=False, transpose_y=False, name=None): |
1590 | 1591 | """
|
1591 |
| - Applies matrix multipication to two tensors. |
| 1592 | + Applies matrix multipication to two tensors. Currently only rank 1 to rank |
| 1593 | + 3 input tensors are supported. |
1592 | 1594 |
|
1593 |
| - This operator is used to perform (batched) matrix multiplication |
1594 |
| - over the last two dimensions of the input tensors `X` and `Y`. |
| 1595 | + The actual behavior depends on the shapes of :math:`x`, :math:`y` and the |
| 1596 | + flag values of :attr:`transpose_x`, :attr:`transpose_y`. Specifically: |
1595 | 1597 |
|
1596 |
| - If a transpose flag is specified, the last two dimensions of the |
1597 |
| - tensor are transposed. If the tensor is rank-1 of shape [D], then |
1598 |
| - for `X` it is treated as [1, D] in nontransposed form and as [D, 1] |
1599 |
| - in transposed form, whereas for `Y` it is the opposite: It is treated |
1600 |
| - as [D, 1] in nontransposed form and as [1, D] in transposed form. |
| 1598 | + - If a transpose flag is specified, the last two dimensions of the tensor |
| 1599 | + are transposed. If the tensor is rank-1 of shape :math:`[D]`, then for |
| 1600 | + :math:`x` it is treated as :math:`[1, D]` in nontransposed form and as |
| 1601 | + :math:`[D, 1]` in transposed form, whereas for :math:`y` it is the |
| 1602 | + opposite: It is treated as :math:`[D, 1]` in nontransposed form and as |
| 1603 | + :math:`[1, D]` in transposed form. |
1601 | 1604 |
|
1602 |
| - Examples without transpose: |
1603 |
| - - X: [K], Y: [K] => Out: [1] |
1604 |
| - - X: [K], Y: [K, N] => Out: [N] |
1605 |
| - - X: [B, M, K], Y: [K] => Out: [B, M] |
1606 |
| - - X: [M, K], Y: [B, K, N] => Out: [B, M, N] |
1607 |
| - - X: [B, M, K], Y: [B, K, N] => Out: [B, M, N] |
| 1605 | + - After transpose, the two tensors are 2-D or 3-D and matrix multipication |
| 1606 | + performs in the following way. |
1608 | 1607 |
|
1609 |
| - The behavior is designed to be similar to the `numpy.matmul` function. |
1610 |
| - The differences are: |
1611 |
| - - Currently only rank 1 to rank 3 input tensors are supported. |
1612 |
| - - We add `transpose_X` and `transpose_Y` flags. |
| 1608 | + - If both are 2-D, they are multiplied like conventional matrices. |
| 1609 | + - If either is 3-D, it is treated as a stack of matrices residing in the |
| 1610 | + last two dimensions and a batched matrix multiply supporting broadcast |
| 1611 | + applies on the two tensors. |
1613 | 1612 |
|
1614 |
| - Both the input `X` and `Y` can carry the LoD (Level of Details) information, |
1615 |
| - or not. But the output only shares the LoD information with input `X`. |
| 1613 | + Also note that if the raw tensor :math:`x` or :math:`y` is rank-1 and |
| 1614 | + nontransposed, the prepended or appended dimension :math:`1` will be |
| 1615 | + removed after matrix multipication. |
1616 | 1616 |
|
1617 | 1617 | Args:
|
1618 | 1618 | x (Variable): The input variable which is a Tensor or LoDTensor.
|
1619 |
| - y (Variable): If :attr:`num_or_sections` is an integer, |
1620 |
| - then the integer indicates the number of equal sized sub-tensors |
1621 |
| - that the tensor will be divided into. If :attr:`num_or_sections` |
1622 |
| - is a list of integers, the length of list indicates the number of |
1623 |
| - sub-tensors and the integers indicate the sizes of sub-tensors' |
1624 |
| - :attr:`dim` dimension orderly. |
1625 |
| - dim (int): The dimension along which to split. If :math:`dim < 0`, the |
1626 |
| - dimension to split along is :math:`rank(input) + dim`. |
| 1619 | + y (Variable): The input variable which is a Tensor or LoDTensor. |
| 1620 | + transpose_x (bool): Whether to transpose :math:`x` before multiplication. |
| 1621 | + transpose_y (bool): Whether to transpose :math:`y` before multiplication. |
| 1622 | + name(str|None): A name for this layer(optional). If set None, the layer |
| 1623 | + will be named automatically. |
1627 | 1624 |
|
1628 | 1625 | Returns:
|
1629 |
| - List: The list of segmented tensor variables. |
| 1626 | + Variable: The product Tensor variable. |
1630 | 1627 |
|
1631 | 1628 | Examples:
|
1632 | 1629 | .. code-block:: python
|
1633 | 1630 |
|
1634 |
| - # x is a Tensor variable with shape [3, 9, 5]: |
1635 |
| - x0, x1, x2 = fluid.layers.split(x, num_or_sections=3, dim=1) |
1636 |
| - x0.shape # [3, 3, 5] |
1637 |
| - x1.shape # [3, 3, 5] |
1638 |
| - x2.shape # [3, 3, 5] |
1639 |
| - x0, x1, x2 = fluid.layers.split(x, num_or_sections=[2, 3, 4], dim=1) |
1640 |
| - x0.shape # [3, 2, 5] |
1641 |
| - x1.shape # [3, 3, 5] |
1642 |
| - x2.shape # [3, 4, 5] |
| 1631 | + # Examples to clarify shapes of the inputs and output |
| 1632 | + # x: [B, M, K], y: [B, K, N] |
| 1633 | + fluid.layers.matmul(x, y) # out: [B, M, N] |
| 1634 | + # x: [B, M, K], y: [K, N] |
| 1635 | + fluid.layers.matmul(x, y) # out: [B, M, N] |
| 1636 | + # x: [B, M, K], y: [K] |
| 1637 | + fluid.layers.matmul(x, y) # out: [B, M] |
| 1638 | + # x: [M, K], y: [K, N] |
| 1639 | + fluid.layers.matmul(x, y) # out: [M, N] |
| 1640 | + # x: [K], y: [K] |
| 1641 | + fluid.layers.matmul(x, y) # out: [1] |
| 1642 | + # x: [M], y: [N] |
| 1643 | + fluid.layers.matmul(x, y, True, True) # out: [M, N] |
1643 | 1644 | """
|
1644 |
| - helper = LayerHelper('split', **locals()) |
1645 |
| - input_shape = input.shape |
1646 |
| - dim = (len(input_shape) + dim) if dim < 0 else dim |
1647 |
| - if isinstance(num_or_sections, int): |
1648 |
| - assert num_or_sections > 1, 'num_or_sections must be more than 1.' |
1649 |
| - num = num_or_sections |
1650 |
| - else: |
1651 |
| - assert len(num_or_sections) < input_shape[ |
1652 |
| - dim], 'len(num_or_sections) must not be more than input.shape[dim].' |
1653 |
| - num = len(num_or_sections) |
1654 |
| - outs = [ |
1655 |
| - helper.create_tmp_variable(dtype=helper.input_dtype()) |
1656 |
| - for i in range(num) |
1657 |
| - ] |
| 1645 | + helper = LayerHelper('matmul', **locals()) |
| 1646 | + assert max( |
| 1647 | + len(x.shape), len(y.shape) |
| 1648 | + ) <= 3, 'Currently only rank 1 to rank 3 input tensors are supported.' |
| 1649 | + out = helper.create_tmp_variable(dtype=helper.input_dtype()) |
1658 | 1650 | helper.append_op(
|
1659 |
| - type='split', |
1660 |
| - inputs={'X': input}, |
1661 |
| - outputs={'Out': outs}, |
1662 |
| - attrs={ |
1663 |
| - 'num': num_or_sections if isinstance(num_or_sections, int) else 0, |
1664 |
| - 'sections': num_or_sections |
1665 |
| - if isinstance(num_or_sections, list) else [], |
1666 |
| - 'axis': dim |
1667 |
| - }) |
1668 |
| - return outs |
| 1651 | + type='matmul', |
| 1652 | + inputs={'X': x, |
| 1653 | + 'Y': y}, |
| 1654 | + outputs={'Out': out}, |
| 1655 | + attrs={'transpose_X': transpose_x, |
| 1656 | + 'transpose_Y': transpose_y}) |
| 1657 | + return out |
0 commit comments