@@ -37,10 +37,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
37
37
PADDLE_ENFORCE (ctx->HasOutput (" Out" ),
38
38
" Output(Out) of SoftmaxOp should not be null." );
39
39
40
- auto x_dims = ctx->GetInputDim (" X" );
41
- PADDLE_ENFORCE (x_dims.size () == 2UL ,
42
- " The input of softmax op must be a matrix." );
43
- ctx->SetOutputDim (" Out" , x_dims);
40
+ ctx->SetOutputDim (" Out" , ctx->GetInputDim (" X" ));
44
41
ctx->ShareLoD (" X" , /* ->*/ " Out" );
45
42
}
46
43
@@ -81,8 +78,8 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
81
78
public:
82
79
void Make () override {
83
80
AddInput (" X" ,
84
- " The input tensor of softmax. "
85
- " 2-D with shape [batch_size, input_feature_dimensions] ." );
81
+ " The input tensor of softmax, "
82
+ " whose last dimension is the input_feature_dimensions." );
86
83
AddOutput (" Out" , " The normalized values with the same shape as X." )
87
84
.Reuse (" X" );
88
85
AddAttr<bool >(
@@ -105,20 +102,23 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
105
102
AddComment (R"DOC(
106
103
Softmax Operator.
107
104
108
- The input of the softmax operator is a 2-D tensor with shape N x K (N is the
109
- batch_size, K is the dimension of input feature). The output tensor has the
110
- same shape as the input tensor.
105
+ The input of the softmax operator is a tensor of any rank. The output tensor
106
+ has the same shape as the input.
111
107
112
- For each row of the input tensor, the softmax operator squashes the
113
- K-dimensional vector of arbitrary real values to a K-dimensional vector of real
114
- values in the range [0, 1] that add up to 1.
108
+ The input tensor will first be logically flattened to a 2-D matrix. The matrix's
109
+ second dimension(row length) is as same as the last dimension of the input
110
+ tensor, and the first dimension(column length) is the product of all other
111
+ dimensions of the input tensor. For each row of the matrix, the softmax operator
112
+ squashes the K-dimensional(K is the width of the matrix, which is also the size
113
+ of the input tensor's last dimension) vector of arbitrary real values to a
114
+ K-dimensional vector of real values in the range [0, 1] that add up to 1.
115
115
It computes the exponential of the given dimension and the sum of exponential
116
116
values of all the other dimensions in the K-dimensional vector input.
117
117
Then the ratio of the exponential of the given dimension and the sum of
118
118
exponential values of all the other dimensions is the output of the softmax
119
119
operator.
120
120
121
- For each row $i$ and each column $j$ in Input(X) , we have:
121
+ For each row $i$ and each column $j$ in the matrix , we have:
122
122
$$Out[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$
123
123
124
124
)DOC" );
0 commit comments