PaddlePaddle
diff --git a/‎paddle/fluid/operators/math/blas.cc
Lines changed: 13 additions & 21 deletions b/‎paddle/fluid/operators/math/blas.cc
Lines changed: 13 additions & 21 deletions
diff --git a/‎paddle/fluid/operators/math/blas.h
Lines changed: 35 additions & 2 deletions b/‎paddle/fluid/operators/math/blas.h
Lines changed: 35 additions & 2 deletions
@@ -18,34 +18,26 @@
 namespace paddle {
 namespace operators {
 namespace math {
-MatDescriptor GetMatDim(const framework::DDim& dim, int num_flatten_cols,
-                        bool trans) {
+MatDescriptor CreateMatrixDescriptor(const framework::DDim &tensor_dim,
+                                     int num_flatten_cols, bool trans) {
+  PADDLE_ENFORCE_GT(tensor_dim.size(), 1);
   MatDescriptor retv;
   if (num_flatten_cols > 1) {
-    auto flatten_dim = framework::flatten_to_2d(dim, num_flatten_cols);
+    auto flatten_dim = framework::flatten_to_2d(tensor_dim, num_flatten_cols);
     retv.height_ = flatten_dim[0];
     retv.width_ = flatten_dim[1];
   } else {
-    if (dim.size() == 1) {
-      retv.height_ = 1;
-      retv.width_ = dim[0];
-    } else if (dim.size() == 2) {
-      retv.height_ = dim[0];
-      retv.width_ = dim[1];
+    if (tensor_dim.size() == 2) {
+      retv.height_ = tensor_dim[0];
+      retv.width_ = tensor_dim[1];
     } else {
-      if (dim.size() == 3) {
-        retv.batch_size_ = dim[0];
-        retv.height_ = dim[1];
-        retv.width_ = dim[2];
-      } else {
-        auto dim_vec = framework::vectorize(dim);
-        retv.batch_size_ = 1;
-        for (size_t i = 0; i < dim_vec.size() - 2; ++i) {
-          retv.batch_size_ *= dim_vec[i];
-          retv.height_ = dim_vec[dim_vec.size() - 2];
-          retv.width_ = dim_vec[dim_vec.size() - 1];
-        }
+      auto dim_vec = framework::vectorize(tensor_dim);
+      retv.batch_size_ = 1;
+      for (size_t i = 0; i < dim_vec.size() - 2; ++i) {
+        retv.batch_size_ *= dim_vec[i];
       }
+      retv.height_ = dim_vec[dim_vec.size() - 2];
+      retv.width_ = dim_vec[dim_vec.size() - 1];
       retv.stride_ = retv.height_ * retv.width_;
     }
   }
 
@@ -46,6 +46,25 @@ namespace paddle {
 namespace operators {
 namespace math {
 
+/**
+ * Matrix Descriptor of a memory buffer.
+ *
+ * It is used for Blas::MatMul. MatMul operator can be batched.
+ * if Mat A is [BatchSize, H, W], Mat B is [BatchSize, H, W]. It will be a
+ * `batch_size` times of GEMM. The batched GEMM could be faster base on the
+ * implementation of the blas library. The batch size could be zero. If any
+ * matrix of `matmul` has a batch size, the will be a batched GEMM, too. e.g.,
+ * Mat A is [BatchSize, H1, W2], and Mat B [H2, W2], The result matrix wil be
+ * [BatchSize, H1, W2]
+ *
+ * The boolean flag, `trans`, describe the memory is the transpose of matrix or
+ * not. If the trans is true, the last two dims of matrix are transposed. The
+ * memory layout of the matrix is [Width, Height] or [BatchSize, Width, Height].
+ *
+ * The MatDescriptor is not only the dimension or shape of a matrix, it also
+ * contains the layout, stride of matrix. It is clearer to have a structure than
+ * reuse `DDim`.
+ */
 struct MatDescriptor {
   int64_t height_;
   int64_t width_;
@@ -54,8 +73,22 @@ struct MatDescriptor {
   bool trans_;
 };
 
-extern MatDescriptor GetMatDim(const framework::DDim& tensor,
-                               int num_flatten_cols, bool trans);
+/**
+ * Create Matrix Descriptor from a tensor dim, num_flatten_cols, and transpose
+ * flag
+ *
+ * @param tensor_dim: The dimension of the tensor. The rank of this dimension
+ * must larger than 1.
+ *
+ * @param num_flatten_cols:  Reshape a tensor to a matrix. The matrix's first
+ * dimension(column length) will be the product of tensor's first `num_col_dims`
+ * dimensions. If num_flatten_cols is zero, the first N-2 dimension will be the
+ * batch_size of descriptor.
+ *
+ * @param trans: True if the matrix is transposed.
+ */
+extern MatDescriptor CreateMatrixDescriptor(const framework::DDim& tensor_dim,
+                                            int num_flatten_cols, bool trans);
 
 template <typename DeviceContext>
 class Blas {