remove Parameter layer of MatMul; support case: (A,B, ...,K)*(K,N) -> (A,B,..,N)

minglu2019 · minglu2019 · commit da3611ba28ec · 2021-06-17T05:41:50.000+02:00
diff --git a/include/caffe/layers/matmul_layer.hpp b/include/caffe/layers/matmul_layer.hpp
@@ -10,7 +10,7 @@
 
 namespace caffe {
 /*
- * @brief Resize images to size using nearest neighbor interpolation. ////
+ * @brief MatMul. ////
  * Note: implementation of tf.linalg.matmul()
  * https://www.tensorflow.org/versions/r1.14/api_docs/python/tf/linalg/matmul
  */
@@ -24,7 +24,7 @@ template <typename Dtype> class MatMulLayer : public Layer<Dtype> {
                        const vector<Blob<Dtype> *> &top);
 
   virtual inline const char *type() const { return "MatMul"; }
-  virtual inline int ExactNumBottomBlobs() const { return 2; }
+  virtual inline int MinNumBottomBlobs() const { return 1; }
   virtual inline int ExactNumTopBlobs() const { return 1; }
 
 protected:
@@ -48,6 +48,7 @@ template <typename Dtype> class MatMulLayer : public Layer<Dtype> {
   int K;
   bool transpose_a;
   bool transpose_b;
+  vector<int> blob_shape_;
 };
 
 } // namespace caffe
diff --git a/src/caffe/layers/matmul_layer.cpp b/src/caffe/layers/matmul_layer.cpp
@@ -13,51 +13,75 @@ void MatMulLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype> *> &bottom,
   const MatMulParameter &matmul_param = this->layer_param_.matmul_param();
   transpose_a = matmul_param.transpose_a();
   transpose_b = matmul_param.transpose_b();
+  blob_shape_.clear();
+  std::copy(matmul_param.blob_shape().begin(), matmul_param.blob_shape().end(),
+            std::back_inserter(blob_shape_));
 
-  CHECK_EQ(bottom[0]->num_axes(), bottom[1]->num_axes())
-      << "input a and input b should have same dimension!!";
+  if (bottom.size() == 1 && this->blobs_.size() != 1 &&
+      blob_shape_.size() != 0) {
+    this->blobs_.resize(1);
+    this->blobs_[0].reset(new Blob<Dtype>(blob_shape_));
+    // initialize blobs_ value with  0.
+    caffe_set(this->blobs_[0]->count(), Dtype(0),
+              this->blobs_[0]->mutable_cpu_data());
+  }
+  Blob<Dtype> *inputs1 =
+      (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get();
   num_axes = bottom[0]->num_axes();
-  M = transpose_a ? bottom[0]->shape(num_axes - 1)
-                  : bottom[0]->shape(num_axes - 2);
-  N = transpose_b ? bottom[1]->shape(num_axes - 2)
-                  : bottom[1]->shape(num_axes - 1);
-  K = transpose_a ? bottom[0]->shape(num_axes - 2)
-                  : bottom[0]->shape(num_axes - 1);
-  if (transpose_b) {
-    CHECK_EQ(K, bottom[1]->shape(num_axes - 1))
-        << "input a and input b have incompatible shapes! ";
+
+  CHECK_GE(bottom[0]->num_axes(), inputs1->num_axes())
+      << "input a and input b should have same dimension or dim(a) > dim(b)!!";
+
+  if (bottom[0]->num_axes() == inputs1->num_axes()) {
+    M = transpose_a ? bottom[0]->shape(num_axes - 1)
+                    : bottom[0]->shape(num_axes - 2);
+    N = transpose_b ? inputs1->shape(num_axes - 2)
+                    : inputs1->shape(num_axes - 1);
+    K = transpose_a ? bottom[0]->shape(num_axes - 2)
+                    : bottom[0]->shape(num_axes - 1);
+    if (transpose_b) {
+      CHECK_EQ(K, inputs1->shape(num_axes - 1))
+          << "input a and input b have incompatible shapes! ";
+    } else {
+      CHECK_EQ(K, inputs1->shape(num_axes - 2))
+          << "input a and input b have incompatible shapes! ";
+    }
+    for (int i = 0; i < num_axes - 2; i++) {
+      CHECK_EQ(bottom[0]->shape(i), inputs1->shape(i))
+          << "inputs should have same shape except in last two dimensions, but "
+             "in dimension "
+          << i << ", the two inputs have different shape!";
+    }
   } else {
-    CHECK_EQ(K, bottom[1]->shape(num_axes - 2))
+    int axes1 = bottom[0]->num_axes();
+    int axes2 = inputs1->num_axes();
+    K = bottom[0]->shape(axes1 - 1);
+    M = bottom[0]->count() / K;
+    N = inputs1->shape(axes2 - 1);
+    CHECK_GE(axes2, 2) << "If dim(a) > dim(b), dim(b) should be 2!!";
+    CHECK_EQ(K, inputs1->shape(axes2 - 2))
         << "input a and input b have incompatible shapes! ";
   }
-  for (int i = 0; i < num_axes - 2; i++) {
-    CHECK_EQ(bottom[0]->shape(i), bottom[1]->shape(i))
-        << "inputs should have same shape except in last two dimensions, but "
-           "in dimension "
-        << i << ", the two inputs have different shape!";
-  }
 }
 
 template <typename Dtype>
 void MatMulLayer<Dtype>::Reshape(const vector<Blob<Dtype> *> &bottom,
                                  const vector<Blob<Dtype> *> &top) {
-
   vector<int> top_shape = bottom[0]->shape();
-  top_shape[num_axes - 2] = M;
   top_shape[num_axes - 1] = N;
   top[0]->Reshape(top_shape);
 }
 
 template <typename Dtype>
 void MatMulLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype> *> &bottom,
                                      const vector<Blob<Dtype> *> &top) {
-
+  Blob<Dtype> *inputs1 =
+      (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get();
   const Dtype *bottom_data0 = bottom[0]->cpu_data();
-  const Dtype *bottom_data1 = bottom[1]->cpu_data();
+  const Dtype *bottom_data1 = inputs1->cpu_data();
   Dtype *top_data = top[0]->mutable_cpu_data();
 
-  const int batch_size = bottom[0]->count(0, num_axes - 2);
-
+  const int batch_size = bottom[0]->count() / (M * K);
   for (int i = 0; i < batch_size; ++i) {
     int b_idx0 = i * M * K;
     int b_idx1 = i * K * N;
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
@@ -3515,6 +3515,7 @@ message LpNormalizationParameter {
 message MatMulParameter {
   optional bool transpose_a = 1[default = false];
   optional bool transpose_b = 2[default = false];
+  repeated uint32 blob_shape = 3;
 }
 
 message GatherV2Parameter {

Original file line number	Diff line number	Diff line change
`@@ -3515,6 +3515,7 @@ message LpNormalizationParameter {`
`3515`	`3515`	`message MatMulParameter {`
`3516`	`3516`	`optional bool transpose_a = 1[default = false];`
`3517`	`3517`	`optional bool transpose_b = 2[default = false];`
	`3518`	`+ repeated uint32 blob_shape = 3;`
`3518`	`3519`	`}`
`3519`	`3520`
`3520`	`3521`	`message GatherV2Parameter {`