kaldi-asr · danpovey · Mar 13, 2020 · Mar 4, 2020 · Mar 5, 2020 · Mar 5, 2020
diff --git a/src/nnet3/nnet-convolutional-component.h b/src/nnet3/nnet-convolutional-component.h
@@ -553,6 +553,8 @@ class TdnnComponent: public UpdatableComponent {
 
   CuMatrixBase<BaseFloat> &LinearParams() { return linear_params_; }
 
+  const CuMatrix<BaseFloat> &Linearparams() const { return linear_params_; }
+
   // This allows you to resize the vector in order to add a bias where
   // there previously was none-- obviously this should be done carefully.
   CuVector<BaseFloat> &BiasParams() { return bias_params_; }

diff --git a/src/nnet3/nnet-normalize-component.h b/src/nnet3/nnet-normalize-component.h
@@ -224,9 +224,10 @@ class BatchNormComponent: public Component {
   const CuVector<BaseFloat> &Offset() const { return offset_; }
   const CuVector<BaseFloat> &Scale() const { return scale_; }
 
-  virtual const CuVector<double> &StatsSum() const { return stats_sum_; }
-  virtual const CuVector<double> &StatsSumsq() const { return stats_sumsq_; }
-  virtual const double &Count() const { return count_; }
+  CuVector<double> &StatsSum() { return stats_sum_; }
+  CuVector<double> &StatsSumsq() { return stats_sumsq_; }
+  double Count() { return count_; }
+  BaseFloat Eps() { return epsilon_; }
 
  private:
 

diff --git a/src/nnet3/nnet-simple-component.h b/src/nnet3/nnet-simple-component.h
@@ -971,6 +971,7 @@ class LinearComponent: public UpdatableComponent {
   BaseFloat OrthonormalConstraint() const { return orthonormal_constraint_; }
   CuMatrixBase<BaseFloat> &Params() { return params_; }
   const CuMatrixBase<BaseFloat> &Params() const { return params_; }
+  const CuMatrix<BaseFloat> &Params2() const { return params_; }
  private:
 
   // disallow assignment operator.

diff --git a/src/pybind/cudamatrix/cu_matrix_pybind.cc b/src/pybind/cudamatrix/cu_matrix_pybind.cc
@@ -38,8 +38,6 @@ void pybind_cu_matrix(py::module& m) {
         .def("Set", &PyClass::Set, py::arg("value"))
         .def("Add", &PyClass::Add, py::arg("value"))
         .def("Scale", &PyClass::Scale, py::arg("value"))
-        .def("to_dlpack",
-             [](py::object obj) { return CuMatrixToDLPack(&obj); })
         .def("__getitem__",
              [](const PyClass& m, std::pair<ssize_t, ssize_t> i) {
                return m(i.first, i.second);
@@ -57,8 +55,8 @@ void pybind_cu_matrix(py::module& m) {
              py::arg("MatrixStrideType") = kDefaultStride)
         .def(py::init<const MatrixBase<float>&, MatrixTransposeType>(),
              py::arg("other"), py::arg("trans") = kNoTrans)
-        // .def("to_dlpack",
-        //      [](py::object obj) { return CuMatrixToDLPack(&obj); })
+        .def("to_dlpack",
+             [](py::object obj) { return CuMatrixToDLPack(&obj); })
              ;
   }
   {

diff --git a/src/pybind/dlpack/dlpack_pybind.cc b/src/pybind/dlpack/dlpack_pybind.cc
@@ -236,7 +236,7 @@ py::capsule CuVectorToDLPack(py::object* obj) {
 }
 
 py::capsule CuMatrixToDLPack(py::object* obj) {
-  auto* m = obj->cast<CuMatrixBase<float>*>();
+  auto* m = obj->cast<CuMatrix<float>*>();
 #if HAVE_CUDA == 1
   KALDI_ASSERT(CuDevice::Instantiate().Enabled());
 

diff --git a/src/pybind/kaldi/io_util.py b/src/pybind/kaldi/io_util.py
@@ -84,13 +84,13 @@ def read_transition_model(rxfilename):
     return trans_model
 
 
-def read_nnet3_model(filename):
-    '''Read nnet model from an filename.
+def read_nnet3_model(rxfilename):
+    '''Read nnet model from an rxfilename.
     '''
     ki = kaldi_pybind.Input()
-    is_opened, is_binary = ki.Open(filename, read_header=True)
+    is_opened, is_binary = ki.Open(rxfilename, read_header=True)
     if not is_opened:
-        raise FileNotOpenException('Failed to open {}'.format(filename))
+        raise FileNotOpenException('Failed to open {}'.format(rxfilename))
 
     nnet = kaldi_pybind.nnet3.Nnet()
     nnet.Read(ki.Stream(), is_binary)

diff --git a/src/pybind/nnet3/final.mdl b/src/pybind/nnet3/final.mdl
diff --git a/src/pybind/nnet3/nnet3_pybind.cc b/src/pybind/nnet3/nnet3_pybind.cc
@@ -2,6 +2,7 @@
 
 // Copyright 2019   Mobvoi AI Lab, Beijing, China
 //                  (author: Fangjun Kuang, Yaguang Hu, Jian Wang)
+// Copyright 2020   JD AI, Beijing, China (author: Lu Fan)
 
 // See ../../../COPYING for clarification regarding multiple authors
 //

diff --git a/src/pybind/nnet3/nnet_convolutional_component_pybind.cc b/src/pybind/nnet3/nnet_convolutional_component_pybind.cc
@@ -25,8 +25,9 @@ using namespace kaldi::nnet3;
 
 void pybind_nnet_convolutional_component(py::module& m) {
   using TC = kaldi::nnet3::TdnnComponent;
-  py::class_<TC>(m, "TdnnComponent")
-      .def("Type", &TC::Type)
-      .def("LinearParams", &TC::LinearParams, py::return_value_policy::reference)
-      .def("BiasParams", &TC::BiasParams, py::return_value_policy::reference);
+  py::class_<TC, Component>(m, "TdnnComponent")
+      .def("LinearParams", &TC::Linearparams,
+           py::return_value_policy::reference)
+      .def("BiasParams", &TC::BiasParams,
+           py::return_value_policy::reference);
 }
diff --git a/src/pybind/nnet3/nnet_nnet_pybind_test.py b/src/pybind/nnet3/nnet_nnet_pybind_test.py
@@ -3,38 +3,155 @@
 # Copyright 2020 JD AI, Beijing, China (author: Lu Fan)
 # Apache 2.0
 
+import kaldi
+import unittest
+from torch.utils.dlpack import to_dlpack
+from torch.utils.dlpack import from_dlpack
 import os
 import sys
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), os.pardir))
 
-import unittest
+try:
+    import torch
+except ImportError:
+    print('This test needs PyTorch.')
+    print('Please install PyTorch first.')
+    print('PyTorch 1.3.0dev20191006 has been tested and is known to work.')
+    sys.exit(0)
+
+
+"""
+input dim=40 name=input
+
+# please note that it is important to have input layer with the name=input
+# as the layer immediately preceding the fixed-affine-layer to enable
+# the use of short notation for the descriptor
+fixed-affine-layer name=lda input=Append(-1,0,1) affine-transform-file=$dir/configs/lda.mat
+
+# the first splicing is moved before the lda layer, so no splicing here
+relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=16
+tdnnf-layer name=tdnnf2 $tdnnf_opts dim=16 bottleneck-dim=2 time-stride=1
+linear-component name=prefinal-l dim=4 $linear_opts
+
+prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=16 small-dim=4
+output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=16 small-dim=4
+output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+"""
 
-import kaldi
-from kaldi import read_nnet3_model
-from torch.utils.dlpack import from_dlpack
-from torch.utils.dlpack import to_dlpack
 
 class TestNnetNnet(unittest.TestCase):
 
     def test_nnet_nnet(self):
-        kaldi.SelectGpuId('yes')
-        final_mdl = "/mnt/cfs1_alias1/asr/users/fanlu/task/kaldi_recipe/pybind/s10.1/exp/chain_cleaned_1c/tdnn1c_sp/final.mdl"
+        if torch.cuda.is_available() == False:
+            print('No GPU detected! Skip it')
+            return
+
+        if kaldi.CudaCompiled() == False:
+            print('Kaldi is not compiled with CUDA! Skip it')
+            return
+
+        device_id = 0
+
+        # Kaldi and PyTorch will use the same GPU
+        kaldi.SelectGpuDevice(device_id=device_id)
+        kaldi.CuDeviceAllowMultithreading()
+
+        final_mdl = 'src/pybind/nnet3/final.mdl'
         nnet = kaldi.read_nnet3_model(final_mdl)
         for i in range(nnet.NumComponents()):
             component = nnet.GetComponent(i)
             comp_type = component.Type()
-            if "Affine" in comp_type or "TdnnComponent" in comp_type:
-                linear_params = from_dlpack(component.LinearParams().to_dlpack())
+            if comp_type in ['RectifiedLinearComponent', 'GeneralDropoutComponent',
+                             'NoOpComponent']:
+                continue
+            comp_name = nnet.GetComponentName(i)
+            if comp_name == 'lda':
+                self.assertEqual(comp_type, 'FixedAffineComponent')
+                linear_params = from_dlpack(
+                    component.LinearParams().to_dlpack())
+                bias_params = from_dlpack(component.BiasParams().to_dlpack())
+                self.assertEqual(linear_params.shape, (120, 120))
+                self.assertEqual(bias_params.shape, (120,))
+            elif comp_name == 'tdnn1.affine':
+                self.assertEqual(comp_type, 'NaturalGradientAffineComponent')
+                linear_params = from_dlpack(
+                    component.LinearParams().to_dlpack())
+                bias_params = from_dlpack(component.BiasParams().to_dlpack())
+                self.assertEqual(linear_params.shape, (16, 120))
+                self.assertEqual(bias_params.shape, (16,))
+            elif comp_name == 'tdnn1.batchnorm':
+                self.assertEqual(comp_type, 'BatchNormComponent')
+                # TODO BN get mean and var
+            elif comp_name == 'tdnnf2.linear':
+                self.assertEqual(comp_type, 'TdnnComponent')
+                linear_params = from_dlpack(
+                    component.LinearParams().to_dlpack())
+                self.assertEqual(linear_params.shape, (2, 32))
+            elif comp_name == 'tdnnf2.affine':
+                self.assertEqual(comp_type, 'TdnnComponent')
+                linear_params = from_dlpack(
+                    component.LinearParams().to_dlpack())
+                bias_params = from_dlpack(component.BiasParams().to_dlpack())
+                self.assertEqual(linear_params.shape, (16, 4))
+                self.assertEqual(bias_params.shape, (16,))
+            elif comp_name == 'tdnnf2.batchnorm':
+                self.assertEqual(comp_type, 'BatchNormComponent')
+                # TODO BN get mean and var
+            elif comp_name == 'prefinal-l':
+                self.assertEqual(comp_type, 'LinearComponent')
+                params = from_dlpack(component.Params().to_dlpack())
+                self.assertEqual(params.shape, (4, 16))
+            elif comp_name == 'prefinal-chain.affine':
+                self.assertEqual(comp_type, 'NaturalGradientAffineComponent')
+                linear_params = from_dlpack(
+                    component.LinearParams().to_dlpack())
+                bias_params = from_dlpack(component.BiasParams().to_dlpack())
+                self.assertEqual(linear_params.shape, (16, 4))
+                self.assertEqual(bias_params.shape, (16,))
+            elif comp_name == 'prefinal-chain.batchnorm1':
+                self.assertEqual(comp_type, 'BatchNormComponent')
+                # TODO BN get mean and var
+            elif comp_name == 'prefinal-chain.linear':
+                self.assertEqual(comp_type, 'LinearComponent')
+                params = from_dlpack(component.Params().to_dlpack())
+                self.assertEqual(linear_params.shape, (16, 4))
+            elif comp_name == 'prefinal-chain.batchnorm2':
+                self.assertEqual(comp_type, 'BatchNormComponent')
+                # TODO BN get mean and var
+            elif comp_name == 'output.affine':
+                self.assertEqual(comp_type, 'NaturalGradientAffineComponent')
+                linear_params = from_dlpack(
+                    component.LinearParams().to_dlpack())
+                bias_params = from_dlpack(component.BiasParams().to_dlpack())
+                self.assertEqual(linear_params.shape, (3448, 4))
+                self.assertEqual(bias_params.shape, (3448,))
+            elif comp_name == 'prefinal-xent.affine':
+                self.assertEqual(comp_type, 'NaturalGradientAffineComponent')
+                linear_params = from_dlpack(
+                    component.LinearParams().to_dlpack())
+                bias_params = from_dlpack(component.BiasParams().to_dlpack())
+                self.assertEqual(linear_params.shape, (16, 4))
+                self.assertEqual(bias_params.shape, (16,))
+            elif comp_name == 'prefinal-xent.batchnorm1':
+                self.assertEqual(comp_type, 'BatchNormComponent')
+                # TODO BN get mean and var
+            elif comp_name == 'prefinal-xent.linear':
+                self.assertEqual(comp_type, 'LinearComponent')
+                params = from_dlpack(component.Params().to_dlpack())
+                self.assertEqual(linear_params.shape, (16, 4))
+            elif comp_name == 'prefinal-xent.batchnorm2':
+                self.assertEqual(comp_type, 'BatchNormComponent')
+                # TODO BN get mean and var
+            elif comp_name == 'output-xent.affine':
+                self.assertEqual(comp_type, 'NaturalGradientAffineComponent')
+                linear_params = from_dlpack(
+                    component.LinearParams().to_dlpack())
                 bias_params = from_dlpack(component.BiasParams().to_dlpack())
-                print(linear_params.shape)
-            elif "Batch" in comp_type:
-                # stats_sum = from_dlpack(component.StatsSum().to_dlpack())
-                # stats_sumsq = from_dlpack(component.StatsSumsq().to_dlpack())
-                # print(stats_sum.shape)
-                pass
-            elif "LinearComponent" == comp_type:
-                linear_params = from_dlpack(component.LinearParams().to_dlpack())
-                print(linear_params.shape)
+                self.assertEqual(linear_params.shape, (3448, 4))
+                self.assertEqual(bias_params.shape, (3448,))
+
 
 if __name__ == '__main__':
-    unittest.main()
+    unittest.main()
diff --git a/src/pybind/nnet3/nnet_normalize_component_pybind.cc b/src/pybind/nnet3/nnet_normalize_component_pybind.cc
@@ -25,11 +25,12 @@ using namespace kaldi::nnet3;
 
 void pybind_nnet_normalize_component(py::module& m) {
   using PyClass = kaldi::nnet3::BatchNormComponent;
-  py::class_<PyClass>(m, "BatchNormComponent")
-      .def("Type", &PyClass::Type)
-      .def("StatsSum", &PyClass::StatsSum)
-      .def("StatsSumsq", &PyClass::StatsSumsq)
+  py::class_<PyClass, Component>(m, "BatchNormComponent")
+      .def("StatsSum", &PyClass::StatsSum, py::return_value_policy::reference)
+      .def("StatsSumsq", &PyClass::StatsSumsq, py::return_value_policy::reference)
       .def("Count", &PyClass::Count)
-      .def("Offset", &PyClass::Offset)
-      .def("Scale", overload_cast_<>()(&PyClass::Scale, py::const_));
+      .def("Eps", &PyClass::Eps)
+      .def("Offset", &PyClass::Offset, py::return_value_policy::reference)
+      .def("Scale", overload_cast_<>()(&PyClass::Scale, py::const_),
+           py::return_value_policy::reference);
 }
diff --git a/src/pybind/nnet3/nnet_normalize_component_pybind.h b/src/pybind/nnet3/nnet_normalize_component_pybind.h
@@ -15,11 +15,11 @@
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef KALDI_PYBIND_NNET3_NNET_NORNALIZE_COMPONENT_PYBIND_H_
-#define KALDI_PYBIND_NNET3_NNET_NORNALIZE_COMPONENT_PYBIND_H_
+#ifndef KALDI_PYBIND_NNET3_NNET_NORMALIZE_COMPONENT_PYBIND_H_
+#define KALDI_PYBIND_NNET3_NNET_NORMALIZE_COMPONENT_PYBIND_H_
 
 #include "pybind/kaldi_pybind.h"
 
 void pybind_nnet_normalize_component(py::module& m);
 
-#endif  // KALDI_PYBIND_NNET3_NNET_NORNALIZE_COMPONENT_PYBIND_H_
+#endif  // KALDI_PYBIND_NNET3_NNET_NORMALIZE_COMPONENT_PYBIND_H_
diff --git a/src/pybind/nnet3/nnet_simple_component_pybind.cc b/src/pybind/nnet3/nnet_simple_component_pybind.cc
@@ -25,21 +25,22 @@ using namespace kaldi::nnet3;
 
 void pybind_nnet_simple_component(py::module& m) {
   using FAC = FixedAffineComponent;
-  py::class_<FAC>(m, "FixedAffineComponent")
-      .def("Type", &FAC::Type)
-      .def("LinearParams", &FAC::LinearParams)
-      .def("BiasParams", &FAC::BiasParams);
+  py::class_<FAC, Component>(m, "FixedAffineComponent")
+      .def("LinearParams", &FAC::LinearParams,
+           py::return_value_policy::reference)
+      .def("BiasParams", &FAC::BiasParams, py::return_value_policy::reference);
 
   using LC = LinearComponent;
-  py::class_<LC>(m, "LinearComponent")
-      .def("Type", &LC::Type)
-      .def("Params", overload_cast_<>()(&LC::Params, py::const_), py::return_value_policy::reference);
+  py::class_<LC, Component>(m, "LinearComponent")
+      .def("Params", overload_cast_<>()(&LC::Params2, py::const_),
+           py::return_value_policy::reference);
 
   using AC = AffineComponent;
-  py::class_<AC>(m, "AffineComponent")
-      .def("Type", &AC::Type)
-      .def("LinearParams", overload_cast_<>()(&AC::LinearParams, py::const_))
-      .def("BiasParams", overload_cast_<>()(&AC::BiasParams, py::const_));
+  py::class_<AC, Component>(m, "AffineComponent")
+      .def("LinearParams", overload_cast_<>()(&AC::LinearParams, py::const_),
+           py::return_value_policy::reference)
+      .def("BiasParams", overload_cast_<>()(&AC::BiasParams, py::const_),
+           py::return_value_policy::reference);
 
   using NGAC = NaturalGradientAffineComponent;
   py::class_<NGAC, AC>(m, "NaturalGradientAffineComponent");