kaldi-asr
diff --git a/‎.travis.yml‎
Lines changed: 3 additions & 0 deletions b/‎.travis.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/pybind/cudamatrix/cu_device_pybind.cc‎
Lines changed: 10 additions & 0 deletions b/‎src/pybind/cudamatrix/cu_device_pybind.cc‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎src/pybind/cudamatrix/cu_matrix_pybind.cc‎
Lines changed: 2 additions & 1 deletion b/‎src/pybind/cudamatrix/cu_matrix_pybind.cc‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/pybind/cudamatrix/cu_vector_pybind.cc‎
Lines changed: 2 additions & 1 deletion b/‎src/pybind/cudamatrix/cu_vector_pybind.cc‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/pybind/dlpack/Makefile‎
Lines changed: 1 addition & 0 deletions b/‎src/pybind/dlpack/Makefile‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/pybind/dlpack/dlpack_pybind.cc‎
Lines changed: 71 additions & 85 deletions b/‎src/pybind/dlpack/dlpack_pybind.cc‎
Lines changed: 71 additions & 85 deletions
diff --git a/‎src/pybind/dlpack/dlpack_pybind.h‎
Lines changed: 5 additions & 7 deletions b/‎src/pybind/dlpack/dlpack_pybind.h‎
Lines changed: 5 additions & 7 deletions
@@ -22,6 +22,9 @@ addons:
       - liblapack-dev
       - clang-3.8
       - sox
+      - python3
+      - python3-dev
+      - python3-pip
 
 branches:
   only:
 
@@ -43,4 +43,14 @@ void pybind_cu_device(py::module& m) {
     KALDI_LOG << "Kaldi is NOT compiled with GPU! Ignore it.";
 #endif
   });
+
+  m.def("CudaCompiled",
+        []() -> bool {
+#if HAVE_CUDA == 1
+          return true;
+#else
+          return false;
+#endif
+        },
+        "true if kaldi is compiled with GPU support; false otherwise");
 }
@@ -55,7 +55,8 @@ void pybind_cu_matrix(py::module& m) {
              py::arg("MatrixStrideType") = kDefaultStride)
         .def(py::init<const MatrixBase<float>&, MatrixTransposeType>(),
              py::arg("other"), py::arg("trans") = kNoTrans)
-        .def("to_dlpack", [](py::object obj) { return CuMatrixToDLPack(obj); });
+        .def("to_dlpack",
+             [](py::object obj) { return CuMatrixToDLPack(&obj); });
   }
   {
     using PyClass = CuSubMatrix<float>;
 
@@ -44,7 +44,8 @@ void pybind_cu_vector(py::module& m) {
         .def(py::init<MatrixIndexT, MatrixResizeType>(), py::arg("dim"),
              py::arg("MatrixResizeType") = kSetZero)
         .def(py::init<const VectorBase<float>&>(), py::arg("v"))
-        .def("to_dlpack", [](py::object obj) { return CuVectorToDLPack(obj); });
+        .def("to_dlpack",
+             [](py::object obj) { return CuVectorToDLPack(&obj); });
   }
   {
     using PyClass = CuSubVector<float>;
 
@@ -1,3 +1,4 @@
 
 test:
 	python3 ./dlpack_pybind_test.py
+	python3 ./dlpack_pybind_test_gpu.py
@@ -46,9 +46,9 @@ const char* kDLPackTensorName = "dltensor";
 // PyTorch, TVM and CuPy name the used dltensor to be `used_dltensor`
 const char* kDLPackUsedTensorName = "used_dltensor";
 
-DLManagedTensor* CreateDLManagedtensor(DLDeviceType device_type, int device_id,
+DLManagedTensor* CreateDLManagedTensor(DLDeviceType device_type, int device_id,
                                        void* data) {
-  // As SubVector/SubMatrix/CuSubVector/CuSumMatrix
+  // As SubVector/SubMatrix/CuSubVector/CuSubMatrix
   // all require a DLManagedTensor, we put the shared
   // code here to avoid duplicates
 
@@ -79,7 +79,7 @@ DLManagedTensor* CreateDLManagedtensor(DLDeviceType device_type, int device_id,
   return managed_tensor;
 }
 
-DLManagedTensor* ConsumeDLManagedtensor(py::capsule* capsule,
+DLManagedTensor* ConsumeDLManagedTensor(py::capsule* capsule,
                                         DLDeviceType device_type, int device_id,
                                         int ndim) {
   // check the name of the capsule
@@ -138,37 +138,10 @@ void DLPackCapsuleDestructor(PyObject* data) {
   }
 }
 
-}  // namespace
-
-namespace kaldi {
-
-py::capsule VectorToDLPack(py::object obj) {
-  auto* v = obj.cast<Vector<float>*>();
-  auto* managed_tensor = CreateDLManagedtensor(kDLCPU, 0, v->Data());
-  auto* tensor = &managed_tensor->dl_tensor;
-
-  tensor->ndim = 1;
-
-  // `shape` and `strides` are freed in `DLManagedTensorDeleter`, so
-  // no memory leak here .
-  tensor->shape = new int64_t[1];
-  tensor->shape[0] = v->Dim();
-
-  tensor->strides = new int64_t[1];
-  tensor->strides[0] = 1;
-
-  managed_tensor->manager_ctx = obj.ptr();
-  obj.inc_ref();  // increase it since the above line borrows it
-
-  PyObject* capsule =
-      PyCapsule_New(managed_tensor, kDLPackTensorName, DLPackCapsuleDestructor);
-  bool is_borrowed = false;
-  return py::object(capsule, is_borrowed);
-}
-
-py::capsule MatrixToDLPack(py::object obj) {
-  auto* m = obj.cast<Matrix<float>*>();
-  auto* managed_tensor = CreateDLManagedtensor(kDLCPU, 0, m->Data());
+// Both Matrix and CuMatrix will share this template
+template <typename M>
+py::capsule MatrixToDLPackImpl(const M* m, py::object* obj,
+                               DLManagedTensor* managed_tensor) {
   auto* tensor = &managed_tensor->dl_tensor;
 
   tensor->ndim = 2;
@@ -183,84 +156,89 @@ py::capsule MatrixToDLPack(py::object obj) {
   tensor->strides[0] = m->Stride();
   tensor->strides[1] = 1;
 
-  managed_tensor->manager_ctx = obj.ptr();
-  obj.inc_ref();  // increase it since the above line borrows it
+  managed_tensor->manager_ctx = obj->ptr();
+  obj->inc_ref();  // increase it since the above line borrows it
 
   PyObject* capsule =
       PyCapsule_New(managed_tensor, kDLPackTensorName, DLPackCapsuleDestructor);
   bool is_borrowed = false;
   return py::object(capsule, is_borrowed);
 }
 
-py::capsule CuVectorToDLPack(py::object obj) {
-#if HAVE_CUDA == 1
-  auto* v = obj.cast<CuVector<float>*>();
-  auto* managed_tensor =
-      CreateDLManagedtensor(kDLGPU, CuDevice::GetCurrentDeviceId(), v->Data());
-
+template <typename V>
+py::capsule VectorToDLPackImpl(const V* v, py::object* obj,
+                               DLManagedTensor* managed_tensor) {
   auto* tensor = &managed_tensor->dl_tensor;
 
   tensor->ndim = 1;
 
-  // `shape` and `strides` are freed in `DLManagedTensorDeleter`,
-  // so no memory leak here.
+  // `shape` and `strides` are freed in `DLManagedTensorDeleter`, so
+  // no memory leak here .
   tensor->shape = new int64_t[1];
   tensor->shape[0] = v->Dim();
 
   tensor->strides = new int64_t[1];
   tensor->strides[0] = 1;
 
-  managed_tensor->manager_ctx = obj.ptr();
-  obj.inc_ref();  // increase it since the above line borrows it
+  managed_tensor->manager_ctx = obj->ptr();
+  obj->inc_ref();  // increase it since the above line borrows it
 
   PyObject* capsule =
       PyCapsule_New(managed_tensor, kDLPackTensorName, DLPackCapsuleDestructor);
   bool is_borrowed = false;
   return py::object(capsule, is_borrowed);
-#else
-  KALDI_ERR << "Kaldi is not compiled with GPU!";
-  return py::none();
-#endif
 }
 
-py::capsule CuMatrixToDLPack(py::object obj) {
-#if HAVE_CUDA == 1
-  auto* m = obj.cast<CuMatrix<float>*>();
+}  // namespace
 
-  auto* managed_tensor =
-      CreateDLManagedtensor(kDLGPU, CuDevice::GetCurrentDeviceId(), m->Data());
+namespace kaldi {
 
-  auto* tensor = &managed_tensor->dl_tensor;
+py::capsule VectorToDLPack(py::object* obj) {
+  auto* v = obj->cast<Vector<float>*>();
+  auto* managed_tensor = CreateDLManagedTensor(kDLCPU, 0, v->Data());
+  return VectorToDLPackImpl(v, obj, managed_tensor);
+}
 
-  tensor->ndim = 2;
+py::capsule MatrixToDLPack(py::object* obj) {
+  auto* m = obj->cast<Matrix<float>*>();
+  auto* managed_tensor = CreateDLManagedTensor(kDLCPU, 0, m->Data());
+  return MatrixToDLPackImpl(m, obj, managed_tensor);
+}
 
-  // `shape` and `strides` are freed in `DLManagedTensorDeleter`,
-  // so no memory leak here
-  tensor->shape = new int64_t[2];
-  tensor->shape[0] = m->NumRows();
-  tensor->shape[1] = m->NumCols();
+py::capsule CuVectorToDLPack(py::object* obj) {
+  auto* v = obj->cast<CuVector<float>*>();
+#if HAVE_CUDA == 1
+  KALDI_ASSERT(CuDevice::Instantiate().Enabled());
 
-  tensor->strides = new int64_t[2];
-  tensor->strides[0] = m->Stride();
-  tensor->strides[1] = 1;
+  auto* managed_tensor =
+      CreateDLManagedTensor(kDLGPU, CuDevice::GetCurrentDeviceId(), v->Data());
+#else
+  // kaldi is not compiled with GPU, return a CPU tensor
+  auto* managed_tensor = CreateDLManagedTensor(kDLCPU, 0, v->Data());
+#endif
 
-  managed_tensor->manager_ctx = obj.ptr();
-  obj.inc_ref();  // increase it since the above line borrows it
+  return VectorToDLPackImpl(v, obj, managed_tensor);
+}
 
-  PyObject* capsule =
-      PyCapsule_New(managed_tensor, kDLPackTensorName, DLPackCapsuleDestructor);
-  bool is_borrowed = false;
-  return py::object(capsule, is_borrowed);
+py::capsule CuMatrixToDLPack(py::object* obj) {
+  auto* m = obj->cast<CuMatrix<float>*>();
+#if HAVE_CUDA == 1
+  KALDI_ASSERT(CuDevice::Instantiate().Enabled());
+
+  auto* managed_tensor =
+      CreateDLManagedTensor(kDLGPU, CuDevice::GetCurrentDeviceId(), m->Data());
 #else
-  KALDI_ERR << "Kaldi is not compiled with GPU!";
-  return py::none();
+  // kaldi is not compiled with GPU, return a CPU tensor
+  auto* managed_tensor = CreateDLManagedTensor(kDLCPU, 0, m->Data());
 #endif
+
+  return MatrixToDLPackImpl(m, obj, managed_tensor);
 }
 
 // As the destructor of `VectorBase<float>` is not `virtual`
 // we cannot return a `VectorBase<float>*` or `SubVector<float>*`.
 DLPackSubVector<float>* SubVectorFromDLPack(py::capsule* capsule) {
-  auto* managed_tensor = ConsumeDLManagedtensor(capsule, kDLCPU, 0, 1);
+  auto* managed_tensor = ConsumeDLManagedTensor(capsule, kDLCPU, 0, 1);
   auto* tensor = &managed_tensor->dl_tensor;
 
   // we use `py::return_value_policy::take_ownership`
@@ -277,7 +255,7 @@ DLPackSubVector<float>* SubVectorFromDLPack(py::capsule* capsule) {
 }
 
 DLPackSubMatrix<float>* SubMatrixFromDLPack(py::capsule* capsule) {
-  auto* managed_tensor = ConsumeDLManagedtensor(capsule, kDLCPU, 0, 2);
+  auto* managed_tensor = ConsumeDLManagedTensor(capsule, kDLCPU, 0, 2);
   auto* tensor = &managed_tensor->dl_tensor;
 
   // DLPack assumes row major, so we use strides[0]
@@ -288,32 +266,40 @@ DLPackSubMatrix<float>* SubMatrixFromDLPack(py::capsule* capsule) {
 
 DLPackCuSubVector<float>* CuSubVectorFromDLPack(py::capsule* capsule) {
 #if HAVE_CUDA == 1
-  auto* managed_tensor = ConsumeDLManagedtensor(
+  // no need to check CuDevice::Instantiate().Enabled()
+  // since `ConsumeDLManagedTensor` will check the device id
+  auto* managed_tensor = ConsumeDLManagedTensor(
       capsule, kDLGPU, CuDevice::GetCurrentDeviceId(), 1);
+#else
+  // Kaldi is not compiled with GPU, so we expect the passed capsule
+  // to be a CPU tensor; if not, `ConsumeDLManagedTensor` will throw
+  auto* managed_tensor = ConsumeDLManagedTensor(capsule, kDLCPU, 0, 1);
+#endif
+
   auto* tensor = &managed_tensor->dl_tensor;
 
   return new DLPackCuSubVector<float>(reinterpret_cast<float*>(tensor->data),
                                       tensor->shape[0], managed_tensor);
-#else
-  KALDI_ERR << "Kaldi is not compiled with GPU!";
-  return nullptr;
-#endif
 }
 
 DLPackCuSubMatrix<float>* CuSubMatrixFromDLPack(py::capsule* capsule) {
 #if HAVE_CUDA == 1
-  auto* managed_tensor = ConsumeDLManagedtensor(
+  // no need to check CuDevice::Instantiate().Enabled()
+  // since `ConsumeDLManagedTensor` will check the device id
+  auto* managed_tensor = ConsumeDLManagedTensor(
       capsule, kDLGPU, CuDevice::GetCurrentDeviceId(), 2);
+#else
+  // Kaldi is not compiled with GPU, so we expect the passed capsule
+  // to be a CPU tensor; if not, `ConsumeDLManagedTensor` will throw
+  auto* managed_tensor = ConsumeDLManagedTensor(capsule, kDLCPU, 0, 2);
+#endif
+
   auto* tensor = &managed_tensor->dl_tensor;
 
   // DLPack assumes row major, so we use strides[0]
   return new DLPackCuSubMatrix<float>(reinterpret_cast<float*>(tensor->data),
                                       tensor->shape[0], tensor->shape[1],
                                       tensor->strides[0], managed_tensor);
-#else
-  KALDI_ERR << "Kaldi is not compiled with GPU!";
-  return nullptr;
-#endif
 }
 
 }  // namespace kaldi
 
@@ -28,15 +28,13 @@ void pybind_dlpack(py::module& m);
 
 namespace kaldi {
 
-// it is very cheap to copy a `py::object`,
-// so we pass a copy here
 // Inside the function, we will use
-// ```Vector<float>* v = obj.cast<Vector<float>*>();```
+// ```Vector<float>* v = obj->cast<Vector<float>*>();```
 // if it fails, it will throw.
-py::capsule VectorToDLPack(py::object obj);
-py::capsule MatrixToDLPack(py::object obj);
-py::capsule CuVectorToDLPack(py::object obj);
-py::capsule CuMatrixToDLPack(py::object obj);
+py::capsule VectorToDLPack(py::object* obj);
+py::capsule MatrixToDLPack(py::object* obj);
+py::capsule CuVectorToDLPack(py::object* obj);
+py::capsule CuMatrixToDLPack(py::object* obj);
 
 DLPackSubVector<float>* SubVectorFromDLPack(py::capsule* capsule);
 DLPackSubMatrix<float>* SubMatrixFromDLPack(py::capsule* capsule);
Original file line number	Diff line number	Diff line change
`@@ -55,7 +55,8 @@ void pybind_cu_matrix(py::module& m) {`
`55`	`55`	`py::arg("MatrixStrideType") = kDefaultStride)`
`56`	`56`	`.def(py::init<const MatrixBase<float>&, MatrixTransposeType>(),`
`57`	`57`	`py::arg("other"), py::arg("trans") = kNoTrans)`
`58`		`- .def("to_dlpack", [](py::object obj) { return CuMatrixToDLPack(obj); });`
	`58`	`+ .def("to_dlpack",`
	`59`	`+ [](py::object obj) { return CuMatrixToDLPack(&obj); });`
`59`	`60`	`}`
`60`	`61`	`{`
`61`	`62`	`using PyClass = CuSubMatrix<float>;`
Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,8 @@ void pybind_cu_vector(py::module& m) {`
`44`	`44`	`.def(py::init<MatrixIndexT, MatrixResizeType>(), py::arg("dim"),`
`45`	`45`	`py::arg("MatrixResizeType") = kSetZero)`
`46`	`46`	`.def(py::init<const VectorBase<float>&>(), py::arg("v"))`
`47`		`- .def("to_dlpack", [](py::object obj) { return CuVectorToDLPack(obj); });`
	`47`	`+ .def("to_dlpack",`
	`48`	`+ [](py::object obj) { return CuVectorToDLPack(&obj); });`
`48`	`49`	`}`
`49`	`50`	`{`
`50`	`51`	`using PyClass = CuSubVector<float>;`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
`1`	`1`
`2`	`2`	`test:`
`3`	`3`	`python3 ./dlpack_pybind_test.py`
	`4`	`+ python3 ./dlpack_pybind_test_gpu.py`