Skip to content

Commit 653595d

Browse files
csukuangfjdanpovey
authored andcommitted
[pybind] Support to construct CuSubMatrix/CuSubVector from DLPack without GPU support. (#3828)
1 parent aed291b commit 653595d

File tree

12 files changed

+526
-292
lines changed

12 files changed

+526
-292
lines changed

.travis.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ addons:
2222
- liblapack-dev
2323
- clang-3.8
2424
- sox
25+
- python3
26+
- python3-dev
27+
- python3-pip
2528

2629
branches:
2730
only:

src/pybind/cudamatrix/cu_device_pybind.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,14 @@ void pybind_cu_device(py::module& m) {
4343
KALDI_LOG << "Kaldi is NOT compiled with GPU! Ignore it.";
4444
#endif
4545
});
46+
47+
m.def("CudaCompiled",
48+
[]() -> bool {
49+
#if HAVE_CUDA == 1
50+
return true;
51+
#else
52+
return false;
53+
#endif
54+
},
55+
"true if kaldi is compiled with GPU support; false otherwise");
4656
}

src/pybind/cudamatrix/cu_matrix_pybind.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ void pybind_cu_matrix(py::module& m) {
5555
py::arg("MatrixStrideType") = kDefaultStride)
5656
.def(py::init<const MatrixBase<float>&, MatrixTransposeType>(),
5757
py::arg("other"), py::arg("trans") = kNoTrans)
58-
.def("to_dlpack", [](py::object obj) { return CuMatrixToDLPack(obj); });
58+
.def("to_dlpack",
59+
[](py::object obj) { return CuMatrixToDLPack(&obj); });
5960
}
6061
{
6162
using PyClass = CuSubMatrix<float>;

src/pybind/cudamatrix/cu_vector_pybind.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ void pybind_cu_vector(py::module& m) {
4444
.def(py::init<MatrixIndexT, MatrixResizeType>(), py::arg("dim"),
4545
py::arg("MatrixResizeType") = kSetZero)
4646
.def(py::init<const VectorBase<float>&>(), py::arg("v"))
47-
.def("to_dlpack", [](py::object obj) { return CuVectorToDLPack(obj); });
47+
.def("to_dlpack",
48+
[](py::object obj) { return CuVectorToDLPack(&obj); });
4849
}
4950
{
5051
using PyClass = CuSubVector<float>;

src/pybind/dlpack/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11

22
test:
33
python3 ./dlpack_pybind_test.py
4+
python3 ./dlpack_pybind_test_gpu.py

src/pybind/dlpack/dlpack_pybind.cc

Lines changed: 71 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,9 @@ const char* kDLPackTensorName = "dltensor";
4646
// PyTorch, TVM and CuPy name the used dltensor to be `used_dltensor`
4747
const char* kDLPackUsedTensorName = "used_dltensor";
4848

49-
DLManagedTensor* CreateDLManagedtensor(DLDeviceType device_type, int device_id,
49+
DLManagedTensor* CreateDLManagedTensor(DLDeviceType device_type, int device_id,
5050
void* data) {
51-
// As SubVector/SubMatrix/CuSubVector/CuSumMatrix
51+
// As SubVector/SubMatrix/CuSubVector/CuSubMatrix
5252
// all require a DLManagedTensor, we put the shared
5353
// code here to avoid duplicates
5454

@@ -79,7 +79,7 @@ DLManagedTensor* CreateDLManagedtensor(DLDeviceType device_type, int device_id,
7979
return managed_tensor;
8080
}
8181

82-
DLManagedTensor* ConsumeDLManagedtensor(py::capsule* capsule,
82+
DLManagedTensor* ConsumeDLManagedTensor(py::capsule* capsule,
8383
DLDeviceType device_type, int device_id,
8484
int ndim) {
8585
// check the name of the capsule
@@ -138,37 +138,10 @@ void DLPackCapsuleDestructor(PyObject* data) {
138138
}
139139
}
140140

141-
} // namespace
142-
143-
namespace kaldi {
144-
145-
py::capsule VectorToDLPack(py::object obj) {
146-
auto* v = obj.cast<Vector<float>*>();
147-
auto* managed_tensor = CreateDLManagedtensor(kDLCPU, 0, v->Data());
148-
auto* tensor = &managed_tensor->dl_tensor;
149-
150-
tensor->ndim = 1;
151-
152-
// `shape` and `strides` are freed in `DLManagedTensorDeleter`, so
153-
// no memory leak here .
154-
tensor->shape = new int64_t[1];
155-
tensor->shape[0] = v->Dim();
156-
157-
tensor->strides = new int64_t[1];
158-
tensor->strides[0] = 1;
159-
160-
managed_tensor->manager_ctx = obj.ptr();
161-
obj.inc_ref(); // increase it since the above line borrows it
162-
163-
PyObject* capsule =
164-
PyCapsule_New(managed_tensor, kDLPackTensorName, DLPackCapsuleDestructor);
165-
bool is_borrowed = false;
166-
return py::object(capsule, is_borrowed);
167-
}
168-
169-
py::capsule MatrixToDLPack(py::object obj) {
170-
auto* m = obj.cast<Matrix<float>*>();
171-
auto* managed_tensor = CreateDLManagedtensor(kDLCPU, 0, m->Data());
141+
// Both Matrix and CuMatrix will share this template
142+
template <typename M>
143+
py::capsule MatrixToDLPackImpl(const M* m, py::object* obj,
144+
DLManagedTensor* managed_tensor) {
172145
auto* tensor = &managed_tensor->dl_tensor;
173146

174147
tensor->ndim = 2;
@@ -183,84 +156,89 @@ py::capsule MatrixToDLPack(py::object obj) {
183156
tensor->strides[0] = m->Stride();
184157
tensor->strides[1] = 1;
185158

186-
managed_tensor->manager_ctx = obj.ptr();
187-
obj.inc_ref(); // increase it since the above line borrows it
159+
managed_tensor->manager_ctx = obj->ptr();
160+
obj->inc_ref(); // increase it since the above line borrows it
188161

189162
PyObject* capsule =
190163
PyCapsule_New(managed_tensor, kDLPackTensorName, DLPackCapsuleDestructor);
191164
bool is_borrowed = false;
192165
return py::object(capsule, is_borrowed);
193166
}
194167

195-
py::capsule CuVectorToDLPack(py::object obj) {
196-
#if HAVE_CUDA == 1
197-
auto* v = obj.cast<CuVector<float>*>();
198-
auto* managed_tensor =
199-
CreateDLManagedtensor(kDLGPU, CuDevice::GetCurrentDeviceId(), v->Data());
200-
168+
template <typename V>
169+
py::capsule VectorToDLPackImpl(const V* v, py::object* obj,
170+
DLManagedTensor* managed_tensor) {
201171
auto* tensor = &managed_tensor->dl_tensor;
202172

203173
tensor->ndim = 1;
204174

205-
// `shape` and `strides` are freed in `DLManagedTensorDeleter`,
206-
// so no memory leak here.
175+
// `shape` and `strides` are freed in `DLManagedTensorDeleter`, so
176+
// no memory leak here .
207177
tensor->shape = new int64_t[1];
208178
tensor->shape[0] = v->Dim();
209179

210180
tensor->strides = new int64_t[1];
211181
tensor->strides[0] = 1;
212182

213-
managed_tensor->manager_ctx = obj.ptr();
214-
obj.inc_ref(); // increase it since the above line borrows it
183+
managed_tensor->manager_ctx = obj->ptr();
184+
obj->inc_ref(); // increase it since the above line borrows it
215185

216186
PyObject* capsule =
217187
PyCapsule_New(managed_tensor, kDLPackTensorName, DLPackCapsuleDestructor);
218188
bool is_borrowed = false;
219189
return py::object(capsule, is_borrowed);
220-
#else
221-
KALDI_ERR << "Kaldi is not compiled with GPU!";
222-
return py::none();
223-
#endif
224190
}
225191

226-
py::capsule CuMatrixToDLPack(py::object obj) {
227-
#if HAVE_CUDA == 1
228-
auto* m = obj.cast<CuMatrix<float>*>();
192+
} // namespace
229193

230-
auto* managed_tensor =
231-
CreateDLManagedtensor(kDLGPU, CuDevice::GetCurrentDeviceId(), m->Data());
194+
namespace kaldi {
232195

233-
auto* tensor = &managed_tensor->dl_tensor;
196+
py::capsule VectorToDLPack(py::object* obj) {
197+
auto* v = obj->cast<Vector<float>*>();
198+
auto* managed_tensor = CreateDLManagedTensor(kDLCPU, 0, v->Data());
199+
return VectorToDLPackImpl(v, obj, managed_tensor);
200+
}
234201

235-
tensor->ndim = 2;
202+
py::capsule MatrixToDLPack(py::object* obj) {
203+
auto* m = obj->cast<Matrix<float>*>();
204+
auto* managed_tensor = CreateDLManagedTensor(kDLCPU, 0, m->Data());
205+
return MatrixToDLPackImpl(m, obj, managed_tensor);
206+
}
236207

237-
// `shape` and `strides` are freed in `DLManagedTensorDeleter`,
238-
// so no memory leak here
239-
tensor->shape = new int64_t[2];
240-
tensor->shape[0] = m->NumRows();
241-
tensor->shape[1] = m->NumCols();
208+
py::capsule CuVectorToDLPack(py::object* obj) {
209+
auto* v = obj->cast<CuVector<float>*>();
210+
#if HAVE_CUDA == 1
211+
KALDI_ASSERT(CuDevice::Instantiate().Enabled());
242212

243-
tensor->strides = new int64_t[2];
244-
tensor->strides[0] = m->Stride();
245-
tensor->strides[1] = 1;
213+
auto* managed_tensor =
214+
CreateDLManagedTensor(kDLGPU, CuDevice::GetCurrentDeviceId(), v->Data());
215+
#else
216+
// kaldi is not compiled with GPU, return a CPU tensor
217+
auto* managed_tensor = CreateDLManagedTensor(kDLCPU, 0, v->Data());
218+
#endif
246219

247-
managed_tensor->manager_ctx = obj.ptr();
248-
obj.inc_ref(); // increase it since the above line borrows it
220+
return VectorToDLPackImpl(v, obj, managed_tensor);
221+
}
249222

250-
PyObject* capsule =
251-
PyCapsule_New(managed_tensor, kDLPackTensorName, DLPackCapsuleDestructor);
252-
bool is_borrowed = false;
253-
return py::object(capsule, is_borrowed);
223+
py::capsule CuMatrixToDLPack(py::object* obj) {
224+
auto* m = obj->cast<CuMatrix<float>*>();
225+
#if HAVE_CUDA == 1
226+
KALDI_ASSERT(CuDevice::Instantiate().Enabled());
227+
228+
auto* managed_tensor =
229+
CreateDLManagedTensor(kDLGPU, CuDevice::GetCurrentDeviceId(), m->Data());
254230
#else
255-
KALDI_ERR << "Kaldi is not compiled with GPU!";
256-
return py::none();
231+
// kaldi is not compiled with GPU, return a CPU tensor
232+
auto* managed_tensor = CreateDLManagedTensor(kDLCPU, 0, m->Data());
257233
#endif
234+
235+
return MatrixToDLPackImpl(m, obj, managed_tensor);
258236
}
259237

260238
// As the destructor of `VectorBase<float>` is not `virtual`
261239
// we cannot return a `VectorBase<float>*` or `SubVector<float>*`.
262240
DLPackSubVector<float>* SubVectorFromDLPack(py::capsule* capsule) {
263-
auto* managed_tensor = ConsumeDLManagedtensor(capsule, kDLCPU, 0, 1);
241+
auto* managed_tensor = ConsumeDLManagedTensor(capsule, kDLCPU, 0, 1);
264242
auto* tensor = &managed_tensor->dl_tensor;
265243

266244
// we use `py::return_value_policy::take_ownership`
@@ -277,7 +255,7 @@ DLPackSubVector<float>* SubVectorFromDLPack(py::capsule* capsule) {
277255
}
278256

279257
DLPackSubMatrix<float>* SubMatrixFromDLPack(py::capsule* capsule) {
280-
auto* managed_tensor = ConsumeDLManagedtensor(capsule, kDLCPU, 0, 2);
258+
auto* managed_tensor = ConsumeDLManagedTensor(capsule, kDLCPU, 0, 2);
281259
auto* tensor = &managed_tensor->dl_tensor;
282260

283261
// DLPack assumes row major, so we use strides[0]
@@ -288,32 +266,40 @@ DLPackSubMatrix<float>* SubMatrixFromDLPack(py::capsule* capsule) {
288266

289267
DLPackCuSubVector<float>* CuSubVectorFromDLPack(py::capsule* capsule) {
290268
#if HAVE_CUDA == 1
291-
auto* managed_tensor = ConsumeDLManagedtensor(
269+
// no need to check CuDevice::Instantiate().Enabled()
270+
// since `ConsumeDLManagedTensor` will check the device id
271+
auto* managed_tensor = ConsumeDLManagedTensor(
292272
capsule, kDLGPU, CuDevice::GetCurrentDeviceId(), 1);
273+
#else
274+
// Kaldi is not compiled with GPU, so we expect the passed capsule
275+
// to be a CPU tensor; if not, `ConsumeDLManagedTensor` will throw
276+
auto* managed_tensor = ConsumeDLManagedTensor(capsule, kDLCPU, 0, 1);
277+
#endif
278+
293279
auto* tensor = &managed_tensor->dl_tensor;
294280

295281
return new DLPackCuSubVector<float>(reinterpret_cast<float*>(tensor->data),
296282
tensor->shape[0], managed_tensor);
297-
#else
298-
KALDI_ERR << "Kaldi is not compiled with GPU!";
299-
return nullptr;
300-
#endif
301283
}
302284

303285
DLPackCuSubMatrix<float>* CuSubMatrixFromDLPack(py::capsule* capsule) {
304286
#if HAVE_CUDA == 1
305-
auto* managed_tensor = ConsumeDLManagedtensor(
287+
// no need to check CuDevice::Instantiate().Enabled()
288+
// since `ConsumeDLManagedTensor` will check the device id
289+
auto* managed_tensor = ConsumeDLManagedTensor(
306290
capsule, kDLGPU, CuDevice::GetCurrentDeviceId(), 2);
291+
#else
292+
// Kaldi is not compiled with GPU, so we expect the passed capsule
293+
// to be a CPU tensor; if not, `ConsumeDLManagedTensor` will throw
294+
auto* managed_tensor = ConsumeDLManagedTensor(capsule, kDLCPU, 0, 2);
295+
#endif
296+
307297
auto* tensor = &managed_tensor->dl_tensor;
308298

309299
// DLPack assumes row major, so we use strides[0]
310300
return new DLPackCuSubMatrix<float>(reinterpret_cast<float*>(tensor->data),
311301
tensor->shape[0], tensor->shape[1],
312302
tensor->strides[0], managed_tensor);
313-
#else
314-
KALDI_ERR << "Kaldi is not compiled with GPU!";
315-
return nullptr;
316-
#endif
317303
}
318304

319305
} // namespace kaldi

src/pybind/dlpack/dlpack_pybind.h

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,13 @@ void pybind_dlpack(py::module& m);
2828

2929
namespace kaldi {
3030

31-
// it is very cheap to copy a `py::object`,
32-
// so we pass a copy here
3331
// Inside the function, we will use
34-
// ```Vector<float>* v = obj.cast<Vector<float>*>();```
32+
// ```Vector<float>* v = obj->cast<Vector<float>*>();```
3533
// if it fails, it will throw.
36-
py::capsule VectorToDLPack(py::object obj);
37-
py::capsule MatrixToDLPack(py::object obj);
38-
py::capsule CuVectorToDLPack(py::object obj);
39-
py::capsule CuMatrixToDLPack(py::object obj);
34+
py::capsule VectorToDLPack(py::object* obj);
35+
py::capsule MatrixToDLPack(py::object* obj);
36+
py::capsule CuVectorToDLPack(py::object* obj);
37+
py::capsule CuMatrixToDLPack(py::object* obj);
4038

4139
DLPackSubVector<float>* SubVectorFromDLPack(py::capsule* capsule);
4240
DLPackSubMatrix<float>* SubMatrixFromDLPack(py::capsule* capsule);

0 commit comments

Comments
 (0)