@@ -46,9 +46,9 @@ const char* kDLPackTensorName = "dltensor";
4646// PyTorch, TVM and CuPy name the used dltensor to be `used_dltensor`
4747const char * kDLPackUsedTensorName = " used_dltensor" ;
4848
49- DLManagedTensor* CreateDLManagedtensor (DLDeviceType device_type, int device_id,
49+ DLManagedTensor* CreateDLManagedTensor (DLDeviceType device_type, int device_id,
5050 void * data) {
51- // As SubVector/SubMatrix/CuSubVector/CuSumMatrix
51+ // As SubVector/SubMatrix/CuSubVector/CuSubMatrix
5252 // all require a DLManagedTensor, we put the shared
5353 // code here to avoid duplicates
5454
@@ -79,7 +79,7 @@ DLManagedTensor* CreateDLManagedtensor(DLDeviceType device_type, int device_id,
7979 return managed_tensor;
8080}
8181
82- DLManagedTensor* ConsumeDLManagedtensor (py::capsule* capsule,
82+ DLManagedTensor* ConsumeDLManagedTensor (py::capsule* capsule,
8383 DLDeviceType device_type, int device_id,
8484 int ndim) {
8585 // check the name of the capsule
@@ -138,37 +138,10 @@ void DLPackCapsuleDestructor(PyObject* data) {
138138 }
139139}
140140
141- } // namespace
142-
143- namespace kaldi {
144-
145- py::capsule VectorToDLPack (py::object obj) {
146- auto * v = obj.cast <Vector<float >*>();
147- auto * managed_tensor = CreateDLManagedtensor (kDLCPU , 0 , v->Data ());
148- auto * tensor = &managed_tensor->dl_tensor ;
149-
150- tensor->ndim = 1 ;
151-
152- // `shape` and `strides` are freed in `DLManagedTensorDeleter`, so
153- // no memory leak here .
154- tensor->shape = new int64_t [1 ];
155- tensor->shape [0 ] = v->Dim ();
156-
157- tensor->strides = new int64_t [1 ];
158- tensor->strides [0 ] = 1 ;
159-
160- managed_tensor->manager_ctx = obj.ptr ();
161- obj.inc_ref (); // increase it since the above line borrows it
162-
163- PyObject* capsule =
164- PyCapsule_New (managed_tensor, kDLPackTensorName , DLPackCapsuleDestructor);
165- bool is_borrowed = false ;
166- return py::object (capsule, is_borrowed);
167- }
168-
169- py::capsule MatrixToDLPack (py::object obj) {
170- auto * m = obj.cast <Matrix<float >*>();
171- auto * managed_tensor = CreateDLManagedtensor (kDLCPU , 0 , m->Data ());
141+ // Both Matrix and CuMatrix will share this template
142+ template <typename M>
143+ py::capsule MatrixToDLPackImpl (const M* m, py::object* obj,
144+ DLManagedTensor* managed_tensor) {
172145 auto * tensor = &managed_tensor->dl_tensor ;
173146
174147 tensor->ndim = 2 ;
@@ -183,84 +156,89 @@ py::capsule MatrixToDLPack(py::object obj) {
183156 tensor->strides [0 ] = m->Stride ();
184157 tensor->strides [1 ] = 1 ;
185158
186- managed_tensor->manager_ctx = obj. ptr ();
187- obj. inc_ref (); // increase it since the above line borrows it
159+ managed_tensor->manager_ctx = obj-> ptr ();
160+ obj-> inc_ref (); // increase it since the above line borrows it
188161
189162 PyObject* capsule =
190163 PyCapsule_New (managed_tensor, kDLPackTensorName , DLPackCapsuleDestructor);
191164 bool is_borrowed = false ;
192165 return py::object (capsule, is_borrowed);
193166}
194167
195- py::capsule CuVectorToDLPack (py::object obj) {
196- #if HAVE_CUDA == 1
197- auto * v = obj.cast <CuVector<float >*>();
198- auto * managed_tensor =
199- CreateDLManagedtensor (kDLGPU , CuDevice::GetCurrentDeviceId (), v->Data ());
200-
168+ template <typename V>
169+ py::capsule VectorToDLPackImpl (const V* v, py::object* obj,
170+ DLManagedTensor* managed_tensor) {
201171 auto * tensor = &managed_tensor->dl_tensor ;
202172
203173 tensor->ndim = 1 ;
204174
205- // `shape` and `strides` are freed in `DLManagedTensorDeleter`,
206- // so no memory leak here.
175+ // `shape` and `strides` are freed in `DLManagedTensorDeleter`, so
176+ // no memory leak here .
207177 tensor->shape = new int64_t [1 ];
208178 tensor->shape [0 ] = v->Dim ();
209179
210180 tensor->strides = new int64_t [1 ];
211181 tensor->strides [0 ] = 1 ;
212182
213- managed_tensor->manager_ctx = obj. ptr ();
214- obj. inc_ref (); // increase it since the above line borrows it
183+ managed_tensor->manager_ctx = obj-> ptr ();
184+ obj-> inc_ref (); // increase it since the above line borrows it
215185
216186 PyObject* capsule =
217187 PyCapsule_New (managed_tensor, kDLPackTensorName , DLPackCapsuleDestructor);
218188 bool is_borrowed = false ;
219189 return py::object (capsule, is_borrowed);
220- #else
221- KALDI_ERR << " Kaldi is not compiled with GPU!" ;
222- return py::none ();
223- #endif
224190}
225191
226- py::capsule CuMatrixToDLPack (py::object obj) {
227- #if HAVE_CUDA == 1
228- auto * m = obj.cast <CuMatrix<float >*>();
192+ } // namespace
229193
230- auto * managed_tensor =
231- CreateDLManagedtensor (kDLGPU , CuDevice::GetCurrentDeviceId (), m->Data ());
194+ namespace kaldi {
232195
233- auto * tensor = &managed_tensor->dl_tensor ;
196+ py::capsule VectorToDLPack (py::object* obj) {
197+ auto * v = obj->cast <Vector<float >*>();
198+ auto * managed_tensor = CreateDLManagedTensor (kDLCPU , 0 , v->Data ());
199+ return VectorToDLPackImpl (v, obj, managed_tensor);
200+ }
234201
235- tensor->ndim = 2 ;
202+ py::capsule MatrixToDLPack (py::object* obj) {
203+ auto * m = obj->cast <Matrix<float >*>();
204+ auto * managed_tensor = CreateDLManagedTensor (kDLCPU , 0 , m->Data ());
205+ return MatrixToDLPackImpl (m, obj, managed_tensor);
206+ }
236207
237- // `shape` and `strides` are freed in `DLManagedTensorDeleter`,
238- // so no memory leak here
239- tensor->shape = new int64_t [2 ];
240- tensor->shape [0 ] = m->NumRows ();
241- tensor->shape [1 ] = m->NumCols ();
208+ py::capsule CuVectorToDLPack (py::object* obj) {
209+ auto * v = obj->cast <CuVector<float >*>();
210+ #if HAVE_CUDA == 1
211+ KALDI_ASSERT (CuDevice::Instantiate ().Enabled ());
242212
243- tensor->strides = new int64_t [2 ];
244- tensor->strides [0 ] = m->Stride ();
245- tensor->strides [1 ] = 1 ;
213+ auto * managed_tensor =
214+ CreateDLManagedTensor (kDLGPU , CuDevice::GetCurrentDeviceId (), v->Data ());
215+ #else
216+ // kaldi is not compiled with GPU, return a CPU tensor
217+ auto * managed_tensor = CreateDLManagedTensor (kDLCPU , 0 , v->Data ());
218+ #endif
246219
247- managed_tensor-> manager_ctx = obj. ptr ( );
248- obj. inc_ref (); // increase it since the above line borrows it
220+ return VectorToDLPackImpl (v, obj, managed_tensor );
221+ }
249222
250- PyObject* capsule =
251- PyCapsule_New (managed_tensor, kDLPackTensorName , DLPackCapsuleDestructor);
252- bool is_borrowed = false ;
253- return py::object (capsule, is_borrowed);
223+ py::capsule CuMatrixToDLPack (py::object* obj) {
224+ auto * m = obj->cast <CuMatrix<float >*>();
225+ #if HAVE_CUDA == 1
226+ KALDI_ASSERT (CuDevice::Instantiate ().Enabled ());
227+
228+ auto * managed_tensor =
229+ CreateDLManagedTensor (kDLGPU , CuDevice::GetCurrentDeviceId (), m->Data ());
254230#else
255- KALDI_ERR << " Kaldi is not compiled with GPU! " ;
256- return py::none ( );
231+ // kaldi is not compiled with GPU, return a CPU tensor
232+ auto * managed_tensor = CreateDLManagedTensor ( kDLCPU , 0 , m-> Data () );
257233#endif
234+
235+ return MatrixToDLPackImpl (m, obj, managed_tensor);
258236}
259237
260238// As the destructor of `VectorBase<float>` is not `virtual`
261239// we cannot return a `VectorBase<float>*` or `SubVector<float>*`.
262240DLPackSubVector<float >* SubVectorFromDLPack (py::capsule* capsule) {
263- auto * managed_tensor = ConsumeDLManagedtensor (capsule, kDLCPU , 0 , 1 );
241+ auto * managed_tensor = ConsumeDLManagedTensor (capsule, kDLCPU , 0 , 1 );
264242 auto * tensor = &managed_tensor->dl_tensor ;
265243
266244 // we use `py::return_value_policy::take_ownership`
@@ -277,7 +255,7 @@ DLPackSubVector<float>* SubVectorFromDLPack(py::capsule* capsule) {
277255}
278256
279257DLPackSubMatrix<float >* SubMatrixFromDLPack (py::capsule* capsule) {
280- auto * managed_tensor = ConsumeDLManagedtensor (capsule, kDLCPU , 0 , 2 );
258+ auto * managed_tensor = ConsumeDLManagedTensor (capsule, kDLCPU , 0 , 2 );
281259 auto * tensor = &managed_tensor->dl_tensor ;
282260
283261 // DLPack assumes row major, so we use strides[0]
@@ -288,32 +266,40 @@ DLPackSubMatrix<float>* SubMatrixFromDLPack(py::capsule* capsule) {
288266
289267DLPackCuSubVector<float >* CuSubVectorFromDLPack (py::capsule* capsule) {
290268#if HAVE_CUDA == 1
291- auto * managed_tensor = ConsumeDLManagedtensor (
269+ // no need to check CuDevice::Instantiate().Enabled()
270+ // since `ConsumeDLManagedTensor` will check the device id
271+ auto * managed_tensor = ConsumeDLManagedTensor (
292272 capsule, kDLGPU , CuDevice::GetCurrentDeviceId (), 1 );
273+ #else
274+ // Kaldi is not compiled with GPU, so we expect the passed capsule
275+ // to be a CPU tensor; if not, `ConsumeDLManagedTensor` will throw
276+ auto * managed_tensor = ConsumeDLManagedTensor (capsule, kDLCPU , 0 , 1 );
277+ #endif
278+
293279 auto * tensor = &managed_tensor->dl_tensor ;
294280
295281 return new DLPackCuSubVector<float >(reinterpret_cast <float *>(tensor->data ),
296282 tensor->shape [0 ], managed_tensor);
297- #else
298- KALDI_ERR << " Kaldi is not compiled with GPU!" ;
299- return nullptr ;
300- #endif
301283}
302284
303285DLPackCuSubMatrix<float >* CuSubMatrixFromDLPack (py::capsule* capsule) {
304286#if HAVE_CUDA == 1
305- auto * managed_tensor = ConsumeDLManagedtensor (
287+ // no need to check CuDevice::Instantiate().Enabled()
288+ // since `ConsumeDLManagedTensor` will check the device id
289+ auto * managed_tensor = ConsumeDLManagedTensor (
306290 capsule, kDLGPU , CuDevice::GetCurrentDeviceId (), 2 );
291+ #else
292+ // Kaldi is not compiled with GPU, so we expect the passed capsule
293+ // to be a CPU tensor; if not, `ConsumeDLManagedTensor` will throw
294+ auto * managed_tensor = ConsumeDLManagedTensor (capsule, kDLCPU , 0 , 2 );
295+ #endif
296+
307297 auto * tensor = &managed_tensor->dl_tensor ;
308298
309299 // DLPack assumes row major, so we use strides[0]
310300 return new DLPackCuSubMatrix<float >(reinterpret_cast <float *>(tensor->data ),
311301 tensor->shape [0 ], tensor->shape [1 ],
312302 tensor->strides [0 ], managed_tensor);
313- #else
314- KALDI_ERR << " Kaldi is not compiled with GPU!" ;
315- return nullptr ;
316- #endif
317303}
318304
319305} // namespace kaldi
0 commit comments