-
Notifications
You must be signed in to change notification settings - Fork 90
Expand file tree
/
Copy pathgpu_utils_py.cpp
More file actions
116 lines (105 loc) · 4.02 KB
/
gpu_utils_py.cpp
File metadata and controls
116 lines (105 loc) · 4.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
#include <dlpack/dlpack.h>
#include <nanobind/nanobind.h>
#include <nanobind/stl/shared_ptr.h>
#include <nanobind/stl/string.h>
#include <nanobind/stl/vector.h>
#include <mscclpp/gpu_data_types.hpp>
#include <mscclpp/gpu_utils.hpp>
namespace nb = nanobind;
using namespace mscclpp;
constexpr int BYTE_BITS = 8;
static DLDeviceType getDeviceType() {
#if defined(__HIP_PLATFORM_AMD__)
return kDLROCM;
#else
return kDLCUDA;
#endif
}
static DLDataType getDlType(std::string type) {
if (type == "torch.float") {
return DLDataType{kDLFloat, 32, 1};
} else if (type == "torch.int") {
return DLDataType{kDLInt, 32, 1};
} else if (type == "torch.uint32") {
return DLDataType{kDLUInt, 32, 1};
} else if (type == "torch.bfloat16") {
return DLDataType{kDLBfloat, 16, 1};
} else if (type == "torch.float16") {
return DLDataType{kDLFloat, 16, 1};
} else {
throw Error("Unsupported type: " + type, ErrorCode::InvalidUsage);
}
}
static nb::capsule toDlpack(GpuBuffer<char> buffer, std::string dataType, std::vector<int64_t>& shape,
std::vector<int64_t>& strides) {
DLDataType dtype = getDlType(dataType);
int64_t* tensorShape = shape.size() > 0 ? new int64_t[shape.size()] : new int64_t[1];
int64_t* tensorStrides = strides.size() > 0 ? new int64_t[strides.size()] : nullptr;
if (shape.size() == 0) {
tensorShape[0] = (int64_t)(buffer.nelems() / ((dtype.bits * dtype.lanes + 7) / BYTE_BITS));
} else {
for (size_t i = 0; i < shape.size(); ++i) {
tensorShape[i] = shape[i];
}
}
for (size_t i = 0; i < strides.size(); ++i) {
tensorStrides[i] = strides[i];
}
DLManagedTensor* dlManagedTensor = new DLManagedTensor();
dlManagedTensor->dl_tensor.data = buffer.data();
dlManagedTensor->dl_tensor.device.device_type = getDeviceType();
dlManagedTensor->dl_tensor.device.device_id = buffer.deviceId();
dlManagedTensor->dl_tensor.ndim = shape.size() == 0 ? 1 : shape.size();
dlManagedTensor->dl_tensor.strides = tensorStrides;
dlManagedTensor->dl_tensor.shape = tensorShape;
dlManagedTensor->dl_tensor.byte_offset = 0;
dlManagedTensor->dl_tensor.dtype = dtype;
dlManagedTensor->manager_ctx = new GpuBuffer<char>(buffer);
dlManagedTensor->deleter = [](DLManagedTensor* self) {
delete static_cast<GpuBuffer<char>*>(self->manager_ctx);
self->manager_ctx = nullptr;
self->dl_tensor.data = nullptr;
if (self->dl_tensor.shape != nullptr) {
delete[] self->dl_tensor.shape;
self->dl_tensor.shape = nullptr;
if (self->dl_tensor.strides) {
delete[] self->dl_tensor.strides;
self->dl_tensor.strides = nullptr;
}
}
delete self;
};
PyObject* dlCapsule = PyCapsule_New(static_cast<void*>(dlManagedTensor), "dltensor", [](PyObject* capsule) {
if (PyCapsule_IsValid(capsule, "used_dltensor")) {
return;
}
if (!PyCapsule_IsValid(capsule, "dltensor")) {
return;
}
DLManagedTensor* managedTensor = static_cast<DLManagedTensor*>(PyCapsule_GetPointer(capsule, "dltensor"));
if (managedTensor == nullptr) {
return;
}
if (managedTensor->deleter) {
managedTensor->deleter(managedTensor);
}
});
return nb::steal<nb::capsule>(dlCapsule);
}
void register_gpu_utils(nb::module_& m) {
m.def("is_nvls_supported", &isNvlsSupported);
nb::class_<GpuBuffer<char>>(m, "RawGpuBuffer")
.def(nb::init<size_t>(), nb::arg("nelems"))
.def("nelems", &GpuBuffer<char>::nelems)
.def("bytes", &GpuBuffer<char>::bytes)
.def("data", [](GpuBuffer<char>& self) { return reinterpret_cast<uintptr_t>(self.data()); })
.def("device_id", &GpuBuffer<char>::deviceId)
.def(
"to_dlpack",
[](GpuBuffer<char>& self, std::string dataType, std::vector<int64_t> shape, std::vector<int64_t> strides) {
return toDlpack(self, dataType, shape, strides);
},
nb::arg("dataType"), nb::arg("shape") = std::vector<int64_t>(), nb::arg("strides") = std::vector<int64_t>());
}