Skip to content

Commit a3c5f3a

Browse files
gongchensuzhuyue
andauthored
issue/505 - Feature/add debug (#504)
* Add debug function in InfiniCore tensor. * refactor test scripts and remove txt write add large-scale and non-contiguous tensor I/O tests * Move debug.py out of the op operator test folder. --------- Co-authored-by: zhuyue <[email protected]>
1 parent 2c168b6 commit a3c5f3a

File tree

5 files changed

+776
-1
lines changed

5 files changed

+776
-1
lines changed

include/infinicore/tensor.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ class TensorImpl : public std::enable_shared_from_this<TensorImpl> {
122122

123123
std::string info() const;
124124

125+
void debug(const std::string &filename) const;
126+
127+
void debug() const;
128+
125129
///
126130
/// Data Transfer APIs
127131
///

python/infinicore/tensor.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,17 @@ def permute(self, dims):
7575
def view(self, shape):
7676
return Tensor(self._underlying.view(shape))
7777

78+
def debug(self, filename=None):
79+
"""Print tensor data or save to file for debugging
80+
81+
Args:
82+
filename: Optional filename to save raw binary data. If None, prints to stdout.
83+
"""
84+
if filename is None:
85+
self._underlying.debug()
86+
else:
87+
self._underlying.debug(filename)
88+
7889

7990
def empty(size, *, dtype=None, device=None, pin_memory=False):
8091
return Tensor(

src/infinicore/pybind11/tensor.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,16 @@ inline void bind(py::module &m) {
1717
.def_property_readonly("dtype", [](const Tensor &tensor) { return tensor->dtype(); })
1818
.def_property_readonly("device", [](const Tensor &tensor) { return tensor->device(); })
1919

20-
.def("data_ptr", [](const Tensor &tensor) { return tensor->data(); })
20+
.def("data_ptr", [](const Tensor &tensor) { return reinterpret_cast<uintptr_t>(tensor->data()); })
2121
.def("size", [](const Tensor &tensor, std::size_t dim) { return tensor->size(dim); })
2222
.def("stride", [](const Tensor &tensor, std::size_t dim) { return tensor->stride(dim); })
2323
.def("numel", [](const Tensor &tensor) { return tensor->numel(); })
2424

2525
.def("is_contiguous", [](const Tensor &tensor) { return tensor->is_contiguous(); })
2626
.def("is_pinned", [](const Tensor &tensor) { return tensor->is_pinned(); })
2727
.def("info", [](const Tensor &tensor) { return tensor->info(); })
28+
.def("debug", [](const Tensor &tensor) { return tensor->debug(); })
29+
.def("debug", [](const Tensor &tensor, const std::string &filename) { return tensor->debug(filename); })
2830

2931
.def("copy_", [](Tensor &tensor, const Tensor &other) { tensor->copy_from(other); })
3032
.def("to", [](const Tensor &tensor, const Device &device) { return tensor->to(device); })

src/infinicore/tensor/debug.cc

Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
#include "infinicore/context/context.hpp"
2+
#include "infinicore/dtype.hpp"
3+
#include "infinicore/tensor.hpp"
4+
5+
#include <cstring>
6+
#include <fstream>
7+
#include <iostream>
8+
#include <limits>
9+
#include <memory>
10+
#include <sstream>
11+
12+
namespace infinicore {
13+
14+
inline float f16_to_f32(uint16_t h) {
15+
uint32_t sign = (h & 0x8000) << 16;
16+
int32_t exponent = (h >> 10) & 0x1F;
17+
uint32_t mantissa = h & 0x3FF;
18+
19+
uint32_t f32;
20+
if (exponent == 31) {
21+
if (mantissa != 0) {
22+
f32 = sign | 0x7F800000 | (mantissa << 13);
23+
} else {
24+
f32 = sign | 0x7F800000;
25+
}
26+
} else if (exponent == 0) {
27+
if (mantissa == 0) {
28+
f32 = sign;
29+
} else {
30+
exponent = -14;
31+
while ((mantissa & 0x400) == 0) {
32+
mantissa <<= 1;
33+
exponent--;
34+
}
35+
mantissa &= 0x3FF;
36+
f32 = sign | ((exponent + 127) << 23) | (mantissa << 13);
37+
}
38+
} else {
39+
f32 = sign | ((exponent + 127 - 15) << 23) | (mantissa << 13);
40+
}
41+
42+
float result;
43+
std::memcpy(&result, &f32, sizeof(result));
44+
return result;
45+
}
46+
47+
inline float bf16_to_f32(uint16_t val) {
48+
uint32_t bits32 = static_cast<uint32_t>(val) << 16;
49+
float out;
50+
std::memcpy(&out, &bits32, sizeof(out));
51+
return out;
52+
}
53+
54+
// Template function for printing data recursively
55+
template <typename T>
56+
void print_data(const T *data, const Shape &shape, const Strides &strides, size_t dim) {
57+
if (dim == shape.size() - 1) {
58+
for (size_t i = 0; i < shape[dim]; i++) {
59+
std::cout << data[i * strides[dim]] << " ";
60+
}
61+
std::cout << std::endl;
62+
} else if (dim < shape.size() - 1) {
63+
for (size_t i = 0; i < shape[dim]; i++) {
64+
print_data(data + i * strides[dim], shape, strides, dim + 1);
65+
}
66+
}
67+
}
68+
69+
// Specialization for F16 (uint16_t)
70+
template <>
71+
void print_data<uint16_t>(const uint16_t *data, const Shape &shape, const Strides &strides, size_t dim) {
72+
if (dim == shape.size() - 1) {
73+
for (size_t i = 0; i < shape[dim]; i++) {
74+
std::cout << f16_to_f32(data[i * strides[dim]]) << " ";
75+
}
76+
std::cout << std::endl;
77+
} else if (dim < shape.size() - 1) {
78+
for (size_t i = 0; i < shape[dim]; i++) {
79+
print_data(data + i * strides[dim], shape, strides, dim + 1);
80+
}
81+
}
82+
}
83+
84+
// Function for printing BF16 data
85+
void print_data_bf16(const uint16_t *data, const Shape &shape, const Strides &strides, size_t dim) {
86+
if (dim == shape.size() - 1) {
87+
for (size_t i = 0; i < shape[dim]; i++) {
88+
std::cout << bf16_to_f32(data[i * strides[dim]]) << " ";
89+
}
90+
std::cout << std::endl;
91+
} else if (dim < shape.size() - 1) {
92+
for (size_t i = 0; i < shape[dim]; i++) {
93+
print_data_bf16(data + i * strides[dim], shape, strides, dim + 1);
94+
}
95+
}
96+
}
97+
98+
// Template function for writing data recursively to binary file (handles non-contiguous tensors)
99+
template <typename T>
100+
void write_binary_data(std::ofstream &out, const T *data, const Shape &shape, const Strides &strides, size_t dim) {
101+
if (dim == shape.size() - 1) {
102+
// Write the innermost dimension
103+
for (size_t i = 0; i < shape[dim]; i++) {
104+
out.write(reinterpret_cast<const char *>(&data[i * strides[dim]]), sizeof(T));
105+
}
106+
} else {
107+
// Recursively process higher dimensions
108+
for (size_t i = 0; i < shape[dim]; i++) {
109+
write_binary_data(out, data + i * strides[dim], shape, strides, dim + 1);
110+
}
111+
}
112+
}
113+
114+
void TensorImpl::debug(const std::string &filename) const {
115+
// Synchronize device if needed
116+
context::syncDevice();
117+
118+
std::cout << info() << std::endl;
119+
120+
const std::byte *cpu_data = nullptr;
121+
std::unique_ptr<std::byte[]> allocated_memory; // RAII: 自动管理内存
122+
123+
// Copy data to CPU if not already on CPU
124+
if (this->device().getType() != Device::Type::CPU) {
125+
size_t numel = this->numel();
126+
size_t element_size = dsize(this->dtype());
127+
128+
// 检查乘法溢出
129+
if (numel > 0 && element_size > std::numeric_limits<size_t>::max() / numel) {
130+
std::cerr << "Error: Memory size calculation overflow for tensor with "
131+
<< numel << " elements of size " << element_size << "\n";
132+
return;
133+
}
134+
135+
size_t mem_size = numel * element_size;
136+
allocated_memory = std::make_unique<std::byte[]>(mem_size);
137+
context::memcpyD2H(allocated_memory.get(), this->data(), mem_size);
138+
cpu_data = allocated_memory.get();
139+
} else {
140+
cpu_data = this->data();
141+
}
142+
143+
// If filename is provided, save to binary file
144+
if (!filename.empty()) {
145+
std::ofstream outFile(filename, std::ios::binary);
146+
if (!outFile) {
147+
std::cerr << "Error opening file for writing: " << filename << "\n";
148+
return; // allocated_memory 会自动释放(RAII)
149+
}
150+
151+
// Check if tensor is contiguous - for optimization
152+
if (this->is_contiguous()) {
153+
// Fast path: contiguous tensor, write in one go
154+
size_t mem_size = this->numel() * dsize(this->dtype());
155+
outFile.write(reinterpret_cast<const char *>(cpu_data), mem_size);
156+
} else {
157+
// Slow path: non-contiguous tensor, write element by element using strides
158+
switch (this->dtype()) {
159+
case DataType::F16:
160+
case DataType::BF16:
161+
write_binary_data(outFile, reinterpret_cast<const uint16_t *>(cpu_data),
162+
this->shape(), this->strides(), 0);
163+
break;
164+
case DataType::F32:
165+
write_binary_data(outFile, reinterpret_cast<const float *>(cpu_data),
166+
this->shape(), this->strides(), 0);
167+
break;
168+
case DataType::F64:
169+
write_binary_data(outFile, reinterpret_cast<const double *>(cpu_data),
170+
this->shape(), this->strides(), 0);
171+
break;
172+
case DataType::U64:
173+
write_binary_data(outFile, reinterpret_cast<const uint64_t *>(cpu_data),
174+
this->shape(), this->strides(), 0);
175+
break;
176+
case DataType::I64:
177+
write_binary_data(outFile, reinterpret_cast<const int64_t *>(cpu_data),
178+
this->shape(), this->strides(), 0);
179+
break;
180+
case DataType::U32:
181+
write_binary_data(outFile, reinterpret_cast<const uint32_t *>(cpu_data),
182+
this->shape(), this->strides(), 0);
183+
break;
184+
case DataType::I32:
185+
write_binary_data(outFile, reinterpret_cast<const int32_t *>(cpu_data),
186+
this->shape(), this->strides(), 0);
187+
break;
188+
case DataType::U16:
189+
write_binary_data(outFile, reinterpret_cast<const uint16_t *>(cpu_data),
190+
this->shape(), this->strides(), 0);
191+
break;
192+
case DataType::I16:
193+
write_binary_data(outFile, reinterpret_cast<const int16_t *>(cpu_data),
194+
this->shape(), this->strides(), 0);
195+
break;
196+
case DataType::U8:
197+
write_binary_data(outFile, reinterpret_cast<const uint8_t *>(cpu_data),
198+
this->shape(), this->strides(), 0);
199+
break;
200+
case DataType::I8:
201+
write_binary_data(outFile, reinterpret_cast<const int8_t *>(cpu_data),
202+
this->shape(), this->strides(), 0);
203+
break;
204+
case DataType::BOOL:
205+
// 布尔类型特殊处理:转换为 uint8_t 以保证跨平台一致性
206+
write_binary_data(outFile, reinterpret_cast<const uint8_t *>(cpu_data),
207+
this->shape(), this->strides(), 0);
208+
break;
209+
default:
210+
std::cerr << "Unsupported data type for binary output\n";
211+
return;
212+
}
213+
}
214+
215+
// 显式关闭文件并检查是否成功
216+
outFile.close();
217+
if (!outFile) {
218+
std::cerr << "Error: Failed to write data to file: " << filename << "\n";
219+
return;
220+
}
221+
222+
std::cout << "Data written to binary file: " << filename;
223+
if (!this->is_contiguous()) {
224+
std::cout << " (non-contiguous tensor, wrote " << this->numel() << " elements)";
225+
}
226+
std::cout << "\n";
227+
return;
228+
}
229+
230+
// Print data based on dtype
231+
switch (this->dtype()) {
232+
case DataType::F16:
233+
print_data(reinterpret_cast<const uint16_t *>(cpu_data),
234+
this->shape(), this->strides(), 0);
235+
break;
236+
case DataType::F32:
237+
print_data(reinterpret_cast<const float *>(cpu_data),
238+
this->shape(), this->strides(), 0);
239+
break;
240+
case DataType::F64:
241+
print_data(reinterpret_cast<const double *>(cpu_data),
242+
this->shape(), this->strides(), 0);
243+
break;
244+
case DataType::U64:
245+
print_data(reinterpret_cast<const uint64_t *>(cpu_data),
246+
this->shape(), this->strides(), 0);
247+
break;
248+
case DataType::I64:
249+
print_data(reinterpret_cast<const int64_t *>(cpu_data),
250+
this->shape(), this->strides(), 0);
251+
break;
252+
case DataType::U32:
253+
print_data(reinterpret_cast<const uint32_t *>(cpu_data),
254+
this->shape(), this->strides(), 0);
255+
break;
256+
case DataType::I32:
257+
print_data(reinterpret_cast<const int32_t *>(cpu_data),
258+
this->shape(), this->strides(), 0);
259+
break;
260+
case DataType::U16:
261+
print_data(reinterpret_cast<const uint16_t *>(cpu_data),
262+
this->shape(), this->strides(), 0);
263+
break;
264+
case DataType::I16:
265+
print_data(reinterpret_cast<const int16_t *>(cpu_data),
266+
this->shape(), this->strides(), 0);
267+
break;
268+
case DataType::U8:
269+
print_data(reinterpret_cast<const uint8_t *>(cpu_data),
270+
this->shape(), this->strides(), 0);
271+
break;
272+
case DataType::I8:
273+
print_data(reinterpret_cast<const int8_t *>(cpu_data),
274+
this->shape(), this->strides(), 0);
275+
break;
276+
case DataType::BF16:
277+
print_data_bf16(reinterpret_cast<const uint16_t *>(cpu_data),
278+
this->shape(), this->strides(), 0);
279+
break;
280+
case DataType::BOOL:
281+
print_data(reinterpret_cast<const bool *>(cpu_data),
282+
this->shape(), this->strides(), 0);
283+
break;
284+
default:
285+
std::cout << "Unsupported data type for debug" << std::endl;
286+
break;
287+
}
288+
}
289+
290+
void TensorImpl::debug() const {
291+
this->debug("");
292+
}
293+
294+
} // namespace infinicore

0 commit comments

Comments
 (0)