Skip to content

Commit c300b1b

Browse files
author
wopeizl
authored
Tensor index (#16223)
* extend the slice function for python test=develop
1 parent 0d9d25d commit c300b1b

File tree

5 files changed

+588
-1
lines changed

5 files changed

+588
-1
lines changed

paddle/fluid/pybind/pybind.cc

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,8 @@ PYBIND11_MODULE(core, m) {
347347
.def("_set_double_element", TensorSetElement<double>)
348348
.def("_get_double_element", TensorGetElement<double>)
349349
.def("_place", [](Tensor &self) { return self.place(); })
350-
.def("_dtype", [](Tensor &self) { return self.type(); });
350+
.def("_dtype", [](Tensor &self) { return self.type(); })
351+
.def("__getitem__", PySliceTensor, py::return_value_policy::reference);
351352

352353
py::class_<LoDTensor, Tensor>(m, "LoDTensor", R"DOC(
353354
LoDTensor is a Tensor with optional LoD information.
@@ -499,6 +500,13 @@ PYBIND11_MODULE(core, m) {
499500
500501
Returns:
501502
out (bool): whether the lod is valid.
503+
)DOC")
504+
.def("__getitem__", PySliceTensor, py::return_value_policy::reference,
505+
R"DOC(
506+
Slice the original Tensor, and remove the LoD information.
507+
508+
Returns:
509+
out (Tensor): new Tensor(NOT LoDTensor).
502510
)DOC");
503511

504512
py::class_<SelectedRows>(m, "SelectedRows")

paddle/fluid/pybind/tensor_py.h

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,22 @@ limitations under the License. */
1414

1515
#pragma once
1616
#include <Python.h>
17+
#include <algorithm>
18+
#include <memory>
1719
#include <string>
1820
#include <tuple>
1921
#include <vector>
2022
#include "paddle/fluid/framework/lod_tensor.h"
2123
#include "paddle/fluid/memory/memcpy.h"
24+
#include "paddle/fluid/operators/math/concat_and_split.h"
25+
#include "paddle/fluid/operators/strided_memcpy.h"
2226
#include "paddle/fluid/platform/device_context.h"
2327
#include "paddle/fluid/platform/float16.h"
2428
#include "pybind11/numpy.h"
2529
#include "pybind11/pybind11.h"
2630

31+
namespace py = pybind11;
32+
2733
namespace paddle {
2834
namespace pybind {
2935
namespace details {
@@ -191,6 +197,253 @@ inline void PyCPUTensorSetFromArray(
191197
std::memcpy(dst, array.data(), sizeof(uint16_t) * array.size());
192198
}
193199

200+
template <typename T, size_t D>
201+
void _sliceCompute(const framework::Tensor *in, framework::Tensor *out,
202+
const platform::CPUDeviceContext &ctx,
203+
const std::vector<int> &axes,
204+
const std::vector<int> &starts) {
205+
auto &eigen_place = *ctx.eigen_device();
206+
auto place = in->place();
207+
auto out_dims = out->dims();
208+
auto in_dims = in->dims();
209+
210+
auto offsets = Eigen::array<int, D>();
211+
auto extents = Eigen::array<int, D>();
212+
for (size_t i = 0; i < D; ++i) {
213+
offsets[i] = 0;
214+
extents[i] = out_dims[i];
215+
}
216+
int start;
217+
for (size_t i = 0; i < axes.size(); ++i) {
218+
start = starts[i];
219+
if (start < 0) {
220+
start = (start + in_dims[axes[i]]);
221+
}
222+
start = std::max(start, 0);
223+
offsets[axes[i]] = start;
224+
}
225+
auto in_t =
226+
framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
227+
*in);
228+
auto out_t =
229+
framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
230+
*out);
231+
out_t.device(eigen_place) = in_t.slice(offsets, extents);
232+
}
233+
234+
template <typename T>
235+
void _concatCompute(const std::vector<paddle::framework::Tensor> &ins,
236+
paddle::framework::Tensor *out,
237+
const platform::CPUDeviceContext &ctx, int64_t axis) {
238+
if (axis == 0 && ins.size() < 10) {
239+
size_t output_offset = 0;
240+
for (auto &in : ins) {
241+
auto in_stride = framework::stride_numel(in.dims());
242+
auto out_stride = framework::stride_numel(out->dims());
243+
paddle::operators::StridedNumelCopyWithAxis<T>(
244+
ctx, axis, out->data<T>() + output_offset, out_stride, in.data<T>(),
245+
in_stride, in_stride[axis]);
246+
output_offset += in_stride[axis];
247+
}
248+
} else {
249+
paddle::operators::math::ConcatFunctor<platform::CPUDeviceContext, T>
250+
concat_functor;
251+
concat_functor(ctx, ins, static_cast<int>(axis), out);
252+
}
253+
}
254+
255+
void _getSliceinfo(const framework::Tensor &self, py::object obj,
256+
const int64_t dim, int64_t *pstart, int64_t *pstop,
257+
int64_t *pstep, int64_t *pslicelength) {
258+
auto &start = *pstart;
259+
auto &stop = *pstop;
260+
auto &step = *pstep;
261+
auto &slicelength = *pslicelength;
262+
const framework::DDim &srcDDim = self.dims();
263+
if (dim < 0 || dim >= srcDDim.size()) {
264+
throw py::index_error();
265+
}
266+
if (py::isinstance<py::slice>(obj)) {
267+
size_t lstart, lstop, lstep, lslicelength;
268+
py::slice s = static_cast<py::slice>(obj);
269+
if (!s.compute(srcDDim[dim], &lstart, &lstop, &lstep, &lslicelength)) {
270+
throw py::index_error();
271+
}
272+
start = static_cast<int64_t>(lstart);
273+
stop = static_cast<int64_t>(lstop);
274+
step = static_cast<int64_t>(lstep);
275+
slicelength = static_cast<int64_t>(lslicelength);
276+
} else if (py::isinstance<py::int_>(obj)) {
277+
start = static_cast<int64_t>(static_cast<py::int_>(obj));
278+
if (std::abs(start) >= srcDDim[dim]) {
279+
throw py::index_error();
280+
}
281+
start = (start >= 0) ? start : srcDDim[dim] - start;
282+
stop = start + 1;
283+
step = 1;
284+
slicelength = 1;
285+
} else {
286+
throw py::index_error();
287+
}
288+
}
289+
290+
inline framework::Tensor *_getTensor(const framework::Tensor &self,
291+
const framework::DDim &ddim) {
292+
framework::Tensor *output = new framework::Tensor();
293+
output->Resize(ddim);
294+
auto place = self.place();
295+
if (platform::is_cpu_place(place)) {
296+
output->mutable_data(boost::get<platform::CPUPlace>(place), self.type());
297+
#ifdef PADDLE_WITH_CUDA
298+
} else {
299+
if (platform::is_cuda_pinned_place(place)) {
300+
output->mutable_data(boost::get<platform::CUDAPinnedPlace>(place),
301+
self.type());
302+
} else if ((platform::is_gpu_place(place))) {
303+
output->mutable_data(boost::get<platform::CUDAPlace>(place), self.type());
304+
}
305+
#endif
306+
}
307+
return output;
308+
}
309+
310+
template <typename T>
311+
void _sliceDapper(const framework::Tensor *in, framework::Tensor *out,
312+
const platform::CPUDeviceContext &ctx,
313+
const std::vector<int> &axes, const std::vector<int> &starts,
314+
int size) {
315+
switch (size) {
316+
case 1:
317+
_sliceCompute<T, 1>(in, out, ctx, axes, starts);
318+
break;
319+
case 2:
320+
_sliceCompute<T, 2>(in, out, ctx, axes, starts);
321+
break;
322+
case 3:
323+
_sliceCompute<T, 3>(in, out, ctx, axes, starts);
324+
break;
325+
case 4:
326+
_sliceCompute<T, 4>(in, out, ctx, axes, starts);
327+
break;
328+
case 5:
329+
_sliceCompute<T, 5>(in, out, ctx, axes, starts);
330+
break;
331+
case 6:
332+
_sliceCompute<T, 6>(in, out, ctx, axes, starts);
333+
break;
334+
case 7:
335+
_sliceCompute<T, 7>(in, out, ctx, axes, starts);
336+
break;
337+
case 8:
338+
_sliceCompute<T, 8>(in, out, ctx, axes, starts);
339+
break;
340+
case 9:
341+
_sliceCompute<T, 9>(in, out, ctx, axes, starts);
342+
break;
343+
default:
344+
PADDLE_THROW("dim size not exepected, current is %d", size);
345+
break;
346+
}
347+
}
348+
349+
template <typename T>
350+
inline framework::Tensor *_sliceWrapper(const framework::Tensor &self,
351+
const platform::CPUDeviceContext &ctx,
352+
py::object obj, int dim, int64_t start,
353+
int64_t slicelength) {
354+
framework::DDim dstDDim = self.dims();
355+
dstDDim[dim] = static_cast<int64_t>(slicelength);
356+
std::vector<int> axes({dim});
357+
std::vector<int> starts({static_cast<int>(start)});
358+
framework::Tensor *output = _getTensor(self, dstDDim);
359+
_sliceDapper<T>(&self, output, ctx, axes, starts, dstDDim.size());
360+
return output;
361+
}
362+
363+
template <typename T>
364+
inline framework::Tensor *_sliceAndConcat(const framework::Tensor &self,
365+
py::object obj, int dim) {
366+
platform::CPUDeviceContext ctx;
367+
int64_t start, stop, step, slicelength;
368+
_getSliceinfo(self, obj, dim, &start, &stop, &step, &slicelength);
369+
if (step == 1 || slicelength == 1) {
370+
return _sliceWrapper<T>(self, ctx, obj, dim, start, slicelength);
371+
} else {
372+
std::vector<framework::Tensor> ins;
373+
for (auto i = 0; i < slicelength; ++i, start += step) {
374+
ins.emplace_back(*_sliceWrapper<T>(self, ctx, obj, dim, start, 1));
375+
}
376+
377+
// do the concat operation
378+
framework::DDim dstDDim = self.dims();
379+
dstDDim[dim] = static_cast<int64_t>(slicelength);
380+
framework::Tensor *output1 = _getTensor(self, dstDDim);
381+
_concatCompute<T>(ins, output1, ctx, dim);
382+
return output1;
383+
}
384+
}
385+
386+
inline framework::Tensor *_sliceTensor(const framework::Tensor &self,
387+
py::object obj, int dim) {
388+
auto src_type = self.type();
389+
switch (src_type) {
390+
case framework::proto::VarType::FP16:
391+
return _sliceAndConcat<paddle::platform::float16>(self, obj, dim);
392+
case framework::proto::VarType::FP32:
393+
return _sliceAndConcat<float>(self, obj, dim);
394+
case framework::proto::VarType::FP64:
395+
return _sliceAndConcat<double>(self, obj, dim);
396+
case framework::proto::VarType::INT32:
397+
return _sliceAndConcat<int>(self, obj, dim);
398+
case framework::proto::VarType::INT64:
399+
return _sliceAndConcat<int64_t>(self, obj, dim);
400+
case framework::proto::VarType::BOOL:
401+
return _sliceAndConcat<bool>(self, obj, dim);
402+
case framework::proto::VarType::INT16:
403+
return _sliceAndConcat<bool>(self, obj, dim);
404+
case framework::proto::VarType::UINT8:
405+
return _sliceAndConcat<bool>(self, obj, dim);
406+
default:
407+
PADDLE_THROW("Not support type %d", src_type);
408+
}
409+
}
410+
411+
inline framework::Tensor *_pySliceTensor(const framework::Tensor &self,
412+
py::object obj) {
413+
if (py::isinstance<py::tuple>(obj)) {
414+
py::list l = static_cast<py::list>(obj);
415+
std::unique_ptr<framework::Tensor> target;
416+
framework::Tensor *src = const_cast<framework::Tensor *>(&self);
417+
for (auto i = 0; i < static_cast<int>(l.size()); ++i) {
418+
src = _sliceTensor(*src, l[i], i);
419+
if (i + 1 == static_cast<int>(l.size())) {
420+
return src;
421+
} else {
422+
target.reset(src);
423+
}
424+
}
425+
return nullptr;
426+
} else {
427+
return _sliceTensor(self, obj, 0);
428+
}
429+
}
430+
431+
inline framework::Tensor *PySliceTensor(const framework::Tensor &self,
432+
py::object obj) {
433+
if (platform::is_gpu_place(self.place())) {
434+
std::unique_ptr<framework::Tensor> holder;
435+
framework::Tensor src;
436+
framework::TensorCopySync(self, platform::CPUPlace(), &src);
437+
framework::Tensor *output = _pySliceTensor(src, obj);
438+
holder.reset(output);
439+
framework::Tensor *dst = _getTensor(*output, output->dims());
440+
framework::TensorCopySync(*output, self.place(), dst);
441+
return dst;
442+
} else {
443+
return _pySliceTensor(self, obj);
444+
}
445+
}
446+
194447
#ifdef PADDLE_WITH_CUDA
195448
template <typename T>
196449
void PyCUDATensorSetFromArray(

0 commit comments

Comments
 (0)