Skip to content

Commit 453da78

Browse files
authored
add intel hpu custom_ops index_copy (#1491)
1 parent 1fba424 commit 453da78

File tree

5 files changed

+1038
-0
lines changed

5 files changed

+1038
-0
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""
18+
Copyright (c) 2024 Baidu.com, Inc. All Rights Reserved.
19+
20+
Build and setup Intele_HPU custom ops.
21+
"""
22+
23+
from paddle.utils.cpp_extension import CppExtension, setup
24+
25+
setup(
26+
name="paddlenlp_ops",
27+
ext_modules=[
28+
CppExtension(
29+
sources=[
30+
"./src/index_copy.cc",
31+
],
32+
include_dirs=[
33+
"../",
34+
"../build/third_party/install/onednn/include/",
35+
"../build/third_party/install/glog/include/",
36+
"../build/third_party/install/gflags/include/",
37+
],
38+
library_dirs=[
39+
"../build/python/paddle_custom_device/",
40+
"/usr/lib/habanalabs/",
41+
],
42+
libraries=[
43+
"paddle-intel-hpu",
44+
"Synapse",
45+
],
46+
)
47+
],
48+
)
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License"); you may
4+
// not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "habanalabs/perf_lib_layer_params.h"
16+
#include "kernels/funcs.h"
17+
#include "kernels/hpu_operator.h"
18+
#include "paddle/extension.h"
19+
#include "utils/utils.h"
20+
21+
namespace custom_kernel {
22+
23+
class IndexCopy : public HpuOperator {
24+
public:
25+
explicit IndexCopy(synDataType dtype)
26+
: HpuOperator("index_copy_fwd"), dtype_(dtype) {}
27+
28+
void AddNode(ConvertTensors& ct, ns_IndexCopy::Params params) {
29+
auto inputs = ct.GetTensors();
30+
auto outputs = ct.GetTensors(false);
31+
32+
std::vector<synTensor> syn_inputs;
33+
for (size_t i = 0; i < inputs.size(); i++) {
34+
syn_inputs.push_back(createTensor(inputs[i].dims.size(),
35+
inputs[i].type,
36+
inputs[i].dims,
37+
true,
38+
inputs[i].name));
39+
}
40+
41+
std::vector<synTensor> syn_outputs;
42+
for (size_t i = 0; i < outputs.size(); i++) {
43+
syn_outputs.push_back(createTensor(outputs[i].dims.size(),
44+
outputs[i].type,
45+
outputs[i].dims,
46+
true,
47+
outputs[i].name));
48+
}
49+
50+
std::string guid = guid_ + "_" + SynDataTypeToStr(outputs[0].type);
51+
synStatus status = synNodeCreate(graphHandle_,
52+
syn_inputs.data(),
53+
syn_outputs.data(),
54+
syn_inputs.size(),
55+
syn_outputs.size(),
56+
&params,
57+
sizeof(params),
58+
guid.c_str(),
59+
"index_copy",
60+
nullptr,
61+
nullptr);
62+
63+
PD_CHECK(
64+
status == synSuccess, "[RUNTIME] synNodeCreate () failed = %d", status);
65+
}
66+
67+
protected:
68+
synDataType dtype_;
69+
};
70+
71+
template <typename T, typename Context>
72+
void IndexCopyKernel(const Context& dev_ctx,
73+
const phi::DenseTensor& input,
74+
const phi::Scalar& dim,
75+
const phi::DenseTensor& index,
76+
const phi::DenseTensor& source,
77+
phi::DenseTensor* out) {
78+
dev_ctx.template Alloc<T>(out);
79+
if (out->numel() == 0) {
80+
return;
81+
}
82+
83+
ConvertTensors ct;
84+
ct.Add(input);
85+
ct.Add(index);
86+
ct.Add(source);
87+
88+
ct.Add(out, false);
89+
90+
std::vector<DIMS> inputs_dims = ct.GetDims();
91+
ns_IndexCopy::Params params{};
92+
params.axis = dim.to<unsigned>();
93+
94+
OpCacheOperator op_info;
95+
op_info.prepareOpInfo<T, ns_IndexCopy::Params>(
96+
"index_copy_kernel", inputs_dims, &params);
97+
98+
auto recipe = op_info.GetRecipe();
99+
100+
if (recipe == nullptr) {
101+
IndexCopy op(op_info.datatype_);
102+
op.AddNode(ct, params);
103+
op.Compile();
104+
op_info.setOp(op);
105+
recipe = op_info.GetRecipe();
106+
}
107+
108+
RecipeRunner runner(recipe);
109+
auto tensors = ct.GetDeviceAddr();
110+
runner.Run(reinterpret_cast<C_Stream>(dev_ctx.stream()), tensors);
111+
}
112+
113+
} // namespace custom_kernel
114+
115+
template <typename Context>
116+
void CallIndexCopyKernel(const Context& dev_ctx,
117+
const phi::DenseTensor& input,
118+
const phi::Scalar& dim,
119+
const phi::DenseTensor& index,
120+
const phi::DenseTensor& source,
121+
phi::DenseTensor* out) {
122+
if (input.dtype() == phi::DataType::FLOAT32) {
123+
custom_kernel::IndexCopyKernel<float>(
124+
dev_ctx, input, dim, index, source, out);
125+
} else if (input.dtype() == phi::DataType::FLOAT16) {
126+
custom_kernel::IndexCopyKernel<phi::dtype::float16>(
127+
dev_ctx, input, dim, index, source, out);
128+
} else if (input.dtype() == phi::DataType::BFLOAT16) {
129+
custom_kernel::IndexCopyKernel<phi::dtype::bfloat16>(
130+
dev_ctx, input, dim, index, source, out);
131+
} else {
132+
throw std::runtime_error("Unsupported data type for IndexCopyKernel");
133+
}
134+
}
135+
136+
std::vector<paddle::Tensor> IndexCopyForward(const paddle::Tensor& input,
137+
const int dim,
138+
const paddle::Tensor& index,
139+
const paddle::Tensor& source) {
140+
auto dev_ctx = static_cast<const phi::CustomContext*>(
141+
paddle::experimental::DeviceContextPool::Instance().Get(input.place()));
142+
143+
auto input_tensor = static_cast<phi::DenseTensor*>(input.impl().get());
144+
auto index_tensor = static_cast<const phi::DenseTensor*>(index.impl().get());
145+
auto source_tensor =
146+
static_cast<const phi::DenseTensor*>(source.impl().get());
147+
auto out_tensor = std::make_shared<phi::DenseTensor>();
148+
out_tensor->Resize(input_tensor->dims());
149+
150+
CallIndexCopyKernel(*dev_ctx,
151+
*input_tensor,
152+
phi::Scalar(dim),
153+
*index_tensor,
154+
*source_tensor,
155+
out_tensor.get());
156+
157+
return {paddle::Tensor(out_tensor)};
158+
}
159+
160+
PD_BUILD_OP(index_copy)
161+
.Inputs({"input", "index", "source"})
162+
.Outputs({"out"})
163+
.Attrs({"dim: int"})
164+
.SetKernelFn(PD_KERNEL(IndexCopyForward));

0 commit comments

Comments
 (0)