Skip to content

Commit 7dddf3e

Browse files
authored
[INTEL_HPU] add one hot op kernel (#1609)
1 parent 43f245e commit 7dddf3e

File tree

2 files changed

+291
-0
lines changed

2 files changed

+291
-0
lines changed
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License"); you may
4+
// not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "habanalabs/perf_lib_layer_params.h"
16+
#include "habanalabs/synapse_api.h"
17+
#include "habanalabs/synapse_common_types.h"
18+
#include "kernels/funcs.h"
19+
#include "kernels/hpu_operator.h"
20+
#include "utils/utils.h"
21+
22+
namespace custom_kernel {
23+
24+
class OneHotOperator : public HpuOperator {
25+
public:
26+
OneHotOperator(std::string guid_prefix, std::string node_name)
27+
: HpuOperator(guid_prefix), pName_(node_name) {}
28+
void AddNode(ConvertTensors& ct, ns_OneHotKernel::Params params) {
29+
auto inputs = ct.GetTensors();
30+
auto outputs = ct.GetTensors(false);
31+
std::vector<synTensor> syn_inputs;
32+
if (inputs[0].type == syn_type_int32) {
33+
for (size_t i = 0; i < inputs.size(); i++) {
34+
syn_inputs.push_back(createTensor(inputs[i].dims.size(),
35+
inputs[i].type,
36+
inputs[i].dims,
37+
true,
38+
inputs[i].name));
39+
}
40+
} else {
41+
for (size_t i = 0; i < inputs.size(); i++) {
42+
std::vector<synTensor> x_i64;
43+
x_i64.push_back(createTensor(inputs[i].dims.size(),
44+
inputs[i].type,
45+
inputs[i].dims,
46+
true,
47+
inputs[i].name));
48+
std::vector<synTensor> x_i32;
49+
auto x_cast = createTensor(inputs[i].dims.size(),
50+
syn_type_int32,
51+
inputs[i].dims,
52+
false,
53+
"x_cast");
54+
x_i32.push_back(x_cast);
55+
56+
std::string guid_cast = "cast_i64_to_i32";
57+
synStatus status = synNodeCreate(graphHandle_,
58+
x_i64.data(),
59+
x_i32.data(),
60+
x_i64.size(),
61+
x_i32.size(),
62+
nullptr,
63+
0,
64+
guid_cast.c_str(),
65+
"cast_x",
66+
nullptr,
67+
nullptr);
68+
PD_CHECK(status == synSuccess,
69+
"[RUNTIME] synNodeCreate cast_x failed = ",
70+
status);
71+
syn_inputs.push_back(x_cast);
72+
}
73+
}
74+
75+
std::vector<synTensor> syn_outputs;
76+
for (size_t i = 0; i < outputs.size(); i++) {
77+
syn_outputs.push_back(createTensor(outputs[i].dims.size(),
78+
outputs[i].type,
79+
outputs[i].dims,
80+
true,
81+
outputs[i].name));
82+
}
83+
84+
synStatus status = synNodeCreate(graphHandle_,
85+
syn_inputs.data(),
86+
syn_outputs.data(),
87+
inputs.size(),
88+
outputs.size(),
89+
&params,
90+
sizeof(params),
91+
guid_.c_str(),
92+
pName_.c_str(),
93+
nullptr,
94+
nullptr);
95+
PD_CHECK(
96+
status == synSuccess, "[RUNTIME] synNodeCreate () failed = %d", status);
97+
}
98+
std::string pName_;
99+
};
100+
101+
template <typename T, typename Context>
102+
void OneHotRawKernel(const Context& dev_ctx,
103+
const phi::DenseTensor& x,
104+
const phi::Scalar& num_classes,
105+
phi::DataType dtype,
106+
bool allow_out_of_range,
107+
phi::DenseTensor* out) {
108+
// allocate memory on device.
109+
ConvertTensors ct;
110+
ct.Add(x);
111+
dev_ctx.template Alloc<float>(out);
112+
int depth = num_classes.to<int>();
113+
auto out_dims = out->dims();
114+
out_dims[out_dims.size() - 1] = depth;
115+
out->Resize(out_dims);
116+
ct.Add(out, false);
117+
118+
std::vector<DIMS> inputs_dims = ct.GetDims();
119+
ns_OneHotKernel::Params params{-1, depth, 1, 0};
120+
121+
OpCacheOperator op_info;
122+
op_info.prepareOpInfo<float, ns_OneHotKernel::Params>(
123+
"one_hot_fwd", {inputs_dims}, &params);
124+
125+
auto recipe = op_info.GetRecipe();
126+
if (recipe == nullptr) {
127+
// compile
128+
OneHotOperator op(op_info.guid_, "one_hot_op");
129+
op.AddNode(ct, params);
130+
op.Compile();
131+
op_info.setOp(op);
132+
recipe = op_info.GetRecipe();
133+
}
134+
135+
RecipeRunner runner(recipe);
136+
auto tensors = ct.GetDeviceAddr();
137+
runner.Run(reinterpret_cast<C_Stream>(dev_ctx.stream()), tensors);
138+
}
139+
140+
template <typename T, typename Context>
141+
void OneHotKernel(const Context& dev_ctx,
142+
const phi::DenseTensor& x,
143+
const phi::Scalar& num_classes_s,
144+
phi::DenseTensor* out) {
145+
custom_kernel::OneHotRawKernel<T, Context>(
146+
dev_ctx, x, num_classes_s, phi::DataType::FLOAT32, false, out);
147+
}
148+
149+
} // namespace custom_kernel
150+
151+
PD_REGISTER_PLUGIN_KERNEL(one_hot_raw,
152+
intel_hpu,
153+
ALL_LAYOUT,
154+
custom_kernel::OneHotRawKernel,
155+
int32_t,
156+
int64_t) {}
157+
158+
PD_REGISTER_PLUGIN_KERNEL(one_hot,
159+
intel_hpu,
160+
ALL_LAYOUT,
161+
custom_kernel::OneHotKernel,
162+
int32_t,
163+
int64_t) {
164+
kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32);
165+
}
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.#
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
from __future__ import print_function
15+
16+
import unittest
17+
import numpy as np
18+
from tests.op_test import OpTest
19+
import paddle
20+
import paddle.base.core as core
21+
22+
paddle.enable_static()
23+
24+
import os
25+
26+
intel_hpus_module_id = os.environ.get("FLAGS_selected_intel_hpus", 0)
27+
28+
29+
class TestOneHotOp(OpTest):
30+
def set_hpu(self):
31+
self.__class__.use_custom_device = True
32+
33+
def setUp(self):
34+
self.set_hpu()
35+
self.op_type = "one_hot_v2"
36+
self.dtype = np.int32
37+
depth = 10
38+
depth_np = np.array(10).astype("int32")
39+
x_lod = [[4, 1, 3, 3]]
40+
x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))]
41+
x = np.array(x).astype("int32").reshape([sum(x_lod[0])])
42+
43+
out = np.zeros(shape=(np.prod(x.shape), depth)).astype(self.dtype)
44+
45+
for i in range(np.prod(x.shape)):
46+
out[i, x[i]] = 1.0
47+
48+
self.inputs = {"X": (x, x_lod), "depth_tensor": depth_np}
49+
self.attrs = {"dtype": int(core.VarDesc.VarType.FP32)}
50+
self.outputs = {"Out": (out, x_lod)}
51+
52+
def test_check_output(self):
53+
self.check_output_with_place(
54+
paddle.CustomPlace("intel_hpu", int(intel_hpus_module_id))
55+
)
56+
57+
def test_check_grad(self):
58+
pass
59+
60+
61+
class TestOneHotOpAttr(OpTest):
62+
def set_hpu(self):
63+
self.__class__.use_custom_device = True
64+
65+
def setUp(self):
66+
self.set_hpu()
67+
self.op_type = "one_hot_v2"
68+
self.dtype = np.int32
69+
depth = 10
70+
depth_np = np.array(10).astype("int32")
71+
x_lod = [[4, 1, 3, 3]]
72+
x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))]
73+
x = np.array(x).astype("int32").reshape([sum(x_lod[0])])
74+
75+
out = np.zeros(shape=(np.prod(x.shape), depth)).astype(self.dtype)
76+
77+
for i in range(np.prod(x.shape)):
78+
out[i, x[i]] = 1.0
79+
80+
self.inputs = {"X": (x, x_lod)}
81+
self.attrs = {"dtype": int(core.VarDesc.VarType.FP32), "depth": depth}
82+
self.outputs = {"Out": (out, x_lod)}
83+
84+
def test_check_output(self):
85+
self.check_output_with_place(
86+
paddle.CustomPlace("intel_hpu", int(intel_hpus_module_id))
87+
)
88+
89+
def test_check_grad(self):
90+
pass
91+
92+
93+
class TestOneHotOpNoLod(OpTest):
94+
def set_hpu(self):
95+
self.__class__.use_custom_device = True
96+
97+
def setUp(self):
98+
self.set_hpu()
99+
self.op_type = "one_hot_v2"
100+
self.dtype = np.int32
101+
depth = 10
102+
depth_np = np.array(10).astype("int32")
103+
x_lod = [[4, 1, 3, 3]]
104+
x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))]
105+
x = np.array(x).astype("int32").reshape([sum(x_lod[0])])
106+
107+
out = np.zeros(shape=(np.prod(x.shape), depth)).astype(self.dtype)
108+
109+
for i in range(np.prod(x.shape)):
110+
out[i, x[i]] = 1.0
111+
112+
self.inputs = {"X": x, "depth_tensor": depth_np}
113+
self.attrs = {"dtype": int(core.VarDesc.VarType.FP32)}
114+
self.outputs = {"Out": out}
115+
116+
def test_check_output(self):
117+
self.check_output_with_place(
118+
paddle.CustomPlace("intel_hpu", int(intel_hpus_module_id))
119+
)
120+
121+
def test_check_grad(self):
122+
pass
123+
124+
125+
if __name__ == "__main__":
126+
unittest.main()

0 commit comments

Comments
 (0)