Skip to content

Commit 33b4920

Browse files
authored
Merge pull request #14057 from velconia/continue_hash_op
[1.1] Add hash_op implementation
2 parents 209f24a + 2fec8c5 commit 33b4920

File tree

12 files changed

+312
-36
lines changed

12 files changed

+312
-36
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ include(external/eigen) # download eigen3
179179
include(external/pybind11) # download pybind11
180180
include(external/cares)
181181
include(external/cub)
182+
include(external/xxhash) # download xxhash
182183

183184
if (NOT WIN32)
184185
# there is no official support of snappystream, warpctc, nccl, cupti in windows

cmake/external/xxhash.cmake

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
INCLUDE(ExternalProject)
2+
3+
set(XXHASH_SOURCE_DIR ${THIRD_PARTY_PATH}/xxhash)
4+
set(XXHASH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/xxhash)
5+
set(XXHASH_INCLUDE_DIR "${XXHASH_INSTALL_DIR}/include")
6+
7+
IF(WITH_STATIC_LIB)
8+
SET(BUILD_CMD make lib)
9+
ELSE()
10+
SET(BUILD_CMD sed -i "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" ${XXHASH_SOURCE_DIR}/src/extern_xxhash/Makefile && make lib)
11+
ENDIF()
12+
13+
ExternalProject_Add(
14+
extern_xxhash
15+
${EXTERNAL_PROJECT_LOG_ARGS}
16+
GIT_REPOSITORY "https://github.com/Cyan4973/xxHash"
17+
GIT_TAG "v0.6.5"
18+
PREFIX ${XXHASH_SOURCE_DIR}
19+
DOWNLOAD_NAME "xxhash"
20+
UPDATE_COMMAND ""
21+
CONFIGURE_COMMAND ""
22+
BUILD_IN_SOURCE 1
23+
PATCH_COMMAND
24+
BUILD_COMMAND ${BUILD_CMD}
25+
INSTALL_COMMAND export PREFIX=${XXHASH_INSTALL_DIR}/ && make install
26+
TEST_COMMAND ""
27+
)
28+
29+
set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/libxxhash.a")
30+
INCLUDE_DIRECTORIES(${XXHASH_INCLUDE_DIR})
31+
32+
add_library(xxhash STATIC IMPORTED GLOBAL)
33+
set_property(TARGET xxhash PROPERTY IMPORTED_LOCATION ${XXHASH_LIBRARIES})
34+
include_directories(${XXHASH_INCLUDE_DIR})
35+
add_dependencies(xxhash extern_xxhash)
36+
37+
LIST(APPEND external_project_dependencies xxhash)
38+
39+
IF(WITH_C_API)
40+
INSTALL(DIRECTORY ${XXHASH_INCLUDE_DIR} DESTINATION third_party/xxhash)
41+
IF(ANDROID)
42+
INSTALL(FILES ${XXHASH_LIBRARIES} DESTINATION third_party/xxhash/lib/${ANDROID_ABI})
43+
ELSE()
44+
INSTALL(FILES ${XXHASH_LIBRARIES} DESTINATION third_party/xxhash/lib)
45+
ENDIF()
46+
ENDIF()

cmake/inference_lib.cmake

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ function(copy TARGET)
3131
foreach(index RANGE ${len})
3232
list(GET copy_lib_SRCS ${index} src)
3333
list(GET copy_lib_DSTS ${index} dst)
34-
add_custom_command(TARGET ${TARGET} PRE_BUILD
34+
add_custom_command(TARGET ${TARGET} PRE_BUILD
3535
COMMAND mkdir -p "${dst}"
3636
COMMAND cp -r "${src}" "${dst}"
3737
COMMENT "copying ${src} -> ${dst}")
@@ -67,6 +67,13 @@ copy(boost_lib
6767
DEPS boost
6868
)
6969

70+
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/xxhash")
71+
copy(xxhash_lib
72+
SRCS ${XXHASH_INCLUDE_DIR} ${XXHASH_LIBRARIES}
73+
DSTS ${dst_dir} ${dst_dir}/lib
74+
DEPS xxhash
75+
)
76+
7077
if(NOT PROTOBUF_FOUND)
7178
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/protobuf")
7279
copy(protobuf_lib
@@ -186,7 +193,7 @@ copy(cmake_cache
186193
DSTS ${FLUID_INSTALL_DIR})
187194

188195
# This command generates a complete fluid library for both train and inference
189-
add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
196+
add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
190197

191198
# Following commands generate a inference-only fluid library
192199
# third_party, version.txt and CMakeCache.txt are the same position with ${FLUID_INSTALL_DIR}

paddle/fluid/API.spec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label'
176176
paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
177177
paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
178178
paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None))
179+
paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None))
179180
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
180181
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
181182
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)

paddle/fluid/inference/api/demo_ci/CMakeLists.txt

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ include_directories("${PADDLE_LIB}")
5252
include_directories("${PADDLE_LIB}/third_party/install/protobuf/include")
5353
include_directories("${PADDLE_LIB}/third_party/install/glog/include")
5454
include_directories("${PADDLE_LIB}/third_party/install/gflags/include")
55+
include_directories("${PADDLE_LIB}/third_party/install/xxhash/include")
5556
if (NOT WIN32)
5657
include_directories("${PADDLE_LIB}/third_party/install/snappy/include")
5758
include_directories("${PADDLE_LIB}/third_party/install/snappystream/include")
@@ -61,8 +62,8 @@ endif(NOT WIN32)
6162
include_directories("${PADDLE_LIB}/third_party/boost")
6263
include_directories("${PADDLE_LIB}/third_party/eigen3")
6364

64-
if (NOT WIN32)
65-
if (USE_TENSORRT AND WITH_GPU)
65+
if (NOT WIN32)
66+
if (USE_TENSORRT AND WITH_GPU)
6667
include_directories("${TENSORRT_INCLUDE_DIR}")
6768
link_directories("${TENSORRT_LIB_DIR}")
6869
endif()
@@ -77,13 +78,14 @@ endif(NOT WIN32)
7778
link_directories("${PADDLE_LIB}/third_party/install/protobuf/lib")
7879
link_directories("${PADDLE_LIB}/third_party/install/glog/lib")
7980
link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
81+
link_directories("${PADDLE_LIB}/third_party/install/xxhash/lib")
8082
link_directories("${PADDLE_LIB}/paddle/lib")
8183

8284
add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
8385

8486
if(WITH_MKL)
8587
include_directories("${PADDLE_LIB}/third_party/install/mklml/include")
86-
set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
88+
set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
8789
${PADDLE_LIB}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})
8890
set(MKLDNN_PATH "${PADDLE_LIB}/third_party/install/mkldnn")
8991
if(EXISTS ${MKLDNN_PATH})
@@ -107,7 +109,7 @@ if (NOT WIN32)
107109
set(EXTERNAL_LIB "-lrt -ldl -lpthread")
108110
set(DEPS ${DEPS}
109111
${MATH_LIB} ${MKLDNN_LIB}
110-
glog gflags protobuf snappystream snappy z
112+
glog gflags protobuf snappystream snappy z xxhash
111113
${EXTERNAL_LIB})
112114
else()
113115
set(DEPS ${DEPS}
@@ -120,7 +122,7 @@ endif(NOT WIN32)
120122

121123
if(WITH_GPU)
122124
if(NOT WIN32)
123-
if (USE_TENSORRT)
125+
if (USE_TENSORRT)
124126
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_STATIC_LIBRARY_SUFFIX})
125127
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX})
126128
endif()

paddle/fluid/operators/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ if (WITH_GPU AND TENSORRT_FOUND)
268268
else()
269269
set(DEPS_OPS ${DEPS_OPS} tensorrt_engine_op)
270270
endif()
271+
op_library(hash_op DEPS xxhash)
271272
op_library(clip_by_norm_op DEPS selected_rows_functor selected_rows)
272273
op_library(sum_op DEPS selected_rows_functor)
273274
op_library(sgd_op DEPS selected_rows_functor)

paddle/fluid/operators/hash_op.cc

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/fluid/operators/hash_op.h"
16+
#include <string>
17+
#include <vector>
18+
19+
namespace paddle {
20+
namespace operators {
21+
22+
class HashOp : public framework::OperatorWithKernel {
23+
public:
24+
HashOp(const std::string &type, const framework::VariableNameMap &inputs,
25+
const framework::VariableNameMap &outputs,
26+
const framework::AttributeMap &attrs)
27+
: OperatorWithKernel(type, inputs, outputs, attrs) {}
28+
29+
void InferShape(framework::InferShapeContext *ctx) const override {
30+
PADDLE_ENFORCE(ctx->HasInput("X"),
31+
"Input(X) of HashOp should not be null.");
32+
PADDLE_ENFORCE(ctx->HasOutput("Out"),
33+
"Output(Out) of HashOp should not be null.");
34+
35+
auto dims = ctx->GetInputDim("X");
36+
PADDLE_ENFORCE_EQ(dims.size(), 2UL,
37+
"The input of hash_op's dimensions must be 2");
38+
std::vector<int64_t> out_dims;
39+
out_dims.reserve(dims.size() + 1);
40+
// copy all dims except the last one
41+
for (size_t i = 0u; i != dims.size() - 1; ++i) {
42+
out_dims.emplace_back(dims[i]);
43+
}
44+
int num_hash = ctx->Attrs().Get<int>("num_hash");
45+
out_dims.emplace_back(num_hash);
46+
// keep the last dim to 1
47+
out_dims.emplace_back(1);
48+
49+
ctx->SetOutputDim("Out", framework::make_ddim(out_dims));
50+
ctx->ShareLoD("X", /*->*/ "Out");
51+
}
52+
};
53+
54+
class HashOpMaker : public framework::OpProtoAndCheckerMaker {
55+
public:
56+
void Make() override {
57+
AddInput("X", "(Tensor) Input tensor of scale operator.");
58+
AddOutput("Out", "(Tensor) Output tensor of scale operator.");
59+
AddComment(R"DOC(
60+
**Hash Operator**
61+
$$Out = scale * X$$
62+
)DOC");
63+
AddAttr<int>("num_hash", "").SetDefault(1);
64+
AddAttr<int>("mod_by", "").SetDefault(100000);
65+
}
66+
};
67+
68+
} // namespace operators
69+
} // namespace paddle
70+
71+
namespace ops = paddle::operators;
72+
73+
REGISTER_OP_WITHOUT_GRADIENT(hash, ops::HashOp, ops::HashOpMaker);
74+
REGISTER_OP_CPU_KERNEL(hash, ops::HashKerel<int>, ops::HashKerel<int64_t>);

paddle/fluid/operators/hash_op.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
17+
extern "C" {
18+
#include <xxhash.h>
19+
}
20+
#include "paddle/fluid/framework/eigen.h"
21+
#include "paddle/fluid/framework/op_registry.h"
22+
23+
namespace paddle {
24+
namespace operators {
25+
// template <typename DeviceContext, typename T>
26+
template <typename T>
27+
class HashKerel : public framework::OpKernel<T> {
28+
public:
29+
virtual void Compute(const framework::ExecutionContext& context) const {
30+
auto* out_t = context.Output<framework::LoDTensor>("Out");
31+
auto* in_t = context.Input<framework::LoDTensor>("X");
32+
int mod_by = context.Attr<int>("mod_by");
33+
int num_hash = context.Attr<int>("num_hash");
34+
auto* output = out_t->mutable_data<T>(context.GetPlace());
35+
36+
auto in_dims = in_t->dims();
37+
auto in_lod = in_t->lod();
38+
PADDLE_ENFORCE_EQ(
39+
static_cast<uint64_t>(in_dims[0]), in_lod[0].back(),
40+
"The actual input data's size mismatched with LoD information.");
41+
42+
auto seq_length = in_dims[0];
43+
auto last_dim = in_dims[in_dims.size() - 1];
44+
auto* input = in_t->data<T>();
45+
for (int idx = 0; idx < seq_length; ++idx) {
46+
for (int ihash = 0; ihash != num_hash; ++ihash) {
47+
output[idx * num_hash + ihash] =
48+
XXH64(input, sizeof(int) * last_dim, ihash) % mod_by;
49+
}
50+
input += last_dim;
51+
}
52+
}
53+
};
54+
55+
} // namespace operators
56+
} // namespace paddle

paddle/fluid/train/demo/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ include_directories("${PADDLE_LIB}")
1515
include_directories("${PADDLE_LIB}/third_party/install/protobuf/include")
1616
include_directories("${PADDLE_LIB}/third_party/install/glog/include")
1717
include_directories("${PADDLE_LIB}/third_party/install/gflags/include")
18+
include_directories("${PADDLE_LIB}/third_party/install/xxhash/include")
1819
include_directories("${PADDLE_LIB}/third_party/install/snappy/include")
1920
include_directories("${PADDLE_LIB}/third_party/install/snappystream/include")
2021
include_directories("${PADDLE_LIB}/third_party/install/zlib/include")
@@ -27,6 +28,7 @@ link_directories("${PADDLE_LIB}/third_party/install/snappystream/lib")
2728
link_directories("${PADDLE_LIB}/third_party/install/protobuf/lib")
2829
link_directories("${PADDLE_LIB}/third_party/install/glog/lib")
2930
link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
31+
link_directories("${PADDLE_LIB}/third_party/install/xxhash/lib")
3032
link_directories("${PADDLE_LIB}/third_party/install/zlib/lib")
3133

3234
add_executable(demo_trainer demo_trainer.cc)
@@ -62,5 +64,5 @@ target_link_libraries(demo_trainer
6264
${ARCHIVE_END}
6365
${MATH_LIB}
6466
${MKLDNN_LIB}
65-
glog gflags protobuf snappystream snappy z
67+
glog gflags protobuf snappystream snappy z xxhash
6668
${EXTERNAL_LIB})

paddle/scripts/paddle_build.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,9 @@ function cmake_gen() {
9595
exit 1
9696
fi
9797
fi
98-
else
98+
else
9999
if [ "$1" != "" ]; then
100-
echo "using python abi: $1"
100+
echo "using python abi: $1"
101101
if [ "$1" == "cp27-cp27m" ]; then
102102
export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:}
103103
export PATH=/opt/python/cp27-cp27m/bin/:${PATH}
@@ -119,15 +119,15 @@ function cmake_gen() {
119119
fi
120120
fi
121121
fi
122-
122+
123123
if [ "$SYSTEM" == "Darwin" ]; then
124124
WITH_DISTRIBUTE=${WITH_DISTRIBUTE:-ON}
125125
WITH_AVX=${WITH_AVX:-ON}
126126
INFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR:-~/.cache/inference_demo}
127127
else
128128
INFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR:-/root/.cache/inference_demo}
129129
fi
130-
130+
131131
cat <<EOF
132132
========================================
133133
Configuring cmake in /paddle/build ...
@@ -394,8 +394,8 @@ EOF
394394
export http_proxy=
395395
export https_proxy=
396396
# TODO: jiabin need to refine this part when these tests fixed on mac
397-
ctest --output-on-failure -j $1
398-
# make install should also be test when unittest
397+
ctest --output-on-failure -j $1
398+
# make install should also be test when unittest
399399
make install -j 8
400400
pip install --user ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl
401401
if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]] ; then

0 commit comments

Comments
 (0)