PaddlePaddle
diff --git a/‎paddlenlp/ops/CMakeLists.txt‎
Lines changed: 2 additions & 2 deletions b/‎paddlenlp/ops/CMakeLists.txt‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddlenlp/ops/README.md‎
Lines changed: 14 additions & 12 deletions b/‎paddlenlp/ops/README.md‎
Lines changed: 14 additions & 12 deletions
diff --git a/‎paddlenlp/ops/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎paddlenlp/ops/__init__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddlenlp/ops/faster_transformer/CMakeLists.txt‎
Lines changed: 14 additions & 0 deletions b/‎paddlenlp/ops/faster_transformer/CMakeLists.txt‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎paddlenlp/ops/transformer/__init__.py‎ renamed to ‎paddlenlp/ops/faster_transformer/__init__.py‎ b/‎paddlenlp/ops/transformer/__init__.py‎ renamed to ‎paddlenlp/ops/faster_transformer/__init__.py‎
diff --git a/‎paddlenlp/ops/sample/config/decoding.sample.yaml‎ renamed to ‎paddlenlp/ops/faster_transformer/sample/config/decoding.sample.yaml‎ b/‎paddlenlp/ops/sample/config/decoding.sample.yaml‎ renamed to ‎paddlenlp/ops/faster_transformer/sample/config/decoding.sample.yaml‎
diff --git a/‎paddlenlp/ops/sample/decoding_sample.py‎ renamed to ‎paddlenlp/ops/faster_transformer/sample/decoding_sample.py‎ b/‎paddlenlp/ops/sample/decoding_sample.py‎ renamed to ‎paddlenlp/ops/faster_transformer/sample/decoding_sample.py‎
diff --git a/‎paddlenlp/ops/sample/encoder_decoding_sample.py‎ renamed to ‎paddlenlp/ops/faster_transformer/sample/encoder_decoding_sample.py‎ b/‎paddlenlp/ops/sample/encoder_decoding_sample.py‎ renamed to ‎paddlenlp/ops/faster_transformer/sample/encoder_decoding_sample.py‎
diff --git a/‎paddlenlp/ops/sample/gpt_export_model_sample.py‎ renamed to ‎paddlenlp/ops/faster_transformer/sample/gpt_export_model_sample.py‎ b/‎paddlenlp/ops/sample/gpt_export_model_sample.py‎ renamed to ‎paddlenlp/ops/faster_transformer/sample/gpt_export_model_sample.py‎
diff --git a/‎paddlenlp/ops/sample/gpt_sample.py‎ renamed to ‎paddlenlp/ops/faster_transformer/sample/gpt_sample.py‎ b/‎paddlenlp/ops/sample/gpt_sample.py‎ renamed to ‎paddlenlp/ops/faster_transformer/sample/gpt_sample.py‎
@@ -18,7 +18,7 @@ find_package(CUDA 10.1 REQUIRED)
 
 INCLUDE(ExternalProject)
 
-set(CXX_STD "11" CACHE STRING "C++ standard")
+set(CXX_STD "14" CACHE STRING "C++ standard")
 
 option(ON_INFER         "Compile with inference. "                                OFF)
 option(WITH_GPU         "Compile with GPU/CPU, default use CPU."                  ON)
@@ -223,4 +223,4 @@ if(ON_INFER AND WITH_GPT)
   )
 endif()
 
-add_subdirectory(src)
+add_subdirectory(faster_transformer)
@@ -4,9 +4,11 @@
 
 ```text
 .
-├── sample/                 # 基于 Transformer 机器翻译使用样例（beam search）
-├── src/                    # 自定义 OP C++ CUDA 代码
-└── transformer/            # Python API 封装脚本
+├── faster_transformer/       # 基于自定义 op Faster Transformer 子路径
+  ├── sample/                 # 基于 Faster Transformer 使用样例
+  ├── src/                    # 自定义 OP C++ CUDA 代码
+  └── transformer/            # Python API 封装脚本
+└── patches                   # 自定义 op 第三方库自定义补丁代码
 ```
 
 ## 使用环境说明
@@ -95,7 +97,7 @@ transformer = FasterTransformer(
     use_fp16_decoding=args.use_fp16_decoding)
 ```
 
-更详细的例子可以参考 `./sample/decoding_sample.py` 以及 `./sample/encoder_decoding_sample.py`，我们提供了更详细用例。
+更详细的例子可以参考 `./faster_transformer/sample/decoding_sample.py` 以及 `./sample/encoder_decoding_sample.py`，我们提供了更详细用例。
 
 #### 执行 Transformer decoding on PaddlePaddle
 
@@ -105,7 +107,7 @@ transformer = FasterTransformer(
 export CUDA_VISIBLE_DEVICES=0
 export FLAGS_fraction_of_gpu_memory_to_use=0.1
 ./build/third-party/build/bin/decoding_gemm 32 4 8 64 30000 32 512 0
-python sample/decoding_sample.py --config ./sample/config/decoding.sample.yaml --decoding_lib ./build/lib/libdecoding_op.so
+python ./faster_transformer/sample/decoding_sample.py --config ./faster_transformer/sample/config/decoding.sample.yaml --decoding_lib ./build/lib/libdecoding_op.so
 ```
 
 使用 PaddlePaddle 仅执行 decoding 测试（float16）：
@@ -115,7 +117,7 @@ python sample/decoding_sample.py --config ./sample/config/decoding.sample.yaml -
 export CUDA_VISIBLE_DEVICES=0
 export FLAGS_fraction_of_gpu_memory_to_use=0.1
 ./build/third-party/build/bin/decoding_gemm 32 4 8 64 30000 32 512 1
-python sample/decoding_sample.py --config ./sample/config/decoding.sample.yaml --decoding_lib ./build/lib/libdecoding_op.so --use_fp16_decoding
+python ./faster_transformer/sample/decoding_sample.py --config ./faster_transformer/sample/config/decoding.sample.yaml --decoding_lib ./build/lib/libdecoding_op.so --use_fp16_decoding
 ```
 
 其中，`decoding_gemm` 不同参数的意义可以参考 [FasterTransformer 文档](https://github.com/NVIDIA/FasterTransformer/tree/v3.1#execute-the-decoderdecoding-demos)。
@@ -151,15 +153,15 @@ gpt = FasterGPT(
 
 目前，GPT-2 的例子仅支持 `batch size` 为 `1` 或是 batch 内输入的序列长度相等的情况。并且，仅支持 topk-sampling 和 topp-sampling，不支持 beam-search。
 
-更详细的例子可以参考 `./sample/gpt_sample.py`，我们提供了更详细用例。
+更详细的例子可以参考 `./faster_transformer/sample/gpt_sample.py`，我们提供了更详细用例。
 
 #### 执行 GPT-2 decoding on PaddlePaddle
 
 使用 PaddlePaddle 仅执行 decoding 测试（float32）：
 
 ``` sh
 export CUDA_VISIBLE_DEVICES=0
-python sample/gpt_sample.py --model_name_or_path gpt2-medium-en --decoding_lib ./build/lib/libdecoding_op.so --batch_size 1 --topk 4 --topp 0.0 --max_out_len 32 --start_token "<|endoftext|>" --end_token "<|endoftext|>" --temperature 1.0
+python ./faster_transformer/sample/gpt_sample.py --model_name_or_path gpt2-medium-en --decoding_lib ./build/lib/libdecoding_op.so --batch_size 1 --topk 4 --topp 0.0 --max_out_len 32 --start_token "<|endoftext|>" --end_token "<|endoftext|>" --temperature 1.0
 ```
 
 其中，各个选项的意义如下：
@@ -204,7 +206,7 @@ cd PaddleNLP/paddlenlp/ops/
 ``` sh
 mkdir build
 cd build/
-cmake .. -DSM=xx -DCMAKE_BUILD_TYPE=Release -DPADDLE_LIB=/path/to/paddle_inference_lib/ -DDEMO=./demo/transformer_e2e.cc -DWITH_STATIC_LIB=OFF -DON_INFER=ON -DWITH_MKL=ON
+cmake .. -DSM=xx -DCMAKE_BUILD_TYPE=Release -DPADDLE_LIB=/path/to/paddle_inference_lib/ -DDEMO=./faster_transformer/src/demo/transformer_e2e.cc -DWITH_STATIC_LIB=OFF -DON_INFER=ON -DWITH_MKL=ON
 make -j
 cd ../
 ```
@@ -224,7 +226,7 @@ cd ../
     └── threadpool/
   └── version.txt
   ```
-* `-DDEMO` 说明预测库使用 demo 的位置。比如指定 -DDEMO=./demo/transformer_e2e.cc 或是 -DDEMO=./demo/gpt.cc。
+* `-DDEMO` 说明预测库使用 demo 的位置。比如指定 -DDEMO=./faster_transformer/src/demo/transformer_e2e.cc 或是 -DDEMO=./faster_transformer/src/demo/gpt.cc。
 * `-DWITH_GPT`，如果是编译 GPT 的预测库可执行文件，需要加上 `-DWITH_GPT=ON`。
 * **当使用预测库的自定义 op 的时候，请务必开启 `-DON_INFER=ON` 选项，否则，不会得到预测库的可执行文件。**
 
@@ -253,10 +255,10 @@ cd bin/
 
 #### 执行 GPT decoding on PaddlePaddle
 
-如果需要使用 Paddle Inference 预测库针对 GPT 进行预测，首先，需要导出预测模型，可以通过 `sample/gpt_export_model_sample.py` 脚本获取预测库用模型，执行方式如下所示：
+如果需要使用 Paddle Inference 预测库针对 GPT 进行预测，首先，需要导出预测模型，可以通过 `./faster_transformer/sample/gpt_export_model_sample.py` 脚本获取预测库用模型，执行方式如下所示：
 
 ``` sh
-python sample/gpt_export_model_sample.py --model_name_or_path gpt2-medium-en --decoding_lib ./build/lib/libdecoding_op.so --batch_size 1 --topk 4 --topp 0.0 --max_out_len 32 --start_token "<|endoftext|>" --end_token "<|endoftext|>" --temperature 1.0 --inference_model_dir ./infer_model/
+python ./faster_transformer/sample/gpt_export_model_sample.py --model_name_or_path gpt2-medium-en --decoding_lib ./build/lib/libdecoding_op.so --topk 4 --topp 0.0 --max_out_len 32 --start_token "<|endoftext|>" --end_token "<|endoftext|>" --temperature 1.0 --inference_model_dir ./infer_model/
 ```
 
 各个选项的意义与上文的 `gpt_sample.py` 的选项相同。额外新增一个 `--inference_model_dir` 选项用于指定保存的模型文件、词表等文件。若是使用的模型是 gpt2-medium-en，保存之后，`./infer_model/` 目录下组织的结构如下：
 
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .transformer.decoding import *
-from .transformer.faster_transformer import *
+from .faster_transformer.transformer.decoding import *
+from .faster_transformer.transformer.faster_transformer import *
 from .einsum import *
 from .distributed import *
 from . import optimizer
@@ -0,0 +1,14 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+add_subdirectory(src)