Skip to content

Commit f99e25f

Browse files
authored
[llama] Build the runner with tiktoken by default
Differential Revision: D61830302 Pull Request resolved: #4921
1 parent ff4a736 commit f99e25f

File tree

16 files changed

+108
-91
lines changed

16 files changed

+108
-91
lines changed

.ci/scripts/build-qnn-sdk.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# LICENSE file in the root directory of this source tree.
77

88
set -eux
9+
set -o xtrace
910

1011
build_qnn_backend() {
1112
echo "Start building qnn backend."

backends/qualcomm/scripts/build.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66
set -e
7+
set -o xtrace
78

89
if [[ -z ${QNN_SDK_ROOT} ]]; then
910
echo "Please export QNN_SDK_ROOT=/path/to/qnn_sdk"

build/build_android_llm_demo.sh

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,6 @@ build_android_native_library() {
2020
TOKENIZER="$2"
2121
ANDROID_NDK="${ANDROID_NDK:-/opt/ndk}"
2222
CMAKE_OUT="cmake-out-android-${ANDROID_ABI}"
23-
if [[ $TOKENIZER == "tiktoken" ]]; then
24-
EXECUTORCH_USE_TIKTOKEN=ON
25-
else
26-
EXECUTORCH_USE_TIKTOKEN=OFF
27-
fi
2823

2924
cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
3025
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
@@ -54,7 +49,6 @@ build_android_native_library() {
5449
-DANDROID_ABI="$ANDROID_ABI" \
5550
-DANDROID_PLATFORM=android-23 \
5651
-DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
57-
-DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
5852
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
5953
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
6054
-DEXECUTORCH_BUILD_XNNPACK=ON \
@@ -72,7 +66,6 @@ build_android_native_library() {
7266
-DEXECUTORCH_ENABLE_LOGGING=ON \
7367
-DEXECUTORCH_LOG_LEVEL=Info \
7468
-DEXECUTORCH_BUILD_LLAMA_JNI=ON \
75-
-DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
7669
-DCMAKE_BUILD_TYPE=Release \
7770
-B"${CMAKE_OUT}"/extension/android
7871

examples/demo-apps/android/LlamaDemo/README.md

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,22 +64,14 @@ Note: `<path_to_android_ndk>` is the root for the NDK, which is usually under
6464
`~/Library/Android/sdk/ndk/XX.Y.ZZZZZ` for macOS, and contains NOTICE and README.md.
6565
We use `<path_to_android_ndk>/build/cmake/android.toolchain.cmake` for CMake to cross-compile.
6666

67-
3. (Optional) If you need to use tiktoken as the tokenizer (for LLaMA3), set
68-
`EXECUTORCH_USE_TIKTOKEN=ON` and later CMake will use it as the tokenizer.
69-
If you need to run other models like LLaMA2, skip this skip.
70-
71-
```bash
72-
export EXECUTORCH_USE_TIKTOKEN=ON # Only for LLaMA3
73-
```
74-
75-
4. Build the Android Java extension code:
67+
3. Build the Android Java extension code:
7668
```bash
7769
pushd extension/android
7870
./gradlew build
7971
popd
8072
```
8173

82-
5. Run the following command set up the required JNI library:
74+
4. Run the following command set up the required JNI library:
8375
```bash
8476
pushd examples/demo-apps/android/LlamaDemo
8577
./gradlew :app:setup

examples/demo-apps/android/LlamaDemo/setup.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ cmake examples/models/llama2 \
3535
-DANDROID_ABI="$ANDROID_ABI" \
3636
-DANDROID_PLATFORM=android-23 \
3737
-DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
38-
-DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
3938
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
4039
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
4140
-DEXECUTORCH_BUILD_XNNPACK=ON \
@@ -50,7 +49,6 @@ cmake extension/android \
5049
-DANDROID_PLATFORM=android-23 \
5150
-DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
5251
-DEXECUTORCH_BUILD_LLAMA_JNI=ON \
53-
-DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
5452
-DCMAKE_BUILD_TYPE=Release \
5553
-B"${CMAKE_OUT}"/extension/android
5654

examples/models/llama2/CMakeLists.txt

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ project(llama_runner)
2121
# Duplicating options as root CMakeLists.txt
2222
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "Build the optimized kernels" OFF)
2323

24-
option(EXECUTORCH_USE_TIKTOKEN "Use Tiktoken as a tokenizer" OFF)
25-
2624
include(CMakeDependentOption)
2725
#
2826
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
@@ -94,23 +92,6 @@ endif()
9492

9593
# llama_runner library
9694
add_subdirectory(runner)
97-
if(EXECUTORCH_USE_TIKTOKEN)
98-
# find RE2 for tokenizer
99-
set(ABSL_ENABLE_INSTALL ON)
100-
set(ABSL_PROPAGATE_CXX_STD ON)
101-
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
102-
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
103-
add_subdirectory(
104-
${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/third-party/abseil-cpp
105-
${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp
106-
)
107-
add_subdirectory(
108-
${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/third-party/re2
109-
${CMAKE_CURRENT_BINARY_DIR}/re2
110-
)
111-
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
112-
target_link_libraries(llama_runner PUBLIC re2::re2)
113-
endif()
11495

11596
set(link_libraries gflags)
11697
set(_srcs main.cpp)

examples/models/llama2/README.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,6 @@ Note for Mac users: There's a known linking issue with Xcode 15.1. Refer to the
227227
cmake --build cmake-out/examples/models/llama2 -j16 --config Release
228228
```
229229
230-
For Llama3, add `-DEXECUTORCH_USE_TIKTOKEN=ON` option when building the llama runner.
231-
232230
3. Run model. Run options available [here](https://github.com/pytorch/executorch/blob/main/examples/models/llama2/main.cpp#L18-L40).
233231
```
234232
cmake-out/examples/models/llama2/llama_main --model_path=<model pte file> --tokenizer_path=<tokenizer.bin> --prompt=<prompt>
@@ -283,7 +281,6 @@ cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
283281

284282
cmake --build cmake-out-android/examples/models/llama2 -j16 --config Release
285283
```
286-
For Llama3, add `-DEXECUTORCH_USE_TIKTOKEN=ON` option when building the llama runner.
287284
288285
**2. Run on Android via adb shell**
289286

examples/models/llama2/runner/CMakeLists.txt

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,13 @@ target_include_directories(
4141
extension_module INTERFACE ${_common_include_directories}
4242
)
4343

44-
if(EXECUTORCH_USE_TIKTOKEN)
45-
list(
46-
APPEND _llama_runner__srcs
47-
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/tiktoken.cpp
48-
)
49-
list(APPEND _llama_runner__srcs
50-
${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp
51-
)
52-
set(_preprocessor_flag -DET_USE_TIKTOKEN)
53-
endif()
44+
list(
45+
APPEND _llama_runner__srcs
46+
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/tiktoken.cpp
47+
)
48+
list(APPEND _llama_runner__srcs
49+
${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp
50+
)
5451

5552
if(CMAKE_TOOLCHAIN_IOS
5653
OR ANDROID
@@ -63,7 +60,24 @@ else()
6360
add_library(llama_runner SHARED ${_llama_runner__srcs})
6461
endif()
6562

66-
set(llama_runner_deps executorch extension_module extension_data_loader)
63+
# find RE2 for tokenizer, build tiktoken
64+
set(ABSL_ENABLE_INSTALL ON)
65+
set(ABSL_PROPAGATE_CXX_STD ON)
66+
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
67+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
68+
add_subdirectory(
69+
${EXECUTORCH_ROOT}/extension/llm/third-party/abseil-cpp
70+
${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp
71+
)
72+
add_subdirectory(
73+
${EXECUTORCH_ROOT}/extension/llm/third-party/re2
74+
${CMAKE_CURRENT_BINARY_DIR}/re2
75+
)
76+
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
77+
78+
set(llama_runner_deps executorch extension_module extension_data_loader
79+
re2::re2
80+
)
6781

6882
target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})
6983

examples/models/llama2/runner/runner.cpp

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,8 @@
1616
#include <executorch/extension/llm/runner/util.h>
1717
#include <executorch/extension/runner_util/managed_tensor.h>
1818

19-
#if ET_USE_TIKTOKEN
2019
#include <executorch/examples/models/llama2/tokenizer/llama_tiktoken.h>
21-
#else /* BPE */
2220
#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
23-
#endif /* ET_USE_TIKTOKEN*/
2421

2522
namespace torch::executor {
2623
namespace {
@@ -46,13 +43,6 @@ Runner::Runner(
4643
: temperature_(temperature),
4744
module_(std::make_unique<Module>(model_path, Module::LoadMode::File)),
4845
tokenizer_path_(tokenizer_path),
49-
tokenizer_(
50-
#if ET_USE_TIKTOKEN
51-
get_tiktoken_for_llama()
52-
#else
53-
std::make_unique<BPETokenizer>()
54-
#endif
55-
),
5646
metadata_({
5747
{kAppendEosToPrompt, false},
5848
{kEnableDynamicShape, false},
@@ -79,8 +69,19 @@ Error Runner::load() {
7969
return Error::Ok;
8070
}
8171
ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method("forward"));
82-
83-
tokenizer_->load(tokenizer_path_);
72+
// load tokenizer
73+
tokenizer_ = nullptr;
74+
tokenizer_ = std::make_unique<BPETokenizer>();
75+
Error err = tokenizer_->load(tokenizer_path_);
76+
if (err == Error::InvalidArgument) {
77+
ET_LOG(
78+
Info,
79+
"Failed to load %s as a BPETokenizer artifact, trying Tiktoken",
80+
tokenizer_path_.c_str());
81+
tokenizer_.reset();
82+
tokenizer_ = get_tiktoken_for_llama();
83+
tokenizer_->load(tokenizer_path_);
84+
}
8485

8586
ET_LOG(Info, "Reading metadata from model");
8687

examples/models/llama2/runner/targets.bzl

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@ def _get_operator_lib(aten = False):
88
else:
99
return ["//executorch/configurations:optimized_native_cpu_ops", "//executorch/extension/llm/custom_ops:custom_ops"]
1010

11-
def use_tiktoken():
12-
return native.read_config("llama", "use_tiktoken", "0") == "1"
13-
1411
def define_common_targets():
1512
for aten in (True, False):
1613
aten_suffix = "_aten" if aten else ""
@@ -26,7 +23,6 @@ def define_common_targets():
2623
preprocessor_flags = [
2724
"-DUSE_ATEN_LIB",
2825
] if aten else [],
29-
exported_preprocessor_flags = ["-DET_USE_TIKTOKEN"] if use_tiktoken() else [],
3026
visibility = [
3127
"@EXECUTORCH_CLIENTS",
3228
],
@@ -43,11 +39,9 @@ def define_common_targets():
4339
"//executorch/kernels/quantized:generated_lib" + aten_suffix,
4440
"//executorch/runtime/core/exec_aten:lib" + aten_suffix,
4541
"//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix,
46-
] + ([
4742
"//executorch/examples/models/llama2/tokenizer:tiktoken",
48-
] if use_tiktoken() else [
4943
"//executorch/extension/llm/tokenizer:bpe_tokenizer",
50-
]) + (_get_operator_lib(aten)) + ([
44+
] + (_get_operator_lib(aten)) + ([
5145
# Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE)
5246
# Therefore enable it explicitly for now to avoid failing tests
5347
"//executorch/backends/vulkan:vulkan_backend_lib",

0 commit comments

Comments
 (0)