Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions examples/models/llama/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ DEFINE_int32(
-1,
"Number of CPU threads for inference. Defaults to -1, which implies we'll use a heuristic to derive the # of performant cores for a specific device.");

DEFINE_int32(
num_bos,
0,
"Number of BOS tokens to prepend to the prompt. Defaults to 0. If > 0, the prompt will be prepended with BOS tokens. This is useful for models that expect one or more BOS token at the start.");

DEFINE_int32(
num_eos,
0,
"Number of EOS tokens to append to the prompt. Defaults to 0. If > 0, the prompt will be appended with EOS tokens. This is useful for models that expect one or more EOS token at the end.");

DEFINE_bool(warmup, false, "Whether to run a warmup run.");

int32_t main(int32_t argc, char** argv) {
Expand Down
20 changes: 7 additions & 13 deletions examples/qualcomm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ find_package(gflags REQUIRED)
set(_common_compile_options -Wno-deprecated-declarations -fPIC)

# Let files say "include <executorch/path/to/header.h>".
set(_common_include_directories ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/json/single_include)
set(_common_include_directories
${EXECUTORCH_ROOT}/..
${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/json/single_include
)

#
# The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
Expand Down Expand Up @@ -72,20 +75,11 @@ target_include_directories(
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/llama.cpp-unicode/src
)

# find RE2 for tokenizer
set(ABSL_ENABLE_INSTALL ON)
set(ABSL_PROPAGATE_CXX_STD ON)
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/abseil-cpp
${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp
)
# add tokenizers
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/re2
${CMAKE_CURRENT_BINARY_DIR}/re2
${EXECUTORCH_ROOT}/extension/llm/tokenizers
${CMAKE_CURRENT_BINARY_DIR}/../../extension/llm/tokenizers
)
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})

# build qnn_executor_runner
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/executor_runner)
Expand Down
26 changes: 11 additions & 15 deletions examples/qualcomm/oss_scripts/llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@

# model sharding with custom op
set(CUSTOM_OP_SRCS_FILE
"${EXECUTORCH_SOURCE_DIR}/extension/llm/custom_ops/op_fallback.cpp"
"${EXECUTORCH_SOURCE_DIR}/extension/llm/custom_ops/op_fallback.cpp"
)
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../..)

add_library(custom_ops ${CUSTOM_OP_SRCS_FILE})
target_include_directories(custom_ops PUBLIC "${_common_include_directories}")
target_include_directories(
custom_ops PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/../../include"
)
target_link_libraries(
custom_ops PUBLIC full_portable_ops_lib
)
target_link_libraries(custom_ops PUBLIC full_portable_ops_lib)
target_link_options_shared_lib(custom_ops)

# preprocess qnn runner src files for llama
Expand Down Expand Up @@ -44,17 +44,15 @@ list(
${CMAKE_CURRENT_LIST_DIR}/runner/kv_manager.h
)

list(
APPEND
_llama_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/src/tiktoken.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../../models/llama/tokenizer/llama_tiktoken.cpp
)
list(APPEND _llama_runner__srcs)

# build qnn llama runner
add_executable(qnn_llama_runner ${_llama_runner__srcs})
target_include_directories(
qnn_llama_runner PUBLIC ${_common_include_directories} ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
qnn_llama_runner
PUBLIC
${_common_include_directories}
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
)

target_link_options_shared_lib(quantized_ops_lib)
Expand All @@ -68,14 +66,12 @@ target_link_libraries(
extension_module
extension_tensor
gflags
re2::re2
custom_ops
quantized_ops_lib
quantized_kernels
tokenizers
)
target_compile_options(
qnn_llama_runner PUBLIC ${_common_compile_options}
)
target_compile_options(qnn_llama_runner PUBLIC ${_common_compile_options})
set_target_properties(
qnn_llama_runner PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"
)
34 changes: 11 additions & 23 deletions examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

# preprocess qaihub runner src files for llama2,3
set(_qaihub_llama_runner__srcs ${_llama_runner__srcs})
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../..)

list(TRANSFORM _qaihub_llama_runner__srcs PREPEND "${EXECUTORCH_SOURCE_DIR}/")
list(FILTER _qaihub_llama_runner__srcs EXCLUDE REGEX ".*(/runner/).*")
list(
Expand All @@ -26,13 +28,11 @@ list(PREPEND _qaihub_llama2_7b_runner__srcs

# build qaihub llama2 7b runner
add_executable(qaihub_llama2_7b_runner ${_qaihub_llama2_7b_runner__srcs})

target_include_directories(
qaihub_llama2_7b_runner PUBLIC
${_common_include_directories}
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/json/single_include
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/include
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/src
qaihub_llama2_7b_runner
PUBLIC ${_common_include_directories}
${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
)
target_link_libraries(
qaihub_llama2_7b_runner
Expand All @@ -43,7 +43,7 @@ target_link_libraries(
extension_module
extension_tensor
gflags
re2::re2
tokenizers
)
target_compile_options(
qaihub_llama2_7b_runner PUBLIC ${_common_compile_options}
Expand All @@ -62,25 +62,13 @@ list(PREPEND _qaihub_llama3_8b_runner__srcs
# Adding a compile option to differentiate llama2 with llama3 logic
list(APPEND _common_compile_options -DQAIHUB_LLAMA3_RUNNER)

list(
APPEND _qaihub_llama3_8b_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/src/tiktoken.cpp
)
list(
APPEND
_qaihub_llama3_8b_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../../../models/llama/tokenizer/llama_tiktoken.cpp
)

# build qaihub llama3 8b runner
add_executable(qaihub_llama3_8b_runner ${_qaihub_llama3_8b_runner__srcs})
target_include_directories(
qaihub_llama3_8b_runner PUBLIC
${_common_include_directories}
qaihub_llama3_8b_runner
PUBLIC
${_common_include_directories}
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/json/single_include
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/include
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/src
)

target_link_libraries(
Expand All @@ -92,7 +80,7 @@ target_link_libraries(
extension_module
extension_tensor
gflags
re2::re2
tokenizers
)
target_compile_options(
qaihub_llama3_8b_runner PUBLIC ${_common_compile_options}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
F292B01D2D88AF3500BE6839 /* bpe_tokenizer_base.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */; };
F292B0202D88AF3500BE6839 /* llama2c_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0172D88AF3500BE6839 /* llama2c_tokenizer.cpp */; };
F292B0212D88AF3500BE6839 /* tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B01A2D88AF3500BE6839 /* tiktoken.cpp */; };
F2E1B5172E03AC19002C9718 /* sentencepiece.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F2E1B5162E03AC19002C9718 /* sentencepiece.cpp */; };
/* End PBXBuildFile section */

/* Begin PBXContainerItemProxy section */
Expand Down Expand Up @@ -110,6 +111,7 @@
F292B0292D88AF4800BE6839 /* result.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = result.h; sourceTree = "<group>"; };
F292B02B2D88AF4800BE6839 /* tiktoken.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tiktoken.h; sourceTree = "<group>"; };
F292B02D2D88AF4800BE6839 /* tokenizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tokenizer.h; sourceTree = "<group>"; };
F2E1B5162E03AC19002C9718 /* sentencepiece.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = sentencepiece.cpp; path = src/sentencepiece.cpp; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
Expand Down Expand Up @@ -183,6 +185,7 @@
032A74022CAFBB7800932D36 /* tokenizers */ = {
isa = PBXGroup;
children = (
F2E1B5162E03AC19002C9718 /* sentencepiece.cpp */,
3C6ABD322DFA27DE0015DE55 /* regex_lookahead.cpp */,
30AA4B592DC0766800B1BE50 /* hf_tokenizer.cpp */,
30AA4B5A2DC0766800B1BE50 /* pcre2_regex.cpp */,
Expand Down Expand Up @@ -426,6 +429,7 @@
F292B01D2D88AF3500BE6839 /* bpe_tokenizer_base.cpp in Sources */,
F292B0202D88AF3500BE6839 /* llama2c_tokenizer.cpp in Sources */,
F292B0212D88AF3500BE6839 /* tiktoken.cpp in Sources */,
F2E1B5172E03AC19002C9718 /* sentencepiece.cpp in Sources */,
03E7E6792CBDCAE900205E71 /* CoreMLTests.mm in Sources */,
032A74232CAFC1B300932D36 /* runner.cpp in Sources */,
03B2D37A2C8A515C0046936E /* GenericTests.mm in Sources */,
Expand Down
4 changes: 3 additions & 1 deletion extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ OTHER_LDFLAGS = $(inherited) \
HEADER_SEARCH_PATHS = $(inherited) \
$(SRCROOT)/../../../../.. \
$(TEMP_DIR)/cmake/include \
$(SRCROOT)/../../../../extension/llm/tokenizers/include
$(SRCROOT)/../../../../extension/llm/tokenizers/include \
$(SRCROOT)/../../../../extension/llm/tokenizers/third-party/sentencepiece \
$(SRCROOT)/../../../../extension/llm/tokenizers/third-party/sentencepiece/src

LIBRARY_SEARCH_PATHS = $(inherited) \
$(TEMP_DIR)/cmake/lib
4 changes: 4 additions & 0 deletions extension/llm/runner/irunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ struct GenerationConfig {
// Temperature for sampling (higher = more random)
float temperature = 0.8f;

// Number of eos and bos to add to the prompt
int32_t num_bos = 0;
int32_t num_eos = 0;

/**
* Resolve the maximum number of new tokens to generate based on constraints.
*
Expand Down
2 changes: 1 addition & 1 deletion extension/llm/runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def define_common_targets():
":text_token_generator" + aten_suffix,
"//pytorch/tokenizers:hf_tokenizer",
"//pytorch/tokenizers:llama2c_tokenizer",
# "//pytorch/tokenizers:sentencepiece", # TODO(larryliu0820) Make sure this compiles in xplat.
"//pytorch/tokenizers:sentencepiece",
"//pytorch/tokenizers:tiktoken",
],
)
11 changes: 9 additions & 2 deletions extension/llm/runner/text_llm_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <executorch/extension/llm/runner/util.h>
#include <pytorch/tokenizers/hf_tokenizer.h>
#include <pytorch/tokenizers/llama2c_tokenizer.h>
#include <pytorch/tokenizers/sentencepiece.h>
#include <pytorch/tokenizers/tiktoken.h>

namespace executorch::extension::llm {
Expand Down Expand Up @@ -116,8 +117,8 @@ Error TextLLMRunner::generate_from_pos(

::tokenizers::Result<std::vector<uint64_t>> encode_res = tokenizer_->encode(
prompt,
/* bos */ 0,
/* eos */ 0);
/*bos=*/config.num_bos,
/*eos=*/config.num_eos);

ET_CHECK_TK_OK_OR_RETURN_ERROR(
encode_res.error(), "Failed to encode prompt %s", prompt.c_str());
Expand Down Expand Up @@ -278,6 +279,12 @@ std::unique_ptr<tokenizers::Tokenizer> load_tokenizer(
return tiktoken_tokenizer;
}

auto sp_tokenizer = std::make_unique<::tokenizers::SPTokenizer>();
if (sp_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
ET_LOG(Info, "Loaded Sentencepiece tokenizer");
return sp_tokenizer;
}

auto bpe_tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>();
if (bpe_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
ET_LOG(Info, "Loaded BPE tokenizer");
Expand Down
2 changes: 1 addition & 1 deletion extension/llm/tokenizers
Loading