Skip to content

Commit 04dbca7

Browse files
committed
Update on "[llm] Add a generic text only LLM runner"
Introducing `text_llm_runner`. This can be used to run all text only decoder only LLM models supported by ExecuTorch. * Metadata is being read out from the .pte file and being used to construct the runner object. * examples/models/llama/runner.h[.cpp] only contains a simple wrapper around `text_llm_runner.h[.cpp]`. In next PRs I will move examples/models/phi-3-mini/runner to use the generic runner. Will look into QNN and MediaTek runners as well. Differential Revision: [D75910889](https://our.internmc.facebook.com/intern/diff/D75910889/) [ghstack-poisoned]
2 parents 13125f4 + 009282a commit 04dbca7

File tree

3 files changed

+12
-5
lines changed

3 files changed

+12
-5
lines changed

.ci/scripts/test_llama_torchao_lowbit.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ cmake --build cmake-out -j16 --target install --config Release
4040

4141
# Install llama runner with torchao
4242
cmake -DPYTHON_EXECUTABLE=python \
43+
-DBUILD_TESTING=OFF \
4344
-DCMAKE_BUILD_TYPE=Release \
4445
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
4546
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \

examples/models/llama/runner/CMakeLists.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,11 @@ else()
5353
endif()
5454

5555
# For extension_llm_runner
56-
if (NOT TARGET extension_llm_runner)
57-
add_subdirectory(
58-
${EXECUTORCH_ROOT}/extension/llm/runner
59-
${CMAKE_CURRENT_BINARY_DIR}/../../../../extension/llm/runner
60-
)
56+
if(NOT TARGET extension_llm_runner)
57+
add_subdirectory(
58+
${EXECUTORCH_ROOT}/extension/llm/runner
59+
${CMAKE_CURRENT_BINARY_DIR}/../../../../extension/llm/runner
60+
)
6161
endif()
6262

6363
set(llama_runner_deps executorch_core extension_data_loader extension_module

extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
30AA4B642DC0766800B1BE50 /* std_regex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 30AA4B5E2DC0766800B1BE50 /* std_regex.cpp */; };
3434
30AA4B652DC0766800B1BE50 /* pre_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 30AA4B5B2DC0766800B1BE50 /* pre_tokenizer.cpp */; };
3535
30AA4B662DC0766800B1BE50 /* re2_regex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 30AA4B5C2DC0766800B1BE50 /* re2_regex.cpp */; };
36+
F22E9E1A2DF2CBB900EC5425 /* text_llm_runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F22E9E192DF2CBB900EC5425 /* text_llm_runner.cpp */; };
3637
F292B01D2D88AF3500BE6839 /* bpe_tokenizer_base.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */; };
3738
F292B0202D88AF3500BE6839 /* llama2c_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0172D88AF3500BE6839 /* llama2c_tokenizer.cpp */; };
3839
F292B0212D88AF3500BE6839 /* tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B01A2D88AF3500BE6839 /* tiktoken.cpp */; };
@@ -94,6 +95,8 @@
9495
30AA4B5D2DC0766800B1BE50 /* regex.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = regex.cpp; path = src/regex.cpp; sourceTree = "<group>"; };
9596
30AA4B5E2DC0766800B1BE50 /* std_regex.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = std_regex.cpp; path = src/std_regex.cpp; sourceTree = "<group>"; };
9697
30AA4B5F2DC0766800B1BE50 /* token_decoder.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = token_decoder.cpp; path = src/token_decoder.cpp; sourceTree = "<group>"; };
98+
F22E9E182DF2CBB900EC5425 /* text_llm_runner.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = text_llm_runner.h; sourceTree = "<group>"; };
99+
F22E9E192DF2CBB900EC5425 /* text_llm_runner.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = text_llm_runner.cpp; sourceTree = "<group>"; };
97100
F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = bpe_tokenizer_base.cpp; path = src/bpe_tokenizer_base.cpp; sourceTree = "<group>"; };
98101
F292B0172D88AF3500BE6839 /* llama2c_tokenizer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = llama2c_tokenizer.cpp; path = src/llama2c_tokenizer.cpp; sourceTree = "<group>"; };
99102
F292B01A2D88AF3500BE6839 /* tiktoken.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = tiktoken.cpp; path = src/tiktoken.cpp; sourceTree = "<group>"; };
@@ -146,6 +149,8 @@
146149
032A73E02CAFBB7800932D36 /* runner */ = {
147150
isa = PBXGroup;
148151
children = (
152+
F22E9E182DF2CBB900EC5425 /* text_llm_runner.h */,
153+
F22E9E192DF2CBB900EC5425 /* text_llm_runner.cpp */,
149154
032A73D42CAFBB7800932D36 /* image.h */,
150155
032A73D52CAFBB7800932D36 /* image_prefiller.h */,
151156
032A73D62CAFBB7800932D36 /* multimodal_runner.h */,
@@ -409,6 +414,7 @@
409414
isa = PBXSourcesBuildPhase;
410415
buildActionMask = 2147483647;
411416
files = (
417+
F22E9E1A2DF2CBB900EC5425 /* text_llm_runner.cpp in Sources */,
412418
03B0118E2CAC567900054791 /* DynamicTestCase.m in Sources */,
413419
032A74182CAFBB7800932D36 /* text_decoder_runner.cpp in Sources */,
414420
032A741D2CAFBB7800932D36 /* text_prefiller.cpp in Sources */,

0 commit comments

Comments
 (0)