diff --git a/examples/models/llama2/runner/runner.h b/examples/models/llama2/runner/runner.h index ca843427a7b..4524aa81aab 100644 --- a/examples/models/llama2/runner/runner.h +++ b/examples/models/llama2/runner/runner.h @@ -26,7 +26,7 @@ namespace example { -class Runner { +class ET_EXPERIMENTAL Runner { public: explicit Runner( const std::string& model_path, diff --git a/examples/models/llava/runner/llava_image_prefiller.h b/examples/models/llava/runner/llava_image_prefiller.h index ace28ac2c1f..b4b1ef420ce 100644 --- a/examples/models/llava/runner/llava_image_prefiller.h +++ b/examples/models/llava/runner/llava_image_prefiller.h @@ -15,7 +15,7 @@ namespace example { -class LlavaImagePrefiller +class ET_EXPERIMENTAL LlavaImagePrefiller : public ::executorch::extension::llm::ImagePrefiller { public: LlavaImagePrefiller(::executorch::extension::Module* module) diff --git a/examples/models/llava/runner/llava_runner.h b/examples/models/llava/runner/llava_runner.h index 79cc22fb240..7898d644aa8 100644 --- a/examples/models/llava/runner/llava_runner.h +++ b/examples/models/llava/runner/llava_runner.h @@ -21,7 +21,8 @@ namespace example { -class LlavaRunner : public ::executorch::extension::llm::MultimodalRunner { +class ET_EXPERIMENTAL LlavaRunner + : public ::executorch::extension::llm::MultimodalRunner { public: explicit LlavaRunner( const std::string& model_path, diff --git a/examples/models/llava/runner/llava_text_decoder_runner.h b/examples/models/llava/runner/llava_text_decoder_runner.h index b2ee56f321a..236d4129102 100644 --- a/examples/models/llava/runner/llava_text_decoder_runner.h +++ b/examples/models/llava/runner/llava_text_decoder_runner.h @@ -14,7 +14,7 @@ namespace example { -class LlavaTextDecoderRunner +class ET_EXPERIMENTAL LlavaTextDecoderRunner : public executorch::extension::llm::TextDecoderRunner { public: LlavaTextDecoderRunner( diff --git a/extension/llm/runner/image.h b/extension/llm/runner/image.h index 32a9f878187..d3c4e8d945d 100644 --- a/extension/llm/runner/image.h +++ b/extension/llm/runner/image.h @@ -11,13 +11,14 @@ #pragma once #include // patternlint-disable-next-line executorch-cpp-nostdinc +#include #include namespace executorch { namespace extension { namespace llm { -struct Image { +struct ET_EXPERIMENTAL Image { // Assuming NCHW format std::vector data; int32_t width; diff --git a/extension/llm/runner/image_prefiller.h b/extension/llm/runner/image_prefiller.h index 89ed0263575..826cea24db9 100644 --- a/extension/llm/runner/image_prefiller.h +++ b/extension/llm/runner/image_prefiller.h @@ -12,13 +12,14 @@ #include #include +#include namespace executorch { namespace extension { namespace llm { // Assuming kv cache and parallel prefill are enabled. -class ImagePrefiller { +class ET_EXPERIMENTAL ImagePrefiller { public: explicit ImagePrefiller(::executorch::extension::Module* module) : module_(module) {} diff --git a/extension/llm/runner/multimodal_runner.h b/extension/llm/runner/multimodal_runner.h index 6798f648a0c..1af9cb6fa29 100644 --- a/extension/llm/runner/multimodal_runner.h +++ b/extension/llm/runner/multimodal_runner.h @@ -36,7 +36,7 @@ namespace executorch { namespace extension { namespace llm { -class MultimodalRunner { +class ET_EXPERIMENTAL MultimodalRunner { public: explicit MultimodalRunner( const std::string& model_path, diff --git a/extension/llm/runner/stats.h b/extension/llm/runner/stats.h index 573b688f3fe..28abc41854a 100644 --- a/extension/llm/runner/stats.h +++ b/extension/llm/runner/stats.h @@ -19,7 +19,7 @@ namespace executorch { namespace extension { namespace llm { -struct Stats { +struct ET_EXPERIMENTAL Stats { // Scaling factor for timestamps - in this case, we use ms. const long SCALING_FACTOR_UNITS_PER_SECOND = 1000; // Time stamps for the different stages of the execution diff --git a/extension/llm/runner/text_decoder_runner.h b/extension/llm/runner/text_decoder_runner.h index 9d5a8112e52..c6f8a0ca676 100644 --- a/extension/llm/runner/text_decoder_runner.h +++ b/extension/llm/runner/text_decoder_runner.h @@ -13,6 +13,7 @@ #include #include #include +#include // patternlint-disable-next-line executorch-cpp-nostdinc #include @@ -20,7 +21,7 @@ namespace executorch { namespace extension { namespace llm { -class TextDecoderRunner { +class ET_EXPERIMENTAL TextDecoderRunner { public: TextDecoderRunner( Module* module, diff --git a/extension/llm/runner/text_prefiller.h b/extension/llm/runner/text_prefiller.h index 0ea126f32d6..9dbaec40e63 100644 --- a/extension/llm/runner/text_prefiller.h +++ b/extension/llm/runner/text_prefiller.h @@ -20,7 +20,7 @@ namespace executorch { namespace extension { namespace llm { -class TextPrefiller { +class ET_EXPERIMENTAL TextPrefiller { public: TextPrefiller( TextDecoderRunner* text_decoder_runner, diff --git a/extension/llm/runner/text_token_generator.h b/extension/llm/runner/text_token_generator.h index 122395c46b1..62b924a57d8 100644 --- a/extension/llm/runner/text_token_generator.h +++ b/extension/llm/runner/text_token_generator.h @@ -18,7 +18,7 @@ namespace executorch { namespace extension { namespace llm { -class TextTokenGenerator { +class ET_EXPERIMENTAL TextTokenGenerator { public: TextTokenGenerator( Tokenizer* tokenizer, diff --git a/extension/llm/runner/util.h b/extension/llm/runner/util.h index 2f1d084811e..04d4eccc4a7 100644 --- a/extension/llm/runner/util.h +++ b/extension/llm/runner/util.h @@ -7,6 +7,7 @@ */ #pragma once +#include #include #include #include @@ -18,7 +19,7 @@ namespace executorch { namespace extension { namespace llm { -void inline safe_printf(const char* piece) { +ET_EXPERIMENTAL void inline safe_printf(const char* piece) { // piece might be a raw byte token, and we only want to print printable chars // or whitespace because some of the other bytes can be various control codes, // backspace, etc. @@ -40,7 +41,7 @@ void inline safe_printf(const char* piece) { // ---------------------------------------------------------------------------- // utilities: time -long inline time_in_ms() { +ET_EXPERIMENTAL long inline time_in_ms() { // return time in milliseconds, for benchmarking the model speed struct timespec time; clock_gettime(CLOCK_REALTIME, &time); @@ -54,7 +55,7 @@ long inline time_in_ms() { // RSS: Resident Set Size, the amount of memory currently in the RAM for this // process. These values are approximate, and are only used for logging // purposes. -size_t inline get_rss_bytes() { +ET_EXPERIMENTAL size_t inline get_rss_bytes() { #if defined(__linux__) || defined(__ANDROID__) || defined(__unix__) struct rusage r_usage; if (getrusage(RUSAGE_SELF, &r_usage) == 0) { diff --git a/extension/llm/sampler/sampler.h b/extension/llm/sampler/sampler.h index 9d6d742e590..759eb6c88a7 100644 --- a/extension/llm/sampler/sampler.h +++ b/extension/llm/sampler/sampler.h @@ -19,6 +19,7 @@ #endif #include +#include namespace executorch { namespace extension { @@ -26,12 +27,12 @@ namespace llm { // A simple llama2 sampler. template -struct ProbIndex { +struct ET_EXPERIMENTAL ProbIndex { T prob; int32_t index; }; // struct used when sorting probabilities during top-p sampling -class Sampler { +class ET_EXPERIMENTAL Sampler { public: Sampler( int32_t vocab_size, diff --git a/extension/llm/sampler/targets.bzl b/extension/llm/sampler/targets.bzl index b947ba13812..77cac2e7d96 100644 --- a/extension/llm/sampler/targets.bzl +++ b/extension/llm/sampler/targets.bzl @@ -23,5 +23,6 @@ def define_common_targets(): ] if aten else [], exported_deps = [ "//executorch/runtime/core/exec_aten:lib" + aten_suffix, + "//executorch/runtime/platform:compiler", ], ) diff --git a/extension/llm/tokenizer/bpe_tokenizer.h b/extension/llm/tokenizer/bpe_tokenizer.h index 7fc7306c100..5b47cf4142e 100644 --- a/extension/llm/tokenizer/bpe_tokenizer.h +++ b/extension/llm/tokenizer/bpe_tokenizer.h @@ -22,7 +22,7 @@ struct TokenIndex { // A simple Byte Pair Encoding (BPE) Tokenizer. Note that the current C++ code // won't work with this class, it needs to go through tokenizer.py first. -class BPETokenizer : public Tokenizer { +class ET_EXPERIMENTAL BPETokenizer : public Tokenizer { public: explicit BPETokenizer(); ~BPETokenizer() override; diff --git a/extension/llm/tokenizer/tiktoken.h b/extension/llm/tokenizer/tiktoken.h index 7d78f8b60da..5201c07a184 100644 --- a/extension/llm/tokenizer/tiktoken.h +++ b/extension/llm/tokenizer/tiktoken.h @@ -22,7 +22,7 @@ using Encoder = std::unordered_map; using Decoder = std::unordered_map; using Re2UPtr = std::unique_ptr; -class Tiktoken : public Tokenizer { +class ET_EXPERIMENTAL Tiktoken : public Tokenizer { public: /** * @param[in] special_tokens List of special tokens including bos, eos; diff --git a/extension/llm/tokenizer/tokenizer.h b/extension/llm/tokenizer/tokenizer.h index 3115cbdff70..52983d90753 100644 --- a/extension/llm/tokenizer/tokenizer.h +++ b/extension/llm/tokenizer/tokenizer.h @@ -16,13 +16,14 @@ #include #include +#include namespace executorch { namespace extension { namespace llm { // A tokenizer interface. -class Tokenizer { +class ET_EXPERIMENTAL Tokenizer { public: explicit Tokenizer() {} virtual ~Tokenizer() {}